@@ -387,7 +387,7 @@ def test_pipeline_e2e_reader_ndd_writer(
387387 httpserver : pytest_httpserver .HTTPServer ,
388388 tmp_path : Path ,
389389 ) -> None :
390- """Realistic e2e: N rows × M files → JsonlReader(files_per_partition=1) → NDD → JsonlWriter.
390+ """Realistic e2e: N rows x M files → JsonlReader(files_per_partition=1) → NDD → JsonlWriter.
391391
392392 Asserts: M output files, each with same row count as input file and new column from NDD;
393393 for each output task, verifies _metadata (e.g. source_files) and _stage_perf for all stages.
@@ -396,8 +396,8 @@ def test_pipeline_e2e_reader_ndd_writer(
396396 """
397397 from nemo_curator .backends .xenna import XennaExecutor
398398
399- N_ROWS_PER_FILE = 3
400- M_FILES = 4
399+ n_rows_per_file = 3
400+ m_files = 4
401401
402402 input_dir = tmp_path / "input"
403403 output_dir = tmp_path / "output"
@@ -406,11 +406,11 @@ def test_pipeline_e2e_reader_ndd_writer(
406406
407407 # 1. Create M JSONL files with N rows each
408408 input_files = []
409- for fi in range (M_FILES ):
409+ for fi in range (m_files ):
410410 path = input_dir / f"doc_{ fi } .jsonl"
411411 input_files .append (str (path ))
412412 with open (path , "w" ) as f :
413- for ri in range (N_ROWS_PER_FILE ):
413+ for ri in range (n_rows_per_file ):
414414 rec = {"text" : f"file{ fi } _row{ ri } " }
415415 f .write (json .dumps (rec ) + "\n " )
416416
@@ -459,8 +459,8 @@ def test_pipeline_e2e_reader_ndd_writer(
459459
460460 # 4. Output is M tasks (FileGroupTask from writer), one per input file
461461 assert result_tasks is not None
462- assert len (result_tasks ) == M_FILES , (
463- f"Expected { M_FILES } output tasks (one per file), got { len (result_tasks )} "
462+ assert len (result_tasks ) == m_files , (
463+ f"Expected { m_files } output tasks (one per file), got { len (result_tasks )} "
464464 )
465465 assert all (isinstance (t , FileGroupTask ) for t in result_tasks )
466466
@@ -469,12 +469,12 @@ def test_pipeline_e2e_reader_ndd_writer(
469469 for task in result_tasks :
470470 assert task .data , f"Task { task .task_id } should have written file path(s)"
471471 output_paths .extend (task .data )
472- assert len (output_paths ) == M_FILES
472+ assert len (output_paths ) == m_files
473473 for out_path in output_paths :
474474 with open (out_path ) as f :
475475 lines = f .readlines ()
476- assert len (lines ) == N_ROWS_PER_FILE , (
477- f"Output file { out_path } should have { N_ROWS_PER_FILE } rows, got { len (lines )} "
476+ assert len (lines ) == n_rows_per_file , (
477+ f"Output file { out_path } should have { n_rows_per_file } rows, got { len (lines )} "
478478 )
479479 for line in lines :
480480 obj = json .loads (line )
@@ -486,8 +486,8 @@ def test_pipeline_e2e_reader_ndd_writer(
486486 # jsonl_reader reports 1 item (one file-group task); NDD and writer report row count
487487 expected_items_per_stage = {
488488 "jsonl_reader" : 1 ,
489- "DataDesignerStage" : N_ROWS_PER_FILE ,
490- "jsonl_writer" : N_ROWS_PER_FILE ,
489+ "DataDesignerStage" : n_rows_per_file ,
490+ "jsonl_writer" : n_rows_per_file ,
491491 }
492492 for task in result_tasks :
493493 assert task ._metadata , "Output task should have _metadata"
0 commit comments