Skip to content

Commit bf38aa9

Browse files
author
Huy Vu2
committed
lint
1 parent 30f5d05 commit bf38aa9

File tree

1 file changed

+12
-12
lines changed

1 file changed

+12
-12
lines changed

tests/stages/synthetic/nemo_data_designer/test_data_designer.py

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -387,7 +387,7 @@ def test_pipeline_e2e_reader_ndd_writer(
387387
httpserver: pytest_httpserver.HTTPServer,
388388
tmp_path: Path,
389389
) -> None:
390-
"""Realistic e2e: N rows × M files → JsonlReader(files_per_partition=1) → NDD → JsonlWriter.
390+
"""Realistic e2e: N rows x M files → JsonlReader(files_per_partition=1) → NDD → JsonlWriter.
391391
392392
Asserts: M output files, each with same row count as input file and new column from NDD;
393393
for each output task, verifies _metadata (e.g. source_files) and _stage_perf for all stages.
@@ -396,8 +396,8 @@ def test_pipeline_e2e_reader_ndd_writer(
396396
"""
397397
from nemo_curator.backends.xenna import XennaExecutor
398398

399-
N_ROWS_PER_FILE = 3
400-
M_FILES = 4
399+
n_rows_per_file = 3
400+
m_files = 4
401401

402402
input_dir = tmp_path / "input"
403403
output_dir = tmp_path / "output"
@@ -406,11 +406,11 @@ def test_pipeline_e2e_reader_ndd_writer(
406406

407407
# 1. Create M JSONL files with N rows each
408408
input_files = []
409-
for fi in range(M_FILES):
409+
for fi in range(m_files):
410410
path = input_dir / f"doc_{fi}.jsonl"
411411
input_files.append(str(path))
412412
with open(path, "w") as f:
413-
for ri in range(N_ROWS_PER_FILE):
413+
for ri in range(n_rows_per_file):
414414
rec = {"text": f"file{fi}_row{ri}"}
415415
f.write(json.dumps(rec) + "\n")
416416

@@ -459,8 +459,8 @@ def test_pipeline_e2e_reader_ndd_writer(
459459

460460
# 4. Output is M tasks (FileGroupTask from writer), one per input file
461461
assert result_tasks is not None
462-
assert len(result_tasks) == M_FILES, (
463-
f"Expected {M_FILES} output tasks (one per file), got {len(result_tasks)}"
462+
assert len(result_tasks) == m_files, (
463+
f"Expected {m_files} output tasks (one per file), got {len(result_tasks)}"
464464
)
465465
assert all(isinstance(t, FileGroupTask) for t in result_tasks)
466466

@@ -469,12 +469,12 @@ def test_pipeline_e2e_reader_ndd_writer(
469469
for task in result_tasks:
470470
assert task.data, f"Task {task.task_id} should have written file path(s)"
471471
output_paths.extend(task.data)
472-
assert len(output_paths) == M_FILES
472+
assert len(output_paths) == m_files
473473
for out_path in output_paths:
474474
with open(out_path) as f:
475475
lines = f.readlines()
476-
assert len(lines) == N_ROWS_PER_FILE, (
477-
f"Output file {out_path} should have {N_ROWS_PER_FILE} rows, got {len(lines)}"
476+
assert len(lines) == n_rows_per_file, (
477+
f"Output file {out_path} should have {n_rows_per_file} rows, got {len(lines)}"
478478
)
479479
for line in lines:
480480
obj = json.loads(line)
@@ -486,8 +486,8 @@ def test_pipeline_e2e_reader_ndd_writer(
486486
# jsonl_reader reports 1 item (one file-group task); NDD and writer report row count
487487
expected_items_per_stage = {
488488
"jsonl_reader": 1,
489-
"DataDesignerStage": N_ROWS_PER_FILE,
490-
"jsonl_writer": N_ROWS_PER_FILE,
489+
"DataDesignerStage": n_rows_per_file,
490+
"jsonl_writer": n_rows_per_file,
491491
}
492492
for task in result_tasks:
493493
assert task._metadata, "Output task should have _metadata"

0 commit comments

Comments
 (0)