Skip to content

Commit 9afbfe7

Browse files
committed
Make test method more readable
1 parent 2dfaeb3 commit 9afbfe7

File tree

1 file changed

+39
-33
lines changed

1 file changed

+39
-33
lines changed

tests/test_snakemake_integration.py

Lines changed: 39 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -137,8 +137,6 @@ def _make_test_input_csv(tmp_path, t: TestFileDescription) -> list[list[Decimal]
137137

138138
def test_snakemake_pipeline_runs_via_exporter_wrapper(tmp_path: Path):
139139
# ARRANGE
140-
repo_root = Path(__file__).resolve().parents[1]
141-
compose_file = repo_root / "docker-compose.yml"
142140

143141
# all fields that need to be de-IDed should contain the string "SECRET" so we can search for it later
144142
file1 = TestFileDescription(
@@ -194,8 +192,9 @@ def test_snakemake_pipeline_runs_via_exporter_wrapper(tmp_path: Path):
194192
)
195193
test_data_files = []
196194
for f in [file1, file2, file3, file4]:
197-
test_data = _make_test_input_csv(tmp_path, f)
198-
test_data_files.append((f, test_data))
195+
test_data_values = _make_test_input_csv(tmp_path, f)
196+
test_data_files.append((f, test_data_values))
197+
199198
expected_hash_summaries = {
200199
"2025-01-01": [
201200
{
@@ -228,6 +227,42 @@ def test_snakemake_pipeline_runs_via_exporter_wrapper(tmp_path: Path):
228227
}
229228

230229
# ACT
230+
run_snakemake(tmp_path)
231+
232+
# ASSERT (data files)
233+
for filename, expected_data in test_data_files:
234+
original_parquet_path = (
235+
tmp_path / "original-parquet" / filename.get_orig_parquet()
236+
)
237+
pseudon_path = tmp_path / "pseudonymised" / filename.get_pseudon_parquet()
238+
239+
assert original_parquet_path.exists()
240+
assert pseudon_path.exists()
241+
242+
_compare_original_parquet_to_expected(original_parquet_path, expected_data)
243+
_compare_parquets(expected_data, original_parquet_path, pseudon_path)
244+
245+
# ASSERT (hash summaries)
246+
# Hash summaries are one per day, not per input file
247+
for datestr, expected_summary in expected_hash_summaries.items():
248+
expected_path = tmp_path / "hash-lookups" / f"{datestr}.hashes.json"
249+
actual_hash_lookup_data = json.loads(expected_path.read_text())
250+
assert isinstance(actual_hash_lookup_data, list)
251+
# sort order to match expected
252+
actual_hash_lookup_data.sort(key=lambda x: x["csn"])
253+
assert expected_summary == actual_hash_lookup_data
254+
255+
# check no extraneous files
256+
assert 4 == len(list((tmp_path / "original-csv").iterdir()))
257+
assert 4 == len(list((tmp_path / "original-parquet").iterdir()))
258+
assert 4 == len(list((tmp_path / "pseudonymised").iterdir()))
259+
assert 2 == len(list((tmp_path / "hash-lookups").iterdir()))
260+
261+
262+
def run_snakemake(tmp_path):
263+
repo_root = Path(__file__).resolve().parents[1]
264+
compose_file = repo_root / "docker-compose.yml"
265+
231266
compose_args = [
232267
"run",
233268
"--rm",
@@ -259,35 +294,6 @@ def test_snakemake_pipeline_runs_via_exporter_wrapper(tmp_path: Path):
259294
print(f"stdout:\n{result.stdout}\n" f"stderr:\n{result.stderr}")
260295
result.check_returncode()
261296

262-
# ASSERT
263-
for filename, expected_data in test_data_files:
264-
original_parquet_path = (
265-
tmp_path / "original-parquet" / filename.get_orig_parquet()
266-
)
267-
pseudon_path = tmp_path / "pseudonymised" / filename.get_pseudon_parquet()
268-
269-
assert original_parquet_path.exists()
270-
assert pseudon_path.exists()
271-
272-
_compare_original_parquet_to_expected(original_parquet_path, expected_data)
273-
_compare_parquets(expected_data, original_parquet_path, pseudon_path)
274-
275-
# Check hash summaries: one per day, not per input file
276-
# inspect our CSN -> hashed_csn lookup file
277-
for datestr, expected_summary in expected_hash_summaries.items():
278-
expected_path = tmp_path / "hash-lookups" / f"{datestr}.hashes.json"
279-
actual_hash_lookup_data = json.loads(expected_path.read_text())
280-
assert isinstance(actual_hash_lookup_data, list)
281-
# sort order to match expected
282-
actual_hash_lookup_data.sort(key=lambda x: x["csn"])
283-
assert expected_summary == actual_hash_lookup_data
284-
285-
# check no extraneous files
286-
assert 4 == len(list((tmp_path / "original-csv").iterdir()))
287-
assert 4 == len(list((tmp_path / "original-parquet").iterdir()))
288-
assert 4 == len(list((tmp_path / "pseudonymised").iterdir()))
289-
assert 2 == len(list((tmp_path / "hash-lookups").iterdir()))
290-
291297

292298
def _compare_original_parquet_to_expected(original_parquet: Path, expected_test_values):
293299
# CSV should always match original parquet

0 commit comments

Comments
 (0)