@@ -36,15 +36,11 @@ def test_mindlogger_data_create_empty_raises_error(tmp_path: Path):
3636@WITH_REPORT
3737def test_mindlogger_source_users (datafiles : Path ):
3838 """Test MindloggerData.source_users."""
39- print (datafiles )
40- print (pl .read_csv (datafiles / "responses.csv" ).columns )
4139 mindlogger_data = MindloggerData .create (datafiles )
4240 source_users = mindlogger_data .source_users
4341 source_user_ids = set (user .id for user in source_users )
44- print (source_user_ids )
4542 assert len (source_users ) == 2
4643 assert all (user .user_type == UserType .SOURCE for user in source_users )
47- print (source_users )
4844 assert source_users [0 ].id == "1e15e0bf-1b81-418e-9b80-20b0cb4cac33"
4945 assert source_users [1 ].id == "1e15e0bf-1b81-418e-9b80-20b0cb4cac33"
5046
@@ -196,275 +192,3 @@ def test_expand_responses(report):
196192def test_data_dictionary (report ):
197193 _data = MindloggerData (report )
198194 assert len (list (_data .data_dictionary )) != 0
199-
200-
201- # def test_long_response():
202- # """Test UnnestingResponsePreprocessor on data with multiple rows."""
203- # report = pl.DataFrame(
204- # {
205- # "parsed_response": [
206- # {"type": "raw_value", "raw_value": "10"},
207- # {"type": "text", "text": "Some text here"},
208- # {"type": "text", "text": "Some multiline\ntext here"},
209- # {"type": "null", "null_value": True},
210- # {"type": "value", "value": [2]},
211- # {"type": "value", "value": [1, 2, 3]},
212- # {"type": "file", "file": "./path/to/file.mp4"},
213- # {"type": "date", "date": date(2021, 2, 1)},
214- # {"type": "date", "date": date(2021, 5, 4)},
215- # {"type": "time", "time": time(12, 30)},
216- # {"type": "time_range", "time_range": timedelta(hours=3, minutes=-25)},
217- # {"type": "geo", "geo": {"latitude": 40.7128, "longitude": -74.0060}},
218- # {
219- # "type": "matrix",
220- # "matrix": [
221- # {"row": "row1", "value": [1]},
222- # {"row": "row2", "value": [2]},
223- # ],
224- # },
225- # {
226- # "type": "matrix",
227- # "matrix": [
228- # {"row": "row1", "value": [1, 2]},
229- # {"row": "row2", "value": [3, 4]},
230- # ],
231- # },
232- # ],
233- # },
234- # schema={
235- # "parsed_response": pl.Struct(
236- # {
237- # "type": pl.String,
238- # "raw_value": pl.String,
239- # "null_value": pl.Boolean,
240- # "value": pl.List(pl.Int64),
241- # "text": pl.String,
242- # "file": pl.String,
243- # "date": date,
244- # "time": time,
245- # "time_range": timedelta,
246- # "geo": pl.Struct({"latitude": pl.Float64, "longitude": pl.Float64}),
247- # "matrix": pl.List(
248- # pl.Struct({"row": pl.String, "value": pl.List(pl.Int64)})
249- # ),
250- # }
251- # )
252- # },
253- # )
254- # expected_df = {
255- # "response_raw_value": ["10"] + [None] * 19,
256- # "response_text": [None]
257- # + ["Some text here", "Some multiline\ntext here"]
258- # + [None] * 17,
259- # "response_null_value": [None] * 3 + [True] + [None] * 16,
260- # "response_value": [None] * 4 + [2, 1, 2, 3] + [None] * 12,
261- # "response_value_index": [None] * 4 + [0, 0, 1, 2] + [None] * 12,
262- # "response_file": [None] * 8 + ["./path/to/file.mp4"] + [None] * 11,
263- # "response_date": [None] * 9 + [date(2021, 2, 1), date(2021, 5, 4)] + [None] * 9,
264- # "response_time": [None] * 11 + [time(12, 30)] + [None] * 8,
265- # "response_time_range": [None] * 12
266- # + [timedelta(hours=3, minutes=-25)]
267- # + [None] * 7,
268- # "response_geo_latitude": [None] * 13 + [40.7128] + [None] * 6,
269- # "response_geo_longitude": [None] * 13 + [-74.0060] + [None] * 6,
270- # "response_matrix_row": [None] * 14
271- # + ["row1", "row2"]
272- # + ["row1", "row1", "row2", "row2"], # [None] * 4,
273- # "response_matrix_value": [None] * 14 + [1, 2] + [1, 2, 3, 4], # [None] * 4,
274- # "response_matrix_value_index": [None] * 14
275- # + [0, 0]
276- # + [0, 1, 0, 1], # [None] * 4,
277- # "response_type": [
278- # "raw_value",
279- # "text",
280- # "text",
281- # "null",
282- # "value",
283- # "value",
284- # "value",
285- # "value",
286- # "file",
287- # "date",
288- # "date",
289- # "time",
290- # "time_range",
291- # "geo",
292- # "matrix",
293- # "matrix",
294- # "matrix",
295- # "matrix",
296- # "matrix",
297- # "matrix",
298- # ],
299- # }
300- # expected_df = pl.DataFrame(
301- # expected_df,
302- # schema={
303- # "response_type": pl.String,
304- # "response_raw_value": pl.String,
305- # "response_text": pl.String,
306- # "response_null_value": pl.Boolean,
307- # "response_file": pl.String,
308- # "response_value": pl.Int64,
309- # "response_value_index": pl.Int64,
310- # "response_date": pl.Date,
311- # "response_time": pl.Time,
312- # "response_time_range": pl.Duration,
313- # "response_geo_latitude": pl.Float64,
314- # "response_geo_longitude": pl.Float64,
315- # "response_matrix_row": pl.String,
316- # "response_matrix_value": pl.Int64,
317- # "response_matrix_value_index": pl.Int64,
318- # },
319- # )
320- # expanded_report = MindloggerData.expand_responses(report).drop("parsed_response")
321- # assert_frame_equal(
322- # expanded_report,
323- # expected_df,
324- # check_column_order=False,
325- # )
326-
327-
328- # def test_score_value_mapping_processor():
329- # """Test ScoreValueMappingProcessor."""
330- # preprocessor = ScoredTypedData()
331- # item_id_cols = [
332- # "version",
333- # "activity_flow_id",
334- # "activity_flow_name",
335- # "activity_id",
336- # "activity_name",
337- # "item_id",
338- # "item",
339- # "prompt",
340- # ]
341- # report = pl.DataFrame(
342- # {
343- # "version": ["1.0", "1.0", "1.0"],
344- # "activity_flow_id": [
345- # "ACTIVITY_FLOW_ID_1",
346- # "ACTIVITY_FLOW_ID_2",
347- # "ACTIVITY_FLOW_ID_3",
348- # ],
349- # "activity_flow_name": [
350- # "ACTIVITY_FLOW_NAME_1",
351- # "ACTIVITY_FLOW_NAME_2",
352- # "ACTIVITY_FLOW_NAME_3",
353- # ],
354- # "activity_id": ["ACTIVITY_ID_1", "ACTIVITY_ID_2", "ACTIVITY_ID_3"],
355- # "activity_name": ["ACTIVITY_NAME_1", "ACTIVITY_NAME_2", "ACTIVITY_NAME_3"],
356- # "item_id": ["ITEM_ID_1", "ITEM_ID_2", "ITEM_ID_3"],
357- # "item": ["ITEM_1", "ITEM_2", "ITEM_3"],
358- # "prompt": ["PROMPT_1", "PROMPT_2", "PROMPT_3"],
359- # "options": [
360- # "Max: 2, Min: 0",
361- # "1: 0, 2: 1, 3: 2",
362- # "1: 0 (score: 3), 2: 1 (score: 4), 3: 2 (score: 5)",
363- # ],
364- # "response": ["value: 1", "value: 2", "value: 2"],
365- # },
366- # )
367- # expected_df = report.with_columns(
368- # option_name=pl.Series(["1", "3", "3"]),
369- # option_score=pl.Series([1, None, 5]),
370- # ).drop("options", "response")
371- # processed_report = preprocessor.process(report)
372-
373- # processed_report = processed_report.select(
374- # item_id_cols + ["option_name", "option_score"]
375- # )
376- # assert_frame_equal(
377- # processed_report,
378- # expected_df,
379- # check_column_order=False,
380- # )
381-
382-
383- # def test_mindlogger_items(mindlogger_export_config: MindloggerExportConfig):
384- # """Test MindloggerData.items."""
385- # mindlogger_report = mindlogger_export_config.input_dir / "report.csv" # noqa: ERA001
386- # mindlogger_data = MindloggerData(pl.read_csv(mindlogger_report))
387- # # items = mindlogger_data.items
388- # # assert len(items) == 23
389-
390- # # 15 Item IDs
391- # item_ids = {i.id for i in items}
392- # assert len(item_ids) == 15
393- # assert item_ids == {
394- # "4260fed8-d266-4f13-a543-817ca946c47d",
395- # "d95159b5-f44c-4975-ae24-1d26022afe9c",
396- # "f197953f-aa8a-4ac0-97a2-87bc7b634306",
397- # "5d48d463-5fb7-48a6-8d77-a864e66efa6e",
398- # "a9b58769-7473-4127-8c39-813c0c3ecf4a",
399- # "a44a3ca9-19d0-48cc-b200-293f454597b7",
400- # "1b91619e-cf50-4743-b7e6-381a768bb68d",
401- # "9d9f8dda-d6ca-496b-b20c-b992d74bd91f",
402- # "ce9424bd-5fce-4926-96f2-63a2dec27dfe",
403- # "12d9f51a-3988-4515-9b3d-df5a13035917",
404- # "4285ee68-6905-4d9b-be4d-940f3a805027",
405- # "cd926b89-06a9-4de4-956e-6879d55e2258",
406- # "57179d77-244a-4132-95bd-d29609ccfd68",
407- # "6ce16878-2261-458f-b746-7cb6bbd0173f",
408- # "76c0b654-a4c6-4dd7-8270-33f4ee06d57b",
409- # }
410-
411- # # 20 Item names
412- # item_names = {i.name for i in items}
413- # assert len(item_names) == 20
414- # assert item_names == {
415- # "Item4-Text",
416- # "Itemms",
417- # "Item2-Multiple_Selection",
418- # "Item3-Slider",
419- # "age_screen",
420- # "Itemns",
421- # "Itemsl",
422- # "Itemss",
423- # "slider_alert_item",
424- # "Item5-Number_Selection",
425- # "suicide_alert",
426- # "gender_screen",
427- # "q4",
428- # "Itemst",
429- # "Date",
430- # "Item2_test",
431- # "Item1",
432- # "Item1-Single_Selection",
433- # "q2",
434- # "q6",
435- # }
436-
437- # item_prompts = {i.prompt for i in items}
438- # assert len(item_prompts) == 11
439- # assert item_prompts == {
440- # "date",
441- # "select",
442- # "Itemms",
443- # "Itemns",
444- # "How do you describe yourself?<br><br>*Please provide your response as accurately as possible. The information you provide is important for ensuring the accuracy of your results. If you have any concerns about how your information will be used, please refer to our Terms of Service.*",
445- # "How old are you?<br><br>*Please provide your response as accurately as possible. The information you provide is important for ensuring the accuracy of your results. If you have any concerns about how your information will be used, please refer to our Terms of Service.*",
446- # "Itemsl",
447- # "suicide alert",
448- # "Itemss",
449- # "Itemst",
450- # "slider_alert",
451- # }
452-
453- # item_options = {i.options for i in items}
454- # assert len(item_options) == 14
455- # assert item_options == {
456- # "0: 0 (score: 1), 1: 1 (score: 2), 2: 2 (score: 3), 3: 3 (score: 4), 4: 4 (score: 5), 5: 5 (score: 6), 6: 6 (score: 7), 7: 7 (score: 8), 8: 8 (score: 9), 9: 9 (score: 10), 10: 10 (score: 11), 11: 11 (score: 12), 12: 12 (score: 13)",
457- # None,
458- # "Male: 0, Female: 1",
459- # "Min: 0, Max: 10",
460- # "4: 0 (score: 4), 8: 1 (score: 8), None: 2 (score: 0)",
461- # "1: 0 (score: 1), 2: 1 (score: 2), 3: 2 (score: 3)",
462- # "1: 0 (score: 0), 2: 1 (score: 2), 3: 2 (score: 3), 4: 3 (score: 4), 5: 4 (score: 5)",
463- # "4: 0, 8: 1, None: 2",
464- # "1: 0 (score: 1), 2: 1 (score: 2), 3: 2 (score: 3), 4: 3 (score: 4)",
465- # "1: 0, 2: 1, 3: 2",
466- # "0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12",
467- # "No: 0 (score: 0), Yes: 1 (score: 1)",
468- # "0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10",
469- # "Yes: 0, No: 1",
470- # }
0 commit comments