44import pytest
55from pydantic import ValidationError
66
7- from data_designer .config .dataset_builders import BuildStage
87from data_designer .config .errors import InvalidConfigError
98from data_designer .config .processors import (
109 DropColumnsProcessorConfig ,
1615
1716
1817def test_drop_columns_processor_config_creation ():
19- config = DropColumnsProcessorConfig (
20- name = "drop_columns_processor" , build_stage = BuildStage .POST_BATCH , column_names = ["col1" , "col2" ]
21- )
18+ config = DropColumnsProcessorConfig (name = "drop_columns_processor" , column_names = ["col1" , "col2" ])
2219
23- assert config .build_stage == BuildStage .POST_BATCH
2420 assert config .column_names == ["col1" , "col2" ]
2521 assert config .processor_type == ProcessorType .DROP_COLUMNS
2622 assert isinstance (config , ProcessorConfig )
2723
2824
2925def test_drop_columns_processor_config_validation ():
30- # Test unsupported stage raises error
31- with pytest .raises (ValidationError , match = "Invalid dataset builder stage" ):
32- DropColumnsProcessorConfig (
33- name = "drop_columns_processor" , build_stage = BuildStage .PRE_BATCH , column_names = ["col1" ]
34- )
35-
3626 # Test missing required field raises error
3727 with pytest .raises (ValidationError , match = "Field required" ):
38- DropColumnsProcessorConfig (name = "drop_columns_processor" , build_stage = BuildStage . POST_BATCH )
28+ DropColumnsProcessorConfig (name = "drop_columns_processor" )
3929
4030
4131def test_drop_columns_processor_config_serialization ():
42- config = DropColumnsProcessorConfig (
43- name = "drop_columns_processor" , build_stage = BuildStage .POST_BATCH , column_names = ["col1" , "col2" ]
44- )
32+ config = DropColumnsProcessorConfig (name = "drop_columns_processor" , column_names = ["col1" , "col2" ])
4533
4634 # Serialize to dict
4735 config_dict = config .model_dump ()
48- assert config_dict ["build_stage" ] == "post_batch"
4936 assert config_dict ["column_names" ] == ["col1" , "col2" ]
5037
5138 # Deserialize from dict
5239 config_restored = DropColumnsProcessorConfig .model_validate (config_dict )
53- assert config_restored .build_stage == config .build_stage
5440 assert config_restored .column_names == config .column_names
5541
5642
5743def test_schema_transform_processor_config_creation ():
5844 config = SchemaTransformProcessorConfig (
5945 name = "output_format_processor" ,
60- build_stage = BuildStage .POST_BATCH ,
6146 template = {"text" : "{{ col1 }}" },
6247 )
6348
64- assert config .build_stage == BuildStage .POST_BATCH
6549 assert config .template == {"text" : "{{ col1 }}" }
6650 assert config .processor_type == ProcessorType .SCHEMA_TRANSFORM
6751 assert isinstance (config , ProcessorConfig )
6852
6953
7054def test_schema_transform_processor_config_validation ():
71- # Test unsupported stage raises error
72- with pytest .raises (ValidationError , match = "Invalid dataset builder stage" ):
73- SchemaTransformProcessorConfig (
74- name = "schema_transform_processor" ,
75- build_stage = BuildStage .PRE_BATCH ,
76- template = {"text" : "{{ col1 }}" },
77- )
78-
7955 # Test missing required field raises error
8056 with pytest .raises (ValidationError , match = "Field required" ):
81- SchemaTransformProcessorConfig (name = "schema_transform_processor" , build_stage = BuildStage . POST_BATCH )
57+ SchemaTransformProcessorConfig (name = "schema_transform_processor" )
8258
8359 # Test invalid template raises error
8460 with pytest .raises (InvalidConfigError , match = "Template must be JSON serializable" ):
85- SchemaTransformProcessorConfig (
86- name = "schema_transform_processor" , build_stage = BuildStage .POST_BATCH , template = {"text" : {1 , 2 , 3 }}
87- )
61+ SchemaTransformProcessorConfig (name = "schema_transform_processor" , template = {"text" : {1 , 2 , 3 }})
8862
8963
9064def test_schema_transform_processor_config_serialization ():
9165 config = SchemaTransformProcessorConfig (
9266 name = "schema_transform_processor" ,
93- build_stage = BuildStage .POST_BATCH ,
9467 template = {"text" : "{{ col1 }}" },
9568 )
9669
9770 # Serialize to dict
9871 config_dict = config .model_dump ()
99- assert config_dict ["build_stage" ] == "post_batch"
10072 assert config_dict ["template" ] == {"text" : "{{ col1 }}" }
10173
10274 # Deserialize from dict
10375 config_restored = SchemaTransformProcessorConfig .model_validate (config_dict )
104- assert config_restored .build_stage == config .build_stage
10576 assert config_restored .template == config .template
10677
10778
@@ -110,7 +81,6 @@ def test_get_processor_config_from_kwargs():
11081 config_drop_columns = get_processor_config_from_kwargs (
11182 ProcessorType .DROP_COLUMNS ,
11283 name = "drop_columns_processor" ,
113- build_stage = BuildStage .POST_BATCH ,
11484 column_names = ["col1" ],
11585 )
11686 assert isinstance (config_drop_columns , DropColumnsProcessorConfig )
@@ -120,7 +90,6 @@ def test_get_processor_config_from_kwargs():
12090 config_schema_transform = get_processor_config_from_kwargs (
12191 ProcessorType .SCHEMA_TRANSFORM ,
12292 name = "output_format_processor" ,
123- build_stage = BuildStage .POST_BATCH ,
12493 template = {"text" : "{{ col1 }}" },
12594 )
12695 assert isinstance (config_schema_transform , SchemaTransformProcessorConfig )
@@ -134,6 +103,6 @@ class UnknownProcessorType(str, Enum):
134103 UNKNOWN = "unknown"
135104
136105 result = get_processor_config_from_kwargs (
137- UnknownProcessorType .UNKNOWN , name = "unknown_processor" , build_stage = BuildStage . POST_BATCH , column_names = ["col1" ]
106+ UnknownProcessorType .UNKNOWN , name = "unknown_processor" , column_names = ["col1" ]
138107 )
139108 assert result is None
0 commit comments