Skip to content

Commit a19a950

Browse files
authored
Merge pull request #165 from perib/columnonehotencoder_fix
fix bug where the wrong param was passed into columnonehotencoder
2 parents 7ed71d4 + 0e4c323 commit a19a950

File tree

2 files changed

+4
-4
lines changed

2 files changed

+4
-4
lines changed

tpot2/tpot_estimator/estimator.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -581,15 +581,15 @@ def fit(self, X, y):
581581
if self.categorical_features is not None: #if categorical features are specified, use those
582582
pipeline_steps.append(("impute_categorical", tpot2.builtin_modules.ColumnSimpleImputer(self.categorical_features, strategy='most_frequent')))
583583
pipeline_steps.append(("impute_numeric", tpot2.builtin_modules.ColumnSimpleImputer("numeric", strategy='mean')))
584-
pipeline_steps.append(("ColumnOneHotEncoder", tpot2.builtin_modules.ColumnOneHotEncoder(self.categorical_features, strategy='most_frequent')))
584+
pipeline_steps.append(("ColumnOneHotEncoder", tpot2.builtin_modules.ColumnOneHotEncoder(self.categorical_features, min_frequency=0.0001)))
585585

586586
else:
587587
if isinstance(X, pd.DataFrame):
588588
categorical_columns = X.select_dtypes(include=['object']).columns
589589
if len(categorical_columns) > 0:
590590
pipeline_steps.append(("impute_categorical", tpot2.builtin_modules.ColumnSimpleImputer("categorical", strategy='most_frequent')))
591591
pipeline_steps.append(("impute_numeric", tpot2.builtin_modules.ColumnSimpleImputer("numeric", strategy='mean')))
592-
pipeline_steps.append(("ColumnOneHotEncoder", tpot2.builtin_modules.ColumnOneHotEncoder("categorical", strategy='most_frequent')))
592+
pipeline_steps.append(("ColumnOneHotEncoder", tpot2.builtin_modules.ColumnOneHotEncoder("categorical", min_frequency=0.0001)))
593593
else:
594594
pipeline_steps.append(("impute_numeric", tpot2.builtin_modules.ColumnSimpleImputer("all", strategy='mean')))
595595
else:

tpot2/tpot_estimator/steady_state_estimator.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -624,15 +624,15 @@ def fit(self, X, y):
624624
if self.categorical_features is not None: #if categorical features are specified, use those
625625
pipeline_steps.append(("impute_categorical", tpot2.builtin_modules.ColumnSimpleImputer(self.categorical_features, strategy='most_frequent')))
626626
pipeline_steps.append(("impute_numeric", tpot2.builtin_modules.ColumnSimpleImputer("numeric", strategy='mean')))
627-
pipeline_steps.append(("impute_categorical", tpot2.builtin_modules.ColumnOneHotEncoder(self.categorical_features, strategy='most_frequent')))
627+
pipeline_steps.append(("impute_categorical", tpot2.builtin_modules.ColumnOneHotEncoder(self.categorical_features, min_frequency=0.0001)))
628628

629629
else:
630630
if isinstance(X, pd.DataFrame):
631631
categorical_columns = X.select_dtypes(include=['object']).columns
632632
if len(categorical_columns) > 0:
633633
pipeline_steps.append(("impute_categorical", tpot2.builtin_modules.ColumnSimpleImputer("categorical", strategy='most_frequent')))
634634
pipeline_steps.append(("impute_numeric", tpot2.builtin_modules.ColumnSimpleImputer("numeric", strategy='mean')))
635-
pipeline_steps.append(("impute_categorical", tpot2.builtin_modules.ColumnOneHotEncoder("categorical", strategy='most_frequent')))
635+
pipeline_steps.append(("impute_categorical", tpot2.builtin_modules.ColumnOneHotEncoder("categorical", min_frequency=0.0001)))
636636
else:
637637
pipeline_steps.append(("impute_numeric", tpot2.builtin_modules.ColumnSimpleImputer("all", strategy='mean')))
638638
else:

0 commit comments

Comments
 (0)