@@ -1060,25 +1060,6 @@ def _process_pandas_column(X_col, is_initial, feature_type, min_unique_continuou
10601060 X_col .codes ,
10611061 None ,
10621062 )
1063- elif isinstance (dt , pd .StringDtype ):
1064- # this handles pd.StringDtype both the numpy and arrow versions
1065- # StringDtype is similar to object dtype but with proper NA handling
1066- if X_col .hasnans :
1067- # if hasnans is true then there is definetly a real missing value in there and not just a mask
1068- return _process_ndarray (
1069- X_col .dropna ().values .astype (np .str_ , copy = False ),
1070- X_col .notna ().values ,
1071- is_initial ,
1072- feature_type ,
1073- min_unique_continuous ,
1074- )
1075- return _process_ndarray (
1076- X_col .values .astype (np .str_ , copy = False ),
1077- None ,
1078- is_initial ,
1079- feature_type ,
1080- min_unique_continuous ,
1081- )
10821063 elif issubclass (tt , _intbool_types ):
10831064 # this handles Int8Dtype to Int64Dtype, UInt8Dtype to UInt64Dtype, and BooleanDtype
10841065 if X_col .hasnans :
@@ -1101,51 +1082,22 @@ def _process_pandas_column(X_col, is_initial, feature_type, min_unique_continuou
11011082 )
11021083 elif isinstance (dt , pd .StringDtype ):
11031084 # this handles pd.StringDtype both the numpy and arrow versions
1104- # Pass StringArray to _process_arrayish to avoid inefficient conversion
1085+ # StringDtype is similar to object dtype but with proper NA handling
11051086 if X_col .hasnans :
11061087 # if hasnans is true then there is definetly a real missing value in there and not just a mask
1107- if feature_type == "continuous" :
1108- # called under: fit or predict
1109- # Convert to string for continuous processing
1110- return (
1111- feature_type ,
1112- None ,
1113- None ,
1114- * _process_continuous (
1115- X_col .dropna ().values .astype (np .str_ , copy = False ),
1116- X_col .notna ().values ,
1117- ),
1118- )
1119- if is_predict :
1120- # called under: predict. feature_type == "nominal" or feature_type == "ordinal"
1121- return (
1122- None ,
1123- * _encode_categorical_existing (
1124- X_col .dropna ().values , X_col .notna ().values
1125- ),
1126- None ,
1127- )
1128- return _process_arrayish (
1129- X_col .dropna ().values ,
1088+ return _process_ndarray (
1089+ X_col .dropna ().values .astype (np .str_ , copy = False ),
11301090 X_col .notna ().values ,
1091+ is_initial ,
11311092 feature_type ,
11321093 min_unique_continuous ,
11331094 )
1134-
1135- if feature_type == "continuous" :
1136- # called under: fit or predict
1137- # Convert to string for continuous processing
1138- return (
1139- feature_type ,
1140- None ,
1141- None ,
1142- * _process_continuous (X_col .values .astype (np .str_ , copy = False ), None ),
1143- )
1144- if is_predict :
1145- # called under: predict. feature_type == "nominal" or feature_type == "ordinal"
1146- return None , * _encode_categorical_existing (X_col .values , None ), None
1147- return _process_arrayish (
1148- X_col .values , None , feature_type , min_unique_continuous
1095+ return _process_ndarray (
1096+ X_col .values .astype (np .str_ , copy = False ),
1097+ None ,
1098+ is_initial ,
1099+ feature_type ,
1100+ min_unique_continuous ,
11491101 )
11501102
11511103 # TODO: implement pd.SparseDtype
0 commit comments