Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions docs/source/libraries/sklearn.rst
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,8 @@ Currently the following transformers are supported out of the box:
* SelectorMixin-based transformers: SelectPercentile_,
SelectKBest_, GenericUnivariateSelect_, VarianceThreshold_,
RFE_, RFECV_, SelectFromModel_, RandomizedLogisticRegression_;
* stability selection-based transformers: RandomizedLogisticRegression_,
RandomizedLasso_, StabilitySelection_;
* scalers from sklearn.preprocessing: MinMaxScaler_, StandardScaler_,
MaxAbsScaler_, RobustScaler_.

Expand All @@ -276,6 +278,8 @@ Currently the following transformers are supported out of the box:
.. _RFECV: http://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.RFECV.html
.. _VarianceThreshold: http://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.VarianceThreshold.html
.. _RandomizedLogisticRegression: http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.RandomizedLogisticRegression.html
.. _RandomizedLasso: https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.RandomizedLasso.html
.. _StabilitySelection: https://github.com/scikit-learn-contrib/stability-selection
.. _Pipeline: http://scikit-learn.org/stable/modules/generated/sklearn.pipeline.Pipeline.html#sklearn.pipeline.Pipeline
.. _singledispatch: https://pypi.python.org/pypi/singledispatch

Expand Down
43 changes: 33 additions & 10 deletions eli5/sklearn/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,21 @@
import numpy as np # type: ignore
from sklearn.pipeline import Pipeline, FeatureUnion # type: ignore
from sklearn.feature_selection.base import SelectorMixin # type: ignore
try:
from sklearn.linear_model import ( # type: ignore
RandomizedLogisticRegression,
RandomizedLasso,
)
except ImportError:
# randomized_l1 feature selectors are not available (removed in scikit-learn 0.21)
RandomizedLogisticRegression = None
RandomizedLasso = None
try:
from stability_selection import StabilitySelection # type: ignore
# TODO: add support for stability_selection.RandomizedLogisticRegression and stability_selection.RandomizedLasso ?
except ImportError:
# scikit-learn-contrib/stability-selection is not available
StabilitySelection = None

from sklearn.preprocessing import ( # type: ignore
MinMaxScaler,
Expand All @@ -16,25 +31,33 @@
from eli5.sklearn.utils import get_feature_names as _get_feature_names


def register_notnone(generic_func, cls):
"""
Register an implementation of a generic function
if the supplied type is not None.
"""
def inner_register(func):
if cls is None:
# do nothing
return func
else:
# register a new implementation
return generic_func.register(cls)(func)
return inner_register


# Feature selection:

@transform_feature_names.register(SelectorMixin)
@register_notnone(transform_feature_names, RandomizedLogisticRegression)
@register_notnone(transform_feature_names, RandomizedLasso)
@register_notnone(transform_feature_names, StabilitySelection)
def _select_names(est, in_names=None):
mask = est.get_support(indices=False)
in_names = _get_feature_names(est, feature_names=in_names,
num_features=len(mask))
return [in_names[i] for i in np.flatnonzero(mask)]

try:
from sklearn.linear_model import ( # type: ignore
RandomizedLogisticRegression,
RandomizedLasso,
)
_select_names = transform_feature_names.register(RandomizedLasso)(_select_names)
_select_names = transform_feature_names.register(RandomizedLogisticRegression)(_select_names)
except ImportError: # Removed in scikit-learn 0.21
pass


# Scaling

Expand Down
62 changes: 54 additions & 8 deletions tests/test_sklearn_transform.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# -*- coding: utf-8 -*-
from __future__ import absolute_import, print_function
import re

import pytest
Expand All @@ -15,11 +17,21 @@
RFECV,
SelectFromModel,
)
from sklearn.linear_model import (
LogisticRegression,
RandomizedLogisticRegression,
RandomizedLasso, # TODO: add tests and document
)
from sklearn.linear_model import LogisticRegression
try:
from sklearn.linear_model import (
RandomizedLogisticRegression,
RandomizedLasso,
)
except ImportError:
# randomized_l1 feature selectors are not available (removed in scikit-learn 0.21)
RandomizedLogisticRegression = None
RandomizedLasso = None
try:
from stability_selection import StabilitySelection
except ImportError:
# scikit-learn-contrib/stability-selection is not available
StabilitySelection = None
from sklearn.preprocessing import (
MinMaxScaler,
StandardScaler,
Expand All @@ -28,6 +40,7 @@
)
from sklearn.pipeline import FeatureUnion, make_pipeline

from .utils import sklearn_version
from eli5 import transform_feature_names
from eli5.sklearn import PermutationImportance

Expand All @@ -47,6 +60,10 @@ def selection_score_func(X, y):
return np.array([1, 2, 3, 4])


def instantiate_notnone(cls, *args, **kwargs):
return cls(*args, **kwargs) if cls is not None else None


@pytest.mark.parametrize('transformer,expected', [
(MyFeatureExtractor(), ['f1', 'f2', 'f3']),

Expand Down Expand Up @@ -88,8 +105,37 @@ def selection_score_func(X, y):
['<NAME1>', '<NAME3>']),
(RFECV(LogisticRegression(random_state=42)),
['<NAME0>', '<NAME1>', '<NAME2>', '<NAME3>']),
(RandomizedLogisticRegression(random_state=42),
['<NAME1>', '<NAME2>', '<NAME3>']),

pytest.param(
instantiate_notnone(RandomizedLogisticRegression, random_state=42),
['<NAME1>', '<NAME2>', '<NAME3>'],
marks=pytest.mark.skipif(RandomizedLogisticRegression is None,
reason='scikit-learn RandomizedLogisticRegression is not available')
),
pytest.param(
instantiate_notnone(RandomizedLasso, random_state=42),
['<NAME1>', '<NAME2>', '<NAME3>'],
marks=[
pytest.mark.skipif(RandomizedLasso is None,
reason='RandomizedLasso is not available'),
pytest.mark.skipif(sklearn_version() < '0.19',
reason='scikit-learn < 0.19')]
),
pytest.param(
instantiate_notnone(RandomizedLasso, random_state=42),
['<NAME0>', '<NAME1>', '<NAME2>', '<NAME3>'],
marks=[
pytest.mark.skipif(RandomizedLasso is None,
reason='RandomizedLasso is not available'),
pytest.mark.skipif('0.19' <= sklearn_version(),
reason='scikit-learn >= 0.19')]
),
pytest.param(
instantiate_notnone(StabilitySelection, random_state=42),
['<NAME2>'],
marks=pytest.mark.skipif(StabilitySelection is None,
reason='scikit-learn-contrib/stability-selection is not available')
),
])
def test_transform_feature_names_iris(transformer, expected, iris_train):
X, y, _, _ = iris_train
Expand All @@ -102,4 +148,4 @@ def test_transform_feature_names_iris(transformer, expected, iris_train):
# Test in_names being None
expected_default_names = [re.sub('<NAME([0-9]+)>', r'x\1', name)
for name in expected]
assert transform_feature_names(transformer, None) == expected_default_names
assert transform_feature_names(transformer, None) == expected_default_names