Skip to content

Commit 5cee7ef

Browse files
committed
metrics and usability
1 parent 0a36157 commit 5cee7ef

File tree

6 files changed

+217
-16
lines changed

6 files changed

+217
-16
lines changed

CompStats/interface.py

Lines changed: 36 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,12 @@ class Perf(object):
4747
:type n_jobs: int
4848
:param num_samples: Number of bootstrap samples, default=500.
4949
:type num_samples: int
50+
:param use_tqdm: Whether to use tqdm.tqdm to visualize the progress, default=True
51+
:type use_tqdm: bool
52+
5053
5154
>>> from sklearn.svm import LinearSVC
55+
>>> from sklearn.linear_model import LogisticRegression
5256
>>> from sklearn.ensemble import RandomForestClassifier
5357
>>> from sklearn.datasets import load_iris
5458
>>> from sklearn.model_selection import train_test_split
@@ -65,22 +69,36 @@ class Perf(object):
6569
<Perf>
6670
Prediction statistics with standard error
6771
alg-1 = 1.000 (0.000)
68-
forest = 0.978 (0.019)
72+
forest = 0.946 (0.038)
73+
74+
If an algorithm's prediction is missing, this can be included by calling the instance, as can be seen in the following instruction. Note that the algorithm's name can also be given with the keyword :py:attr:`name.`
75+
76+
>>> lr = LogisticRegression().fit(X_train, y_train)
77+
>>> perf(lr.predict(X_val), name='Log. Reg.')
78+
<Perf>
79+
Prediction statistics with standard error
80+
alg-1 = 1.000 (0.000)
81+
forest = 0.946 (0.038)
82+
Log. Reg. = 0.946 (0.038)
83+
84+
The performance function used to compare the algorithms can be changed, and the same bootstrap samples would be used if the instance were cloned. Consequently, the values are computed using the same samples, as can be seen in the following example.
6985
7086
>>> perf_error = clone(perf)
7187
>>> perf_error.error_func = lambda y, hy: (y != hy).mean()
7288
>>> perf_error
7389
<Perf>
7490
Prediction statistics with standard error
7591
alg-1 = 0.000 (0.000)
76-
forest = 0.022 (0.018)
92+
forest = 0.044 (0.030)
93+
Log. Reg. = 0.044 (0.030)
7794
7895
"""
7996
def __init__(self, y_true, *args,
8097
score_func=macro(f1_score),
8198
error_func=None,
8299
num_samples: int=500,
83100
n_jobs: int=-1,
101+
use_tqdm=True,
84102
**kwargs):
85103
assert (score_func is None) ^ (error_func is None)
86104
self.score_func = score_func
@@ -93,6 +111,7 @@ def __init__(self, y_true, *args,
93111
self.y_true = y_true
94112
self.num_samples = num_samples
95113
self.n_jobs = n_jobs
114+
self.use_tqdm = use_tqdm
96115
self._init()
97116

98117
def _init(self):
@@ -140,6 +159,20 @@ def __str__(self):
140159
output.append(f'{key} = {value:0.3f} ({se[key]:0.3f})')
141160
return "\n".join(output)
142161

162+
def __call__(self, y_pred, name=None):
163+
"""Add predictions"""
164+
if name is None:
165+
k = len(self.predictions) + 1
166+
if k == 0:
167+
k = 1
168+
name = f'alg-{k}'
169+
self.predictions[name] = np.asanyarray(y_pred)
170+
samples = self._statistic_samples
171+
calls = samples.calls
172+
if name in calls:
173+
del calls[name]
174+
return self
175+
143176
def difference(self, wrt_to: str=None):
144177
"""Compute the difference w.r.t any algorithm by default is the best
145178
@@ -285,7 +318,7 @@ def statistic_samples(self):
285318
algs = set(samples.calls.keys())
286319
algs = set(self.predictions.keys()) - algs
287320
if len(algs):
288-
for key in progress_bar(algs):
321+
for key in progress_bar(algs, use_tqdm=self.use_tqdm):
289322
samples(self.y_true, self.predictions[key], name=key)
290323
return self._statistic_samples
291324

CompStats/metrics.py

Lines changed: 127 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,22 +15,143 @@
1515
from sklearn import metrics
1616

1717

18+
def accuracy_score(y_true, *args,
19+
normalize=True, sample_weight=None,
20+
num_samples: int=500,
21+
n_jobs: int=-1,
22+
use_tqdm=True,
23+
**kwargs):
24+
""":py:class:`~CompStats.interface.Perf` with :py:func:`sklearn.metrics.accuracy_score` as :py:attr:`score_func.`
25+
26+
:param y_true: True measurement or could be a pandas.DataFrame where column label 'y' corresponds to the true measurement.
27+
:type y_true: numpy.ndarray or pandas.DataFrame
28+
:param args: Predictions, the algorithms will be identified with alg-k where k=1 is the first argument included in :py:attr:`args.`
29+
:type args: numpy.ndarray
30+
:param kwargs: Predictions, the algorithms will be identified using the keyword
31+
:type args: numpy.ndarray
32+
:param num_samples: Number of bootstrap samples, default=500.
33+
:type num_samples: int
34+
:param n_jobs: Number of jobs to compute the statistic, default=-1 corresponding to use all threads.
35+
:type n_jobs: int
36+
:param use_tqdm: Whether to use tqdm.tqdm to visualize the progress, default=True
37+
:type use_tqdm: bool
38+
:param normalize: see sklearn.metrics.f1_score
39+
:param sample_weight: see sklearn.metrics.f1_score
40+
41+
>>> from sklearn.svm import LinearSVC
42+
>>> from sklearn.ensemble import RandomForestClassifier
43+
>>> from sklearn.datasets import load_iris
44+
>>> from sklearn.model_selection import train_test_split
45+
>>> from sklearn.base import clone
46+
>>> from CompStats.metrics import accuracy_score
47+
>>> X, y = load_iris(return_X_y=True)
48+
>>> _ = train_test_split(X, y, test_size=0.3)
49+
>>> X_train, X_val, y_train, y_val = _
50+
>>> m = LinearSVC().fit(X_train, y_train)
51+
>>> hy = m.predict(X_val)
52+
>>> ens = RandomForestClassifier().fit(X_train, y_train)
53+
>>> score = accuracy_score(y_val, hy,
54+
forest=ens.predict(X_val))
55+
>>> score
56+
<Perf>
57+
Prediction statistics with standard error
58+
forest = 0.978 (0.023)
59+
alg-1 = 0.956 (0.030)
60+
>>> diff = score.difference()
61+
>>> diff
62+
<Difference>
63+
difference p-values w.r.t forest
64+
alg-1 0.252
65+
"""
66+
67+
def inner(y, hy):
68+
return metrics.accuracy_score(y, hy,
69+
normalize=normalize,
70+
sample_weight=sample_weight)
71+
return Perf(y_true, *args, score_func=inner,
72+
num_samples=num_samples, n_jobs=n_jobs,
73+
use_tqdm=use_tqdm,
74+
**kwargs)
75+
76+
77+
def balanced_accuracy_score(y_true, *args,
78+
sample_weight=None, adjusted=False,
79+
num_samples: int=500,
80+
n_jobs: int=-1,
81+
use_tqdm=True,
82+
**kwargs):
83+
""":py:class:`~CompStats.interface.Perf` with :py:func:`sklearn.metrics.balanced_accuracy_score` as :py:attr:`score_func.`
84+
85+
:param y_true: True measurement or could be a pandas.DataFrame where column label 'y' corresponds to the true measurement.
86+
:type y_true: numpy.ndarray or pandas.DataFrame
87+
:param args: Predictions, the algorithms will be identified with alg-k where k=1 is the first argument included in :py:attr:`args.`
88+
:type args: numpy.ndarray
89+
:param kwargs: Predictions, the algorithms will be identified using the keyword
90+
:type args: numpy.ndarray
91+
:param num_samples: Number of bootstrap samples, default=500.
92+
:type num_samples: int
93+
:param n_jobs: Number of jobs to compute the statistic, default=-1 corresponding to use all threads.
94+
:type n_jobs: int
95+
:param use_tqdm: Whether to use tqdm.tqdm to visualize the progress, default=True
96+
:type use_tqdm: bool
97+
:param sample_weight: see sklearn.metrics.f1_score
98+
:param adjusted: see sklearn.metrics.f1_score
99+
100+
>>> from sklearn.svm import LinearSVC
101+
>>> from sklearn.ensemble import RandomForestClassifier
102+
>>> from sklearn.datasets import load_iris
103+
>>> from sklearn.model_selection import train_test_split
104+
>>> from sklearn.base import clone
105+
>>> from CompStats.metrics import balanced_accuracy_score
106+
>>> X, y = load_iris(return_X_y=True)
107+
>>> _ = train_test_split(X, y, test_size=0.3)
108+
>>> X_train, X_val, y_train, y_val = _
109+
>>> m = LinearSVC().fit(X_train, y_train)
110+
>>> hy = m.predict(X_val)
111+
>>> ens = RandomForestClassifier().fit(X_train, y_train)
112+
>>> score = balanced_accuracy_score(y_val, hy,
113+
forest=ens.predict(X_val))
114+
>>> score
115+
<Perf>
116+
Prediction statistics with standard error
117+
forest = 0.957 (0.031)
118+
alg-1 = 0.935 (0.037)
119+
>>> diff = score.difference()
120+
>>> diff
121+
<Difference>
122+
difference p-values w.r.t forest
123+
alg-1 0.254
124+
"""
125+
126+
def inner(y, hy):
127+
return metrics.balanced_accuracy_score(y, hy,
128+
adjusted=adjusted,
129+
sample_weight=sample_weight)
130+
return Perf(y_true, *args, score_func=inner,
131+
num_samples=num_samples, n_jobs=n_jobs,
132+
use_tqdm=use_tqdm,
133+
**kwargs)
134+
135+
18136
def f1_score(y_true, *args, labels=None, pos_label=1,
19137
average='binary', sample_weight=None,
20138
zero_division='warn', num_samples: int=500,
21-
n_jobs: int=-1, **kwargs):
22-
""":py:class:`~CompStats.interface.Perf` with :py:func:`~sklearn.metrics.f1_score` as :py:attr:`score_func.`
139+
n_jobs: int=-1, use_tqdm=True,
140+
**kwargs):
141+
""":py:class:`~CompStats.interface.Perf` with :py:func:`sklearn.metrics.f1_score` as :py:attr:`score_func.`
23142
24143
:param y_true: True measurement or could be a pandas.DataFrame where column label 'y' corresponds to the true measurement.
25144
:type y_true: numpy.ndarray or pandas.DataFrame
26145
:param args: Predictions, the algorithms will be identified with alg-k where k=1 is the first argument included in :py:attr:`args.`
27146
:type args: numpy.ndarray
28147
:param kwargs: Predictions, the algorithms will be identified using the keyword
29148
:type args: numpy.ndarray
30-
:param n_jobs: Number of jobs to compute the statistic, default=-1 corresponding to use all threads.
31-
:type n_jobs: int
32149
:param num_samples: Number of bootstrap samples, default=500.
33150
:type num_samples: int
151+
:param n_jobs: Number of jobs to compute the statistic, default=-1 corresponding to use all threads.
152+
:type n_jobs: int
153+
:param use_tqdm: Whether to use tqdm.tqdm to visualize the progress, default=True
154+
:type use_tqdm: bool
34155
:param labels: see sklearn.metrics.f1_score
35156
:param pos_label: see sklearn.metrics.f1_score
36157
:param average: see sklearn.metrics.f1_score
@@ -72,4 +193,5 @@ def inner(y, hy):
72193
zero_division=zero_division)
73194
return Perf(y_true, *args, score_func=inner,
74195
num_samples=num_samples, n_jobs=n_jobs,
75-
**kwargs)
196+
use_tqdm=use_tqdm,
197+
**kwargs)

CompStats/tests/test_interface.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,4 +130,24 @@ def test_Perf_dataframe():
130130

131131
df = pd.read_csv(DATA)
132132
perf = Perf(df, num_samples=50)
133-
assert 'INGEOTEC' in perf.statistic()
133+
assert 'INGEOTEC' in perf.statistic()
134+
135+
136+
def test_Perf_call():
137+
"""Test Perf call"""
138+
from CompStats.interface import Perf
139+
140+
X, y = load_iris(return_X_y=True)
141+
_ = train_test_split(X, y, test_size=0.3)
142+
X_train, X_val, y_train, y_val = _
143+
m = LinearSVC().fit(X_train, y_train)
144+
hy = m.predict(X_val)
145+
ens = RandomForestClassifier().fit(X_train, y_train)
146+
hy2 = ens.predict(X_val)
147+
perf = Perf(y_val, num_samples=50)
148+
for xx in [hy, hy2]:
149+
_ = perf(xx)
150+
print(_)
151+
perf(hy, name='alg-2')
152+
assert 'alg-2' not in perf._statistic_samples.calls
153+
assert 'alg-1' in perf._statistic_samples.calls

CompStats/tests/test_metrics.py

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,4 +27,30 @@ def test_f1_score():
2727
ens = RandomForestClassifier().fit(X_train, y_train)
2828
perf = f1_score(y_val, forest=ens.predict(X_val),
2929
num_samples=50, average='macro')
30-
assert 'forest' in perf.statistic()
30+
assert 'forest' in perf.statistic()
31+
32+
33+
def test_accuracy_score():
34+
"""Test f1_score"""
35+
from CompStats.metrics import accuracy_score
36+
37+
X, y = load_iris(return_X_y=True)
38+
_ = train_test_split(X, y, test_size=0.3)
39+
X_train, X_val, y_train, y_val = _
40+
ens = RandomForestClassifier().fit(X_train, y_train)
41+
perf = accuracy_score(y_val, forest=ens.predict(X_val),
42+
num_samples=50)
43+
assert 'forest' in perf.statistic()
44+
45+
46+
def test_balanced_accuracy_score():
47+
"""Test f1_score"""
48+
from CompStats.metrics import balanced_accuracy_score
49+
50+
X, y = load_iris(return_X_y=True)
51+
_ = train_test_split(X, y, test_size=0.3)
52+
X_train, X_val, y_train, y_val = _
53+
ens = RandomForestClassifier().fit(X_train, y_train)
54+
perf = balanced_accuracy_score(y_val, forest=ens.predict(X_val),
55+
num_samples=50)
56+
assert 'forest' in perf.statistic()

CompStats/tests/test_performance.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ def test_performance_multiple_metrics():
7373
{"func": mean_absolute_error, 'BiB': False}
7474
]
7575
perf = performance_multiple_metrics(df, "y", metrics)
76-
ins = plot_performance_multiple(perf)
76+
plot_performance_multiple(perf)
7777
assert 'accuracy_score' in perf['samples']
7878
assert 'y' not in perf['samples']['accuracy_score']
7979
assert 'INGEOTEC' in perf['samples']['accuracy_score']
@@ -90,7 +90,7 @@ def test_difference_multiple():
9090
]
9191
perf = performance_multiple_metrics(df, "y", metrics)
9292
diff = difference_multiple(perf)
93-
ins = plot_difference_multiple(diff)
93+
plot_difference_multiple(diff)
9494
assert diff['winner']['accuracy_score']['best'] == 'BoW'
9595
assert 'BoW' not in diff['winner']['accuracy_score']['diff'].keys()
9696
# assert isinstance(ins, sns.FacetGrid)

CompStats/utils.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@
1818
USE_TQDM = False
1919

2020

21-
def progress_bar(arg, **kwargs):
21+
def progress_bar(arg, use_tqdm: bool=True, **kwargs):
2222
"""Progress bar using tqdm"""
23-
if USE_TQDM:
24-
return tqdm(arg, **kwargs)
25-
return arg
23+
if not USE_TQDM or not use_tqdm:
24+
return arg
25+
return tqdm(arg, **kwargs)

0 commit comments

Comments
 (0)