Skip to content

Commit a647a25

Browse files
authored
Merge pull request #16 from INGEOTEC/develop
Version - 0.1.4
2 parents 2571f06 + 2601f88 commit a647a25

File tree

6 files changed

+938
-125
lines changed

6 files changed

+938
-125
lines changed

CompStats/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
14-
__version__ = '0.1.3'
14+
__version__ = '0.1.4'
1515
from CompStats.bootstrap import StatisticSamples
1616
from CompStats.measurements import CI, SE, difference_p_value
1717
from CompStats.performance import performance, difference, all_differences, plot_performance, plot_difference

CompStats/interface.py

Lines changed: 29 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -59,30 +59,33 @@ class Perf(object):
5959
>>> perf = Perf(y_val, hy, forest=ens.predict(X_val))
6060
>>> perf
6161
<Perf>
62-
Prediction statistics with standard error
63-
alg-1 = 1.000 (0.000)
64-
forest = 0.946 (0.038)
62+
Statistic with its standard error (se)
63+
statistic (se)
64+
0.9792 (0.0221) <= alg-1
65+
0.9744 (0.0246) <= forest
6566
6667
If an algorithm's prediction is missing, this can be included by calling the instance, as can be seen in the following instruction. Note that the algorithm's name can also be given with the keyword :py:attr:`name.`
6768
6869
>>> lr = LogisticRegression().fit(X_train, y_train)
6970
>>> perf(lr.predict(X_val), name='Log. Reg.')
7071
<Perf>
71-
Prediction statistics with standard error
72-
alg-1 = 1.000 (0.000)
73-
forest = 0.946 (0.038)
74-
Log. Reg. = 0.946 (0.038)
72+
Statistic with its standard error (se)
73+
statistic (se)
74+
1.0000 (0.0000) <= Log. Reg.
75+
0.9792 (0.0221) <= alg-1
76+
0.9744 (0.0246) <= forest
7577
7678
The performance function used to compare the algorithms can be changed, and the same bootstrap samples would be used if the instance were cloned. Consequently, the values are computed using the same samples, as can be seen in the following example.
7779
7880
>>> perf_error = clone(perf)
7981
>>> perf_error.error_func = lambda y, hy: (y != hy).mean()
8082
>>> perf_error
8183
<Perf>
82-
Prediction statistics with standard error
83-
alg-1 = 0.000 (0.000)
84-
forest = 0.044 (0.030)
85-
Log. Reg. = 0.044 (0.030)
84+
Statistic with its standard error (se)
85+
statistic (se)
86+
0.0000 (0.0000) <= Log. Reg.
87+
0.0222 (0.0237) <= alg-1
88+
0.0222 (0.0215) <= forest
8689
8790
"""
8891
def __init__(self, y_true, *y_pred,
@@ -137,18 +140,19 @@ def __sklearn_clone__(self):
137140
ins.predictions = dict(self.predictions)
138141
ins._statistic_samples._samples = self.statistic_samples._samples
139142
return ins
140-
143+
141144
def __repr__(self):
142145
"""Prediction statistics with standard error in parenthesis"""
143146
return f"<{self.__class__.__name__}>\n{self}"
144147

145148
def __str__(self):
146149
"""Prediction statistics with standard error in parenthesis"""
147150

148-
se = self.se()
149-
output = ["Prediction statistics with standard error"]
150-
for key, value in self.statistic().items():
151-
output.append(f'{key} = {value:0.3f} ({se[key]:0.3f})')
151+
se = self.se
152+
output = ["Statistic with its standard error (se)"]
153+
output.append("statistic (se)")
154+
for key, value in self.statistic.items():
155+
output.append(f'{value:0.4f} ({se[key]:0.4f}) <= {key}')
152156
return "\n".join(output)
153157

154158
def __call__(self, y_pred, name=None):
@@ -196,7 +200,7 @@ def difference(self, wrt_to: str=None):
196200
continue
197201
diff[k] = sign * (base - v)
198202
diff_ins = Difference(statistic_samples=clone(self.statistic_samples),
199-
statistic=self.statistic(),
203+
statistic=self.statistic,
200204
best=self.best[0])
201205
diff_ins.statistic_samples.calls = diff
202206
diff_ins.statistic_samples.info['best'] = self.best[0]
@@ -209,12 +213,13 @@ def best(self):
209213
try:
210214
return self._best
211215
except AttributeError:
212-
statistic = [(k, v) for k, v in self.statistic().items()]
216+
statistic = [(k, v) for k, v in self.statistic.items()]
213217
statistic = sorted(statistic, key=lambda x: x[1],
214218
reverse=self.statistic_samples.BiB)
215219
self._best = statistic[0]
216220
return self._best
217221

222+
@property
218223
def statistic(self):
219224
"""Statistic
220225
@@ -230,7 +235,7 @@ def statistic(self):
230235
>>> hy = m.predict(X_val)
231236
>>> ens = RandomForestClassifier().fit(X_train, y_train)
232237
>>> perf = Perf(y_val, hy, forest=ens.predict(X_val))
233-
>>> perf.statistic()
238+
>>> perf.statistic
234239
{'alg-1': 1.0, 'forest': 0.9500891265597148}
235240
"""
236241

@@ -239,6 +244,7 @@ def statistic(self):
239244
key=lambda x: x[1], reverse=self.statistic_samples.BiB)
240245
return dict(data)
241246

247+
@property
242248
def se(self):
243249
"""Standard Error
244250
@@ -254,9 +260,10 @@ def se(self):
254260
>>> hy = m.predict(X_val)
255261
>>> ens = RandomForestClassifier().fit(X_train, y_train)
256262
>>> perf = Perf(y_val, hy, forest=ens.predict(X_val))
257-
>>> print(perf.se())
263+
>>> perf.se
258264
{'alg-1': 0.0, 'forest': 0.026945730782184187}
259265
"""
266+
260267
return SE(self.statistic_samples)
261268

262269
def plot(self, **kwargs):
@@ -405,7 +412,7 @@ class Difference:
405412
>>> diff
406413
<Difference>
407414
difference p-values w.r.t alg-1
408-
forest 0.3
415+
0.0780 <= forest
409416
"""
410417

411418
statistic_samples:StatisticSamples=None
@@ -420,7 +427,7 @@ def __str__(self):
420427
"""p-value"""
421428
output = [f"difference p-values w.r.t {self.best}"]
422429
for k, v in self.p_value().items():
423-
output.append(f'{k} {v}')
430+
output.append(f'{v:0.4f} <= {k}')
424431
return "\n".join(output)
425432

426433
def p_value(self):

0 commit comments

Comments
 (0)