Merge pull request #27 from INGEOTEC/develop

mgraffg · web-flow · commit 8bd7cec30417 · 2025-04-22T06:36:27.000-06:00
Version - 0.1.13
diff --git a/CompStats/__init__.py b/CompStats/__init__.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-__version__ = '0.1.12'
+__version__ = '0.1.13'
 from CompStats.bootstrap import StatisticSamples
 from CompStats.measurements import CI, SE, difference_p_value
 from CompStats.performance import performance, difference, all_differences, plot_performance, plot_difference
diff --git a/CompStats/metrics.py b/CompStats/metrics.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 from functools import wraps
 from sklearn import metrics
+from scipy import stats
 from CompStats.interface import Perf
 from CompStats.utils import metrics_docs
 
@@ -302,6 +303,81 @@ def inner(y, hy):
                 **kwargs)
 
 
+def macro_f1(y_true, *y_pred, labels=None,
+             sample_weight=None, zero_division='warn',
+             num_samples: int=500, n_jobs: int=-1, use_tqdm=True,
+             **kwargs):
+    """:py:class:`~CompStats.interface.Perf` with :py:func:`~sklearn.metrics.f1_score` (as :py:attr:`score_func`) with the parameteres needed to compute the macro score. The parameters not described can be found in :py:func:`~sklearn.metrics.macro_f1`
+
+    :param y_true: True measurement or could be a pandas.DataFrame where column label 'y' corresponds to the true measurement. 
+    :type y_true: numpy.ndarray or pandas.DataFrame 
+    :param y_pred: Predictions, the algorithms will be identified with alg-k where k=1 is the first argument included in :py:attr:`y_pred.` 
+    :type y_pred: numpy.ndarray 
+    :param kwargs: Predictions, the algorithms will be identified using the keyword  
+    :type kwargs: numpy.ndarray 
+    :param num_samples: Number of bootstrap samples, default=500. 
+    :type num_samples: int 
+    :param n_jobs: Number of jobs to compute the statistic, default=-1 corresponding to use all threads. 
+    :type n_jobs: int 
+    :param use_tqdm: Whether to use tqdm.tqdm to visualize the progress, default=True 
+    :type use_tqdm: bool     
+    """
+    return f1_score(y_true, *y_pred, labels=labels, average='macro',
+                    sample_weight=sample_weight, zero_division=zero_division,
+                    num_samples=num_samples, n_jobs=n_jobs,
+                    use_tqdm=use_tqdm, **kwargs)
+
+
+def macro_recall(y_true, *y_pred, labels=None,
+                 sample_weight=None, zero_division='warn',
+                 num_samples: int=500, n_jobs: int=-1, use_tqdm=True,
+                 **kwargs):
+    """:py:class:`~CompStats.interface.Perf` with :py:func:`~sklearn.metrics.recall_score` (as :py:attr:`score_func`) with the parameteres needed to compute the macro score. The parameters not described can be found in :py:func:`~sklearn.metrics.macro_recall`
+
+    :param y_true: True measurement or could be a pandas.DataFrame where column label 'y' corresponds to the true measurement. 
+    :type y_true: numpy.ndarray or pandas.DataFrame 
+    :param y_pred: Predictions, the algorithms will be identified with alg-k where k=1 is the first argument included in :py:attr:`y_pred.` 
+    :type y_pred: numpy.ndarray 
+    :param kwargs: Predictions, the algorithms will be identified using the keyword  
+    :type kwargs: numpy.ndarray 
+    :param num_samples: Number of bootstrap samples, default=500. 
+    :type num_samples: int 
+    :param n_jobs: Number of jobs to compute the statistic, default=-1 corresponding to use all threads. 
+    :type n_jobs: int 
+    :param use_tqdm: Whether to use tqdm.tqdm to visualize the progress, default=True 
+    :type use_tqdm: bool     
+    """
+    return recall_score(y_true, *y_pred, labels=labels, average='macro',
+                        sample_weight=sample_weight, zero_division=zero_division,
+                        num_samples=num_samples, n_jobs=n_jobs,
+                        use_tqdm=use_tqdm, **kwargs)
+
+
+def macro_precision(y_true, *y_pred, labels=None,
+                    sample_weight=None, zero_division='warn',
+                    num_samples: int=500, n_jobs: int=-1, use_tqdm=True,
+                    **kwargs):
+    """:py:class:`~CompStats.interface.Perf` with :py:func:`~sklearn.metrics.precision_score` (as :py:attr:`score_func`) with the parameteres needed to compute the macro score. The parameters not described can be found in :py:func:`~sklearn.metrics.macro_precision`
+
+    :param y_true: True measurement or could be a pandas.DataFrame where column label 'y' corresponds to the true measurement. 
+    :type y_true: numpy.ndarray or pandas.DataFrame 
+    :param y_pred: Predictions, the algorithms will be identified with alg-k where k=1 is the first argument included in :py:attr:`y_pred.` 
+    :type y_pred: numpy.ndarray 
+    :param kwargs: Predictions, the algorithms will be identified using the keyword  
+    :type kwargs: numpy.ndarray 
+    :param num_samples: Number of bootstrap samples, default=500. 
+    :type num_samples: int 
+    :param n_jobs: Number of jobs to compute the statistic, default=-1 corresponding to use all threads. 
+    :type n_jobs: int 
+    :param use_tqdm: Whether to use tqdm.tqdm to visualize the progress, default=True 
+    :type use_tqdm: bool     
+    """
+    return precision_score(y_true, *y_pred, labels=labels, average='macro',
+                           sample_weight=sample_weight, zero_division=zero_division,
+                           num_samples=num_samples, n_jobs=n_jobs,
+                           use_tqdm=use_tqdm, **kwargs)
+
+
 ########################################################
 #################### Regression ########################
 ########################################################
@@ -576,7 +652,6 @@ def inner(y, hy):
                 **kwargs)
 
 
-@metrics_docs(hy_name='y_pred', attr_name='score_func')
 def d2_absolute_error_score(y_true,
                             *y_pred,
                             sample_weight=None,
@@ -597,3 +672,37 @@ def inner(y, hy):
                 num_samples=num_samples, n_jobs=n_jobs,
                 use_tqdm=use_tqdm,
                 **kwargs)
+
+
+def pearsonr(y_true, *y_pred,
+             alternative='two-sided', method=None,
+             num_samples: int=500,
+             n_jobs: int=-1,
+             use_tqdm=True,
+             **kwargs):
+    """:py:class:`~CompStats.interface.Perf` with :py:func:`~scipy.stats.pearsonr` as :py:attr:`score_func.`
+
+    :param y_true: True measurement or could be a pandas.DataFrame where column label 'y' corresponds to the true measurement. 
+    :type y_true: numpy.ndarray or pandas.DataFrame 
+    :param y_pred: Predictions, the algorithms will be identified with alg-k where k=1 is the first argument included in :py:attr:`y_pred.` 
+    :type y_pred: numpy.ndarray 
+    :param kwargs: Predictions, the algorithms will be identified using the keyword  
+    :type kwargs: numpy.ndarray 
+    :param num_samples: Number of bootstrap samples, default=500. 
+    :type num_samples: int 
+    :param n_jobs: Number of jobs to compute the statistic, default=-1 corresponding to use all threads. 
+    :type n_jobs: int 
+    :param use_tqdm: Whether to use tqdm.tqdm to visualize the progress, default=True 
+    :type use_tqdm: bool 
+    """
+
+    @wraps(stats.pearsonr)
+    def inner(y, hy):
+        return stats.pearsonr(y, hy,
+                              alternative=alternative,
+                              method=method).statistic
+    
+    return Perf(y_true, *y_pred, score_func=inner, error_func=None,
+                num_samples=num_samples, n_jobs=n_jobs,
+                use_tqdm=use_tqdm,
+                **kwargs)
diff --git a/CompStats/tests/test_metrics.py b/CompStats/tests/test_metrics.py
@@ -39,6 +39,21 @@ def test_f1_score():
     assert str(perf) is not None
 
 
+def test_macro_f1_score():
+    """Test f1_score"""
+    from CompStats.metrics import macro_f1
+
+    X, y = load_iris(return_X_y=True)
+    _ = train_test_split(X, y, test_size=0.3)
+    X_train, X_val, y_train, y_val = _
+    ens = RandomForestClassifier().fit(X_train, y_train)
+    hy = ens.predict(X_val)
+    perf = macro_f1(y_val, forest=hy, num_samples=50)
+    assert isinstance(perf.statistic, float)
+    _ = metrics.f1_score(y_val, hy, average='macro')
+    assert _ == perf.statistic  
+
+
 def test_accuracy_score():
     """Test f1_score"""
     from CompStats.metrics import accuracy_score
@@ -152,6 +167,23 @@ def test_precision_score():
     assert _ == perf.statistic
 
 
+def test_macro_precision():
+    """Test macro_precision"""
+    from CompStats.metrics import macro_precision
+    import numpy as np
+
+    X, y = load_iris(return_X_y=True)
+    _ = train_test_split(X, y, test_size=0.3, stratify=y)
+    X_train, X_val, y_train, y_val = _
+    ens = RandomForestClassifier().fit(X_train, y_train)
+    hy = ens.predict(X_val)
+    perf = macro_precision(y_val,
+                           forest=hy,
+                           num_samples=50)
+    _ = metrics.precision_score(y_val, hy, average='macro')
+    assert _ == perf.statistic
+
+
 def test_recall_score():
     """Test recall_score"""
     from CompStats.metrics import recall_score
@@ -169,6 +201,23 @@ def test_recall_score():
     assert _ == perf.statistic
 
 
+def test_macro_recall():
+    """Test macro_recall"""
+    from CompStats.metrics import macro_recall
+    import numpy as np
+
+    X, y = load_iris(return_X_y=True)
+    _ = train_test_split(X, y, test_size=0.3, stratify=y)
+    X_train, X_val, y_train, y_val = _
+    ens = RandomForestClassifier().fit(X_train, y_train)
+    hy = ens.predict(X_val)
+    perf = macro_recall(y_val,
+                        forest=hy,
+                        num_samples=50)
+    _ = metrics.recall_score(y_val, hy, average='macro')
+    assert _ == perf.statistic
+
+
 def test_jaccard_score():
     """jaccard_score"""
     from CompStats.metrics import jaccard_score
@@ -427,4 +476,21 @@ def test_d2_absolute_error_score():
                                    forest=hy,
                                    num_samples=50)
     _ = metrics.d2_absolute_error_score(y_val, hy)
-    assert _ == perf.statistic
+    assert _ == perf.statistic
+
+
+def test_pearsonr():
+    """test pearsonr"""
+    from CompStats.metrics import pearsonr
+    from scipy import stats
+
+    X, y = load_diabetes(return_X_y=True)
+    _ = train_test_split(X, y, test_size=0.3)
+    X_train, X_val, y_train, y_val = _
+    ens = RandomForestRegressor().fit(X_train, y_train)
+    hy = ens.predict(X_val)
+    perf = pearsonr(y_val,
+                    forest=hy,
+                    num_samples=50)
+    _ = stats.pearsonr(y_val, hy)
+    assert _.statistic == perf.statistic