ML-KULeuven · LouisCarpentier42 · Jan 24, 2025 · Jan 24, 2025 · Jan 24, 2025 · Jan 24, 2025
@@ -0,0 +1,43 @@
+name: Lint
+
+on:
+  push:
+    branches: [ "main" ]
+  pull_request:
+    branches: [ "main" ]
+
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+    steps:
+
+      # Check out the repository code
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      # Set up Python
+      - name: Set up Python
+        uses: actions/setup-python@v3
+        with:
+          python-version: '3.10'  # Only for one python version to save on resources
+
+      # Run black
+      - name: black
+        uses: psf/black@stable
+        with:
+          options: "--check --verbose"
+          src: "./dtaianomaly"
+
+      # Install dtaianomaly (not required for black)
+      - name: Install dtaianomaly
+        run: |
+          python -m pip install --upgrade pip
+          pip install .[all]
+          pip list
+
+      # Apply isort
+      - name: isort
+        uses: isort/isort-action@v1.1.1
+        with:
+          sortPaths: "dtaianomaly"
+          configuration: "--check-only --diff --profile black"
@@ -30,17 +30,9 @@ jobs:
     - name: Install dtaianomaly
       run: |
         python -m pip install --upgrade pip
-        pip install .[all]
-        pip install flake8
+        pip install .[tests]
         pip list
 
-    - name: Lint with flake8
-      run: |
-        # stop the build if there are Python syntax errors or undefined names
-        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
-        # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
-        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
-
     - name: Test with pytest
       run: |
         pytest --cov=dtaianomaly --cov-report term-missing
@@ -0,0 +1,21 @@
+repos:
+
+-   repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v5.0.0
+    hooks:
+    -   id: trailing-whitespace
+
+-   repo: https://github.com/pycqa/isort/
+    rev: 5.13.2
+    hooks:
+    -   id: isort
+        args: ["--profile", "black"]
+        language: python
+
+-   repo: https://github.com/psf/black
+    rev: 24.10.0
+    hooks:
+    -   id: black
+        language: python
+
+files: "dtaianomaly"
@@ -17,7 +17,8 @@ Added
   everything via different requirements files.
 - Added support for Python 3.13.
 - Implemented the ``predict_confidence()`` method in the ``BaseDetector``, which 
-  computes a confidence score for each prediction of the anomaly detector. 
+  computes a confidence score for each prediction of the anomaly detector.
+- Integrated ``black`` and ``isort`` to format the code.
 
 Changed
 ^^^^^^^
@@ -35,7 +36,6 @@ Changed
   in ``compute_window_size()``, which will be returned (if provided) instead of raising
   an error to allow the system to continue.
 
-
 Fixed
 ^^^^^
 - Renamed ``ZNormalizer`` to ``StandardScaler``, to make it align with the Sklearn declaration.

@@ -92,6 +92,13 @@ run the following command:
 You should include the ``--editable`` flag to ensure that your
 changes to the code are actually reflected in the installed version.
 
+Next, make sure to install pre-commit to the project,
+using the following command:
+
+.. code-block:: bash
+
+     pre-commit install
+
 To check if the environment is correct, you verify if all tests
 succeed by running the following command (which also checks the
 coverage of the unit tests):

@@ -1,4 +1,3 @@
-
 import abc
 import inspect
 
@@ -13,11 +12,22 @@ def initialization_call_string(o: object) -> str:
     parameters = {
         parameter: getattr(o, parameter)
         for parameter, value in inspect.signature(o.__init__).parameters.items()
-        if parameter not in ['args', 'kwargs'] and value.default != getattr(o, parameter)
+        if parameter not in ["args", "kwargs"]
+        and value.default != getattr(o, parameter)
     }
-    if hasattr(o, 'kwargs'):
+    if hasattr(o, "kwargs"):
         parameters.update(o.kwargs)
-    return o.__class__.__name__ + '(' + ','.join([f'{parameter}={string_with_apostrophe(value)}' for parameter, value in parameters.items()]) + ')'
+    return (
+        o.__class__.__name__
+        + "("
+        + ",".join(
+            [
+                f"{parameter}={string_with_apostrophe(value)}"
+                for parameter, value in parameters.items()
+            ]
+        )
+        + ")"
+    )
 
 
 def string_with_apostrophe(s):

@@ -1 +1 @@
-__version__ = '0.2.3.dev'
+__version__ = "0.2.3.dev"
@@ -1,15 +1,16 @@
 import abc
+import enum
 import os.path
 import pickle
-import enum
-import numpy as np
-import scipy
 from pathlib import Path
 from typing import Optional, Union
 
-from dtaianomaly.thresholding.thresholding import ContaminationRate
+import numpy as np
+import scipy
+
 from dtaianomaly import utils
 from dtaianomaly.PrettyPrintable import PrettyPrintable
+from dtaianomaly.thresholding.thresholding import ContaminationRate
 
 
 class Supervision(enum.Enum):
@@ -21,6 +22,7 @@ class Supervision(enum.Enum):
     - ``Semi-supervised``: The anomaly detector requires *normal* training data, but no training labels.
     - ``Supervised``: The anomaly detector requires both training data and training labels. The training data may contain anomalies.
     """
+
     UNSUPERVISED = 1
     SEMI_SUPERVISED = 2
     SUPERVISED = 3
@@ -40,6 +42,7 @@ class BaseDetector(PrettyPrintable):
     supervision: Supervision
         The type of supervision this anomaly detector requires.
     """
+
     supervision: Supervision
 
     def __init__(self, supervision: Supervision):
@@ -48,7 +51,9 @@ def __init__(self, supervision: Supervision):
         self.supervision = supervision
 
     @abc.abstractmethod
-    def fit(self, X: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> 'BaseDetector':
+    def fit(
+        self, X: np.ndarray, y: Optional[np.ndarray] = None, **kwargs
+    ) -> "BaseDetector":
         """
         Abstract method, fit this detector to the given data.
 
@@ -85,7 +90,7 @@ def predict_proba(self, X: np.ndarray) -> np.ndarray:
         """
         Predict anomaly probabilities
 
-        Estimate the probability of a sample of `X` being anomalous, 
+        Estimate the probability of a sample of `X` being anomalous,
         based on the anomaly scores obtained from `decision_function`
         by rescaling them to the range of [0, 1] via min-max scaling.
 
@@ -120,14 +125,22 @@ def predict_proba(self, X: np.ndarray) -> np.ndarray:
         max_score = np.nanmax(raw_scores)
         if min_score == max_score:
             if not (0.0 <= min_score <= 1.0):
-                raise ValueError('The predicted anomaly scores are constant, but not in the interval [0, 1]. '
-                                 'It is not clear how to transform these unambiguously to anomaly-probabilities!')
+                raise ValueError(
+                    "The predicted anomaly scores are constant, but not in the interval [0, 1]. "
+                    "It is not clear how to transform these unambiguously to anomaly-probabilities!"
+                )
             return raw_scores
 
         else:
             return (raw_scores - min_score) / (max_score - min_score)
 
-    def predict_confidence(self, X: np.ndarray, X_train: np.ndarray = None, contamination: float = 0.05, decision_scores_given: bool = False):
+    def predict_confidence(
+        self,
+        X: np.ndarray,
+        X_train: np.ndarray = None,
+        contamination: float = 0.05,
+        decision_scores_given: bool = False,
+    ):
         """
         Predict the confidence of the anomaly scores on the test given test data.
 
@@ -171,36 +184,54 @@ def predict_confidence(self, X: np.ndarray, X_train: np.ndarray = None, contamin
         # Set the decision scores
         if decision_scores_given:
             if len(X.shape) > 1:
-                raise ValueError("In the 'predict_confidence()' method, it was indicated that the decision scores are provided "
-                                 "as X (decision_scores_given=True), but the shape of X does not correspond to the shape of decision"
-                                 f"scores: {X.shape}!")
+                raise ValueError(
+                    "In the 'predict_confidence()' method, it was indicated that the decision scores are provided "
+                    "as X (decision_scores_given=True), but the shape of X does not correspond to the shape of decision"
+                    f"scores: {X.shape}!"
+                )
             if X_train is not None and len(X_train.shape) > 1:
-                raise ValueError("In the 'predict_confidence()' method, it was indicated that the decision scores are provided "
-                                 "as X (decision_scores_given=True), but the shape of X_train does not correspond to the shape of decision"
-                                 f"scores: {X.shape}!")
+                raise ValueError(
+                    "In the 'predict_confidence()' method, it was indicated that the decision scores are provided "
+                    "as X (decision_scores_given=True), but the shape of X_train does not correspond to the shape of decision"
+                    f"scores: {X.shape}!"
+                )
             decision_scores = X
             decision_scores_train = X_train if X_train is not None else decision_scores
 
         else:
             # Compute the decision scores
             decision_scores = self.decision_function(X)
-            decision_scores_train = self.decision_function(X_train) if X_train is not None else decision_scores
+            decision_scores_train = (
+                self.decision_function(X_train)
+                if X_train is not None
+                else decision_scores
+            )
 
         # Convert the decision scores to binary predictions
-        prediction = ContaminationRate(contamination_rate=contamination).threshold(decision_scores)
+        prediction = ContaminationRate(contamination_rate=contamination).threshold(
+            decision_scores
+        )
 
         # Apply the ExCeed method (https://github.com/Lorenzo-Perini/Confidence_AD/blob/master/ExCeeD.py)
         n = decision_scores.shape[0]
 
-        count_instances = np.vectorize(lambda x: np.count_nonzero(decision_scores_train <= x))
+        count_instances = np.vectorize(
+            lambda x: np.count_nonzero(decision_scores_train <= x)
+        )
         n_instances = count_instances(decision_scores)
 
         prob_func = np.vectorize(lambda x: (1 + x) / (2 + n))
-        posterior_prob = prob_func(n_instances)  # Outlier probability according to ExCeeD
+        posterior_prob = prob_func(
+            n_instances
+        )  # Outlier probability according to ExCeeD
 
-        conf_func = np.vectorize(lambda p: 1 - scipy.stats.binom.cdf(n - int(n * contamination), n, p))
+        conf_func = np.vectorize(
+            lambda p: 1 - scipy.stats.binom.cdf(n - int(n * contamination), n, p)
+        )
         exWise_conf = conf_func(posterior_prob)
-        np.place(exWise_conf, prediction == 0, 1 - exWise_conf[prediction == 0])  # if the example is classified as normal, use 1 - confidence.
+        np.place(
+            exWise_conf, prediction == 0, 1 - exWise_conf[prediction == 0]
+        )  # if the example is classified as normal, use 1 - confidence.
 
         return exWise_conf
 
@@ -216,15 +247,15 @@ def save(self, path: Union[str, Path]) -> None:
             Location where to store the detector.
         """
         # Add the '.dtai' extension
-        if Path(path).suffix != '.dtai':
-            path = f'{path}.dtai'
+        if Path(path).suffix != ".dtai":
+            path = f"{path}.dtai"
 
         # Create the subdirectory, if it doesn't exist
         if not os.path.exists(Path(path).parent):
             os.makedirs(Path(path).parent)
 
         # Effectively write the anomaly detector to disk
-        with open(path, 'wb') as f:
+        with open(path, "wb") as f:
             pickle.dump(self, f)
 
 
@@ -244,6 +275,6 @@ def load_detector(path: Union[str, Path]) -> BaseDetector:
     detector: BaseDetector
         The loaded detector.
     """
-    with open(path, 'rb') as f:
+    with open(path, "rb") as f:
         detector = pickle.load(f)
     return detector
@@ -1,5 +1,5 @@
-
 from pyod.models.cblof import CBLOF
+
 from dtaianomaly.anomaly_detection.BaseDetector import Supervision
 from dtaianomaly.anomaly_detection.PyODAnomalyDetector import PyODAnomalyDetector
 

@@ -1,5 +1,5 @@
-
 from pyod.models.copod import COPOD
+
 from dtaianomaly.anomaly_detection.BaseDetector import Supervision
 from dtaianomaly.anomaly_detection.PyODAnomalyDetector import PyODAnomalyDetector
 

@@ -1,5 +1,5 @@
-
 from pyod.models.hbos import HBOS
+
 from dtaianomaly.anomaly_detection.BaseDetector import Supervision
 from dtaianomaly.anomaly_detection.PyODAnomalyDetector import PyODAnomalyDetector
 

@@ -1,5 +1,5 @@
-
 from pyod.models.iforest import IForest
+
 from dtaianomaly.anomaly_detection.BaseDetector import Supervision
 from dtaianomaly.anomaly_detection.PyODAnomalyDetector import PyODAnomalyDetector
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		__version__ = '0.2.3.dev'
		__version__ = "0.2.3.dev"