Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions .github/workflows/lint.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
name: Lint

on:
push:
branches: [ "main" ]
pull_request:
branches: [ "main" ]

jobs:
lint:
runs-on: ubuntu-latest
steps:

# Check out the repository code
- name: Checkout repository
uses: actions/checkout@v4

# Set up Python
- name: Set up Python
uses: actions/setup-python@v3
with:
python-version: '3.10' # Only for one python version to save on resources

# Run black
- name: black
uses: psf/black@stable
with:
options: "--check --verbose"
src: "./dtaianomaly"

# Install dtaianomaly (not required for black)
- name: Install dtaianomaly
run: |
python -m pip install --upgrade pip
pip install .[all]
pip list

# Apply isort
- name: isort
uses: isort/isort-action@v1.1.1
with:
sortPaths: "dtaianomaly"
configuration: "--check-only --diff --profile black"
10 changes: 1 addition & 9 deletions .github/workflows/unit-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,17 +30,9 @@ jobs:
- name: Install dtaianomaly
run: |
python -m pip install --upgrade pip
pip install .[all]
pip install flake8
pip install .[tests]
pip list

- name: Lint with flake8
run: |
# stop the build if there are Python syntax errors or undefined names
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics

- name: Test with pytest
run: |
pytest --cov=dtaianomaly --cov-report term-missing
21 changes: 21 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
repos:

- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v5.0.0
hooks:
- id: trailing-whitespace

- repo: https://github.com/pycqa/isort/
rev: 5.13.2
hooks:
- id: isort
args: ["--profile", "black"]
language: python

- repo: https://github.com/psf/black
rev: 24.10.0
hooks:
- id: black
language: python

files: "dtaianomaly"
4 changes: 2 additions & 2 deletions docs/additional_information/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ Added
everything via different requirements files.
- Added support for Python 3.13.
- Implemented the ``predict_confidence()`` method in the ``BaseDetector``, which
computes a confidence score for each prediction of the anomaly detector.
computes a confidence score for each prediction of the anomaly detector.
- Integrated ``black`` and ``isort`` to format the code.

Changed
^^^^^^^
Expand All @@ -35,7 +36,6 @@ Changed
in ``compute_window_size()``, which will be returned (if provided) instead of raising
an error to allow the system to continue.


Fixed
^^^^^
- Renamed ``ZNormalizer`` to ``StandardScaler``, to make it align with the Sklearn declaration.
Expand Down
7 changes: 7 additions & 0 deletions docs/additional_information/contributing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,13 @@ run the following command:
You should include the ``--editable`` flag to ensure that your
changes to the code are actually reflected in the installed version.

Next, make sure to install pre-commit to the project,
using the following command:

.. code-block:: bash

pre-commit install

To check if the environment is correct, you verify if all tests
succeed by running the following command (which also checks the
coverage of the unit tests):
Expand Down
18 changes: 14 additions & 4 deletions dtaianomaly/PrettyPrintable.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

import abc
import inspect

Expand All @@ -13,11 +12,22 @@ def initialization_call_string(o: object) -> str:
parameters = {
parameter: getattr(o, parameter)
for parameter, value in inspect.signature(o.__init__).parameters.items()
if parameter not in ['args', 'kwargs'] and value.default != getattr(o, parameter)
if parameter not in ["args", "kwargs"]
and value.default != getattr(o, parameter)
}
if hasattr(o, 'kwargs'):
if hasattr(o, "kwargs"):
parameters.update(o.kwargs)
return o.__class__.__name__ + '(' + ','.join([f'{parameter}={string_with_apostrophe(value)}' for parameter, value in parameters.items()]) + ')'
return (
o.__class__.__name__
+ "("
+ ",".join(
[
f"{parameter}={string_with_apostrophe(value)}"
for parameter, value in parameters.items()
]
)
+ ")"
)


def string_with_apostrophe(s):
Expand Down
2 changes: 1 addition & 1 deletion dtaianomaly/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '0.2.3.dev'
__version__ = "0.2.3.dev"
81 changes: 56 additions & 25 deletions dtaianomaly/anomaly_detection/BaseDetector.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
import abc
import enum
import os.path
import pickle
import enum
import numpy as np
import scipy
from pathlib import Path
from typing import Optional, Union

from dtaianomaly.thresholding.thresholding import ContaminationRate
import numpy as np
import scipy

from dtaianomaly import utils
from dtaianomaly.PrettyPrintable import PrettyPrintable
from dtaianomaly.thresholding.thresholding import ContaminationRate


class Supervision(enum.Enum):
Expand All @@ -21,6 +22,7 @@ class Supervision(enum.Enum):
- ``Semi-supervised``: The anomaly detector requires *normal* training data, but no training labels.
- ``Supervised``: The anomaly detector requires both training data and training labels. The training data may contain anomalies.
"""

UNSUPERVISED = 1
SEMI_SUPERVISED = 2
SUPERVISED = 3
Expand All @@ -40,6 +42,7 @@ class BaseDetector(PrettyPrintable):
supervision: Supervision
The type of supervision this anomaly detector requires.
"""

supervision: Supervision

def __init__(self, supervision: Supervision):
Expand All @@ -48,7 +51,9 @@ def __init__(self, supervision: Supervision):
self.supervision = supervision

@abc.abstractmethod
def fit(self, X: np.ndarray, y: Optional[np.ndarray] = None, **kwargs) -> 'BaseDetector':
def fit(
self, X: np.ndarray, y: Optional[np.ndarray] = None, **kwargs
) -> "BaseDetector":
"""
Abstract method, fit this detector to the given data.

Expand Down Expand Up @@ -85,7 +90,7 @@ def predict_proba(self, X: np.ndarray) -> np.ndarray:
"""
Predict anomaly probabilities

Estimate the probability of a sample of `X` being anomalous,
Estimate the probability of a sample of `X` being anomalous,
based on the anomaly scores obtained from `decision_function`
by rescaling them to the range of [0, 1] via min-max scaling.

Expand Down Expand Up @@ -120,14 +125,22 @@ def predict_proba(self, X: np.ndarray) -> np.ndarray:
max_score = np.nanmax(raw_scores)
if min_score == max_score:
if not (0.0 <= min_score <= 1.0):
raise ValueError('The predicted anomaly scores are constant, but not in the interval [0, 1]. '
'It is not clear how to transform these unambiguously to anomaly-probabilities!')
raise ValueError(
"The predicted anomaly scores are constant, but not in the interval [0, 1]. "
"It is not clear how to transform these unambiguously to anomaly-probabilities!"
)
return raw_scores

else:
return (raw_scores - min_score) / (max_score - min_score)

def predict_confidence(self, X: np.ndarray, X_train: np.ndarray = None, contamination: float = 0.05, decision_scores_given: bool = False):
def predict_confidence(
self,
X: np.ndarray,
X_train: np.ndarray = None,
contamination: float = 0.05,
decision_scores_given: bool = False,
):
"""
Predict the confidence of the anomaly scores on the test given test data.

Expand Down Expand Up @@ -171,36 +184,54 @@ def predict_confidence(self, X: np.ndarray, X_train: np.ndarray = None, contamin
# Set the decision scores
if decision_scores_given:
if len(X.shape) > 1:
raise ValueError("In the 'predict_confidence()' method, it was indicated that the decision scores are provided "
"as X (decision_scores_given=True), but the shape of X does not correspond to the shape of decision"
f"scores: {X.shape}!")
raise ValueError(
"In the 'predict_confidence()' method, it was indicated that the decision scores are provided "
"as X (decision_scores_given=True), but the shape of X does not correspond to the shape of decision"
f"scores: {X.shape}!"
)
if X_train is not None and len(X_train.shape) > 1:
raise ValueError("In the 'predict_confidence()' method, it was indicated that the decision scores are provided "
"as X (decision_scores_given=True), but the shape of X_train does not correspond to the shape of decision"
f"scores: {X.shape}!")
raise ValueError(
"In the 'predict_confidence()' method, it was indicated that the decision scores are provided "
"as X (decision_scores_given=True), but the shape of X_train does not correspond to the shape of decision"
f"scores: {X.shape}!"
)
decision_scores = X
decision_scores_train = X_train if X_train is not None else decision_scores

else:
# Compute the decision scores
decision_scores = self.decision_function(X)
decision_scores_train = self.decision_function(X_train) if X_train is not None else decision_scores
decision_scores_train = (
self.decision_function(X_train)
if X_train is not None
else decision_scores
)

# Convert the decision scores to binary predictions
prediction = ContaminationRate(contamination_rate=contamination).threshold(decision_scores)
prediction = ContaminationRate(contamination_rate=contamination).threshold(
decision_scores
)

# Apply the ExCeed method (https://github.com/Lorenzo-Perini/Confidence_AD/blob/master/ExCeeD.py)
n = decision_scores.shape[0]

count_instances = np.vectorize(lambda x: np.count_nonzero(decision_scores_train <= x))
count_instances = np.vectorize(
lambda x: np.count_nonzero(decision_scores_train <= x)
)
n_instances = count_instances(decision_scores)

prob_func = np.vectorize(lambda x: (1 + x) / (2 + n))
posterior_prob = prob_func(n_instances) # Outlier probability according to ExCeeD
posterior_prob = prob_func(
n_instances
) # Outlier probability according to ExCeeD

conf_func = np.vectorize(lambda p: 1 - scipy.stats.binom.cdf(n - int(n * contamination), n, p))
conf_func = np.vectorize(
lambda p: 1 - scipy.stats.binom.cdf(n - int(n * contamination), n, p)
)
exWise_conf = conf_func(posterior_prob)
np.place(exWise_conf, prediction == 0, 1 - exWise_conf[prediction == 0]) # if the example is classified as normal, use 1 - confidence.
np.place(
exWise_conf, prediction == 0, 1 - exWise_conf[prediction == 0]
) # if the example is classified as normal, use 1 - confidence.

return exWise_conf

Expand All @@ -216,15 +247,15 @@ def save(self, path: Union[str, Path]) -> None:
Location where to store the detector.
"""
# Add the '.dtai' extension
if Path(path).suffix != '.dtai':
path = f'{path}.dtai'
if Path(path).suffix != ".dtai":
path = f"{path}.dtai"

# Create the subdirectory, if it doesn't exist
if not os.path.exists(Path(path).parent):
os.makedirs(Path(path).parent)

# Effectively write the anomaly detector to disk
with open(path, 'wb') as f:
with open(path, "wb") as f:
pickle.dump(self, f)


Expand All @@ -244,6 +275,6 @@ def load_detector(path: Union[str, Path]) -> BaseDetector:
detector: BaseDetector
The loaded detector.
"""
with open(path, 'rb') as f:
with open(path, "rb") as f:
detector = pickle.load(f)
return detector
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

from pyod.models.cblof import CBLOF

from dtaianomaly.anomaly_detection.BaseDetector import Supervision
from dtaianomaly.anomaly_detection.PyODAnomalyDetector import PyODAnomalyDetector

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

from pyod.models.copod import COPOD

from dtaianomaly.anomaly_detection.BaseDetector import Supervision
from dtaianomaly.anomaly_detection.PyODAnomalyDetector import PyODAnomalyDetector

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

from pyod.models.hbos import HBOS

from dtaianomaly.anomaly_detection.BaseDetector import Supervision
from dtaianomaly.anomaly_detection.PyODAnomalyDetector import PyODAnomalyDetector

Expand Down
2 changes: 1 addition & 1 deletion dtaianomaly/anomaly_detection/IsolationForest.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

from pyod.models.iforest import IForest

from dtaianomaly.anomaly_detection.BaseDetector import Supervision
from dtaianomaly.anomaly_detection.PyODAnomalyDetector import PyODAnomalyDetector

Expand Down
Loading
Loading