From c2de7456449b937fa1900ca9ce3ef2d2d70e551d Mon Sep 17 00:00:00 2001 From: LouisCarpentier42 Date: Fri, 3 Oct 2025 14:56:07 +0200 Subject: [PATCH 1/3] feat: Implemented ROCKAD Signed-off-by: LouisCarpentier42 --- docs/additional_information/changelog.rst | 4 + docs/api/anomaly_detection.rst | 1 + docs/bibliography.bib | 27 ++- .../_KShapeAnomalyDetector.py | 14 +- dtaianomaly/anomaly_detection/_ROCKAD.py | 208 ++++++++++++++++++ dtaianomaly/anomaly_detection/__init__.py | 2 + pyproject.toml | 2 +- tests/anomaly_detection/test_ROCKAD.py | 22 ++ tests/utils/test_discovery.py | 1 + 9 files changed, 272 insertions(+), 9 deletions(-) create mode 100644 dtaianomaly/anomaly_detection/_ROCKAD.py create mode 100644 tests/anomaly_detection/test_ROCKAD.py diff --git a/docs/additional_information/changelog.rst b/docs/additional_information/changelog.rst index ced5c17..19d7fb3 100644 --- a/docs/additional_information/changelog.rst +++ b/docs/additional_information/changelog.rst @@ -30,6 +30,7 @@ Added - Implement ``HybridKNearestNeighbors`` anomaly detector. - Implement ``SquaredDifference`` baseline anomaly detector. - Implement ``MovingWindowVariance`` baseline anomaly detector. +- Implement ``ROCKAD`` anomaly detector. Changed ^^^^^^^ @@ -38,6 +39,9 @@ Changed - In the neural methods, simplified the options for passing losses and activation functions to only include predefined types. - The documentation is restructured (although it still follows the same style). +- Replaced dependency on ``tslearn>=0.6.3`` by ``sktime[clustering]``, which includes the ``tslearn`` + dependency. Before, ``tslearn`` was only used for ``KShapeAnomalyDetector`` to do the clustering. This + capability is also offered by ``sktime`` through a direct interface to ``tslearn``. Fixed ^^^^^ diff --git a/docs/api/anomaly_detection.rst b/docs/api/anomaly_detection.rst index 603019e..b2a73a3 100644 --- a/docs/api/anomaly_detection.rst +++ b/docs/api/anomaly_detection.rst @@ -61,6 +61,7 @@ Time series statistical methods MatrixProfileDetector MedianMethod RobustRandomCutForestAnomalyDetector + ROCKAD SpectralResidual Neural methods diff --git a/docs/bibliography.bib b/docs/bibliography.bib index 003f1b4..2482aee 100644 --- a/docs/bibliography.bib +++ b/docs/bibliography.bib @@ -536,6 +536,31 @@ @article{song2017hybrid year = {2017} } +@InProceedings{theissler2023rockad, + author="Theissler, Andreas + and Wengert, Manuel + and Gerschner, Felix", + editor="Cr{\'e}milleux, Bruno + and Hess, Sibylle + and Nijssen, Siegfried", + title="ROCKAD: Transferring ROCKET to Whole Time Series Anomaly Detection", + booktitle="Advances in Intelligent Data Analysis XXI", + year="2023", + publisher="Springer Nature Switzerland", + address="Cham", + pages="419--432", + isbn="978-3-031-30047-9" +} - +@article{dempster2020rocket, + title={ROCKET: exceptionally fast and accurate time series classification using random convolutional kernels}, + author={Dempster, Angus and Petitjean, Fran{\c{c}}ois and Webb, Geoffrey I}, + journal={Data Mining and Knowledge Discovery}, + volume={34}, + number={5}, + pages={1454--1495}, + year={2020}, + publisher={Springer}, + doi={https://doi.org/10.1007/s10618-020-00701-z} +} diff --git a/dtaianomaly/anomaly_detection/_KShapeAnomalyDetector.py b/dtaianomaly/anomaly_detection/_KShapeAnomalyDetector.py index eab5626..7ca3c36 100644 --- a/dtaianomaly/anomaly_detection/_KShapeAnomalyDetector.py +++ b/dtaianomaly/anomaly_detection/_KShapeAnomalyDetector.py @@ -1,7 +1,7 @@ import numpy as np import stumpy from scipy.spatial.distance import pdist, squareform -from tslearn.clustering import KShape +from sktime.clustering.k_shapes import TimeSeriesKShapes from dtaianomaly import utils from dtaianomaly.anomaly_detection._BaseDetector import BaseDetector, Supervision @@ -56,13 +56,13 @@ class KShapeAnomalyDetector(BaseDetector): Attributes ---------- window_size_ : int - The effectively used window size for computing the matrix profile + The effectively used window size for detecting anomalies. centroids_ : list of array-like of shape (window_size_*sequence_length_multiplier,) The centroids computed by KShape clustering. weights_ : list of float The normalized weights corresponding to each cluster. - kshape_ : KShape - The fitted KShape-object of tslearn, used to cluster the data. + kshape_ : TimeSeriesKShapes + The fitted KShape-object of sktime, used to cluster the data. Notes ----- @@ -87,7 +87,7 @@ class KShapeAnomalyDetector(BaseDetector): window_size_: int centroids_: list[np.array] weights_: np.array - kshape_: KShape + kshape_: TimeSeriesKShapes attribute_validation = { "window_size": WindowSizeAttribute(), @@ -105,7 +105,7 @@ def __init__( **kwargs, ): # Check if KShape can be initialized - KShape(n_clusters=n_clusters, **kwargs) + TimeSeriesKShapes(n_clusters=n_clusters, **kwargs) super().__init__(Supervision.UNSUPERVISED) self.window_size = window_size @@ -146,7 +146,7 @@ def _fit(self, X: np.ndarray, y: np.ndarray = None, **kwargs) -> None: windows = sliding_window(X, sequence_length, stride) # Apply K-Shape clustering - self.kshape_ = KShape(n_clusters=self.n_clusters, **self.kwargs) + self.kshape_ = TimeSeriesKShapes(n_clusters=self.n_clusters, **self.kwargs) cluster_labels = self.kshape_.fit_predict(windows) # Extract the centroids diff --git a/dtaianomaly/anomaly_detection/_ROCKAD.py b/dtaianomaly/anomaly_detection/_ROCKAD.py new file mode 100644 index 0000000..6799d0a --- /dev/null +++ b/dtaianomaly/anomaly_detection/_ROCKAD.py @@ -0,0 +1,208 @@ +import numpy as np +from sklearn.neighbors import NearestNeighbors +from sklearn.preprocessing import PowerTransformer +from sklearn.utils import resample +from sktime.transformations.panel.rocket import Rocket + +from dtaianomaly.anomaly_detection._BaseDetector import BaseDetector, Supervision +from dtaianomaly.type_validation import ( + BoolAttribute, + IntegerAttribute, + NoneAttribute, + WindowSizeAttribute, +) +from dtaianomaly.utils import get_dimension +from dtaianomaly.windowing import ( + WINDOW_SIZE_TYPE, + compute_window_size, + reverse_sliding_window, +) + +__all__ = ["ROCKAD"] + + +class ROCKAD(BaseDetector): + """ + Detect anomalies in time series subsequences with ROCKAD :cite:`theissler2023rockad`. + + ROCKAD uses the ROCKET transformation :cite:`dempster2020rocket` as an unsupervised + feature extractor from time series subsequences. Then, a bagging-based ensemble of + k-NN models using the ROCKET-features is used to detect anomalous time series + subsequences, in which the anomaly score of each individual instance is computed as + the distance to the k-th nearest neighbor within each bagging subset. As discussed + by :cite:t:`theissler2023rockad`, first applying a power-transform and then standard + scaling the ROCKET features improves separation of the normal and anomalous sequences. + + Parameters + ---------- + window_size : int or str + The window size, the length of the subsequences that will be detected as anomalies. This + value will be passed to :py:meth:`~dtaianomaly.anomaly_detection.compute_window_size`. + stride : int, default=1 + The stride, i.e., the step size for extracting sliding windows from the time series. + n_kernels : int, default=100, + The number of kernels to use in the ROCKET-transformation. + power_transform : bool, default=True + Whether to perform a power-transformation or not. + n_estimators : int, default=10 + The number of k-NN estimators to include in the detection ensemble. + n_neighbors : int, default=5 + The number of neighbors to use for the nearest neighbor queries. + metric : str, default='euclidean' + Distance metric for distance computations. any metric of scikit-learn and + scipy.spatial.distance can be used. + n_jobs : int, default=1 + The number of jobs to use, which is passed to the scikit-learn components. + seed : int, default=None + The random seed used to split the data and initialise the kernels. + + Attributes + ---------- + window_size_ : int + The effectively used window size for detecting anomalies. + rocket_ : Rocket + The ``sktime`` Rocket transformer object. + power_transformer_ : PowerTransformer + The ``sklearn`` power transformer object. The object will only be fitted if + ``power_transform=True``. + nearest_neighbors_ : list of NearestNeighbors + The fitted nearest neighbor instances on a different subset of the instances. + + Examples + -------- + >>> from dtaianomaly.anomaly_detection import ROCKAD + >>> from dtaianomaly.data import demonstration_time_series + >>> x, y = demonstration_time_series() + >>> rockad = ROCKAD(64, seed=0).fit(x) + >>> rockad.decision_function(x) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE + array([5.30759668, 5.25451016, 4.80149563, ..., 3.40483896, 3.72443581, + 3.74599171]) + """ + + window_size: WINDOW_SIZE_TYPE + stride: int + n_kernels: int + power_transform: bool + n_estimators: int + n_neighbors: int + metric: str + n_jobs: int + seed: int | None + + window_size_: int + rocket_: Rocket + power_transformer_: PowerTransformer + nearest_neighbors_: list[NearestNeighbors] + + attribute_validation = { + "window_size": WindowSizeAttribute(), + "stride": IntegerAttribute(1), + "n_kernels": IntegerAttribute(1), + "n_estimators": IntegerAttribute(1), + "n_neighbors": IntegerAttribute(1), + "n_jobs": IntegerAttribute(1), + "power_transform": BoolAttribute(), + "seed": IntegerAttribute() | NoneAttribute(), + } + + def __init__( + self, + window_size: WINDOW_SIZE_TYPE, + stride: int = 1, + n_kernels: int = 100, + power_transform: bool = True, + n_estimators: int = 10, + n_neighbors: int = 5, + metric: str = "euclidean", + n_jobs: int = 1, + seed: int = None, + ): + super().__init__(Supervision.UNSUPERVISED) + self.window_size = window_size + self.stride = stride + self.n_kernels = n_kernels + self.n_estimators = n_estimators + self.n_neighbors = n_neighbors + self.metric = metric + self.n_jobs = n_jobs + self.power_transform = power_transform + self.seed = seed + + def _fit(self, X: np.ndarray, y: np.ndarray = None, **kwargs) -> None: + self.window_size_ = compute_window_size(X, self.window_size, **kwargs) + windows = self._sliding_window(X) + + # Apply ROCKET + self.rocket_ = Rocket( + num_kernels=self.n_kernels, n_jobs=self.n_jobs, random_state=self.seed + ) + features = self.rocket_.fit_transform(windows) + + # Apply power-transformation + self.power_transformer_ = PowerTransformer(standardize=True) + if self.power_transform: + features = self.power_transformer_.fit_transform(features) + + # Train the ensemble of nearest neighbor models. + self.nearest_neighbors_ = [] + for i in range(self.n_estimators): + + # Define a seed for this estimator + seed_ = self.seed + if self.seed is not None: + seed_ += i + + # Initialize the NN object + nearest_neighbors = NearestNeighbors( + n_neighbors=self.n_neighbors, metric=self.metric, n_jobs=self.n_jobs + ) + + # Sample a subset to bootstrap + resample( + features, + replace=True, + n_samples=None, + random_state=seed_, + stratify=None, + ) + + # Fit the nearest neighbor instance on the sample + nearest_neighbors.fit(features) + self.nearest_neighbors_.append(nearest_neighbors) + + def _decision_function(self, X: np.ndarray) -> np.array: + + # Create the sliding windows + windows = self._sliding_window(X) + + # Extract the ROCKET features + features = self.rocket_.transform(windows) + + # Apply power transform + if self.power_transform: + features = self.power_transformer_.transform(features) + + # Compute the k-th nearest neighbor distance to each ensemble item + nearest_neighbors_distances = np.empty( + shape=(windows.shape[0], self.n_estimators) + ) + for i, nearest_neighbors in enumerate(self.nearest_neighbors_): + nearest_neighbors_distances[:, i] = nearest_neighbors.kneighbors(features)[ + 0 + ][:, -1] + + # Aggregate the scores + decision_scores = nearest_neighbors_distances.mean(axis=1) + return reverse_sliding_window( + decision_scores, self.window_size_, self.stride, X.shape[0] + ) + + def _sliding_window(self, X: np.ndarray) -> np.ndarray: + """Custom method to format the windows according to sktime format.""" + X = X.reshape(X.shape[0], get_dimension(X)) + windows = [ + X[t : t + self.window_size_, :].T + for t in range(0, X.shape[0] - self.window_size_, self.stride) + ] + windows.append(X[-self.window_size_ :].T) + return np.array(windows) diff --git a/dtaianomaly/anomaly_detection/__init__.py b/dtaianomaly/anomaly_detection/__init__.py index 2e2ead7..2063ba6 100644 --- a/dtaianomaly/anomaly_detection/__init__.py +++ b/dtaianomaly/anomaly_detection/__init__.py @@ -71,6 +71,7 @@ from ._PrincipalComponentAnalysis import PrincipalComponentAnalysis from ._RobustPrincipalComponentAnalysis import RobustPrincipalComponentAnalysis from ._RobustRandomCutForestAnomalyDetector import RobustRandomCutForestAnomalyDetector +from ._ROCKAD import ROCKAD from ._SpectralResidual import SpectralResidual from ._TimeMoE import TimeMoE from ._TorchTimeSeriesDataSet import ( @@ -134,4 +135,5 @@ "TimeMoE", "RobustRandomCutForestAnomalyDetector", "HybridKNearestNeighbors", + "ROCKAD", ] diff --git a/pyproject.toml b/pyproject.toml index a6d61b0..a29aa38 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,7 +25,7 @@ dependencies = [ "matplotlib>=3.7", "statsmodels>=0.6", "pyod>=2.0.0", - "tslearn>=0.6.3", + "sktime[clustering]", "toml", "torch>=1.8.0", ] diff --git a/tests/anomaly_detection/test_ROCKAD.py b/tests/anomaly_detection/test_ROCKAD.py new file mode 100644 index 0000000..61e80c8 --- /dev/null +++ b/tests/anomaly_detection/test_ROCKAD.py @@ -0,0 +1,22 @@ +import numpy as np + +from dtaianomaly.anomaly_detection import ROCKAD, Supervision + + +class TestROCKAD: + + def test_supervision(self): + assert ROCKAD(128).supervision == Supervision.UNSUPERVISED + + def test_seed(self, univariate_time_series): + detector1 = ROCKAD(128, seed=0) + y_pred1 = detector1.fit(univariate_time_series).decision_function( + univariate_time_series + ) + + detector2 = ROCKAD(128, seed=0) + y_pred2 = detector2.fit(univariate_time_series).decision_function( + univariate_time_series + ) + + assert np.array_equal(y_pred1, y_pred2) diff --git a/tests/utils/test_discovery.py b/tests/utils/test_discovery.py index c14b758..a4abe0c 100644 --- a/tests/utils/test_discovery.py +++ b/tests/utils/test_discovery.py @@ -38,6 +38,7 @@ anomaly_detection.HybridKNearestNeighbors, anomaly_detection.MovingWindowVariance, anomaly_detection.SquaredDifference, + anomaly_detection.ROCKAD, ] data_loaders = [ data.DemonstrationTimeSeriesLoader, From 5cdd7b321b9df50d1a35e102507290dcf0e770c7 Mon Sep 17 00:00:00 2001 From: LouisCarpentier42 Date: Fri, 3 Oct 2025 15:45:33 +0200 Subject: [PATCH 2/3] fix: skip doctest for ROCKAD due to different return for different OS Signed-off-by: LouisCarpentier42 --- dtaianomaly/anomaly_detection/_ROCKAD.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dtaianomaly/anomaly_detection/_ROCKAD.py b/dtaianomaly/anomaly_detection/_ROCKAD.py index 6799d0a..a981d30 100644 --- a/dtaianomaly/anomaly_detection/_ROCKAD.py +++ b/dtaianomaly/anomaly_detection/_ROCKAD.py @@ -74,7 +74,7 @@ class ROCKAD(BaseDetector): >>> from dtaianomaly.data import demonstration_time_series >>> x, y = demonstration_time_series() >>> rockad = ROCKAD(64, seed=0).fit(x) - >>> rockad.decision_function(x) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE + >>> rockad.decision_function(x) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE, +SKIP array([5.30759668, 5.25451016, 4.80149563, ..., 3.40483896, 3.72443581, 3.74599171]) """ From fdb7a4c76a84748969f61869e9c31ed6f95c725b Mon Sep 17 00:00:00 2001 From: LouisCarpentier42 Date: Fri, 3 Oct 2025 16:02:00 +0200 Subject: [PATCH 3/3] fix: changed doctest in KShapeAD Signed-off-by: LouisCarpentier42 --- dtaianomaly/anomaly_detection/_KShapeAnomalyDetector.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/dtaianomaly/anomaly_detection/_KShapeAnomalyDetector.py b/dtaianomaly/anomaly_detection/_KShapeAnomalyDetector.py index 7ca3c36..68971b6 100644 --- a/dtaianomaly/anomaly_detection/_KShapeAnomalyDetector.py +++ b/dtaianomaly/anomaly_detection/_KShapeAnomalyDetector.py @@ -75,7 +75,8 @@ class KShapeAnomalyDetector(BaseDetector): >>> x, y = demonstration_time_series() >>> kshape = KShapeAnomalyDetector(window_size=50).fit(x) >>> kshape.decision_function(x) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE - array([1.01942655, 1.03008335, 1.03906465, ..., 1.29643677, 1.3256903 , 1.34704128]...) + array([7.07106781, 7.07106781, 7.07106781, ..., 7.07106781, 7.07106781, + 7.07106781]) """ window_size: WINDOW_SIZE_TYPE @@ -202,3 +203,9 @@ def _ncc_c(x: np.array, y: np.array) -> np.array: cc = np.fft.ifft(np.fft.fft(x, fft_size) * np.conj(np.fft.fft(y, fft_size))) cc = np.concatenate((cc[-(x.shape[0] - 1) :], cc[: x.shape[0]])) return np.real(cc) / den + + +if __name__ == "__main__": + import doctest + + doctest.testmod()