Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions docs/additional_information/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ Added
- Implement ``HybridKNearestNeighbors`` anomaly detector.
- Implement ``SquaredDifference`` baseline anomaly detector.
- Implement ``MovingWindowVariance`` baseline anomaly detector.
- Implement ``ROCKAD`` anomaly detector.

Changed
^^^^^^^
Expand All @@ -38,6 +39,9 @@ Changed
- In the neural methods, simplified the options for passing losses and activation functions to
only include predefined types.
- The documentation is restructured (although it still follows the same style).
- Replaced dependency on ``tslearn>=0.6.3`` by ``sktime[clustering]``, which includes the ``tslearn``
dependency. Before, ``tslearn`` was only used for ``KShapeAnomalyDetector`` to do the clustering. This
capability is also offered by ``sktime`` through a direct interface to ``tslearn``.

Fixed
^^^^^
Expand Down
1 change: 1 addition & 0 deletions docs/api/anomaly_detection.rst
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ Time series statistical methods
MatrixProfileDetector
MedianMethod
RobustRandomCutForestAnomalyDetector
ROCKAD
SpectralResidual

Neural methods
Expand Down
27 changes: 26 additions & 1 deletion docs/bibliography.bib
Original file line number Diff line number Diff line change
Expand Up @@ -536,6 +536,31 @@ @article{song2017hybrid
year = {2017}
}

@InProceedings{theissler2023rockad,
author="Theissler, Andreas
and Wengert, Manuel
and Gerschner, Felix",
editor="Cr{\'e}milleux, Bruno
and Hess, Sibylle
and Nijssen, Siegfried",
title="ROCKAD: Transferring ROCKET to Whole Time Series Anomaly Detection",
booktitle="Advances in Intelligent Data Analysis XXI",
year="2023",
publisher="Springer Nature Switzerland",
address="Cham",
pages="419--432",
isbn="978-3-031-30047-9"
}


@article{dempster2020rocket,
title={ROCKET: exceptionally fast and accurate time series classification using random convolutional kernels},
author={Dempster, Angus and Petitjean, Fran{\c{c}}ois and Webb, Geoffrey I},
journal={Data Mining and Knowledge Discovery},
volume={34},
number={5},
pages={1454--1495},
year={2020},
publisher={Springer},
doi={https://doi.org/10.1007/s10618-020-00701-z}
}

23 changes: 15 additions & 8 deletions dtaianomaly/anomaly_detection/_KShapeAnomalyDetector.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import numpy as np
import stumpy
from scipy.spatial.distance import pdist, squareform
from tslearn.clustering import KShape
from sktime.clustering.k_shapes import TimeSeriesKShapes

from dtaianomaly import utils
from dtaianomaly.anomaly_detection._BaseDetector import BaseDetector, Supervision
Expand Down Expand Up @@ -56,13 +56,13 @@ class KShapeAnomalyDetector(BaseDetector):
Attributes
----------
window_size_ : int
The effectively used window size for computing the matrix profile
The effectively used window size for detecting anomalies.
centroids_ : list of array-like of shape (window_size_*sequence_length_multiplier,)
The centroids computed by KShape clustering.
weights_ : list of float
The normalized weights corresponding to each cluster.
kshape_ : KShape
The fitted KShape-object of tslearn, used to cluster the data.
kshape_ : TimeSeriesKShapes
The fitted KShape-object of sktime, used to cluster the data.

Notes
-----
Expand All @@ -75,7 +75,8 @@ class KShapeAnomalyDetector(BaseDetector):
>>> x, y = demonstration_time_series()
>>> kshape = KShapeAnomalyDetector(window_size=50).fit(x)
>>> kshape.decision_function(x) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE
array([1.01942655, 1.03008335, 1.03906465, ..., 1.29643677, 1.3256903 , 1.34704128]...)
array([7.07106781, 7.07106781, 7.07106781, ..., 7.07106781, 7.07106781,
7.07106781])
"""

window_size: WINDOW_SIZE_TYPE
Expand All @@ -87,7 +88,7 @@ class KShapeAnomalyDetector(BaseDetector):
window_size_: int
centroids_: list[np.array]
weights_: np.array
kshape_: KShape
kshape_: TimeSeriesKShapes

attribute_validation = {
"window_size": WindowSizeAttribute(),
Expand All @@ -105,7 +106,7 @@ def __init__(
**kwargs,
):
# Check if KShape can be initialized
KShape(n_clusters=n_clusters, **kwargs)
TimeSeriesKShapes(n_clusters=n_clusters, **kwargs)

super().__init__(Supervision.UNSUPERVISED)
self.window_size = window_size
Expand Down Expand Up @@ -146,7 +147,7 @@ def _fit(self, X: np.ndarray, y: np.ndarray = None, **kwargs) -> None:
windows = sliding_window(X, sequence_length, stride)

# Apply K-Shape clustering
self.kshape_ = KShape(n_clusters=self.n_clusters, **self.kwargs)
self.kshape_ = TimeSeriesKShapes(n_clusters=self.n_clusters, **self.kwargs)
cluster_labels = self.kshape_.fit_predict(windows)

# Extract the centroids
Expand Down Expand Up @@ -202,3 +203,9 @@ def _ncc_c(x: np.array, y: np.array) -> np.array:
cc = np.fft.ifft(np.fft.fft(x, fft_size) * np.conj(np.fft.fft(y, fft_size)))
cc = np.concatenate((cc[-(x.shape[0] - 1) :], cc[: x.shape[0]]))
return np.real(cc) / den


if __name__ == "__main__":
import doctest

doctest.testmod()
208 changes: 208 additions & 0 deletions dtaianomaly/anomaly_detection/_ROCKAD.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,208 @@
import numpy as np
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import PowerTransformer
from sklearn.utils import resample
from sktime.transformations.panel.rocket import Rocket

from dtaianomaly.anomaly_detection._BaseDetector import BaseDetector, Supervision
from dtaianomaly.type_validation import (
BoolAttribute,
IntegerAttribute,
NoneAttribute,
WindowSizeAttribute,
)
from dtaianomaly.utils import get_dimension
from dtaianomaly.windowing import (
WINDOW_SIZE_TYPE,
compute_window_size,
reverse_sliding_window,
)

__all__ = ["ROCKAD"]


class ROCKAD(BaseDetector):
"""
Detect anomalies in time series subsequences with ROCKAD :cite:`theissler2023rockad`.

ROCKAD uses the ROCKET transformation :cite:`dempster2020rocket` as an unsupervised
feature extractor from time series subsequences. Then, a bagging-based ensemble of
k-NN models using the ROCKET-features is used to detect anomalous time series
subsequences, in which the anomaly score of each individual instance is computed as
the distance to the k-th nearest neighbor within each bagging subset. As discussed
by :cite:t:`theissler2023rockad`, first applying a power-transform and then standard
scaling the ROCKET features improves separation of the normal and anomalous sequences.

Parameters
----------
window_size : int or str
The window size, the length of the subsequences that will be detected as anomalies. This
value will be passed to :py:meth:`~dtaianomaly.anomaly_detection.compute_window_size`.
stride : int, default=1
The stride, i.e., the step size for extracting sliding windows from the time series.
n_kernels : int, default=100,
The number of kernels to use in the ROCKET-transformation.
power_transform : bool, default=True
Whether to perform a power-transformation or not.
n_estimators : int, default=10
The number of k-NN estimators to include in the detection ensemble.
n_neighbors : int, default=5
The number of neighbors to use for the nearest neighbor queries.
metric : str, default='euclidean'
Distance metric for distance computations. any metric of scikit-learn and
scipy.spatial.distance can be used.
n_jobs : int, default=1
The number of jobs to use, which is passed to the scikit-learn components.
seed : int, default=None
The random seed used to split the data and initialise the kernels.

Attributes
----------
window_size_ : int
The effectively used window size for detecting anomalies.
rocket_ : Rocket
The ``sktime`` Rocket transformer object.
power_transformer_ : PowerTransformer
The ``sklearn`` power transformer object. The object will only be fitted if
``power_transform=True``.
nearest_neighbors_ : list of NearestNeighbors
The fitted nearest neighbor instances on a different subset of the instances.

Examples
--------
>>> from dtaianomaly.anomaly_detection import ROCKAD
>>> from dtaianomaly.data import demonstration_time_series
>>> x, y = demonstration_time_series()
>>> rockad = ROCKAD(64, seed=0).fit(x)
>>> rockad.decision_function(x) # doctest: +ELLIPSIS, +NORMALIZE_WHITESPACE, +SKIP
array([5.30759668, 5.25451016, 4.80149563, ..., 3.40483896, 3.72443581,
3.74599171])
"""

window_size: WINDOW_SIZE_TYPE
stride: int
n_kernels: int
power_transform: bool
n_estimators: int
n_neighbors: int
metric: str
n_jobs: int
seed: int | None

window_size_: int
rocket_: Rocket
power_transformer_: PowerTransformer
nearest_neighbors_: list[NearestNeighbors]

attribute_validation = {
"window_size": WindowSizeAttribute(),
"stride": IntegerAttribute(1),
"n_kernels": IntegerAttribute(1),
"n_estimators": IntegerAttribute(1),
"n_neighbors": IntegerAttribute(1),
"n_jobs": IntegerAttribute(1),
"power_transform": BoolAttribute(),
"seed": IntegerAttribute() | NoneAttribute(),
}

def __init__(
self,
window_size: WINDOW_SIZE_TYPE,
stride: int = 1,
n_kernels: int = 100,
power_transform: bool = True,
n_estimators: int = 10,
n_neighbors: int = 5,
metric: str = "euclidean",
n_jobs: int = 1,
seed: int = None,
):
super().__init__(Supervision.UNSUPERVISED)
self.window_size = window_size
self.stride = stride
self.n_kernels = n_kernels
self.n_estimators = n_estimators
self.n_neighbors = n_neighbors
self.metric = metric
self.n_jobs = n_jobs
self.power_transform = power_transform
self.seed = seed

def _fit(self, X: np.ndarray, y: np.ndarray = None, **kwargs) -> None:
self.window_size_ = compute_window_size(X, self.window_size, **kwargs)
windows = self._sliding_window(X)

# Apply ROCKET
self.rocket_ = Rocket(
num_kernels=self.n_kernels, n_jobs=self.n_jobs, random_state=self.seed
)
features = self.rocket_.fit_transform(windows)

# Apply power-transformation
self.power_transformer_ = PowerTransformer(standardize=True)
if self.power_transform:
features = self.power_transformer_.fit_transform(features)

# Train the ensemble of nearest neighbor models.
self.nearest_neighbors_ = []
for i in range(self.n_estimators):

# Define a seed for this estimator
seed_ = self.seed
if self.seed is not None:
seed_ += i

# Initialize the NN object
nearest_neighbors = NearestNeighbors(
n_neighbors=self.n_neighbors, metric=self.metric, n_jobs=self.n_jobs
)

# Sample a subset to bootstrap
resample(
features,
replace=True,
n_samples=None,
random_state=seed_,
stratify=None,
)

# Fit the nearest neighbor instance on the sample
nearest_neighbors.fit(features)
self.nearest_neighbors_.append(nearest_neighbors)

def _decision_function(self, X: np.ndarray) -> np.array:

# Create the sliding windows
windows = self._sliding_window(X)

# Extract the ROCKET features
features = self.rocket_.transform(windows)

# Apply power transform
if self.power_transform:
features = self.power_transformer_.transform(features)

# Compute the k-th nearest neighbor distance to each ensemble item
nearest_neighbors_distances = np.empty(
shape=(windows.shape[0], self.n_estimators)
)
for i, nearest_neighbors in enumerate(self.nearest_neighbors_):
nearest_neighbors_distances[:, i] = nearest_neighbors.kneighbors(features)[
0
][:, -1]

# Aggregate the scores
decision_scores = nearest_neighbors_distances.mean(axis=1)
return reverse_sliding_window(
decision_scores, self.window_size_, self.stride, X.shape[0]
)

def _sliding_window(self, X: np.ndarray) -> np.ndarray:
"""Custom method to format the windows according to sktime format."""
X = X.reshape(X.shape[0], get_dimension(X))
windows = [
X[t : t + self.window_size_, :].T
for t in range(0, X.shape[0] - self.window_size_, self.stride)
]
windows.append(X[-self.window_size_ :].T)
return np.array(windows)
2 changes: 2 additions & 0 deletions dtaianomaly/anomaly_detection/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@
from ._PrincipalComponentAnalysis import PrincipalComponentAnalysis
from ._RobustPrincipalComponentAnalysis import RobustPrincipalComponentAnalysis
from ._RobustRandomCutForestAnomalyDetector import RobustRandomCutForestAnomalyDetector
from ._ROCKAD import ROCKAD
from ._SpectralResidual import SpectralResidual
from ._TimeMoE import TimeMoE
from ._TorchTimeSeriesDataSet import (
Expand Down Expand Up @@ -134,4 +135,5 @@
"TimeMoE",
"RobustRandomCutForestAnomalyDetector",
"HybridKNearestNeighbors",
"ROCKAD",
]
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ dependencies = [
"matplotlib>=3.7",
"statsmodels>=0.6",
"pyod>=2.0.0",
"tslearn>=0.6.3",
"sktime[clustering]",
"toml",
"torch>=1.8.0",
]
Expand Down
22 changes: 22 additions & 0 deletions tests/anomaly_detection/test_ROCKAD.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import numpy as np

from dtaianomaly.anomaly_detection import ROCKAD, Supervision


class TestROCKAD:

def test_supervision(self):
assert ROCKAD(128).supervision == Supervision.UNSUPERVISED

def test_seed(self, univariate_time_series):
detector1 = ROCKAD(128, seed=0)
y_pred1 = detector1.fit(univariate_time_series).decision_function(
univariate_time_series
)

detector2 = ROCKAD(128, seed=0)
y_pred2 = detector2.fit(univariate_time_series).decision_function(
univariate_time_series
)

assert np.array_equal(y_pred1, y_pred2)
1 change: 1 addition & 0 deletions tests/utils/test_discovery.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
anomaly_detection.HybridKNearestNeighbors,
anomaly_detection.MovingWindowVariance,
anomaly_detection.SquaredDifference,
anomaly_detection.ROCKAD,
]
data_loaders = [
data.DemonstrationTimeSeriesLoader,
Expand Down