diff --git a/docs/additional_information/changelog.rst b/docs/additional_information/changelog.rst index d993162..cbf4b96 100644 --- a/docs/additional_information/changelog.rst +++ b/docs/additional_information/changelog.rst @@ -11,6 +11,7 @@ Added Changed ^^^^^^^ +- Moved ``InTimeAD`` to separate project at https://github.com/ML-KULeuven/InTimeAD. Fixed ^^^^^ diff --git a/docs/getting_started/installation.rst b/docs/getting_started/installation.rst index a9de574..c72d445 100644 --- a/docs/getting_started/installation.rst +++ b/docs/getting_started/installation.rst @@ -35,7 +35,6 @@ Currently, following subsets are available: installing ``dtaianomaly[all]``! - ``time_moe``: Install transformers==4.40.1, necessary for running Time-MoE. **Warning:** Not included when installing ``dtaianomaly[all]``! -- ``in_time_ad``: Dependencies for running the demonstrator. To install version ``X.Y.Z``, use the following command: diff --git a/dtaianomaly/in_time_ad/_CustomDetectorVisualizer.py b/dtaianomaly/in_time_ad/_CustomDetectorVisualizer.py deleted file mode 100644 index aea5a65..0000000 --- a/dtaianomaly/in_time_ad/_CustomDetectorVisualizer.py +++ /dev/null @@ -1,62 +0,0 @@ -import abc - -from dtaianomaly.anomaly_detection import BaseDetector - -__all__ = ["CustomDetectorVisualizer"] - - -class CustomDetectorVisualizer(abc.ABC): - """ - Base class for custom detector visualizations. - - A base class for showing custom visualizations for anomaly detectors - within InTimeAD. - - Parameters - ---------- - name : str - The name to use for this visualizer. - icon : str, default=None - The icon to show along the visualization. If None, then no icon will - be shown. - """ - - name: str - icon: str | None - - def __init__(self, name: str, icon: str | None): - self.name = name - self.icon = icon - - @abc.abstractmethod - def is_compatible(self, detector: type[BaseDetector]) -> bool: - """ - Check compatibility of the given detector. - - Check whether the given detector is compatible with this visualizer. - - Parameters - ---------- - detector : BaseDetector-object - The type of the anomaly detector to check if it is compatible. - - Returns - ------- - bool - True if and only if this visualizer is compatible with the given - detector, and thus the visualization could be made for the detector. - """ - - @abc.abstractmethod - def show_custom_visualization(self, detector: BaseDetector) -> None: - """ - Show the custom visualization for the given anomaly detector. - - Show the additional information of the given anomaly detector that - is useful for understanding the model. - - Parameters - ---------- - detector : BaseDetector - The anomaly detector for which the visualization should be made. - """ diff --git a/dtaianomaly/in_time_ad/__init__.py b/dtaianomaly/in_time_ad/__init__.py deleted file mode 100644 index 6ab17d6..0000000 --- a/dtaianomaly/in_time_ad/__init__.py +++ /dev/null @@ -1,10 +0,0 @@ -from ._configuration import load_configuration, load_default_configuration -from ._CustomDetectorVisualizer import CustomDetectorVisualizer -from ._run import run - -__all__ = [ - "run", - "load_configuration", - "load_default_configuration", - "CustomDetectorVisualizer", -] diff --git a/dtaianomaly/in_time_ad/_app.py b/dtaianomaly/in_time_ad/_app.py deleted file mode 100644 index f4fd516..0000000 --- a/dtaianomaly/in_time_ad/_app.py +++ /dev/null @@ -1,341 +0,0 @@ -import os.path -import sys -import warnings - -import streamlit as st - -from dtaianomaly.in_time_ad._configuration import load_configuration -from dtaianomaly.in_time_ad._st_AnomalyDetector import StAnomalyDetectorLoader -from dtaianomaly.in_time_ad._st_DataLoader import StDataLoader -from dtaianomaly.in_time_ad._st_QualitativeEvaluator import StQualitativeEvaluator -from dtaianomaly.in_time_ad._st_QuantitativeEvaluator import ( - StEvaluationScores, - StQualitativeEvaluationLoader, -) -from dtaianomaly.in_time_ad._utils import ( - error_no_detectors, - error_no_metrics, - load_custom_models, - show_header, - show_section_description, - write_code_lines, -) -from dtaianomaly.utils import all_classes - -################################################################### -# LAYOUT -################################################################### - -st.set_page_config( - page_title="InTimeAD", - page_icon="https://raw.githubusercontent.com/ML-KULeuven/dtaianomaly/main/docs/logo/favicon.svg", - layout="wide", -) -st.logo( - "https://raw.githubusercontent.com/ML-KULeuven/dtaianomaly/main/docs/logo/readme.svg", - link="https://github.com/ML-KULeuven/dtaianomaly", - icon_image="https://raw.githubusercontent.com/ML-KULeuven/dtaianomaly/main/docs/logo/favicon.svg", -) - - -################################################################### -# SESSION STATE CONFIGURATION -################################################################### - -if "configuration" not in st.session_state: - config_path = sys.argv[1] - if config_path == "default": - config = load_configuration() - elif not os.path.isfile(config_path): - warnings.warn( - f"The given configuration file does not exist: '{config_path}'. Using the default configuration." - ) - else: - config = load_configuration(config_path) - st.session_state.configuration = load_configuration() -if "custom_models" not in st.session_state: - st.session_state.custom_models = load_custom_models(sys.argv[2]) -if "st_data_loader" not in st.session_state: - st.session_state.st_data_loader = StDataLoader( - all_data_loaders=all_classes(type_filter="data-loader", return_names=True) - + st.session_state.custom_models["data_loaders"], - configuration=st.session_state.configuration["data-loader"], - ) -if "st_anomaly_detector_loader" not in st.session_state: - st.session_state.st_anomaly_detector_loader = StAnomalyDetectorLoader( - all_anomaly_detectors=all_classes( - type_filter="anomaly-detector", return_names=True - ) - + st.session_state.custom_models["anomaly_detectors"], - all_custom_detector_visualizers=all_classes( - type_filter="custom-demonstrator-visualizers", return_names=True - ) - + st.session_state.custom_models["custom_visualizers"], - configuration=st.session_state.configuration["detector"], - ) -if "loaded_detectors" not in st.session_state: - st.session_state.loaded_detectors = ( - st.session_state.st_anomaly_detector_loader.select_default_anomaly_detector() - ) - for st_detector in st.session_state.loaded_detectors: - st_detector.fit_predict(st.session_state.st_data_loader.data_set) -if "st_metric_loader" not in st.session_state: - st.session_state.st_metric_loader = StQualitativeEvaluationLoader( - all_metrics=all_classes(type_filter="metric", return_names=True) - + st.session_state.custom_models["metrics"], - configuration=st.session_state.configuration["metric"], - ) -if "loaded_metrics" not in st.session_state: - st.session_state.loaded_metrics = ( - st.session_state.st_metric_loader.select_default_metrics() - ) -if "st_evaluation_scores" not in st.session_state: - st.session_state.st_evaluation_scores = StEvaluationScores( - detectors=st.session_state.loaded_detectors, - metrics=st.session_state.loaded_metrics, - y_test=st.session_state.st_data_loader.data_set.y_test, - ) - - -################################################################### -# INTRODUCTION -################################################################### - -st.title("Welcome to ``InTimeAD``!") -st.subheader("Interactive Time Series Anomaly Detection") -show_section_description( - """ - InTimeAD is tool for *In*teractive *Time* Series *A*nomaly *D*etection, which offers a - simple webinterface to apply state-of-the-art time series anomaly detection. InTimeAD - builds on [``dtaianomaly``](https://github.com/ML-KULeuven/dtaianomaly), an easy-to-use - Python package for time series anomaly detection, to let you detect anomalies without - writing any code. This makes it possible to quickly explore and compare different - models, including on your own data. Once you’ve identified suitable models, you can - switch to Python for more in-depth validation. To help with this transition, code - snippets are provided throughout this InTimeAD (marked with a "💻") and can be copy-pasted - directly in your code-base. - """ -) -with st.expander("What is anomaly detection?", expanded=False, icon="💡"): - show_section_description( - """ - A **time series** is an ordered sequence of observations measured over time. - For example, the CPU usage of a server recorded every minute. A time series - is **univariate** if it tracks only a single variable (e.g., just CPU usage), or - **multivariate** if it tracks 2 or more variables simultaneously (e.g., CPU usage, - memory load, network traffic, ...). - - An **anomaly** in a time series is an observation or a sequence of observations that - deviate from the normal behavior, from the expected. For example, high CPU usage in - off-peak hours may indicate a security breach. Anomalies can signal system malfunctioning - or other critical issues that need to be resolved. - - **Time series anomaly detection** is the task of automatically identifying these unexpected - patterns. The automated detection of anomalies helps to maintain system health, to reduce - downtime, and to improve reliability. - - Typically, an anomaly detection model will compute continuous **anomaly scores**: a - numeric value for each observation in the time series which indicates how anomalous that - observation is. For anomalous measurements, the anomaly score will be large, while the - score will be small for normal observations. - """ - ) - - -################################################################### -# DATA LOADING -################################################################### - -show_header("Time series data") -show_section_description( - """ - To get started, load a time series into InTimeAD. You can either use one of - the built-in data loaders or upload your own data. The time series will be shown - immediately to help you understand its structure. Once you're familiar with the data, - you can begin detecting anomalies. - """ -) -data_updated = st.session_state.st_data_loader.select_data_loader() -st.session_state.st_data_loader.show_data() -write_code_lines(st.session_state.st_data_loader.get_code_lines()) - -# Retrain the anomaly detectors if the data was updated -if data_updated: - for detector in st.session_state.loaded_detectors: - detector.fit_predict(st.session_state.st_data_loader.data_set) - for metric in st.session_state.loaded_metrics: - st.session_state.st_evaluation_scores.add( - detector, metric, st.session_state.st_data_loader.data_set.y_test - ) - -################################################################### -# ANOMALY DETECTION -################################################################### - -show_header("Anomaly detection") -show_section_description( - """ - Over the years, many anomaly detection models have been developed. Each of these - models detects anomalies in a different manner, based on different assumptions - of what constitutes to an anomaly. Below you can select and configure one or - more anomaly detectors to apply on the time series. All the hyperparameters are - filled in by default, but you can tune these in order to better detect the - anomalies or to analyze their effect on the performance. - """ -) - -new_detector = st.session_state.st_anomaly_detector_loader.select_anomaly_detector() - -if new_detector is not None: - st.session_state.loaded_detectors.append(new_detector) - new_detector.fit_predict(st.session_state.st_data_loader.data_set) - for metric in st.session_state.loaded_metrics: - st.session_state.st_evaluation_scores.add( - new_detector, metric, st.session_state.st_data_loader.data_set.y_test - ) - -if len(st.session_state.loaded_detectors) == 0: - error_no_detectors() - -for i, detector in enumerate(st.session_state.loaded_detectors): - updated_detector, remove_detector, old_detector, error_container = ( - detector.show_anomaly_detector() - ) - - if remove_detector: - st.session_state.st_evaluation_scores.remove_detector(detector) - del st.session_state.loaded_detectors[i] - st.rerun() # To make sure that the detector is effectively removed - - if updated_detector: - st.session_state.st_evaluation_scores.remove_detector(old_detector) - detector.fit_predict(st.session_state.st_data_loader.data_set) - for metric in st.session_state.loaded_metrics: - st.session_state.st_evaluation_scores.add( - detector, metric, st.session_state.st_data_loader.data_set.y_test - ) - - if detector.decision_function_ is None: - error_message = "Something went wrong while detecting anomalies! No predictions are available." - if hasattr(detector, "exception_"): - error_message += f"\n\nError message: {detector.exception_}" - - # Handle errors - error_container.error(error_message, icon="🚨") - else: - detector.show_custom_visualizations() - write_code_lines(detector.get_code_lines(st.session_state.st_data_loader.data_set)) - - -################################################################### -# VISUAL ANALYSIS -################################################################### - -show_header("Visual analysis of the anomaly scores") -show_section_description( - """ - The advantage of time series is that they are inherently visual. Because of this, - we can easily verify models by simply plotting the data and the predicted - anomalies. Below, you can analyze the predicted anomaly scores (_"How anomalous is an - observation?"_) as well as the detected anomalies (_"Is the observation an anomaly or not?_). - """ -) -tab_anomaly_scores, tab_predicted_anomalies = st.tabs( - ["Anomaly scores", "Detected anomalies"] -) -with tab_anomaly_scores: - write_code_lines( - StQualitativeEvaluator.plot_anomaly_scores( - data_set=st.session_state.st_data_loader.data_set, - st_anomaly_detectors=st.session_state.loaded_detectors, - ) - ) -with tab_predicted_anomalies: - write_code_lines( - StQualitativeEvaluator.plot_detected_anomalies( - data_set=st.session_state.st_data_loader.data_set, - st_anomaly_detectors=st.session_state.loaded_detectors, - ) - ) - -################################################################### -# NUMERICAL ANALYSIS -################################################################### - -show_header("Numerical analysis of the anomaly detectors") -show_section_description( - """ - While visual inspection gives a good idea of how well a model performs, it's often - useful to summarize performance with a single score, a task made easy by ``dtaianomaly``. - Below, you can choose various evaluation metrics, configure them to fit your application, - and quantitatively assess each anomaly detector. At the bottom, you'll find both the raw - scores of each metric and model, but also a bar plot for a quick comparison of model performance. - """ -) -new_metric = st.session_state.st_metric_loader.select_metric() - -# Add a new metric -if new_metric is not None: - st.session_state.loaded_metrics.append(new_metric) - for detector in st.session_state.loaded_detectors: - st.session_state.st_evaluation_scores.add( - detector, new_metric, st.session_state.st_data_loader.data_set.y_test - ) - -# Cope with issues -if len(st.session_state.loaded_detectors) == 0: - error_no_detectors() -if len(st.session_state.loaded_metrics) == 0: - error_no_metrics() - -# Show all the metrics -for i, metric in enumerate(st.session_state.loaded_metrics): - update_metric, remove_metric, old_name_metric = metric.show_metric() - write_code_lines(metric.get_code_lines(st.session_state.st_data_loader.data_set)) - - if remove_metric: - st.session_state.st_evaluation_scores.remove_metric(metric) - del st.session_state.loaded_metrics[i] - st.rerun() # To make sure that the metric is effectively removed - - if update_metric: - st.session_state.st_evaluation_scores.remove_metric(old_name_metric) - for detector in st.session_state.loaded_detectors: - st.session_state.st_evaluation_scores.add( - detector, metric, st.session_state.st_data_loader.data_set.y_test - ) - -# Show the scores -st.session_state.st_evaluation_scores.show_scores() - -################################################################### -# ACKNOWLEDGEMENTS -################################################################### - -show_header("Acknowledgements") -st.write( - "If you find ``dtaianomaly`` or ``InTimeAD`` useful for your work, we would appreciate the following [citation](https://arxiv.org/abs/2502.14381):" -) -st.code( - """ - @article{carpentier2025dtaianomaly, - title={{dtaianomaly: A Python library for time series anomaly detection}}, - author={Louis Carpentier and Nick Seeuws and Wannes Meert and Mathias Verbeke}, - year={2025}, - eprint={2502.14381}, - archivePrefix={arXiv}, - primaryClass={cs.LG}, - url={https://arxiv.org/abs/2502.14381}, - } - """, - language="bibtex", -) -st.markdown( - "> Carpentier, L., Seeuws, N., Meert, W., Verbeke, M.: dtaianomaly: A Python library for time series anomaly detection (2025), https://arxiv.org/abs/2502.14381" -) - -cols = st.columns(5, vertical_alignment="bottom") -cols[1].image("https://upload.wikimedia.org/wikipedia/commons/4/49/KU_Leuven_logo.svg") -cols[3].image( - "https://raw.githubusercontent.com/FHannes/dtai-logo/master/DTAI_Logo.svg" -) diff --git a/dtaianomaly/in_time_ad/_configuration.py b/dtaianomaly/in_time_ad/_configuration.py deleted file mode 100644 index 3def201..0000000 --- a/dtaianomaly/in_time_ad/_configuration.py +++ /dev/null @@ -1,551 +0,0 @@ -import json - -__all__ = ["load_configuration", "load_default_configuration"] - - -def load_configuration(path: str = None) -> dict: - """ - Load a configuration. - - Load the configuration file at the given path, if it is given. - If no path is given, the default configuration will be loaded. - - Parameters - ---------- - path : str, default=None - The path where the configuration to load is located. - - Returns - ------- - dict - The loaded configuration. - """ - if path is None: - return load_default_configuration() - else: - with open(path, "r") as f: - return json.load(f) - - -def load_default_configuration() -> dict: - """ - Load the default configuration. - - Load the default configuration file for running InTimeAD. - - Returns - ------- - dict - The default configuration. - """ - return { - "data-loader": {"default": "DemonstrationTimeSeriesLoader", "exclude": []}, - "detector": { - "default": "IsolationForest", - "exclude": [ - "MultivariateDetector", - "AlwaysNormal", - "AlwaysAnomalous", - "RandomDetector", - ], - "parameters-required": {"window_size": 64, "neighborhood_size_before": 64}, - "parameters-optional": { - "window_size_selection": { - "label": "Window size", - "options": [ - ["Manual", "Manual"], - ["Dominant Fourier Frequency", "fft"], - ["Highest Autocorrelation", "acf"], - ["Summary Statistics Subsequence", "suss"], - ["Multi-Window-Finder", "mwf"], - ], - "help": "The used method for setting the window size. Options are:\n\n" - "- **Manual:** Manually set the window size to a specific size.\n" - "- **Dominant Fourier Frequency:** Use the window size which corresponds to the dominant frequency in the Fourier domain.\n" - "- **Highest Autocorrelation:** Use the window size which corresponds to maximum autocorrelation.\n" - "- **Summary Statistics Subsequence:** Find a window size such that the statics of that window are similar to those of the full time series. \n" - "- **Multi-Window-Finder:** Find a window size such that the moving average is small. ", - }, - "window_size": { - "label": "Manual window size", - "type": "number_input", - "min_value": 1, - "step": 1, - "value": 64, - "help": "The manually-set size of the sliding window.", - }, - "stride": { - "label": "Stride", - "type": "number_input", - "min_value": 1, - "step": 1, - "value": 1, - "help": "The stride of a sliding window is the number of steps the window moves forward each time.", - }, - "start_level": { - "type": "number_input", - "label": "Start level", - "min_value": 0, - "value": 3, - "step": 1, - "help": "The first level for computing the Discrete Wavelet Transform.", - }, - "quantile_epsilon": { - "type": "slider", - "label": "Quantile", - "min_value": 0.0, - "max_value": 1.0, - "step": 0.01, - "value": 0.01, - "help": "The percentile used as threshold on the likelihood estimates.", - }, - "padding_mode": { - "type": "selectbox", - "label": "Padding", - "options": ["wrap", "symmetric"], - "index": 0, - "help": "How the time series is padded:" - "\n-**wrap:** Use the first values to pad at the end and the last values to pad at the beginning." - "\n- **symmetric:** Pads with the reflection of the time series.", - }, - "sequence_length_multiplier": { - "type": "number_input", - "label": "Sequence length multiplier", - "min_value": 1.0, - "value": 4.0, - "step": 0.1, - "help": "The amount by which the window size should be multiplied to create sliding windows for clustering the data using KShape.", - }, - "overlap_rate": { - "type": "slider", - "label": "Overlap rate", - "min_value": 0.01, - "max_value": 1.0, - "step": 0.01, - "value": 0.5, - "help": "The overlap of the sliding windows for clustering the data. Will be used to compute a relative stride to avoid trivial matches when clustering subsequences.", - }, - "normalize": { - "type": "toggle", - "label": "Z-scale", - "value": True, - "help": "Whether to Z-scale the time series.", - }, - "p": { - "type": "number_input", - "label": "Norm", - "min_value": 1.0, - "value": 2.0, - "step": 0.1, - "help": "The used norm for computing the distances with the matrix profile.", - }, - "k": { - "type": "number_input", - "label": "K", - "min_value": 1, - "value": 1, - "step": 1, - "help": "Use the distance to the K-th nearest neighbor as an anomaly score.", - }, - "novelty": { - "type": "toggle", - "label": "Novelty detection", - "value": False, - "help": "If novelty detection should be performed, i.e., detect anomalies with regards to a normal time series.", - }, - "neighborhood_size_before": { - "type": "number_input", - "label": "Neighborhood size before the sample", - "min_value": 1, - "value": 64, - "step": 1, - "help": "The number of observations before the sample to include in the neighborhood.", - }, - "neighborhood_size_after": { - "type": "number_input", - "label": "Neighborhood size after the sample", - "min_value": 0, - "value": None, - "step": 1, - "help": "The number of observations after the sample to include in the neighborhood. If no value " - "is given, the neighborhood size after the window will be set to the same value as the " - "neighborhood size before the window.", - }, - "seed": { - "type": "number_input", - "label": "Seed", - "min_value": 1, - "value": None, - "step": 1, - "help": "The random seed to set.", - }, - "max_iter": { - "type": "number_input", - "label": "Maximum number of iterations", - "min_value": 1, - "value": 1000, - "step": 1, - "help": "The maximum number of iterations to perform during optimization.", - }, - "error_metric": { - "type": "selectbox", - "label": "Anomaly score metric", - "options": ["mean-absolute-error", "mean-squared-error"], - "index": 0, - "help": "The error measure to use as anomaly scores between the predicted values and the actually observed values in the time series.", - }, - "latent_space_dimension": { - "type": "number_input", - "label": "Dimension of the latent space", - "min_value": 1, - "value": 32, - "step": 1, - "help": "The dimension of the latent space of the auto encoder, i.e., the number of neurons.", - }, - "dropout_rate": { - "type": "slider", - "label": "Dropout rate", - "help": "The drop out rate to use within the network, i.e., the percentage of weights that are frozen during training.", - "min_value": 0.0, - "max_value": 0.99, - "step": 0.01, - "value": 0.0, - }, - "activation_function": { - "type": "selectbox", - "label": "Activation function", - "options": ["linear", "relu", "sigmoid", "tanh"], - "index": 1, - "help": "The activation function to use for including non-linearity in the network.", - }, - "batch_normalization": { - "type": "toggle", - "label": "Apply batch normalization", - "value": True, - "help": "Whether to add batch normalization after each layer or not.", - }, - "standard_scaling": { - "type": "toggle", - "label": "Apply standard scaling", - "value": True, - "help": "Whether to apply standard scaling to each window before feeding it to the neural network.", - }, - "batch_size": { - "type": "number_input", - "label": "Batch size", - "min_value": 1, - "value": 32, - "step": 1, - "help": "The batch size to use for training the network, i.e., the number of samples to feed simultaneously for computing the loss and updating the weights.", - }, - "loss": { - "type": "selectbox", - "label": "Loss function", - "options": ["mse", "l1", "huber"], - "index": 0, - "help": "The loss function to use when training the network. Options are:" - "\n-**mse:** Use the Mean Squared Error loss." - "\n-**l1:** Use the L1-loss or the mean absolute error." - "\n-**huber:** Use the huber loss, which smoothly combines the MSE-loss with the L1-loss.", - }, - "optimizer": { - "type": "selectbox", - "label": "Optimizer", - "options": ["adam", "sgd"], - "index": 0, - "help": "The optimizer to use for updating the weights during training.", - }, - "learning_rate": { - "type": "select_slider", - "label": "Learning rate", - "options": [1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3], - "value": 1e-3, - "help": "The number of training iterations.", - }, - "n_epochs": { - "type": "number_input", - "label": "Number of epochs", - "min_value": 1, - "value": 10, - "step": 1, - "help": "The number of training iterations.", - }, - "forecast_length": { - "type": "number_input", - "label": "Forecast length", - "min_value": 1, - "value": 1, - "step": 1, - "help": "The number of observations in time that should be forecasted for each window.", - }, - "n_clusters": { - "type": "number_input", - "label": "Number of clusters", - "min_value": 1, - "value": 4, - "step": 1, - "help": "The number of clusters to use in the clustering algorithm.", - }, - "alpha": { - "HistogramBasedOutlierScore": { - "type": "slider", - "label": "Alpha", - "help": "Parameter used for regularization and for preventing overflow.", - "min_value": 0.01, - "max_value": 0.99, - "step": 0.01, - "value": 0.1, - }, - "ClusterBasedLocalOutlierFactor": { - "type": "slider", - "label": "Alpha", - "help": "The ratio for deciding small and large clusters.", - "min_value": 0.5, - "max_value": 1.0, - "step": 0.01, - "value": 0.9, - }, - }, - "beta": { - "type": "number_input", - "label": "Beta", - "min_value": 1.0, - "value": 5.0, - "step": 0.1, - "help": "Parameter used for splitting the clusters in 'large' and 'small' clusters.", - }, - "kernel_size": { - "type": "number_input", - "label": "Kernel size", - "min_value": 1, - "value": 3, - "step": 1, - "help": "The size each kernel in the convolutional layers should have.", - }, - "n_bins": { - "type": "number_input", - "label": "Number of bins", - "min_value": 1, - "value": 10, - "step": 1, - "help": "The number of bins to use for each feature.", - }, - "tol": { - "type": "slider", - "label": "Tolerance", - "help": "Parameter defining the flexibility for dealing with samples that fall outside the bins.", - "min_value": 0.01, - "max_value": 0.99, - "step": 0.01, - "value": 0.5, - }, - "n_estimators": { - "type": "number_input", - "label": "Number of estimators", - "min_value": 1, - "value": 100, - "step": 1, - "help": "The number of base trees to include in the ensemble.", - }, - "max_samples": { - "type": "number_input", - "label": "Maximum number of samples", - "help": "The number of samples to use for learning each base estimator. ", - "min_value": 1, - "step": 1, - "value": 256, - }, - "max_features": { - "type": "slider", - "label": "Maximum number of features", - "help": "The maximum number of features to use for training each base estimator. This value represents the percentage of features to use.", - "min_value": 0.01, - "max_value": 1.0, - "step": 0.01, - "value": 1.0, - }, - "n_neighbors": { - "type": "number_input", - "label": "Number of neighbors", - "help": "The number of neighbors to include when computing anomaly scores", - "min_value": 1, - "step": 1, - "value": 10, - }, - "method": { - "type": "selectbox", - "label": "Handling nearest neighbors", - "options": ["largest", "mean", "median"], - "index": 0, - "help": "Method for computing the anomaly scores based on the nearest neighbors. Valid options are:" - "\n-**largest:** Use the distance to the kth neighbor." - "\n-**mean:** Use the mean distance to the nearest neighbors" - "\n-**median:** Use the median distance to the nearest neighbors", - }, - "metric": { - "type": "selectbox", - "label": "Distance metric", - "options": [ - "minkowski", - "euclidean", - "jaccard", - "hamming", - "mahalanobis", - "cosine", - "correlation", - ], - "index": 0, - "help": "The distance metric to use when computing the distances between samples.", - }, - "n_components": { - "type": "slider", - "label": "Percentage of components", - "help": "The percentage of PCA-components to use", - "min_value": 0.01, - "max_value": 1.0, - "step": 0.01, - "value": 1.0, - }, - "kernel": { - "type": "selectbox", - "label": "Kernel", - "options": ["linear", "poly", "rbf", "sigmoid", "cosine"], - "index": 2, - "help": "The kernel to use to map the data into a new space.", - }, - "hidden_units": { - "type": "number_input", - "label": "Number of hidden units", - "help": "The number of hidden units (i.e., LSTM-cells) to include in each LSTM-layer.", - "min_value": 1, - "step": 1, - "value": 8, - }, - "num_lstm_layers": { - "type": "number_input", - "label": "Number of LSTM-layers", - "help": "The number of LSTM-layers to include in the network.", - "min_value": 1, - "step": 1, - "value": 1, - }, - "bias": { - "type": "toggle", - "label": "Include bias", - "value": True, - "help": "Whether to include a learnable bias at the end of each layer.", - }, - "num_heads": { - "type": "number_input", - "label": "Number of heads", - "help": "The number of attention heads to include in each attention-layer.", - "min_value": 1, - "step": 1, - "value": 12, - }, - "num_transformer_layers": { - "type": "number_input", - "label": "Number of attention layers", - "help": "The number of attention layers to include in the transformer.", - "min_value": 1, - "step": 1, - "value": 1, - }, - "dimension_feedforward": { - "type": "number_input", - "label": "Dimension of the feed forward layer", - "help": "The dimension of the linear layer at the end of each attention layer.", - "min_value": 1, - "step": 1, - "value": 32, - }, - }, - }, - "metric": { - "default": ["EventWiseFBeta", "VolumeUnderPrCurve"], - "exclude": ["BestThresholdMetric", "ThresholdMetric"], - "parameters-required": {"cutoff": 0.9}, - "parameters-optional": { - "cutoff": { - "type": "slider", - "label": "Cutoff", - "help": "The cutoff for converting the anomaly scores to a binary prediction. The cutoff is done on the predicted anomaly scores after min-max scaling.", - "min_value": 0.0, - "max_value": 1.0, - "step": 0.01, - "value": 0.9, - }, - "beta": { - "type": "number_input", - "label": "Beta", - "min_value": 0.0, - "step": 0.01, - "value": 1.0, - "help": "Determines the weight of recall in the combined score.", - }, - "buffer_size": { - "type": "number_input", - "label": "Buffer size", - "min_value": 1, - "step": 1, - "value": 100, - "help": "Size of the buffer region around an anomaly. Half of the buffer added before the anomalous event and half of the buffer is added after the anomaly.", - }, - "compatibility_mode": { - "type": "toggle", - "label": "Use original version", - "value": False, - "help": "Whether to use the originally proposed version of this metric or the implementation of TimeEval:" - "\n- For the recall (FPR) existence reward, anomalies are counted as separate events, even if the added slopes overlap;" - "\n- Overlapping slopes don't sum up in their anomaly weight, the anomaly weight for each point in the ground truth is maximized;" - "\n- The original slopes are asymmetric: the slopes at the end of anomalies are a single point shorter than the ones at the beginning of anomalies. Symmetric slopes are used, with the same size for the beginning and end of anomalies;" - "\n- A linear approximation of the slopes is used instead of the convex slope shape presented in the paper.", - }, - "max_samples": { - "type": "number_input", - "label": "Maximum number of thresholds", - "value": 100, - "min_value": 1, - "step": 1, - "help": "The number of thresholds to put on the anomaly scores. This offers a trade-off between exactness of the metric and computation time.", - }, - "alpha": { - "type": "slider", - "label": "Alpha", - "min_value": 0.0, - "max_value": 1.0, - "value": 0.5, - "step": 0.01, - "help": "The importance of detecting the events (even if it is only a single detected point) compared to detecting a large portion of the ground truth events.", - }, - "delta": { - "type": "selectbox", - "label": "Delta", - "options": ["flat", "front", "back", "middle"], - "index": 0, - "help": "Bias for the position of the predicted anomaly in the ground truth anomalous range:" - "\n- **flat:** Equal bias towards all positions in the ground truth anomalous range." - "\n- **front:** Predictions that are near the front of the ground truth anomaly (i.e. early detection) have a higher weight." - "\n- **back:** Predictions that are near the end of the ground truth anomaly (i.e. late detection) have a higher weight." - "\n- **middle:** Predictions that are near the center of the ground truth anomaly have a higher weight.", - }, - "gamma": { - "type": "selectbox", - "label": "Gamma", - "options": ["one", "reciprocal"], - "index": 0, - "help": "Penalization approach for detecting multiple ranges with a single range: " - "\n- **one:** Fragmented detection should not be penalized." - "\n- **reciprocal:** Weight fragmented detection of $N$ ranges with as single range by a factor of $1/N$.", - }, - "max_buffer_size": { - "type": "number_input", - "label": "Maximum buffer size", - "min_value": 1, - "step": 1, - "value": 250, - "help": "Maximum size of the buffer region around an anomaly. Half of the buffer added before the anomalous event and half of the buffer is added after the anomaly. The metric iterates over all the buffer sizes to to create a volume.", - }, - }, - }, - } diff --git a/dtaianomaly/in_time_ad/_run.py b/dtaianomaly/in_time_ad/_run.py deleted file mode 100644 index f893031..0000000 --- a/dtaianomaly/in_time_ad/_run.py +++ /dev/null @@ -1,112 +0,0 @@ -import pathlib -import sys -import warnings - -import torch -from streamlit.web import cli as stcli - -from dtaianomaly.anomaly_detection import BaseDetector -from dtaianomaly.data import LazyDataLoader -from dtaianomaly.evaluation import Metric -from dtaianomaly.in_time_ad._CustomDetectorVisualizer import CustomDetectorVisualizer -from dtaianomaly.utils import convert_to_list - -torch.classes.__path__ = [] # To avoid torch-warning - -__all__ = ["run"] - - -def run( - configuration_path: str = None, - custom_data_loaders: type[LazyDataLoader] | list[type[LazyDataLoader]] = None, - custom_anomaly_detectors: type[BaseDetector] | list[type[BaseDetector]] = None, - custom_metrics: type[Metric] | list[type[Metric]] = None, - custom_visualizers: ( - type[CustomDetectorVisualizer] | list[type[CustomDetectorVisualizer]] - ) = None, -): - """ - Run InTimeAD. - - Start up the demonstrator for ``dtaianomaly``. This function will start a web-application - on your local host, to which you can navigate in order to see InTimeAD. - - Parameters - ---------- - configuration_path : str, default=None - The path to the configuration file for the demonstrator. The configuration file - must be in a json format. - custom_data_loaders : LazyDataLoader object or list of LazyDataLoader objects, default=None - Additional data loaders which must be available within the demonstrator. - custom_anomaly_detectors : BaseDetector object or list of BaseDetector objects, default=None - Additional anomaly detectors which must be available within the demonstrator. - custom_metrics : Metric object or list of Metric objects, default=None - Additional evaluation metrics which must be available within the demonstrator. - custom_visualizers : CustomDetectorVisualizer object or list of CustomDetectorVisualizer objects, default=None - Additional custom visualizations for certain anomaly detectors. - """ - # Run the applications - sys.argv = [ - "streamlit", - "run", - str(pathlib.Path(__file__).parent / "_app.py"), - configuration_path or "default", - str( - _custom_model_config( - custom_data_loaders=custom_data_loaders, - custom_anomaly_detectors=custom_anomaly_detectors, - custom_metrics=custom_metrics, - custom_visualizers=custom_visualizers, - ) - ), - ] - sys.exit(stcli.main()) - - -def _custom_model_config( - custom_data_loaders: type[LazyDataLoader] | list[type[LazyDataLoader]] = None, - custom_anomaly_detectors: type[BaseDetector] | list[type[BaseDetector]] = None, - custom_metrics: type[Metric] | list[type[Metric]] = None, - custom_visualizers: ( - type[CustomDetectorVisualizer] | list[type[CustomDetectorVisualizer]] - ) = None, -) -> dict[str, list[str]]: - - def _is_valid(cls: type) -> bool: - # In case the model is defined in __main__ - if cls.__module__ == "__main__": - warnings.warn( - "Including a custom model in the demonstrator which is defined in the " - "same file from which the demonstrator is started leads to run-time issues." - "Please define these models in a separate .py file and import them into " - f"your main script. The model {cls.__qualname__} will be ignored." - ) - return False - else: - return True - - def _format(cls: type) -> str: - return f"{cls.__module__}.{cls.__qualname__}" - - return { - "data_loaders": [ - _format(data_loader) - for data_loader in convert_to_list(custom_data_loaders or []) - if _is_valid(data_loader) - ], - "anomaly_detectors": [ - _format(anomaly_detector) - for anomaly_detector in convert_to_list(custom_anomaly_detectors or []) - if _is_valid(anomaly_detector) - ], - "metrics": [ - _format(metric) - for metric in convert_to_list(custom_metrics or []) - if _is_valid(metric) - ], - "custom_visualizers": [ - _format(visualizer) - for visualizer in convert_to_list(custom_visualizers or []) - if _is_valid(visualizer) - ], - } diff --git a/dtaianomaly/in_time_ad/_st_AnomalyDetector.py b/dtaianomaly/in_time_ad/_st_AnomalyDetector.py deleted file mode 100644 index d44bdd3..0000000 --- a/dtaianomaly/in_time_ad/_st_AnomalyDetector.py +++ /dev/null @@ -1,320 +0,0 @@ -import copy - -import numpy as np -import streamlit as st - -from dtaianomaly.anomaly_detection import BaseDetector, Supervision -from dtaianomaly.data import DataSet -from dtaianomaly.in_time_ad._CustomDetectorVisualizer import CustomDetectorVisualizer -from dtaianomaly.in_time_ad._utils import ( - get_parameters, - input_widget_hyperparameter, - show_class_summary, - show_small_header, - update_object, -) -from dtaianomaly.type_validation import WindowSizeAttribute - - -class StAnomalyDetector: - - __DETECTOR_COUNTER: int = 0 - detector_id: int - detector: BaseDetector - parameters: dict - custom_visualizers: list[CustomDetectorVisualizer] - decision_function_: np.array - exception_: Exception - - def __init__( - self, - detector: type[BaseDetector], - custom_visualizers: list[CustomDetectorVisualizer], - configuration: dict, - ): - self.detector_id = StAnomalyDetector.__DETECTOR_COUNTER - StAnomalyDetector.__DETECTOR_COUNTER += 1 - - self.custom_visualizers = custom_visualizers - - parameters, required_parameters = get_parameters(detector) - self.parameters = load_parameters(parameters, configuration, detector) - self.detector = detector( - **{ - key: value - for key, value in configuration["parameters-required"].items() - if key in required_parameters - } - ) - set_parameters = { - key: value["value"] - for key, value in self.parameters.items() - if "value" in value - } - update_object(self.detector, set_parameters) - - if "window_size" in self.parameters: - self.parameters["window_size_selection"] = configuration[ - "parameters-optional" - ]["window_size_selection"] - - def show_anomaly_detector(self) -> (bool, bool, "StAnomalyDetector", st.container): - old_detector = copy.deepcopy(self) - - # Save some space for the header - header = st.container() - - # Show an explanation of the detector - show_class_summary(self.detector) - - # Reserve space for a potential error - error_container = st.container() - - # Select the hyperparameters, and update the detector if necessary - col_select_hyperparameters, col_update_hyperparameters, remove_col = st.columns( - 3 - ) - with col_select_hyperparameters.popover( - label="Configure", icon="⚙️", use_container_width=True - ): - hyperparameters = self.select_hyperparameters() - - # Update the model if requested - do_update = col_update_hyperparameters.button( - label="Update hyperparameters", - key=f"update-detector-hyperparameters-{self.detector_id}", - use_container_width=True, - ) - if do_update: - do_update = update_object(self.detector, hyperparameters) - - # Add a button to remove the detecor - remove_detector = remove_col.button( - label="Remove detector", - icon="❌", - key=f"remove_detector_{self.detector_id}", - use_container_width=True, - ) - - with header: - show_small_header(self.detector) - - return do_update, remove_detector, old_detector, error_container - - def select_hyperparameters(self) -> dict[str, any]: - - # A dictionary for the selected hyperparameters - selected_hyperparameters = {} - - # Add the other parameters - for parameter, config in self.parameters.items(): - if parameter == "window_size_selection": - continue - - # Format the kwargs for the widget - input_widget_kwargs = { - key: value for key, value in config.items() if key != "type" - } - input_widget_kwargs["key"] = "-".join( - [parameter, str(self.detector_id), config["type"], "detector"] - ) - if "label" not in input_widget_kwargs: - input_widget_kwargs["label"] = parameter - - # For the window size, we add some additional logic - if parameter == "window_size": - # Select the way in which the window size is computed - col_select, col_value = st.columns(2) - _, selected_window_size = col_select.selectbox( - format_func=lambda t: t[0], - key=f"window_size_select_{self.detector_id}", - **self.parameters["window_size_selection"], - ) - try: - WindowSizeAttribute().raise_error_if_invalid( - selected_window_size, - "window_size", - self.detector.__class__.__name__, - ) - valid_window_size = True - except: - valid_window_size = False - - input_widget_kwargs["disabled"] = valid_window_size - - # Select the manual window size - with col_value: - selected_window_size_integer = input_widget_hyperparameter( - config["type"], **input_widget_kwargs - ) - - # Add the value to the dictionary of selected hyperparameters - if not valid_window_size: - selected_hyperparameters[parameter] = selected_window_size_integer - else: - selected_hyperparameters[parameter] = selected_window_size - - else: - selected_hyperparameters[parameter] = input_widget_hyperparameter( - config["type"], **input_widget_kwargs - ) - - return selected_hyperparameters - - def fit_predict(self, data_set: DataSet): - # Check for compatibility - if not data_set.is_compatible(self.detector): - - error_message = ( - f"Anomaly detector {self.detector} is not compatible with the data! " - ) - if self.detector.supervision == Supervision.SUPERVISED: - error_message += f"{self.detector.__class__.__name__} requires labeled training data with ground truth anomalies, but this is not available." - - elif self.detector.supervision == Supervision.SEMI_SUPERVISED: - error_message += f"{self.detector.__class__.__name__} requires normal training, but this is not available." - - self.decision_function_ = None - self.exception_ = Exception(error_message) - return - - try: - # Retrieve the correct data - if Supervision.SEMI_SUPERVISED in data_set.compatible_supervision(): - X_train, y_train = data_set.X_train, data_set.y_train - else: - X_train, y_train = data_set.X_test, data_set.y_test - - # Fit the detector - with st.spinner(f"Fitting {self.detector}"): - self.detector.fit(X_train, y_train) - - # Compute the decision scores - with st.spinner(f"Detecting anomalies with {self.detector}"): - self.decision_function_ = self.detector.decision_function( - data_set.X_test - ) - - # The exception is no longer relevant - if hasattr(self, "exception_"): - delattr(self, "exception_") - - except Exception as exception: - self.decision_function_ = None - self.exception_ = exception - - def get_code_lines(self, data_set: DataSet) -> list[str]: - - compatible_supervision = data_set.compatible_supervision() - if Supervision.SUPERVISED in compatible_supervision: - train_data = "X_train, y_train" - test_data = "X_test" - elif Supervision.SEMI_SUPERVISED in compatible_supervision: - train_data = "X_train" - test_data = "X_test" - else: - train_data = "X" - test_data = "X" - - module = self.detector.__module__ - if module.startswith("dtaianomaly.anomaly_detection."): - module = "dtaianomaly.anomaly_detection" - - return [ - f"from {module} import {self.detector.__class__.__name__}", - f"detector = {self.detector}.fit({train_data})", - f"y_pred = detector.predict_proba({test_data})", - ] - - def show_custom_visualizations(self): - # Show the custom visualizations - for visualizer in self.custom_visualizers: - with st.expander(label=visualizer.name, icon=visualizer.icon): - visualizer.show_custom_visualization(self.detector) - - def __str__(self) -> str: - return str(self.detector) - - -class StAnomalyDetectorLoader: - - default_detectors: list[type[BaseDetector]] - all_anomaly_detectors: list[(str, type[BaseDetector])] - all_custom_detector_visualizers: list[CustomDetectorVisualizer] - configuration: dict - - def __init__( - self, - all_anomaly_detectors: list[(str, type[BaseDetector])], - all_custom_detector_visualizers: list[(str, type[CustomDetectorVisualizer])], - configuration: dict, - ): - self.all_anomaly_detectors = [] - self.default_detectors = [] - for name, cls in all_anomaly_detectors: - if name not in configuration["exclude"]: - self.all_anomaly_detectors.append((name, cls)) - if name in configuration["default"] or name == configuration["default"]: - self.default_detectors.append(cls) - - self.all_anomaly_detectors = sorted( - self.all_anomaly_detectors, key=lambda x: x[0] - ) - - self.all_custom_detector_visualizers = [ - visualizer() for _, visualizer in all_custom_detector_visualizers - ] - self.configuration = configuration - - def select_anomaly_detector(self) -> StAnomalyDetector | None: - col_selection, col_button = st.columns([3, 1]) - - selected_detector = col_selection.selectbox( - label="Select anomaly detector", - options=self.all_anomaly_detectors, - index=None, - format_func=lambda t: t[0], - label_visibility="collapsed", - ) - - if selected_detector is not None: - show_class_summary(selected_detector[1]) - - button_clicked = col_button.button( - label="Load detector", use_container_width=True - ) - if button_clicked and selected_detector is not None: - return self._load_detector(selected_detector[1]) - - def select_default_anomaly_detector(self) -> list[StAnomalyDetector]: - return [self._load_detector(detector) for detector in self.default_detectors] - - def _load_detector( - self, anomaly_detector: type[BaseDetector] - ) -> "StAnomalyDetector": - return StAnomalyDetector( - detector=anomaly_detector, - custom_visualizers=[ - visualizer - for visualizer in self.all_custom_detector_visualizers - if visualizer.is_compatible(anomaly_detector) - ], - configuration=self.configuration, - ) - - -def load_parameters( - parameters, configuration: dict, detector: type[BaseDetector] -) -> dict: - all_parameters = { - key: value - for key, value in configuration["parameters-optional"].items() - if key in parameters - } - ok_parameters = {} - for key, value in all_parameters.items(): - if "type" in value: - ok_parameters[key] = value - elif detector.__name__ in value and "type" in value[detector.__name__]: - ok_parameters[key] = value[detector.__name__] - return ok_parameters diff --git a/dtaianomaly/in_time_ad/_st_DataLoader.py b/dtaianomaly/in_time_ad/_st_DataLoader.py deleted file mode 100644 index c9d9729..0000000 --- a/dtaianomaly/in_time_ad/_st_DataLoader.py +++ /dev/null @@ -1,175 +0,0 @@ -import tempfile - -import streamlit as st - -from dtaianomaly.anomaly_detection import Supervision -from dtaianomaly.data import CustomDataLoader, DataSet, LazyDataLoader, PathDataLoader -from dtaianomaly.in_time_ad._utils import show_class_summary -from dtaianomaly.in_time_ad._visualization import plot_data - - -class StDataLoader: - - data_set: DataSet - initial_index: int - data_loader: LazyDataLoader - all_data_loaders: list[(str, type[LazyDataLoader])] - - def __init__( - self, - all_data_loaders: list[(str, type[LazyDataLoader])], - configuration: dict, - ): - self.all_data_loaders = [] - for name, cls in all_data_loaders: - if name not in configuration["exclude"]: - self.all_data_loaders.append((name, cls)) - self.all_data_loaders = sorted(self.all_data_loaders, key=lambda x: x[0]) - - # Get the index of the default detector - self.initial_index = 0 - for i, (name, _) in enumerate(self.all_data_loaders): - if name == configuration["default"]: - self.initial_index = i - break - - # Set-up the default data and data loader - self.data_loader = self.all_data_loaders[self.initial_index][1]() - self.data_set = self.data_loader.load() - - self.configuration = configuration - - def select_data_loader(self) -> bool: - col_selection, col_configuration, col_button = st.columns([1, 0.5, 0.5]) - - # Select a data loader - _, data_loader_cls = col_selection.selectbox( - label="Select an anomaly detector", - options=self.all_data_loaders, - index=self.initial_index, - format_func=lambda t: t[0], - label_visibility="collapsed", - ) - - # Show the summary of the selected data loader - show_class_summary(data_loader_cls) - - # Configure the data loader - # At this point, we only care about a loaders that use a file and no other parameters (as other ones don't exist as of now) - parameters = {} - with col_configuration.popover( - "Configuration", icon="⚙️", use_container_width=True - ): - if issubclass(data_loader_cls, PathDataLoader): - uploaded_file = st.file_uploader("Upload a file") - if uploaded_file is not None: - file_name_path = uploaded_file.name - with tempfile.NamedTemporaryFile( - delete=False, suffix=file_name_path - ) as tmp_file: - tmp_file.write(uploaded_file.read()) - parameters["path"] = tmp_file.name - - if issubclass(data_loader_cls, CustomDataLoader): - train_data = st.file_uploader("Upload train data (optional)") - if train_data is not None: - file_name_train = train_data.name - with tempfile.NamedTemporaryFile( - delete=False, suffix=file_name_train - ) as tmp_file: - tmp_file.write(train_data.read()) - parameters["train_path"] = tmp_file.name - - test_data = st.file_uploader("Upload test data") - if test_data is not None: - file_name_test = test_data.name - with tempfile.NamedTemporaryFile( - delete=False, suffix=file_name_test - ) as tmp_file: - tmp_file.write(test_data.read()) - parameters["test_path"] = tmp_file.name - - # A button to actually load the data - button_clicked = col_button.button(label="Load data", use_container_width=True) - - if button_clicked: - try: - self.data_loader = data_loader_cls(**parameters) - self.data_set = self.data_loader.load() - if "path" in parameters: - # This is not recommended in practice, but for showing a nice file (without the temporary path) - self.data_loader.path = file_name_path - if "test_path" in parameters: - self.data_loader.test_path = file_name_test - if "train_path" in parameters: - self.data_loader.train_path = file_name_train - - except Exception as e: - if "missing 1 required positional argument: 'path'" in str(e): - st.error("Select a file before loading the data!", icon="🚨") - else: - st.error( - "Something went wrong, make sure you correctly configure the data loader.", - icon="🚨", - ) - - # Return whether the data was updated - return button_clicked - - def show_data(self): - figs = {"train": None, "test": None} - - # Create the figure for the train data - if self.data_set.X_train is not None: - figs["train"] = plot_data( - X=self.data_set.X_train, - y=self.data_set.y_train, - feature_names=self.data_set.feature_names, - time_steps=self.data_set.time_steps_train, - ) - - # Create figure for the train data - figs["test"] = plot_data( - X=self.data_set.X_test, - y=self.data_set.y_test, - feature_names=self.data_set.feature_names, - time_steps=self.data_set.time_steps_test, - ) - - # Add the figures to the streamlit-page - if self.data_set.X_train is not None: - tabs = st.tabs(["Train data", "Test data"]) - tabs[0].plotly_chart(figs["train"], key="loaded-data-train") - tabs[1].plotly_chart(figs["test"], key="loaded-data-test") - else: - st.plotly_chart(figs["test"], key="loaded-data-test") - - def get_code_lines(self) -> list[str]: - - module = self.data_loader.__module__ - if module.startswith("dtaianomaly.data."): - module = "dtaianomaly.data" - - code_lines = [ - f"from {module} import {self.data_loader.__class__.__name__}", - f"data_loader = {self.data_loader}", - "data_set = data_loader.load()", - ] - - # Load the data arrays - compatible_supervision = self.data_set.compatible_supervision() - if Supervision.SUPERVISED in compatible_supervision: - code_lines += [ - "X_train, y_train = data_set.X_train, data_set.y_train", - "X_test, y_test = data_set.X_test, data_set.y_test", - ] - elif Supervision.SEMI_SUPERVISED in compatible_supervision: - code_lines += [ - "X_train = data_set.X_train", - "X_test, y_test = data_set.X_test, data_set.y_test", - ] - else: - code_lines += ["X, y = data_set.X_test, data_set.y_test"] - - # Return the code lines - return code_lines diff --git a/dtaianomaly/in_time_ad/_st_QualitativeEvaluator.py b/dtaianomaly/in_time_ad/_st_QualitativeEvaluator.py deleted file mode 100644 index 777a6be..0000000 --- a/dtaianomaly/in_time_ad/_st_QualitativeEvaluator.py +++ /dev/null @@ -1,192 +0,0 @@ -from typing import List - -import streamlit as st - -from dtaianomaly.anomaly_detection import Supervision -from dtaianomaly.data import DataSet -from dtaianomaly.in_time_ad._st_AnomalyDetector import StAnomalyDetector -from dtaianomaly.in_time_ad._utils import error_no_detectors -from dtaianomaly.in_time_ad._visualization import ( - plot_anomaly_scores, - plot_detected_anomalies, -) -from dtaianomaly.preprocessing import MinMaxScaler -from dtaianomaly.thresholding import FixedCutoffThreshold - - -class StQualitativeEvaluator: - - @staticmethod - def plot_anomaly_scores( - data_set: DataSet, st_anomaly_detectors: List[StAnomalyDetector] - ) -> List[str]: - st.markdown( - """ - Below figure shows the time series again, as well as the predicted anomaly scores. The - higher the anomaly score, the more likely the observation is anomalous. A high-performing - anomaly detector should predict large anomaly scores for the true anomalies! - """ - ) - - # Retrieve the decision scores - decision_functions = StQualitativeEvaluator._get_decision_functions( - st_anomaly_detectors, "plot_anomaly_scores" - ) - if decision_functions is None: - return [] - - # Plot the decision functions - st.plotly_chart( - plot_anomaly_scores( - X=data_set.X_test, - y=data_set.y_test, - feature_names=data_set.feature_names, - time_steps=data_set.time_steps_test, - anomaly_scores=decision_functions, - ) - ) - - return [ - "from dtaianomaly.visualization import plot_anomaly_scores", - f"plot_anomaly_scores({StQualitativeEvaluator._get_used_data(data_set)}, y_pred)", - ] - - @staticmethod - def plot_detected_anomalies( - data_set: DataSet, st_anomaly_detectors: List[StAnomalyDetector] - ) -> List[str]: - - st.markdown( - """ - To see which observations are identified as anomalies, we must convert the continuous - anomaly scores into binary events: anomaly or not? Various strategies for this exist, - of which arguably the easiest one is setting a threshold. First select one of the anomaly - detectors on the left, then select a threshold, and see which points are detected as an - anomaly! - - Instead of just showing which observations are flagged as an anomaly, we divide the - observations into three categories: - - **True Positives (TP):** the predicted anomalies that are actually anomalies. - - **False Positives (FP):** the predicted anomalies that are no real anomalies but normal! - - **False Negatives (FN):** the real anomalies that were not detected! - """ - ) - - # A container for the ordering - container = st.container() - - # Retrieve the decision scores - decision_functions = StQualitativeEvaluator._get_decision_functions( - st_anomaly_detectors, "plot_detected_anomalies" - ) - if decision_functions is None: - return [] - - with container: - - # Configure the anomaly detector - col_detector, col_threshold = st.columns(2) - selected_detector = col_detector.selectbox( - label="Anomaly detector", - options=decision_functions, - label_visibility="collapsed", - ) - - # Configure the cutoff - min_value = decision_functions[selected_detector].min() - max_value = decision_functions[selected_detector].max() - cutoff = col_threshold.slider( - label="Threshold", - min_value=min_value, - max_value=max_value, - value=0.9 * (max_value - min_value) + min_value, - step=0.01, - label_visibility="collapsed", - ) - - # Compute the binary decisions - y_pred = FixedCutoffThreshold(cutoff=cutoff).threshold( - decision_functions[selected_detector] - ) - - # Plot the detected anomalies - st.plotly_chart( - plot_detected_anomalies( - X=data_set.X_test, - y=data_set.y_test, - y_pred=y_pred, - feature_names=data_set.feature_names, - time_steps=data_set.time_steps_test, - ) - ) - - # Load the data arrays - compatible_supervision = data_set.compatible_supervision() - if Supervision.SEMI_SUPERVISED in compatible_supervision: - ground_truth = "y_test" - else: - ground_truth = "y" - - return [ - "from dtaianomaly.thresholding import FixedCutoff", - "from dtaianomaly.visualization import plot_time_series_anomalies", - f"y_pred_bin = FixedCutoff(cutoff={cutoff}).threshold(y_pred)", - f"plot_time_series_anomalies({StQualitativeEvaluator._get_used_data(data_set)}, {ground_truth}, y_pred_bin)", - ] - - @staticmethod - def _get_used_data(data_set: DataSet) -> str: - if Supervision.SEMI_SUPERVISED in data_set.compatible_supervision(): - return "X_test, y_test" - else: - return "X, y" - - @staticmethod - def _get_decision_functions( - st_anomaly_detectors: List[StAnomalyDetector], pills_key: str - ): - - # Retrieve the raw anomaly scores - decision_functions = { - str(anomaly_detector.detector): anomaly_detector.decision_function_ - for anomaly_detector in st_anomaly_detectors - if hasattr(anomaly_detector, "decision_function_") - and anomaly_detector.decision_function_ is not None - } - if len(decision_functions) == 0: - error_no_detectors() - return - - # Normalize the anomaly scores - with st.expander("Scaling anomaly scores", icon="🛠️"): - st.markdown( - """ - The meaning of the predicted anomaly scores depend on the used - anomaly detectors. For example, the anomaly scores of distance-based - methods like Matrix Profile correspond to distances, while the - anomaly scores of Isolation Forest is related to the depth of the - the trees. Because of this, it might often be difficult to compare - the raw anomaly scores of multiple detectors directly. To cope - with this, we offer methods to scale the anomaly scores and make - them comparable. The following approaches are available: - - - **Raw decision scores.** Do not apply any scaling and show the - raw predicted anomaly scores. - - **Min-max scaled.** Scale the anomaly scores linearly such that - they fall in the interval [0, 1]. - """ - ) - normalization_technique = st.pills( - label="Anomaly scores normalization", - options=["Raw decision scores", "Min-max scaled"], - default="Min-max scaled", - label_visibility="collapsed", - key=pills_key, - ) - if normalization_technique == "Min-max scaled": - decision_functions = { - name: MinMaxScaler().fit_transform(decision_function)[0] - for name, decision_function in decision_functions.items() - } - - return decision_functions diff --git a/dtaianomaly/in_time_ad/_st_QuantitativeEvaluator.py b/dtaianomaly/in_time_ad/_st_QuantitativeEvaluator.py deleted file mode 100644 index 66a8174..0000000 --- a/dtaianomaly/in_time_ad/_st_QuantitativeEvaluator.py +++ /dev/null @@ -1,329 +0,0 @@ -import copy -from collections import defaultdict - -import numpy as np -import pandas as pd -import plotly.express as px -import streamlit as st - -from dtaianomaly.anomaly_detection import Supervision -from dtaianomaly.data import DataSet -from dtaianomaly.evaluation import Metric, ProbaMetric, ThresholdMetric -from dtaianomaly.in_time_ad._st_AnomalyDetector import StAnomalyDetector -from dtaianomaly.in_time_ad._utils import ( - get_parameters, - input_widget_hyperparameter, - show_class_summary, - show_small_header, - update_object, -) -from dtaianomaly.in_time_ad._visualization import get_detector_color_map -from dtaianomaly.preprocessing import MinMaxScaler -from dtaianomaly.thresholding import FixedCutoffThreshold - - -class StMetric: - __METRIC_COUNTER: int = 0 - metric_id: int - metric: Metric - parameters: dict - thresholding: FixedCutoffThreshold | None - - def __init__(self, metric: type[Metric], configuration: dict): - self.metric_id = StMetric.__METRIC_COUNTER - StMetric.__METRIC_COUNTER += 1 - - # Initialize the parameters - parameters, required_parameters = get_parameters(metric) - self.parameters = { - key: value - for key, value in configuration["parameters-optional"].items() - if key in parameters - } - - # Initialize the metric - self.metric = metric( - **{ - key: value - for key, value in configuration["parameters-required"].items() - if key in required_parameters - } - ) - - # Initialize the thresholding if no proba metric is given - if not issubclass(metric, ProbaMetric): - self.thresholding = FixedCutoffThreshold( - configuration["parameters-required"]["cutoff"] - ) - self.parameters["cutoff"] = configuration["parameters-optional"]["cutoff"] - else: - self.thresholding = None - - def show_metric(self) -> (bool, bool, "StMetric"): - - old_metric = copy.deepcopy(self) - - # Save some space for the header - header = st.container() - - # Show an explanation of the detector - show_class_summary(self.metric) - - # Select the hyperparameters, and update the detector if necessary - col_select_parameters, col_update_parameters, col_remove = st.columns(3) - with col_select_parameters.popover( - label="Configure", icon=":material/settings:", use_container_width=True - ): - parameters = self.select_parameters() - - # Update the metric if requested - do_update = col_update_parameters.button( - label="Update parameters", - key=f"update-metric-parameters-{self.metric_id}", - use_container_width=True, - ) - if do_update: - thresholding_parameters = {} - if "cutoff" in parameters: - thresholding_parameters["cutoff"] = parameters.pop("cutoff") - do_update = update_object(self.metric, parameters) or update_object( - self.thresholding, thresholding_parameters - ) - - # Add a button to remove the detecor - remove_metric = col_remove.button( - label="Remove metric", - icon="❌", - key=f"remove_metric_{self.metric_id}", - use_container_width=True, - ) - - with header: - show_small_header(str(self)) - - return do_update, remove_metric, old_metric - - def select_parameters(self) -> dict[str, any]: - selected_parameters = {} - - # Add the other parameters - for parameter, config in self.parameters.items(): - - # Format the kwargs for the widget - input_widget_kwargs = { - key: value for key, value in config.items() if key != "type" - } - input_widget_kwargs["key"] = "-".join( - [parameter, str(self.metric_id), config["type"], "metric"] - ) - if "label" not in input_widget_kwargs: - input_widget_kwargs["label"] = parameter - - selected_parameters[parameter] = input_widget_hyperparameter( - config["type"], **input_widget_kwargs - ) - - return selected_parameters - - def compute_score(self, y_true: np.array, y_pred: np.array) -> float: - if y_pred is None: # If no scores are available - return np.nan - - y_pred = MinMaxScaler().fit_transform(y_pred)[0] - if self.thresholding is not None: - return ThresholdMetric( - thresholder=self.thresholding, metric=self.metric - ).compute(y_true, y_pred) - else: - return self.metric.compute(y_true, y_pred) - - def __str__(self): - if self.thresholding is None: - return str(self.metric) - else: - return f"{self.metric} [cutoff={self.thresholding.cutoff}]" - - def get_code_lines(self, data_set: DataSet) -> list[str]: - # Load the data arrays - compatible_supervision = data_set.compatible_supervision() - if Supervision.SEMI_SUPERVISED in compatible_supervision: - ground_truth = "y_test" - else: - ground_truth = "y" - - if self.metric.__module__.startswith("dtaianomaly.evaluation."): - imports = [ - f"from dtaianomaly.evaluation import {self.metric.__class__.__name__}, ThresholdMetric" - ] - else: - imports = [ - f"from dtaianomaly.evaluation import ThresholdMetric", - f"from {self.metric.__module__} import {self.metric.__class__.__name__}", - ] - - # Optional import of thresholding - if isinstance(self.thresholding, FixedCutoffThreshold): - return ( - [ - f"from dtaianomaly.thresholding import {self.thresholding.__class__.__name__}", - ] - + imports - + [ - f"metric = ThresholdMetric(", - f" thresholder={self.thresholding}", - f" metric={self.metric}", - f")", - "score = metric.compute(y_test, y_pred)", - ] - ) - else: - - module = self.metric.__module__ - if module.startswith("dtaianomaly.evaluation."): - module = "dtaianomaly.evaluation" - - return [ - f"from {module} import {self.metric.__class__.__name__}", - f"metric = {self.metric}", - f"score = metric.compute({ground_truth}, y_pred)", - ] - - -class StQualitativeEvaluationLoader: - - default_metrics: list[type[Metric]] - all_metrics: list[(str, type[Metric])] - configuration: dict - - def __init__(self, all_metrics: list[(str, type[Metric])], configuration: dict): - self.all_metrics = [] - self.default_metrics = [] - for name, cls in all_metrics: - if name not in configuration["exclude"]: - self.all_metrics.append((name, cls)) - if name in configuration["default"] or name == configuration["default"]: - self.default_metrics.append(cls) - self.all_metrics = sorted(self.all_metrics, key=lambda x: x[0]) - - self.configuration = configuration - - def select_metric(self) -> StMetric | None: - col_selection, col_button = st.columns([3, 1]) - selected_metric = col_selection.selectbox( - label="Select anomaly detector", - options=self.all_metrics, - index=None, - format_func=lambda t: t[0], - label_visibility="collapsed", - ) - - if selected_metric is not None: - show_class_summary(selected_metric[1]) - - button_clicked = col_button.button( - label="Load metric", use_container_width=True - ) - - if button_clicked and selected_metric is not None: - return self._load_metric(selected_metric[1]) - - return None - - def select_default_metrics(self) -> list[StMetric]: - return [self._load_metric(metric) for metric in self.default_metrics] - - def _load_metric(self, metric: type[Metric]) -> StMetric: - return StMetric( - metric=metric, - configuration=self.configuration, - ) - - -class StEvaluationScores: - detectors: dict[int, str] - metrics: dict[int, str] - scores: pd.DataFrame - - def __init__( - self, - detectors: list[StAnomalyDetector], - metrics: list[StMetric], - y_test: np.array, - ): - self.detectors = {} - self.metrics = {} - self.scores = pd.DataFrame( - index=[detector.detector_id for detector in detectors], - columns=[metric.metric_id for metric in metrics], - ) - for detector in detectors: - for metric in metrics: - self.add(detector, metric, y_test) - - def show_scores(self) -> None: - - # Identify duplicated metrics and decide which columns to drop - metric_ids = defaultdict(list) - for metric_id, metric_name in self.metrics.items(): - metric_ids[metric_name].append(metric_id) - to_drop = [] - for metric, ids in metric_ids.items(): - if len(ids) > 1: - to_drop.extend(ids[1:]) - st.warning( - f"Metric '{metric}' is defined {len(ids)} times. The evaluation will only be shown once." - ) - formatted_scores = self.scores.drop(columns=to_drop).rename( - columns=self.metrics, index=self.detectors - ) - - # Define a color map - color_map = get_detector_color_map(formatted_scores.index) - - # Show the scores in a bar-plot - df_melted = formatted_scores.T.melt( - ignore_index=False, var_name="Metric", value_name="value" - ) - df_melted["x"] = df_melted.index - - fig = px.bar( - df_melted, - x="x", - y="value", - color="Metric", - barmode="group", - color_discrete_map=color_map, - ) - fig.update_layout( - height=300, xaxis_title=None, yaxis_title=None, legend_title_text=None - ) - st.plotly_chart(fig) - - # Show the raw scores - st.dataframe(formatted_scores) - - # Download the - st.download_button( - label="Download the scores as a csv-file", - data=formatted_scores.to_csv().encode("utf-8"), - file_name="scores.csv", - mime="text/csv", - icon=":material/download:", - ) - - def add( - self, detector: StAnomalyDetector, metric: StMetric, y_test: np.array - ) -> None: - self.scores.loc[detector.detector_id, metric.metric_id] = metric.compute_score( - y_test, detector.decision_function_ - ) - self.detectors[detector.detector_id] = str(detector) - self.metrics[metric.metric_id] = str(metric) - - def remove_detector(self, detector: StAnomalyDetector) -> None: - self.scores = self.scores.drop(index=detector.detector_id) - self.detectors.pop(detector.detector_id) - - def remove_metric(self, metric: StMetric) -> None: - self.scores = self.scores.drop(columns=metric.metric_id) - self.metrics.pop(metric.metric_id) diff --git a/dtaianomaly/in_time_ad/_utils.py b/dtaianomaly/in_time_ad/_utils.py deleted file mode 100644 index 416497a..0000000 --- a/dtaianomaly/in_time_ad/_utils.py +++ /dev/null @@ -1,157 +0,0 @@ -import ast -import importlib -import inspect - -import streamlit as st - - -def error_no_detectors(): - st.error("There are no valid anomaly detectors selected", icon="🚨") - - -def error_no_metrics(): - st.error("There are no evaluation metrics selected", icon="🚨") - - -def write_code_lines(lines, use_expander: bool = True): - if len(lines) == 0: - return - if use_expander: - with st.expander("Show code for ``dtaianomaly``", icon="💻"): - st.code(body="\n".join(lines), language="python", line_numbers=True) - else: - st.code(body="\n".join(lines), language="python", line_numbers=True) - - -def get_class_summary(cls) -> str | None: - doc = cls.__doc__ - if not doc: - return None - # Split by blank lines to get the first paragraph - paragraphs = doc.split("\n\n") - if len(paragraphs) < 2: - return None - return paragraphs[1] if paragraphs else doc - - -def show_class_summary(cls) -> None: - summary = get_class_summary(cls) - if summary is not None: - st.markdown(summary) - - -def show_header(text: str) -> None: - background = "#F8F9FB" - highlight = "#158237" - st.markdown( - f""" -
-

{text}

-
- """, - unsafe_allow_html=True, - ) - - -def show_small_header(o) -> None: - st.markdown(f"###### {o}") - - -def show_section_description(s) -> None: - st.markdown(s) - - -def get_parameters(cls): - signature = inspect.signature(cls.__init__) - params = [] - required_params = [] - - for name, param in signature.parameters.items(): - - # skip 'self' and 'kwargs' - if name == "self" or name == "kwargs": - continue - - # Add the parameter - params.append(name) - - # Check if the parameter is required - if param.default is inspect.Parameter.empty and param.kind in ( - inspect.Parameter.POSITIONAL_OR_KEYWORD, - inspect.Parameter.KEYWORD_ONLY, - inspect.Parameter.POSITIONAL_ONLY, - ): - required_params.append(name) - - return params, required_params - - -def remove_set_values(o: object, hyperparameters: dict[str, any]) -> dict[str, any]: - """ - Given a dictionary which maps attributes to values, return a new dictionary which only contains the - items that have actually a different value from the one set in the object. - """ - return { - param: value - for param, value in hyperparameters.items() - if getattr(o, param) != value - } - - -def update_object(o: object, hyperparameters: dict[str, any]) -> bool: - """ - Given a dictionary which maps attributes to values, return a new dictionary which only contains the - items that have actually a different value from the one set in the object. - """ - updated = False - for param, value in hyperparameters.items(): - recursive_params = param.split(".") - inner_object = o - for p in recursive_params[:-1]: - inner_object = getattr(inner_object, p) - if getattr(inner_object, recursive_params[-1]) != value: - updated = True - setattr(inner_object, param, value) - return updated - - -def input_widget_hyperparameter(widget_type: str, **kwargs) -> any: - if widget_type == "number_input": - return st.number_input(**kwargs) - elif widget_type == "select_slider": - return st.select_slider(**kwargs) - elif widget_type == "toggle": - return st.toggle(**kwargs) - elif widget_type == "checkbox": - return st.checkbox(**kwargs) - elif widget_type == "pills": - return st.pills(**kwargs) - elif widget_type == "segmented_control": - return st.segmented_control(**kwargs) - elif widget_type == "selectbox": - return st.selectbox(**kwargs) - elif widget_type == "slider": - return st.slider(**kwargs) - - -def load_custom_models(custom_models_str: str) -> dict[str, list[(str, type)]]: - - def _load_cls(class_path: str) -> (str, type): - module_path, class_name = class_path.rsplit(".", 1) - module = importlib.import_module(module_path) - return class_name, getattr(module, class_name) - - custom_models = ast.literal_eval(custom_models_str) - return { - "data_loaders": [ - _load_cls(data_loader) for data_loader in custom_models["data_loaders"] - ], - "anomaly_detectors": [ - _load_cls(anomaly_detector) - for anomaly_detector in custom_models["anomaly_detectors"] - ], - "metrics": [_load_cls(metric) for metric in custom_models["metrics"]], - "custom_visualizers": [ - _load_cls(visualizer) for visualizer in custom_models["custom_visualizers"] - ], - } diff --git a/dtaianomaly/in_time_ad/_visualization.py b/dtaianomaly/in_time_ad/_visualization.py deleted file mode 100644 index 5e2b13b..0000000 --- a/dtaianomaly/in_time_ad/_visualization.py +++ /dev/null @@ -1,191 +0,0 @@ -from typing import Dict, List, Optional - -import numpy as np -import plotly.express as px -import plotly.graph_objects as go -from plotly.subplots import make_subplots - -from dtaianomaly.utils import get_dimension, is_univariate, make_intervals - - -def plot_data( - X: np.ndarray, - y: Optional[np.array], - feature_names: Optional[List[str]], - time_steps: Optional[np.array], - fig: go.Figure = None, - row: int = 1, - col: int = 1, -) -> go.Figure: - - X = X.squeeze() - - if fig is None: - fig = make_subplots(rows=1, cols=1) - - # Format the time steps - if time_steps is None: - time_steps = np.arange(X.shape[0]) - - # Format the feature names - if feature_names is None: - feature_names = [f"Feature {i+1}" for i in range(get_dimension(X))] - - # Plot the data - if is_univariate(X): - fig.add_trace( - go.Scatter(x=time_steps, y=X, mode="lines", name=feature_names[0]), - row=row, - col=col, - ) - else: - for d in range(get_dimension(X)): - fig.add_trace( - go.Scatter( - x=time_steps, y=X[:, d], mode="lines", name=feature_names[d] - ), - row=row, - col=col, - ) - - # Plot the labels - if y is not None: - starts, ends = make_intervals(y) - for s, e in zip(starts, ends): - fig.add_vrect( - x0=s, - x1=e, - line_width=3, - line_color="red", - fillcolor="red", - opacity=0.2, - row=row, - col=col, - ) - - # Format the figure - fig.update_layout( - height=300, - xaxis_title="Time", - margin=dict(l=0, r=0, t=0, b=0), - ) - - # Return the figure - return fig - - -def plot_anomaly_scores( - X: np.ndarray, - y: Optional[np.array], - feature_names: Optional[List[str]], - time_steps: Optional[np.array], - anomaly_scores: Dict[str, np.array], -) -> go.Figure: - - # Initialize the figure - fig = make_subplots(rows=2, cols=1, shared_xaxes=True) - - # Plot the data - plot_data( - X=X, - y=y, - feature_names=feature_names, - time_steps=time_steps, - fig=fig, - row=1, - col=1, - ) - - # Plot the anomaly scores - cmap = get_detector_color_map(anomaly_scores.keys()) - for name, score in anomaly_scores.items(): - fig.add_trace( - go.Scatter( - x=time_steps, - y=score, - mode="lines", - name=name, - line=dict(color=cmap[name]), - ), - row=2, - col=1, - ) - - return fig - - -def plot_detected_anomalies( - X: np.ndarray, - y: np.array, - y_pred: np.array, - feature_names: Optional[List[str]], - time_steps: Optional[np.array], -): - - # Format the time steps - if time_steps is None: - time_steps = np.arange(X.shape[0]) - - # Plot the data already - fig = plot_data( - X=X, - y=None, # No need to mark the ground truth anomaly directly - feature_names=feature_names, - time_steps=time_steps, - ) - - # Handle both multivariate an univariate time series (creat new variable to avoid modifying the array) - if is_univariate(X): - X_ = np.reshape(X, shape=(X.shape[0], 1)) - else: - X_ = X - - for d in range(get_dimension(X)): - # Plot the true positives - true_positive = (y == 1) & (y_pred == 1) - fig.add_trace( - go.Scatter( - x=time_steps[true_positive], - y=X_[true_positive, d], - mode="markers", - name=f"TP ({true_positive.sum()})", - marker={"color": "green"}, - showlegend=(d == 0), - ) - ) - - # Plot the false positives - false_positive = (y == 0) & (y_pred == 1) - fig.add_trace( - go.Scatter( - x=time_steps[false_positive], - y=X_[false_positive, d], - mode="markers", - name=f"FP ({false_positive.sum()})", - marker={"color": "red"}, - showlegend=(d == 0), - ) - ) - - # Plot the false negatives - false_negative = (y == 1) & (y_pred == 0) - fig.add_trace( - go.Scatter( - x=time_steps[false_negative], - y=X_[false_negative, d], - mode="markers", - name=f"FN ({false_negative.sum()})", - marker={"color": "orange"}, - showlegend=(d == 0), - ) - ) - - return fig - - -def get_detector_color_map(detectors): - detectors = sorted(detectors) - colors = px.colors.qualitative.Safe - return { - detector: colors[j % len(detectors)] for j, detector in enumerate(detectors) - } diff --git a/dtaianomaly/in_time_ad/custom_visualizers/CentroidVisualizer.py b/dtaianomaly/in_time_ad/custom_visualizers/CentroidVisualizer.py deleted file mode 100644 index be476e9..0000000 --- a/dtaianomaly/in_time_ad/custom_visualizers/CentroidVisualizer.py +++ /dev/null @@ -1,94 +0,0 @@ -import numpy as np -import plotly.graph_objects as go -import streamlit as st -from plotly.subplots import make_subplots - -from dtaianomaly.anomaly_detection import ( - BaseDetector, - ClusterBasedLocalOutlierFactor, - KMeansAnomalyDetector, - KShapeAnomalyDetector, -) -from dtaianomaly.in_time_ad._CustomDetectorVisualizer import CustomDetectorVisualizer - - -class CentroidVisualizer(CustomDetectorVisualizer): - - def __init__(self): - super().__init__(name="Show the centroids", icon="📍") - - def is_compatible(self, detector_type: type[BaseDetector]) -> bool: - return ( - detector_type == KMeansAnomalyDetector - or detector_type == KShapeAnomalyDetector - or detector_type == ClusterBasedLocalOutlierFactor - ) - - def show_custom_visualization(self, detector: BaseDetector) -> None: - - st.markdown( - "Below, you can see the centroids of the different clusters. These " - "represent the different normal behaviors. If a subsequence has a " - "large distance to all these subsequences, then it is different from" - "the normal behaviors, and consequently an anomaly." - ) - - centroids = self._get_centroids(detector) - cols = st.columns([len(c) for c in centroids.values()]) - for col, (title, separate_centroids) in zip(cols, centroids.items()): - - # Create subplots: one column per time series - fig = make_subplots( - rows=1, - cols=len(separate_centroids), - shared_yaxes=True, - shared_xaxes=True, - ) - # Add each time series to its own column - for i, centroid in enumerate(separate_centroids): - if len(centroid.shape) > 1: - for j in range(centroid.shape[1]): - fig.add_trace(go.Scatter(y=centroid[:, j]), row=1, col=i + 1) - else: - fig.add_trace(go.Scatter(y=centroid), row=1, col=i + 1) - - # Layout options - fig.update_layout( - title_text=title, - height=150, - xaxis_title="Time", - margin=dict(l=0, r=0, t=20, b=0), - showlegend=False, - ) - - # Show the data - col.plotly_chart(fig) - - @staticmethod - def _get_centroids(detector: BaseDetector) -> dict[str, list[np.array]]: - if isinstance(detector, KShapeAnomalyDetector): - return {"The centroids": detector.centroids_} - elif isinstance(detector, KMeansAnomalyDetector): - return { - "The centroids": [ - detector.k_means_.cluster_centers_[i, :].reshape( - detector.window_size_, -1 - ) - for i in range(detector.k_means_.cluster_centers_.shape[0]) - ] - } - elif isinstance(detector, ClusterBasedLocalOutlierFactor): - return { - "The centroids of large clusters": [ - detector.pyod_detector_.cluster_centers_[i, :].reshape( - detector.window_size_, -1 - ) - for i in detector.pyod_detector_.large_cluster_labels_ - ], - "The centroids of small clusters": [ - detector.pyod_detector_.cluster_centers_[i, :].reshape( - detector.window_size_, -1 - ) - for i in detector.pyod_detector_.small_cluster_labels_ - ], - } diff --git a/dtaianomaly/in_time_ad/custom_visualizers/NeuralNetVisualizer.py b/dtaianomaly/in_time_ad/custom_visualizers/NeuralNetVisualizer.py deleted file mode 100644 index 543bce8..0000000 --- a/dtaianomaly/in_time_ad/custom_visualizers/NeuralNetVisualizer.py +++ /dev/null @@ -1,21 +0,0 @@ -import streamlit as st - -from dtaianomaly.anomaly_detection import BaseDetector, BaseNeuralDetector -from dtaianomaly.in_time_ad._CustomDetectorVisualizer import CustomDetectorVisualizer - - -class NeuralNetVisualizer(CustomDetectorVisualizer): - - def __init__(self): - super().__init__(name="Show the network architecture", icon="🔗") - - def is_compatible(self, detector_type: type[BaseDetector]) -> bool: - return issubclass(detector_type, BaseNeuralDetector) - - def show_custom_visualization(self, detector: BaseDetector) -> None: - # Show the data - st.markdown( - "Below, you can see a simple overview of the network architecture, to get an " - "idea of the different layers and how they are connected to each other." - ) - st.write(detector.neural_network_) diff --git a/dtaianomaly/in_time_ad/custom_visualizers/__init__.py b/dtaianomaly/in_time_ad/custom_visualizers/__init__.py deleted file mode 100644 index 77a3835..0000000 --- a/dtaianomaly/in_time_ad/custom_visualizers/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from .CentroidVisualizer import CentroidVisualizer -from .NeuralNetVisualizer import NeuralNetVisualizer - -__all__ = ["CentroidVisualizer", "NeuralNetVisualizer"] diff --git a/dtaianomaly/utils/_discovery.py b/dtaianomaly/utils/_discovery.py index 92d0a4c..dbd5795 100644 --- a/dtaianomaly/utils/_discovery.py +++ b/dtaianomaly/utils/_discovery.py @@ -92,7 +92,6 @@ def _has_valid_type(cls: type): from dtaianomaly.anomaly_detection import BaseDetector from dtaianomaly.data import LazyDataLoader from dtaianomaly.evaluation import Metric - from dtaianomaly.in_time_ad import CustomDetectorVisualizer from dtaianomaly.preprocessing import Preprocessor from dtaianomaly.thresholding import Thresholding @@ -104,7 +103,6 @@ def _has_valid_type(cls: type): Metric, Preprocessor, Thresholding, - CustomDetectorVisualizer, ] ) @@ -115,7 +113,6 @@ def _filter_types( from dtaianomaly.anomaly_detection import BaseDetector from dtaianomaly.data import LazyDataLoader from dtaianomaly.evaluation import BinaryMetric, Metric, ProbaMetric - from dtaianomaly.in_time_ad import CustomDetectorVisualizer from dtaianomaly.preprocessing import Preprocessor from dtaianomaly.thresholding import Thresholding @@ -127,7 +124,6 @@ def _filter_types( "proba-metric": ProbaMetric, "preprocessor": Preprocessor, "thresholder": Thresholding, - "custom-demonstrator-visualizers": CustomDetectorVisualizer, } if not isinstance(types, list): diff --git a/pyproject.toml b/pyproject.toml index 6f81fc0..c9e93c5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -74,9 +74,7 @@ all = [ # All the optional dependencies "isort", 'tqdm', "sphinxcontrib-bibtex", - 'tqdm', - "streamlit", - "plotly" + 'tqdm' ] tests = [ # For testing "pytest", @@ -109,7 +107,3 @@ moment = [ time_moe = [ "transformers==4.40.1" ] -in_time_ad = [ - "streamlit", - "plotly" -] diff --git a/tests/utils/test_discovery.py b/tests/utils/test_discovery.py index a4abe0c..8365b6b 100644 --- a/tests/utils/test_discovery.py +++ b/tests/utils/test_discovery.py @@ -1,7 +1,6 @@ import pytest from dtaianomaly import anomaly_detection, data, evaluation, preprocessing, thresholding -from dtaianomaly.in_time_ad import CustomDetectorVisualizer, custom_visualizers from dtaianomaly.utils import all_classes anomaly_detectors = [ @@ -117,18 +116,7 @@ thresholding.ContaminationRateThreshold, thresholding.TopNThreshold, ] -custom_demonstrator_visualizers = [ - custom_visualizers.CentroidVisualizer, - custom_visualizers.NeuralNetVisualizer, -] -everything = ( - anomaly_detectors - + data_loaders - + metrics - + preprocessors - + thresholders - + custom_demonstrator_visualizers -) +everything = anomaly_detectors + data_loaders + metrics + preprocessors + thresholders @pytest.mark.parametrize("return_names", [True, False]) @@ -143,7 +131,6 @@ (evaluation.BinaryMetric, binary_metrics), (preprocessing.Preprocessor, preprocessors), (thresholding.Thresholding, thresholders), - (CustomDetectorVisualizer, custom_demonstrator_visualizers), ], ) class TestAllClasses: diff --git a/tests/workflow/test_workflow_from_config.py b/tests/workflow/test_workflow_from_config.py index 9f34adf..dc5dcb3 100644 --- a/tests/workflow/test_workflow_from_config.py +++ b/tests/workflow/test_workflow_from_config.py @@ -11,7 +11,6 @@ anomaly_detection, data, evaluation, - in_time_ad, preprocessing, thresholding, utils, @@ -24,9 +23,7 @@ ) DATA_PATH = f"{pathlib.Path(__file__).parent.parent.parent}/data" -ALL_CLASSES = utils.all_classes( - return_names=False, exclude_types=in_time_ad.CustomDetectorVisualizer -) +ALL_CLASSES = utils.all_classes(return_names=False) @pytest.fixture