lfedgeai
diff --git a/‎.github/workflows/main.yaml‎
Lines changed: 108 additions & 0 deletions b/‎.github/workflows/main.yaml‎
Lines changed: 108 additions & 0 deletions
diff --git a/‎AIOpsOverview.png‎
56 KB b/‎AIOpsOverview.png‎
56 KB
diff --git a/‎demo/README.md‎
Lines changed: 144 additions & 0 deletions b/‎demo/README.md‎
Lines changed: 144 additions & 0 deletions
diff --git a/‎demo/anomaly-llm-faiss/.dockerignore‎
Lines changed: 6 additions & 0 deletions b/‎demo/anomaly-llm-faiss/.dockerignore‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎demo/anomaly-llm-faiss/Dockerfile‎
Lines changed: 36 additions & 0 deletions b/‎demo/anomaly-llm-faiss/Dockerfile‎
Lines changed: 36 additions & 0 deletions
diff --git a/‎demo/anomaly-llm-faiss/app/README.md‎ b/‎demo/anomaly-llm-faiss/app/README.md‎
diff --git a/‎demo/anomaly-llm-faiss/app/__init__.py‎ b/‎demo/anomaly-llm-faiss/app/__init__.py‎
diff --git a/‎demo/anomaly-llm-faiss/app/api/__init__.py‎ b/‎demo/anomaly-llm-faiss/app/api/__init__.py‎
diff --git a/‎demo/anomaly-llm-faiss/app/api/routes.py‎
Lines changed: 25 additions & 0 deletions b/‎demo/anomaly-llm-faiss/app/api/routes.py‎
Lines changed: 25 additions & 0 deletions
diff --git a/‎demo/anomaly-llm-faiss/app/llmmodels/loadmodel.py‎
Lines changed: 12 additions & 0 deletions b/‎demo/anomaly-llm-faiss/app/llmmodels/loadmodel.py‎
Lines changed: 12 additions & 0 deletions
@@ -0,0 +1,108 @@
+name: Run Podman Compose (Build & Deploy)
+
+on:
+  push:
+    branches: [ main ]
+  pull_request:
+
+jobs:
+  podman-compose:
+    runs-on: ubuntu-latest
+
+    steps:
+      # Step 1: Checkout repository
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      # Step 2: Install Podman & podman-compose
+      - name: Install Podman
+        run: |
+          sudo apt-get update -y
+          sudo apt-get install -y podman python3-pip
+          pip install podman-compose
+          echo "Podman version:"
+          podman --version
+
+      # Step 3: Configure Podman storage correctly
+      - name: Configure Podman storage
+        run: |
+          echo "Configuring Podman storage..."
+          
+          # Podman default storage root under runner home
+          STORAGE_ROOT="/home/runner/work/_containers"
+          
+          sudo mkdir -p $STORAGE_ROOT
+          sudo mkdir -p /etc/containers
+
+          # Create storage.conf if missing
+          if [ ! -f /etc/containers/storage.conf ]; then
+            echo "[storage]" | sudo tee /etc/containers/storage.conf
+          fi
+
+          # Apply correct graphroot path
+          sudo sed -i '/graphroot/d' /etc/containers/storage.conf
+          echo "graphroot=\"$STORAGE_ROOT\"" | sudo tee -a /etc/containers/storage.conf
+          
+          echo "Final /etc/containers/storage.conf:"
+          cat /etc/containers/storage.conf
+          
+          # Initialize storage
+          podman system migrate
+
+      # Step 4: Ensure Podman network exists
+      - name: Ensure Podman network
+        run: |
+          NETWORK_NAME="anomaly-network"
+          if ! podman network exists "$NETWORK_NAME"; then
+            echo "Creating network $NETWORK_NAME..."
+            podman network create "$NETWORK_NAME"
+          else
+            echo "Network $NETWORK_NAME already exists."
+          fi
+
+      # Step 5: Build images using Podman Compose
+      - name: Build Podman images
+        working-directory: demo
+        run: |
+          echo "Building images using podman-compose..."
+          podman-compose -f docker-compose.yaml build
+          echo "Build completed."
+
+      # Step 6: Start containers
+      - name: Run Podman Compose
+        working-directory: demo
+        run: |
+          echo "Starting Podman Compose containers..."
+          podman-compose -f docker-compose.yaml up -d
+          echo "Containers running."
+          podman ps
+
+      # Step 7: Verify Podman environment
+      - name: Verify Podman environment
+        run: |
+          echo "Networks:"
+          podman network ls
+          
+          echo "Containers:"
+          podman ps -a
+
+      # Step 8: Show logs of each container
+      - name: Show Container Logs
+        working-directory: demo
+        run: |
+          echo "Displaying logs for all containers..."
+          podman ps --format "{{.Names}}" | while read -r c; do
+            echo "----------------------------"
+            echo "Logs for: $c"
+            echo "----------------------------"
+            podman logs "$c" || echo "No logs for $c"
+          done
+
+      # Step 9: Cleanup (always runs)
+      - name: Stop and Clean Up
+        if: always()
+        working-directory: demo
+        run: |
+          echo "Stopping Podman Compose..."
+          podman-compose -f docker-compose.yaml down
+          echo "Cleanup completed."
@@ -0,0 +1,144 @@
+
+# AIOPS Demo - Anomaly Detection & Remedy Generation using LLM
+
+A multi-stage pipeline for detecting anomalies in CPU and memory usage of edge devices or Kubernetes clusters and generating intelligent remedy content using AI/ML and LLMs.
+
+# Overview:
+
+This Demo provides an end-to-end pipeline that simulates resource usage, detects anomalies, and generates actionable remedies:
+
+1. Simulation: Generates synthetic CPU and memory consumption for edge devices or Kubernetes clusters.
+
+2. Anomaly Detection: Uses a Random Forest model to identify anomalies.
+
+3. Remedy Generation: Uses an LLM-Faiss system to produce context-aware remediation steps.
+
+The pipeline is fully asynchronous and uses Redis queues for communication between components.
+
+## AIOps Demo workflow and high level component overview
+
+[Anomaly Simulation API (CPU/Memory)] --> [Redis Queue] --> [Anomaly Consumer] --> [Random Forest Model]
+
+--> [[Redis Queue]] --> [LLM Consumer] --> [LLM-Faiss] --> Remedy Conent
+
+
+![Alt text](demo-flow.png)
+
+# Components
+
+**1. Anomaly Simulation API**
+
+Simulates CPU and Memory usage of edge devices or Kubernetes clusters.
+
+Pushes synthetic anomaly events to Redis for downstream processing.
+
+**2. Anomaly Consumer**
+
+Consumes messages from Redis.
+
+Sends the data to the Anomaly Isolation Random Forest Model for anomaly detection.
+
+**3. Anomaly Isolation (Random Forest Model)**
+
+Detects anomalies in resource usage.
+
+Annotates and classifies anomalies.
+
+Pushes detected anomalies back to Redis.
+
+**4. LLM Consumer**
+
+Consumes anomaly messages from Redis.
+
+Sends anomaly data to LLM-Faiss for context-aware remedy generation.
+
+**5. LLM-Faiss**
+
+Generates relevant remedy content using vector search.
+
+Produces actionable insights for alerting or automated remediation.
+
+# Key Features
+
+-  Simulated CPU/Memory metrics for edge devices or Kubernetes clusters.
+
+-  Random Forest-based anomaly detection.
+
+-  LLM-Faiss integration for intelligent remedy content.
+
+-  Fully asynchronous architecture using Redis queues.
+
+# Getting Started and how to run the domo on your local laptop/desktop
+
+**Step :1** Clone the repo from Git repository and navigate to demo folder as shown below
+
+```bash
+git clone https://github.com/lfedgeai/AIOps.git
+
+ ```
+
+**Step :2** build image using docker compose :
+
+```bash
+    cd AIOps/demo
+    docker compose build --no-cache
+ ```
+**Step :3** run docker compose to start postgres Database and Redis Cache/Queue. Make sure Database and Redis 
+started with no errors
+```bash
+    docker compose  up -d anomaly-db redis
+ ```
+
+**Step :4** run docker compose to LLM component. Make sure LLM component
+started with no errors
+
+```bash
+    docker compose  up -d llm-faiss
+ ```
+
+ **Step :4** run docker compose to Anomaly component. Make sure Anomaly component
+started with no errors
+
+```bash
+    docker compose  up -d anomaly-detection
+ ```
+
+A built-in Swagger UI is provided so you can interact with the API directly from your browser.
+
+Open your browser and go to the Swagger API URL (for example: http://localhost:8001/docs).
+
+## How to trigger Anomaly :
+
+Open your browser and go to the Swagger API URL  http://localhost:8002/
+
+Use the provided endpoints to trigger synthetic CPU/Memory anomaly data which triggers anomaly inference 
+
+**GET /generate-anomaly-data/{10}**
+
+The events generaged from synthetic event will automatically flow through the pipeline:
+
+Simulation API → Redis Queue → Anomaly Consumer → Random Forest Model → Redis Queue → LLM Consumer → LLM-Faiss → Remedy Content.
+
+You can verify  the anomaly detection and LLM content remedy from the docker console on your terminal
+
+- First you can create new anomalies through the Swagger API:
+<img width="1447" height="1177" alt="image" src="https://github.com/user-attachments/assets/02ea0368-51c3-4788-8164-404a9f71748c" />
+
+
+- Then you'll see the entries in the terminal window:
+
+<img width="1701" height="585" alt="image" src="https://github.com/user-attachments/assets/caa75a65-9733-4975-8938-d5733ec34ef5" />
+
+- And the recommended remidiation steps:
+<img width="1707" height="604" alt="image" src="https://github.com/user-attachments/assets/3670c597-c054-4aa8-a4c9-116aaeb3156c" />
+
+
+
+**To view Anomaly remedy content generated for the detected anomalies.**
+
+Use the provided endpoints **http://localhost:8002/get-processed-anomalies**
+
+<img width="1912" height="1422" alt="image" src="https://github.com/user-attachments/assets/67473426-6a15-491f-a71e-09c638541fd2" />
+
+
+
@@ -0,0 +1,6 @@
+logs/
+tmp/
+release.sh
+vscode
+*.pyc
+__pycache__/
@@ -0,0 +1,36 @@
+# Use a lightweight Python image
+FROM python:3.11-slim
+
+
+# Install build dependencies
+RUN apt-get update && apt-get install -y curl \
+    gcc \
+    python3-dev \
+    build-essential \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+
+# Create a non-root user (OpenShift runs containers as random UID by default)
+RUN mkdir -p /app /tmp/huggingface /mnt/hf_cache && chmod -R 777 /app /tmp/huggingface /mnt/hf_cache
+
+# Set the working directory
+WORKDIR /app
+
+# Set environment variables for Hugging Face
+ENV HF_HOME=/tmp/huggingface
+ENV TRANSFORMERS_CACHE=/tmp/huggingface/transformers
+
+
+# Copy requirement files and install dependencies
+COPY requirements.txt .
+
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Copy the rest of your application
+COPY . .
+
+# Set PYTHONPATH to ensure 'app' is recognized
+ENV PYTHONPATH=/app
+
+# Command to run the app
+CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8002" , "--log-level", "debug", "--access-log" ]
@@ -0,0 +1,25 @@
+# FastAPI Routes for Anomaly Detection
+from fastapi import FastAPI,APIRouter, HTTPException
+from app.models.models import AnomalyData
+
+# router = APIRouter()
+from typing import Dict
+
+app = FastAPI()
+
+@app.get("/")
+async def root():
+    return {"message": "Isolation Forest Anomaly Detection API is running."}
+
+@app.post("/detect-anomaly/")
+async def handle_anomaly(anomaly: AnomalyData):
+    try:
+        app_name = anomaly.get("app_name", "unknown_app")
+        pod_name = anomaly.get("pod_name", "unknown_pod")
+        cluster_info = anomaly.get("cluster_info", "unknown_cluster")
+        # response = analyze_anomaly_wi
+        # th_llm(anomaly)
+        return {"resolution": response}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"An error occurred: {e}")
+
@@ -0,0 +1,12 @@
+from transformers import AutoModelForCausalLM, AutoTokenizer
+
+model_name = "gpt2"
+local_dir = "./llmmodels/gpt2"
+
+# Download and save locally
+model = AutoModelForCausalLM.from_pretrained(model_name)
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+
+# Save locally
+model.save_pretrained(local_dir)
+tokenizer.save_pretrained(local_dir)
-Original file line number
+Diff line change
@@ @@ -0,0 +1,6 @@ @@
 +logs/
 +tmp/
 +release.sh
 +vscode
 +*.pyc
 +__pycache__/