cognizant-ai-lab · vince-leaf · Apr 16, 2025 · Apr 26, 2025 · Apr 26, 2025 · Apr 26, 2025
diff --git a/.github/workflows/smoke.yml b/.github/workflows/smoke.yml
@@ -0,0 +1,53 @@
+name: Smoke Test
+
+on:
+  schedule:
+    - cron: '0 12 * * *'  # 12:00 PM UTC = 4:00 AM PT (standard)
+  workflow_dispatch:      # enables manual triggering
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    container:
+      image: python:3.12-slim
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+
+      - name: Install dependencies
+        run: |
+          apt-get update && apt-get install -y shellcheck
+          pip install -r requirements-build.txt
+          pip install -r requirements.txt
+
+      - name: Show installed packages
+        run: pip freeze
+
+
+      - name: Run Smoke Test [excluding all other tests]
+        run: python -v -m tests.e2e.tools.smoke_test_runner
+        env:
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+
+      - name: Notify Slack on success
+        if: success()
+        uses: slackapi/slack-github-action@v1.24.0
+        with:
+          payload: |
+            {
+              "text": "✅ *Smoke Tests Passed* for `${{ github.repository }}` on `${{ github.ref_name }}`"
+            }
+        env:
+          SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}
+
+      - name: Notify Slack on failure
+        if: failure()
+        uses: slackapi/slack-github-action@v1.24.0
+        with:
+          payload: |
+            {
+              "text": "❌ *Smoke Tests Failed* for `${{ github.repository }}` on `${{ github.ref_name }}`"
+            }
+        env:
+          SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }}
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -36,10 +36,10 @@ jobs:
         run: build_scripts/run_shellcheck.sh
 
       - name: Run flake8
-        run: flake8
+        run: flake8 
 
-      - name: Run pytest (excluding integration tests)
-        run: pytest --verbose -m "not integration" --timer-top-n 10
+      - name: Run pytest Run All Other Tests (excluding integration and e2e)
+        run: pytest --verbose -m "not integration and not smoke and not e2e" --timer-top-n 10
         env:
           OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
           AGENT_TOOL_PATH: "./neuro_san/coded_tools"

diff --git a/pytest.ini b/pytest.ini
@@ -4,6 +4,14 @@
 markers =
     integration: Mark a test as an integration test.  These generally take > 30 seconds to complete.
 
+    # Prevents PytestUnknownMarkWarning
+    e2e: marks tests as end-to-end tests
+    smoke: marks tests as smoke tests
+
+
 filterwarnings =
     # Ignore warnings about protobuf 4 
     ignore:Type google._upb._message.* uses PyType_Spec with a metaclass that has custom tp_new:DeprecationWarning
+
+    # Ignore warning about pexpect
+    ignore:.*use of forkpty.*:DeprecationWarning:pty
diff --git a/requirements-build.txt b/requirements-build.txt
@@ -8,6 +8,11 @@ timeout-decorator==0.5.0
 coverage==7.6.1
 pytest-cov==5.0.0
 parameterized
+pexpect
+pyhocon
+pytest-xdist
+pytest-timeout
+psutil
 
 # Code quality
 flake8==7.1.1

diff --git a/tests/e2e/README.md b/tests/e2e/README.md
@@ -0,0 +1,99 @@
+# 🧪 End-to-End Testing Suite for `music_nerd_pro`
+
+This directory contains the full end-to-end (E2E) test infrastructure for the `music_nerd_pro` agent, including configuration, reusable utilities, test cases, and server lifecycle control tools.
+
+---
+
+## 📁 Directory Structure
+
+```text
+tests/e2e/
+├── README.md                      # ✅ You're here
+├── configs/
+│   └── config.hocon               # HOCON config defining all agent connections
+├── conftest.py                    # Shared pytest setup, CLI options, parametrization, server startup
+├── requirements.txt               # Pip requirements for test environment
+├── test_cases_data/
+│   └── mnpt_data.hocon            # Input data and expectations for test runner
+├── tests/
+│   └── test_run_agent_cli_music_nerd_pro.py  # Main test case driver (used by orchestrators)
+├── tools/
+│   ├── smoke_test_runner.py       # Orchestrator: start → test → stop
+│   ├── start_server_manual.py     # Manual: starts server and stores PID
+│   ├── stop_all_servers.py        # Manual: stops all running agent servers from PID file
+│   └── stop_last_server.py        # Manual: stops only the most recently started server
+└── utils/
+    ├── logging_config.py          # Shared logging setup (file + console)
+    ├── music_nerd_pro_hocon_loader.py  # Extracts structured test data from HOCON config
+    ├── music_nerd_pro_output_parser.py # Parses CLI outputs for verification
+    ├── music_nerd_pro_runner.py   # Executes the CLI test logic
+    ├── server_manager.py          # Manages agent server lifecycle (start, stop, PID tracking)
+    ├── server_state.py            # In-memory + file-based PID state tracking
+    ├── thinking_file_builder.py   # Generates `thinking_file` argument path
+    └── verifier.py                # Assertion helper for output validation
+```
+
+---
+
+## 🚦 How to Run E2E Tests
+
+### 🔁 Option 1: Manual Mode
+
+```bash
+# 1. Start agent server manually
+python tests/e2e/tools/start_server_manual.py
+
+# 2. Run E2E CLI tests
+pytest tests/e2e/tests/test_run_agent_cli_music_nerd_pro.py \
+  --capture=no --connection grpc --thinking-file --repeat 1 -n auto
+
+# 3. Stop all running agent servers
+python tests/e2e/tools/stop_all_servers.py
+```
+
+---
+
+### ⚡ Option 2: Orchestrated Smoke Test
+
+Run everything in one go:
+
+```bash
+python -m tests.e2e.tools.smoke_test_runner
+```
+
+---
+
+## ✅ Test CLI Options
+
+| Option           | Description                                      |
+|------------------|--------------------------------------------------|
+| `--connection`   | One of: `direct`, `grpc`, `http`                 |
+| `--repeat`       | Number of repetitions per connection             |
+| `--thinking-file`| Enables logging of agent `thinking_file` output  |
+| `-n`             | This is Pytest to launch the runner in parallel  |
+
+---
+
+## 📦 Test Environment Setup
+
+```bash
+pip install -r tests/e2e/requirements.txt
+```
+
+You must also have the `neuro_san` package accessible via `PYTHONPATH`.
+
+---
+
+## 🧠 Notes
+
+- PID tracking is handled via `/tmp/neuro_san_server.pid`.
+- Multiple PIDs are supported and cleaned up automatically.
+- The test file `test_run_agent_cli_music_nerd_pro.py` is ignored during normal discovery unless triggered explicitly.
+- Logging is unified under `/tmp/e2e_server.log`.
+
+---
+
+## 🛠️ Authors & Maintenance
+
+Maintained by QA & Platform Engineering.
+Contact: `@vincent.nguyen`
diff --git a/tests/e2e/configs/share_agent_config.hocon b/tests/e2e/configs/share_agent_config.hocon
@@ -0,0 +1,8 @@
+# config.hocon
+# Agent config & connection setup
+
+connection = ["direct", "grpc", "http"]
+agent = [music_nerd_pro]
+
+model_llm = ["gpt-4o", "llama3.1"]
+
diff --git a/tests/e2e/conftest.py b/tests/e2e/conftest.py
@@ -0,0 +1,164 @@
+# conftest.py
+
+# ------------------------------------------------------------------------
+# Pytest configuration for shared CLI options, dynamic test generation,
+# session-wide logging setup, and agent server lifecycle management.
+# ------------------------------------------------------------------------
+
+import pytest
+import os
+import logging
+from pyhocon import ConfigFactory
+from pathlib import Path
+from utils.logging_config import setup_logging, DEFAULT_LOG_PATH
+setup_logging()  # Make sure logger is initialized
+
+
+# ------------------------------------------------------------------------------
+# Constants
+# ------------------------------------------------------------------------------
+
+THINKING_FILE_PATH = "/private/tmp/agent_thinking"
+LOG_PATH = DEFAULT_LOG_PATH  # shared with logging_config
+NAME_CONFIG_HOCON = "share_agent_config"
+
+# ------------------------------------------------------------------------------
+# One-time Log Cleanup + Logging Setup
+# ------------------------------------------------------------------------------
+
+try:
+    # Truncate the log file for a clean start (don't delete it)
+    open(LOG_PATH, "w").close()
+
+    print(f"[setup] Truncated log file: {LOG_PATH}")
+except Exception as e:
+    print(f"[setup] WARNING: Could not prepare log file: {e}")
+
+
+# Initialize shared logging (both file and console)
+setup_logging(log_path=LOG_PATH)
+logging.info("✅ Logging system initialized by conftest.py")
+
+# ------------------------------------------------------------------------------
+# Load Static Agent Configuration (HOCON)
+# ------------------------------------------------------------------------------
+
+CONFIG_HOCON_PATH = os.path.join(os.path.dirname(__file__), "configs", NAME_CONFIG_HOCON + ".hocon")
+
+config = ConfigFactory.parse_file(CONFIG_HOCON_PATH)
+
+# ------------------------------------------------------------------------------
+# Pytest Hooks
+# ------------------------------------------------------------------------------
+
+
+def pytest_ignore_collect(collection_path: Path, config):
+    """
+    Prevents pytest from collecting a specific test file during discovery.
+
+    This is used to ignore test_agent_cli_music_nerd_pro.py during normal pytest runs,
+    because:
+    - It depends on a pre-started server (via start_server_manual.py)
+    - It is intended to be run only as part of tools/smoke_test_runner.py
+    - This helps avoid accidental test failures or unwanted execution
+
+    Note: Uses pathlib.Path as required by pytest 9+ (fix for PytestRemovedIn9Warning).
+    """
+    return "test_agent_cli_music_nerd_pro.py" in str(collection_path)
+
+
+def pytest_configure(config):
+    """
+    Pytest hook: called once at the start of the test session.
+    This function logs useful context about the test configuration.
+
+    - Logs the repeat count from `--repeat` CLI option (default = 1)
+    - Detects if pytest-xdist is enabled (i.e., running in parallel)
+    """
+    # Fetch repeat count from command-line option or default to 1
+    repeat = config.getoption("repeat", default=1)
+
+    # Check if we are in a worker process (i.e., xdist parallel run)
+    is_parallel = hasattr(config, "workerinput")
+
+    # Emit a log entry showing test mode
+    logging.info(f"🧪 Test mode: repeat={repeat}, parallel={is_parallel}")
+    logging.info("Custom Environment Info")
+    logging.info(f"thinking-file path      : {THINKING_FILE_PATH}")
+
+
+# This is a special pytest hook. Do not rename it!
+# Pytest uses this to register custom CLI options.
+def pytest_addoption(parser):
+    """
+    Defines CLI options:
+    --connection: Limit tests to a specific connection (e.g., direct/grpc/http)
+    --repeat: Repeat each test multiple times
+    --thinking-file: Enables optional thinking_file logging
+    """
+    group = parser.getgroup("custom options")
+    group.addoption("--connection", action="store", default=None,
+                    help="Specify a connection to test: direct, grpc, or http.")
+    group.addoption("--repeat", action="store", type=int, default=1,
+                    help="Number of times to repeat each test.")
+    group.addoption("--thinking-file", action="store_true", default=False,
+                    help="Enable thinking_file output per test run.")
+
+
+def pytest_generate_tests(metafunc):
+    # 🛑 Skip parametrization if running the orchestrator module (test_*.py)
+    # This avoids injecting parameters into the orchestration entrypoint file,
+    # which is responsible for launching tests, not running them directly.
+    if metafunc.module.__name__.endswith("test_none"):
+        return
+
+    # ✅ Only proceed if the test function expects 'connection_name' as a fixture
+    if "connection_name" in metafunc.fixturenames:
+        # Load all available connection types from the HOCON config (e.g., ['grpc', 'http', 'direct'])
+        all_connections = load_connections()
+
+        # Read CLI overrides (if any)
+        selected = metafunc.config.getoption("connection")  # --connection grpc
+        repeat = metafunc.config.getoption("repeat")        # --repeat 3
+
+        # 🔍 If a specific connection is requested, validate and filter
+        if selected:
+            if selected not in all_connections:
+                raise ValueError(f"Connection '{selected}' not in config: {all_connections}")
+            all_connections = [selected]
+
+        # 🧪 Build parameter combinations: (connection, repeat_index)
+        # -----------------------------------------------------------------------------
+        # This block is responsible for *generating the test matrix*.
+        # It determines how many test cases will be launched based on:
+        #   - the list of connections (e.g., grpc, http, direct)
+        #   - the --repeat CLI argument (e.g., --repeat 3)
+        #
+        # Example:
+        #   If connections = ['grpc', 'http'] and repeat = 2, this will produce:
+        #     - grpc_run1
+        #     - grpc_run2
+        #     - http_run1
+        #     - http_run2
+        #
+        # These become individual pytest cases, allowing for:
+        #   ✅ Parallel execution (when using `-n auto`)
+        #   ✅ Fine-grained control over test case identifiers and logs
+        #
+        # The generated values are injected into the test function via parametrize.
+        test_params = [
+            pytest.param(conn, i, id=f"{conn}_run{i+1}")
+            for conn in all_connections
+            for i in range(repeat)
+        ]
+
+        # Inject parameters into the test function
+        # This allows dynamic test generation using standard pytest features
+        metafunc.parametrize("connection_name, repeat_index", test_params)
+
+
+def load_connections():
+    """
+    Returns the list of connections from the test config.
+    """
+    return config.get("connection")
diff --git a/tests/e2e/requirements.txt b/tests/e2e/requirements.txt
@@ -0,0 +1,13 @@
+json
+logging
+os
+sys
+pexpect
+psutil
+pyhocon
+pytest
+pytest-xdist
+pytest-timeout
+pytest-timer
+subprocess
+re