cognizant-ai-lab · vince-leaf · Apr 16, 2025 · Apr 26, 2025 · Apr 26, 2025 · Apr 26, 2025
diff --git a/neuro_san/coded_tools/music_nerd_pro/accounting.py b/neuro_san/coded_tools/music_nerd_pro/accounting.py
@@ -30,7 +30,7 @@ def invoke(self, args: Dict[str, Any], sly_data: Dict[str, Any]) -> Dict[str, An
         running_cost: float = float(args.get("running_cost"))
 
         # Increment the running cost
-        updated_running_cost: float = running_cost + 1.0
+        updated_running_cost: float = running_cost + 3.0
 
         tool_response = {
             "running_cost": updated_running_cost

diff --git a/neuro_san/registries/music_nerd_pro.hocon b/neuro_san/registries/music_nerd_pro.hocon
@@ -50,7 +50,12 @@ You’re equal parts playlist curator, music historian, and pop culture mythbust
 This service comes for a fee. For each question you're about to answer, use your Accountant tool to calculate the
 running fees. 
 
-Return your answer and the running cost in a JSON message.
+#Return your answer and the running cost in a JSON message.
+This service comes at a cost. For every question:
+1. Use your Accountant tool to calculate the updated running cost.
+2. Return your response in **two parts**:
+   - First, give your full music answer as plain text.
+   - Then, on a **separate line**, return a **valid JSON object** with the updated cost only.
 """,
             "tools": ["Accountant"]
         },

diff --git a/tests/e2e/README.md b/tests/e2e/README.md
@@ -0,0 +1,114 @@
+# 🧪 End-to-End Agent Testing Framework
+
+This project provides an extensible, reusable **pytest**-based test system to validate AI agent behavior through real CLI interactions.
+
+It supports:
+- Running **multiple connections** (`grpc`, `http`, `direct`)
+- **Parallel execution** with **pytest-xdist**
+- Optional **thinking file capture** for agent internals
+- Config-driven prompts using **HOCON** files
+
+---
+
+## 📦 Project Structure
+
+```bash
+e2e/
+├── README.md              # This documentation
+├── configs/                # Static agent configuration
+│   └── config.hocon
+├── conftest.py             # Pytest customizations (CLI args, test discovery)
+├── pytest.ini              # Pytest settings
+├── requirements.txt        # Python dependencies
+├── test_cases_data/        # Test data for each agent
+│   └── mnpt_data.hocon
+├── tests/                  # Test case source files
+│   └── test_music_nerd_pro.py
+└── utils/                  # Helper modules (parsing, building commands, etc.)
+    ├── mnpt_hocon_loader.py
+    ├── mnpt_output_parser.py
+    ├── mnpt_test_runner.py
+    ├── thinking_file_builder.py
+    └── verifier.py
+```
+
+---
+
+## 🚀 Running Tests
+
+### Install Dependencies
+
+```bash
+pip install -r requirements.txt
+```
+
+### Basic Test Command
+
+Run a test (default: **all connections**):
+
+```bash
+pytest tests/ --verbose
+```
+
+Run for specific connection only:
+
+```bash
+pytest tests/ --connection grpc --verbose
+```
+
+Run and enable thinking file output:
+
+```bash
+pytest tests/ --thinking-file --verbose
+```
+
+Enable parallel test execution:
+
+```bash
+pytest tests/ --connection grpc --repeat 5 --thinking-file -n auto --verbose
+```
+
+> 💡 When using `-n auto`, each repeat runs across multiple CPU cores.
+
+---
+
+## ⚙️ CLI Options
+
+| Option            | Description |
+|:------------------|:------------|
+| `--connection`     | Run tests only for a specific connection (e.g., `grpc`, `http`, `direct`). |
+| `--repeat`         | Repeat each test multiple times. |
+| `--thinking-file`  | Save the agent's internal "thinking" to a temp directory during the test. |
+
+---
+
+# 🧠 Agent: MusicNerdPro Test (test_music_nerd_pro.py)
+
+This suite tests the `music_nerd_pro` agent over all connection types.
+
+### Test Logic
+
+- Load prompt/expected outputs from **HOCON** config files
+- Spawn a CLI agent process
+- Send user questions
+- Verify that:
+  - Correct keyword appears in the response
+  - Correct cost value is returned
+
+### Related Files
+
+| File | Purpose |
+|:-----|:--------|
+| `tests/test_music_nerd_pro.py` | Main test case (pytest function) |
+| `test_cases_data/mnpt_data.hocon` | Prompt/expected answer definitions |
+| `configs/config.hocon` | Static agent config (connections list) |
+| `utils/*.py` | Reusable helpers for all agent tests |
+
+---
+
+# 📝 Notes
+
+- **Thinking files** are stored under `/private/tmp/agent_thinking/`
+- If `-n auto` is used, **worker-specific** folders are created (e.g., `run_gw0_1`).
+- **PEXPECT** is used to fully simulate CLI typing behavior.
+- Future agents can be easily added following the same pattern as MusicNerdPro!
diff --git a/tests/e2e/configs/config.hocon b/tests/e2e/configs/config.hocon
@@ -0,0 +1,8 @@
+# config.hocon
+# Agent config & connection setup
+
+connection = ["direct", "grpc", "http"]
+agent = [music_nerd_pro]
+
+model_llm = ["gpt-4o", "llama3.1"]
+
diff --git a/tests/e2e/conftest.py b/tests/e2e/conftest.py
@@ -0,0 +1,105 @@
+# conftest.py
+# ------------------------------------------------------------------------
+# Pytest configuration for MusicNerdPro tests.
+# Provides custom CLI flags, dynamic test generation, and environment setup.
+# ------------------------------------------------------------------------
+
+import pytest
+import os
+from pyhocon import ConfigFactory
+
+# ------------------------------------------------------------------------------
+# Constants
+# ------------------------------------------------------------------------------
+
+# Directory where agent CLI thinking files will be written (optional feature)
+THINKING_FILE_PATH = "/private/tmp/agent_thinking"
+
+# Static agent config (HOCON) loaded once for all tests
+CONFIG_HOCON_PATH = os.path.join(os.path.dirname(__file__), "configs", "config.hocon")
+config = ConfigFactory.parse_file(CONFIG_HOCON_PATH)
+
+# ------------------------------------------------------------------------------
+# Hooks
+# ------------------------------------------------------------------------------
+
+def pytest_configure(config):
+    """
+    Prints custom environment info when pytest starts.
+    Helps verify environment settings.
+    """
+    print("\nCustom Environment Info")
+    print(f"thinking-file path      : {THINKING_FILE_PATH}")
+
+def pytest_addoption(parser):
+    """
+    Adds custom command-line options for pytest to control the test suite:
+    --connection    -> Filter tests by specific connection method (direct/grpc/http)
+    --repeat        -> Repeat the same test multiple times (for stability/reliability)
+    --thinking-file -> Enable writing out agent thinking_file logs during test
+    """
+    group = parser.getgroup("custom options")
+    group.addoption(
+        "--connection",
+        action="store",
+        default=None,
+        help="Specify a connection name to test (e.g., direct, grpc, http). If omitted, all will be tested."
+    )
+    group.addoption(
+        "--repeat",
+        action="store",
+        type=int,
+        default=1,
+        help="Number of times to repeat each test (for stress or reliability testing)."
+    )
+    group.addoption(
+        "--thinking-file",
+        action="store_true",
+        default=False,
+        help="If enabled, agent will write a thinking_file log per test case (grpc/http/direct)."
+    )
+
+def pytest_generate_tests(metafunc):
+    """
+    Dynamically parameterizes the tests based on the connection(s) and repetition requested.
+
+    Example:
+    --connection grpc --repeat 3
+    → Runs 3 tests against 'grpc' connection.
+
+    --repeat 2 (with no connection)
+    → Runs 2 tests for each connection (direct, grpc, http).
+
+    This auto-expands into (connection_name, repeat_index) fixture pairs.
+    """
+    if "connection_name" in metafunc.fixturenames:
+        all_connections = load_connections()
+        selected_connection = metafunc.config.getoption("connection")
+        repeat = metafunc.config.getoption("repeat")
+
+        # Filter if a specific connection is selected
+        if selected_connection:
+            if selected_connection not in all_connections:
+                raise ValueError(f"Connection '{selected_connection}' not found in config: {all_connections}")
+            all_connections = [selected_connection]
+
+        # Generate combinations of (connection_name, repeat_index)
+        test_params = [
+            pytest.param(conn, i, id=f"{conn}_run{i+1}")
+            for conn in all_connections
+            for i in range(repeat)
+        ]
+
+        # Parametrize the test function
+        metafunc.parametrize("connection_name, repeat_index", test_params)
+
+# ------------------------------------------------------------------------------
+# Utilities
+# ------------------------------------------------------------------------------
+
+def load_connections():
+    """
+    Loads the list of supported connection names from the static config file.
+    """
+    return config.get("connection")
+
diff --git a/tests/e2e/pytest.ini b/tests/e2e/pytest.ini
@@ -0,0 +1,5 @@
+# pytest.ini
+[pytest]
+filterwarnings =
+    ignore:.*use of forkpty.*:DeprecationWarning:pty
+
diff --git a/tests/e2e/requirements.txt b/tests/e2e/requirements.txt
@@ -0,0 +1,6 @@
+pexpect
+pyhocon
+pytest
+pytest-xdist
+pytest-timeout
+pytest-timer
diff --git a/tests/e2e/test_cases_data/mnpt_data.hocon b/tests/e2e/test_cases_data/mnpt_data.hocon
@@ -0,0 +1,29 @@
+# test_data.hocon
+# Input/output test pairs
+
+test = [
+  {
+    input_1: {
+      user_text: "Who did yellow submarine?"
+      answer: {
+        type_match: "keyword"
+        word: "Beatles"
+        cost: "3.0"
+      }
+    }
+  },
+  {
+    input_2: {
+      user_text: "Where were they from?"
+      answer: {
+        type_match: "keyword"
+        word: "Liverpool"
+        cost: "6.0"
+      }
+    }
+  },
+  {
+    input_done: "quit"
+  }
+]
+
diff --git a/tests/e2e/tests/test_music_nerd_pro.py b/tests/e2e/tests/test_music_nerd_pro.py
@@ -0,0 +1,21 @@
+# test_music_nerd_pro.py
+# ---------------------------------------------------------
+# Parametrized test case that drives CLI interaction test
+# ---------------------------------------------------------
+
+import pytest
+from utils.mnpt_hocon_loader import extract_test_values
+from utils.mnpt_test_runner import run_test
+
+@pytest.mark.timeout(120)
+def test_run_connection(connection_name, repeat_index, request):
+    """
+    Main test entry point for testing music_nerd_pro agent over various connections.
+    """
+    use_thinking_file = request.config.getoption("--thinking-file")
+
+    # NEW: Only pass connection name
+    result = extract_test_values(connection_name)
+
+    run_test(*result, repeat_index, use_thinking_file)
+
diff --git a/tests/e2e/utils/mnpt_hocon_loader.py b/tests/e2e/utils/mnpt_hocon_loader.py
@@ -0,0 +1,69 @@
+# ------------------------------------------------------------------------
+# mnpt_hocon_loader.py
+# ------------------------------------------------------------------------
+# Utility functions for loading test prompt/response values from HOCON files.
+# Separates test data loading from agent configuration loading.
+# ------------------------------------------------------------------------
+
+import os
+from pyhocon import ConfigFactory
+
+# ------------------------------------------------------------------------
+# Path to the TEST DATA HOCON file
+# - This file contains input prompts and expected agent outputs.
+# - NOTE: Only test cases, no agent config.
+# ------------------------------------------------------------------------
+
+TEST_DATA_HOCON_PATH = os.path.join(
+    os.path.dirname(__file__),        # This utils/ folder
+    "../test_cases_data/mnpt_data.hocon"  # Relative path to test_cases/
+)
+
+# ------------------------------------------------------------------------
+# Load the test data once at import time
+# ------------------------------------------------------------------------
+test_data = ConfigFactory.parse_file(os.path.abspath(TEST_DATA_HOCON_PATH))
+
+# ------------------------------------------------------------------------
+# Function: extract_test_values
+# Description:
+#   - Loads the prompts and expected answer keywords/costs
+#   - Validates the connection name if needed
+#   - Returns extracted values for CLI interaction testing
+# ------------------------------------------------------------------------
+def extract_test_values(connection_name):
+    """
+    Loads test prompts and expected outputs for a given connection
+    from the test data HOCON file.
+
+    Args:
+        connection_name (str): The type of connection to validate (e.g., "grpc", "http")
+
+    Returns:
+        tuple: (connection_name, prompt_1, prompt_2, word_1, word_2, cost_1, cost_2, input_done)
+    """
+
+    # If you want to validate connection types, you can add here
+    # Example connection list: ["direct", "grpc", "http"]
+
+    # Pull the list of test prompts and expected outputs
+    test_entries = test_data.get("test")
+
+    # Extract the first test input
+    input_1 = next(item["input_1"] for item in test_entries if "input_1" in item)
+    prompt_1 = input_1.get("user_text")
+    word_1 = input_1.get("answer.word")
+    cost_1 = input_1.get("answer.cost")
+
+    # Extract the second test input
+    input_2 = next(item["input_2"] for item in test_entries if "input_2" in item)
+    prompt_2 = input_2.get("user_text")
+    word_2 = input_2.get("answer.word")
+    cost_2 = input_2.get("answer.cost")
+
+    # Extract the input for termination (e.g., "quit")
+    input_done = next((item.get("input_done") for item in test_entries if "input_done" in item), None)
+
+    # Return all values required for the test runner
+    return connection_name, prompt_1, prompt_2, word_1, word_2, cost_1, cost_2, input_done
+