ed-donner · johngorithm · Mar 5, 2026
diff --git a/week4/community-contributions/johngorithm/week4 exercise.ipynb b/week4/community-contributions/johngorithm/week4 exercise.ipynb
@@ -0,0 +1,397 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "id": "b7f43957",
+      "metadata": {},
+      "source": [
+        "# Week 4 Exercise - Multi-Language Test Case Generator\n",
+        "\n",
+        "Write or paste source code, select a language and LLM model, then generate and run unit tests.\n",
+        "\n",
+        "**Supported languages:** Python, JavaScript, TypeScript, Java, Ruby, Go, Rust, C++, C, C#, PHP, Swift, Kotlin"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "d3f0f314",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# imports\n",
+        "\n",
+        "import os\n",
+        "import re\n",
+        "import subprocess\n",
+        "import tempfile\n",
+        "from dotenv import load_dotenv\n",
+        "from openai import OpenAI\n",
+        "import gradio as gr"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "50218fa6",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# environment and client setup\n",
+        "\n",
+        "load_dotenv(override=True)\n",
+        "\n",
+        "openai_api_key = os.getenv(\"OPENAI_API_KEY\")\n",
+        "if not openai_api_key:\n",
+        "    raise ValueError(\"OPENAI_API_KEY is not set\")\n",
+        "\n",
+        "openai_client = OpenAI()\n",
+        "ollama_client = OpenAI(base_url=\"http://localhost:11434/v1\", api_key=\"ollama\")\n",
+        "\n",
+        "MODELS = {\n",
+        "    \"gpt-oss\": {\"client\": ollama_client, \"model\": \"gpt-oss\"},\n",
+        "    \"llama3.2\": {\"client\": ollama_client, \"model\": \"llama3.2\"},\n",
+        "    \"gpt-5-nano\": {\"client\": openai_client, \"model\": \"gpt-5-nano\"},\n",
+        "}\n",
+        "\n",
+        "RUNNERS = {\n",
+        "    \"python\": (\".py\", lambda f: [\"python\", f]),\n",
+        "    \"javascript\": (\".js\", lambda f: [\"node\", f]),\n",
+        "    \"typescript\": (\".ts\", lambda f: [\"npx\", \"ts-node\", f]),\n",
+        "    \"ruby\": (\".rb\", lambda f: [\"ruby\", f]),\n",
+        "    \"php\": (\".php\", lambda f: [\"php\", f]),\n",
+        "    \"swift\": (\".swift\", lambda f: [\"swift\", f]),\n",
+        "}\n",
+        "\n",
+        "EDITOR_LANGUAGES = [\n",
+        "    \"python\", \"javascript\", \"typescript\", \"java\", \"ruby\", \"go\",\n",
+        "    \"rust\", \"cpp\", \"c\", \"csharp\", \"php\", \"swift\", \"kotlin\",\n",
+        "]"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "bf65565c",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# core logic\n",
+        "\n",
+        "def build_system_prompt(language):\n",
+        "    return (\n",
+        "        f\"You are an expert {language} developer specializing in unit testing. \"\n",
+        "        f\"Given source code, generate a maximum of 7 unit tests covering normal behavior, \"\n",
+        "        f\"edge cases, and error handling. \"\n",
+        "        f\"Output ONLY valid, runnable {language} code with no markdown fences and no prose outside of code comments. \"\n",
+        "        f\"Include the original source code so the test file is self-contained and executable. \"\n",
+        "        f\"Do not modify, rewrite, or refactor the original source code; write tests against it as-is. \"\n",
+        "        f\"Configure the test runner for verbose output (e.g., unittest.main(verbosity=2) in Python). \"\n",
+        "        f\"Do NOT guess behavior. If behavior is ambiguous, skip that test. Generate only high-confidence tests with deterministic assertions. \"\n",
+        "        f\"Name tests with clear function-style identifiers (prefer test_* naming) so results can display exact test names.\"\n",
+        "    )\n",
+        "\n",
+        "\n",
+        "def clean_llm_output(text):\n",
+        "    \"\"\"Strip markdown code fences that LLMs sometimes add despite instructions.\"\"\"\n",
+        "    text = text.strip()\n",
+        "    if text.startswith(\"```\"):\n",
+        "        first_nl = text.find(\"\\n\")\n",
+        "        if first_nl != -1:\n",
+        "            text = text[first_nl + 1:]\n",
+        "    if text.endswith(\"```\"):\n",
+        "        text = text[:-3].rstrip()\n",
+        "    return text\n",
+        "\n",
+        "\n",
+        "def generate_tests(code, model_name, language):\n",
+        "    config = MODELS[model_name]\n",
+        "    system_prompt = build_system_prompt(language)\n",
+        "    user_prompt = (\n",
+        "        f\"Generate comprehensive unit tests for this {language} code. \"\n",
+        "        f\"Treat the source as read-only and do not edit the original implementation.\\n\\n{code}\"\n",
+        "    )\n",
+        "\n",
+        "    request_args = {\n",
+        "        \"model\": config[\"model\"],\n",
+        "        \"messages\": [\n",
+        "            {\"role\": \"system\", \"content\": system_prompt},\n",
+        "            {\"role\": \"user\", \"content\": user_prompt},\n",
+        "        ],\n",
+        "        \"stream\": True,\n",
+        "    }\n",
+        "\n",
+        "    # OpenAI models require max_completion_tokens; Ollama typically expects max_tokens.\n",
+        "    if model_name == \"llama3.2\":\n",
+        "        request_args[\"max_tokens\"] = 4096\n",
+        "    else:\n",
+        "        request_args[\"max_completion_tokens\"] = 4096\n",
+        "\n",
+        "    stream = config[\"client\"].chat.completions.create(**request_args)\n",
+        "\n",
+        "    reply = \"\"\n",
+        "    for chunk in stream:\n",
+        "        fragment = chunk.choices[0].delta.content or \"\"\n",
+        "        reply += fragment\n",
+        "        yield clean_llm_output(reply)\n",
+        "\n",
+        "\n",
+        "def run_tests(test_code, language):\n",
+        "    runner = RUNNERS.get(language)\n",
+        "    if not runner:\n",
+        "        return f\"Automatic test execution is not supported for '{language}'. Copy the generated tests and run them manually.\"\n",
+        "\n",
+        "    suffix, cmd_fn = runner\n",
+        "    tmp = tempfile.NamedTemporaryFile(mode=\"w\", suffix=suffix, delete=False)\n",
+        "    try:\n",
+        "        tmp.write(test_code)\n",
+        "        tmp.close()\n",
+        "        result = subprocess.run(\n",
+        "            cmd_fn(tmp.name), capture_output=True, text=True, timeout=30\n",
+        "        )\n",
+        "        output = (result.stdout + \"\\n\" + result.stderr).strip()\n",
+        "        return output or \"Tests completed with no output.\"\n",
+        "    except subprocess.TimeoutExpired:\n",
+        "        return \"Test execution timed out (30s limit).\"\n",
+        "    except FileNotFoundError as e:\n",
+        "        return f\"Runtime not found: {e}\\nMake sure the {language} runtime is installed.\"\n",
+        "    finally:\n",
+        "        os.unlink(tmp.name)\n",
+        "\n",
+        "\n",
+        "def extract_test_name(raw_name):\n",
+        "    name = raw_name.strip()\n",
+        "\n",
+        "    # pytest node id: path::Class::test_function\n",
+        "    if \"::\" in name:\n",
+        "        tail = name.split(\"::\")[-1].strip()\n",
+        "        if tail:\n",
+        "            return tail\n",
+        "\n",
+        "    # unittest verbose often has: test_xxx (ClassName)\n",
+        "    if \"(\" in name:\n",
+        "        head = name.split(\"(\", 1)[0].strip()\n",
+        "        if re.match(r\"^[A-Za-z_][A-Za-z0-9_]*$\", head):\n",
+        "            return head\n",
+        "\n",
+        "    # Prefer explicit function-like identifiers when present.\n",
+        "    m = re.search(r\"\\b(test[_A-Za-z0-9]+)\\b\", name)\n",
+        "    if m:\n",
+        "        return m.group(1)\n",
+        "\n",
+        "    # If no function-style identifier exists, keep full label (do not truncate).\n",
+        "    return name\n",
+        "\n",
+        "\n",
+        "def format_test_results(raw_output):\n",
+        "    lines = raw_output.splitlines()\n",
+        "    parsed = []\n",
+        "\n",
+        "    for line in lines:\n",
+        "        text = line.strip()\n",
+        "\n",
+        "        # unittest verbose format: test_name (Class) ... ok|FAIL|ERROR\n",
+        "        m = re.match(r\"^(.+?)\\s+\\.{3}\\s+(ok|FAIL|ERROR)$\", text)\n",
+        "        if m:\n",
+        "            test_name = extract_test_name(m.group(1).strip())\n",
+        "            status = m.group(2)\n",
+        "            parsed.append((test_name, status == \"ok\"))\n",
+        "            continue\n",
+        "\n",
+        "        # pytest format: path::class::test_name PASSED|FAILED|ERROR\n",
+        "        m = re.match(r\"^(.+?)\\s+(PASSED|FAILED|ERROR)$\", text)\n",
+        "        if m:\n",
+        "            test_name = extract_test_name(m.group(1).strip())\n",
+        "            status = m.group(2)\n",
+        "            parsed.append((test_name, status == \"PASSED\"))\n",
+        "            continue\n",
+        "\n",
+        "        # generic check/cross output\n",
+        "        m = re.match(r\"^[✓✔]\\s+(.+)$\", text)\n",
+        "        if m:\n",
+        "            parsed.append((extract_test_name(m.group(1).strip()), True))\n",
+        "            continue\n",
+        "        m = re.match(r\"^[✗✘×]\\s+(.+)$\", text)\n",
+        "        if m:\n",
+        "            parsed.append((extract_test_name(m.group(1).strip()), False))\n",
+        "            continue\n",
+        "\n",
+        "    if not parsed:\n",
+        "        return raw_output\n",
+        "\n",
+        "    passed = sum(1 for _, ok in parsed if ok)\n",
+        "    failed = len(parsed) - passed\n",
+        "\n",
+        "    ran_line = \"\"\n",
+        "    for line in lines:\n",
+        "        m = re.search(r\"Ran\\s+(\\d+)\\s+tests?\\s+in\\s+([0-9.]+s)\", line)\n",
+        "        if m:\n",
+        "            ran_line = f\"Ran {m.group(1)} tests in {m.group(2)}\"\n",
+        "            break\n",
+        "    if not ran_line:\n",
+        "        ran_line = f\"Ran {len(parsed)} tests\"\n",
+        "\n",
+        "    result_lines = []\n",
+        "    for idx, (name, ok) in enumerate(parsed, start=1):\n",
+        "        icon = \"✅\" if ok else \"❌\"\n",
+        "        result_lines.append(f\"{idx}. {icon} {name}\")\n",
+        "\n",
+        "    if failed == 0:\n",
+        "        final_status = \"All tests passed\"\n",
+        "    else:\n",
+        "        final_status = f\"{passed} passed and {failed} failed\"\n",
+        "\n",
+        "    result_lines.extend([\n",
+        "        \"\",\n",
+        "        \"Final note:\",\n",
+        "        f\"• {ran_line}\",\n",
+        "        f\"• {final_status}\",\n",
+        "    ])\n",
+        "\n",
+        "    return \"\\n\".join(result_lines)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "c34f0afd",
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# gradio UI\n",
+        "\n",
+        "\n",
+        "def on_editor_lang_change(language, current_code):\n",
+        "    return language, gr.Code(value=current_code, language=language)\n",
+        "\n",
+        "\n",
+        "def on_generate(code, model_name, lang_state):\n",
+        "    if not code or not code.strip():\n",
+        "        yield gr.Code(value=\"No source code provided. Type or paste code in the editor.\", language=\"python\")\n",
+        "        return\n",
+        "    if not lang_state:\n",
+        "        yield gr.Code(value=\"Language not selected. Choose a language from the dropdown.\", language=\"python\")\n",
+        "        return\n",
+        "\n",
+        "    for chunk in generate_tests(code, model_name, lang_state):\n",
+        "        yield gr.Code(value=chunk, language=lang_state)\n",
+        "\n",
+        "\n",
+        "def on_run_tests(test_code, lang_state):\n",
+        "    if not test_code or not test_code.strip():\n",
+        "        return \"No test code to run. Generate tests first.\"\n",
+        "    if not lang_state:\n",
+        "        return \"Language not selected. Choose a language from the dropdown.\"\n",
+        "\n",
+        "    raw = run_tests(test_code, lang_state)\n",
+        "    return format_test_results(raw)\n",
+        "\n",
+        "\n",
+        "CUSTOM_CSS = \"\"\"\n",
+        "#generated-tests-box {\n",
+        "  max-height: 80vh;\n",
+        "}\n",
+        "#generated-tests-box .cm-editor,\n",
+        "#generated-tests-box .cm-scroller,\n",
+        "#generated-tests-box textarea {\n",
+        "  max-height: 80vh !important;\n",
+        "  overflow: auto !important;\n",
+        "}\n",
+        "\"\"\"\n",
+        "\n",
+        "with gr.Blocks(title=\"Test Case Generator\", theme=gr.themes.Soft(), css=CUSTOM_CSS) as ui:\n",
+        "    gr.Markdown(\"# Test Case Generator\\nWrite code, select a language, generate tests with an LLM, and run them.\")\n",
+        "    lang_state = gr.State(value=\"python\")\n",
+        "\n",
+        "    with gr.Row(equal_height=True):\n",
+        "        # Left column — code editor + language selector + model selector + generate button\n",
+        "        with gr.Column(scale=1):\n",
+        "            code_display = gr.Code(\n",
+        "                label=\"Source Code\",\n",
+        "                language=\"python\",\n",
+        "                lines=18,\n",
+        "                interactive=True,\n",
+        "            )\n",
+        "            editor_lang = gr.Dropdown(\n",
+        "                choices=EDITOR_LANGUAGES,\n",
+        "                value=\"python\",\n",
+        "                label=\"Language\",\n",
+        "            )\n",
+        "            model_dropdown = gr.Dropdown(\n",
+        "                choices=list(MODELS.keys()),\n",
+        "                value=\"gpt-oss\",\n",
+        "                label=\"LLM Model\",\n",
+        "            )\n",
+        "            generate_btn = gr.Button(\n",
+        "                \"Validate & Generate Tests\", variant=\"primary\", size=\"lg\"\n",
+        "            )\n",
+        "\n",
+        "        # Middle column — generated tests (85%), run button (15%)\n",
+        "        with gr.Column(scale=1):\n",
+        "            test_output = gr.Code(\n",
+        "                label=\"Generated Test Cases\",\n",
+        "                language=\"python\",\n",
+        "                lines=22,\n",
+        "                interactive=True,\n",
+        "                elem_id=\"generated-tests-box\",\n",
+        "            )\n",
+        "            run_btn = gr.Button(\"Run Tests\", variant=\"secondary\", size=\"lg\")\n",
+        "\n",
+        "        # Right column — test execution results\n",
+        "        with gr.Column(scale=1):\n",
+        "            test_results = gr.Textbox(\n",
+        "                label=\"Test Results\",\n",
+        "                lines=25,\n",
+        "                max_lines=40,\n",
+        "                interactive=False,\n",
+        "                show_copy_button=True,\n",
+        "            )\n",
+        "\n",
+        "    editor_lang.change(\n",
+        "        fn=on_editor_lang_change,\n",
+        "        inputs=[editor_lang, code_display],\n",
+        "        outputs=[lang_state, code_display],\n",
+        "    )\n",
+        "    generate_btn.click(\n",
+        "        on_generate,\n",
+        "        inputs=[code_display, model_dropdown, lang_state],\n",
+        "        outputs=[test_output],\n",
+        "    )\n",
+        "    run_btn.click(\n",
+        "        on_run_tests, inputs=[test_output, lang_state], outputs=[test_results],\n",
+        "    )\n",
+        "\n",
+        "ui.launch(inbrowser=True)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "id": "8a239440",
+      "metadata": {},
+      "outputs": [],
+      "source": []
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": ".venv",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.12.12"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 5
+}