Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
397 changes: 397 additions & 0 deletions week4/community-contributions/johngorithm/week4 exercise.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,397 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "b7f43957",
"metadata": {},
"source": [
"# Week 4 Exercise - Multi-Language Test Case Generator\n",
"\n",
"Write or paste source code, select a language and LLM model, then generate and run unit tests.\n",
"\n",
"**Supported languages:** Python, JavaScript, TypeScript, Java, Ruby, Go, Rust, C++, C, C#, PHP, Swift, Kotlin"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d3f0f314",
"metadata": {},
"outputs": [],
"source": [
"# imports\n",
"\n",
"import os\n",
"import re\n",
"import subprocess\n",
"import tempfile\n",
"from dotenv import load_dotenv\n",
"from openai import OpenAI\n",
"import gradio as gr"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "50218fa6",
"metadata": {},
"outputs": [],
"source": [
"# environment and client setup\n",
"\n",
"load_dotenv(override=True)\n",
"\n",
"openai_api_key = os.getenv(\"OPENAI_API_KEY\")\n",
"if not openai_api_key:\n",
" raise ValueError(\"OPENAI_API_KEY is not set\")\n",
"\n",
"openai_client = OpenAI()\n",
"ollama_client = OpenAI(base_url=\"http://localhost:11434/v1\", api_key=\"ollama\")\n",
"\n",
"MODELS = {\n",
" \"gpt-oss\": {\"client\": ollama_client, \"model\": \"gpt-oss\"},\n",
" \"llama3.2\": {\"client\": ollama_client, \"model\": \"llama3.2\"},\n",
" \"gpt-5-nano\": {\"client\": openai_client, \"model\": \"gpt-5-nano\"},\n",
"}\n",
"\n",
"RUNNERS = {\n",
" \"python\": (\".py\", lambda f: [\"python\", f]),\n",
" \"javascript\": (\".js\", lambda f: [\"node\", f]),\n",
" \"typescript\": (\".ts\", lambda f: [\"npx\", \"ts-node\", f]),\n",
" \"ruby\": (\".rb\", lambda f: [\"ruby\", f]),\n",
" \"php\": (\".php\", lambda f: [\"php\", f]),\n",
" \"swift\": (\".swift\", lambda f: [\"swift\", f]),\n",
"}\n",
"\n",
"EDITOR_LANGUAGES = [\n",
" \"python\", \"javascript\", \"typescript\", \"java\", \"ruby\", \"go\",\n",
" \"rust\", \"cpp\", \"c\", \"csharp\", \"php\", \"swift\", \"kotlin\",\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bf65565c",
"metadata": {},
"outputs": [],
"source": [
"# core logic\n",
"\n",
"def build_system_prompt(language):\n",
" return (\n",
" f\"You are an expert {language} developer specializing in unit testing. \"\n",
" f\"Given source code, generate a maximum of 7 unit tests covering normal behavior, \"\n",
" f\"edge cases, and error handling. \"\n",
" f\"Output ONLY valid, runnable {language} code with no markdown fences and no prose outside of code comments. \"\n",
" f\"Include the original source code so the test file is self-contained and executable. \"\n",
" f\"Do not modify, rewrite, or refactor the original source code; write tests against it as-is. \"\n",
" f\"Configure the test runner for verbose output (e.g., unittest.main(verbosity=2) in Python). \"\n",
" f\"Do NOT guess behavior. If behavior is ambiguous, skip that test. Generate only high-confidence tests with deterministic assertions. \"\n",
" f\"Name tests with clear function-style identifiers (prefer test_* naming) so results can display exact test names.\"\n",
" )\n",
"\n",
"\n",
"def clean_llm_output(text):\n",
" \"\"\"Strip markdown code fences that LLMs sometimes add despite instructions.\"\"\"\n",
" text = text.strip()\n",
" if text.startswith(\"```\"):\n",
" first_nl = text.find(\"\\n\")\n",
" if first_nl != -1:\n",
" text = text[first_nl + 1:]\n",
" if text.endswith(\"```\"):\n",
" text = text[:-3].rstrip()\n",
" return text\n",
"\n",
"\n",
"def generate_tests(code, model_name, language):\n",
" config = MODELS[model_name]\n",
" system_prompt = build_system_prompt(language)\n",
" user_prompt = (\n",
" f\"Generate comprehensive unit tests for this {language} code. \"\n",
" f\"Treat the source as read-only and do not edit the original implementation.\\n\\n{code}\"\n",
" )\n",
"\n",
" request_args = {\n",
" \"model\": config[\"model\"],\n",
" \"messages\": [\n",
" {\"role\": \"system\", \"content\": system_prompt},\n",
" {\"role\": \"user\", \"content\": user_prompt},\n",
" ],\n",
" \"stream\": True,\n",
" }\n",
"\n",
" # OpenAI models require max_completion_tokens; Ollama typically expects max_tokens.\n",
" if model_name == \"llama3.2\":\n",
" request_args[\"max_tokens\"] = 4096\n",
" else:\n",
" request_args[\"max_completion_tokens\"] = 4096\n",
"\n",
" stream = config[\"client\"].chat.completions.create(**request_args)\n",
"\n",
" reply = \"\"\n",
" for chunk in stream:\n",
" fragment = chunk.choices[0].delta.content or \"\"\n",
" reply += fragment\n",
" yield clean_llm_output(reply)\n",
"\n",
"\n",
"def run_tests(test_code, language):\n",
" runner = RUNNERS.get(language)\n",
" if not runner:\n",
" return f\"Automatic test execution is not supported for '{language}'. Copy the generated tests and run them manually.\"\n",
"\n",
" suffix, cmd_fn = runner\n",
" tmp = tempfile.NamedTemporaryFile(mode=\"w\", suffix=suffix, delete=False)\n",
" try:\n",
" tmp.write(test_code)\n",
" tmp.close()\n",
" result = subprocess.run(\n",
" cmd_fn(tmp.name), capture_output=True, text=True, timeout=30\n",
" )\n",
" output = (result.stdout + \"\\n\" + result.stderr).strip()\n",
" return output or \"Tests completed with no output.\"\n",
" except subprocess.TimeoutExpired:\n",
" return \"Test execution timed out (30s limit).\"\n",
" except FileNotFoundError as e:\n",
" return f\"Runtime not found: {e}\\nMake sure the {language} runtime is installed.\"\n",
" finally:\n",
" os.unlink(tmp.name)\n",
"\n",
"\n",
"def extract_test_name(raw_name):\n",
" name = raw_name.strip()\n",
"\n",
" # pytest node id: path::Class::test_function\n",
" if \"::\" in name:\n",
" tail = name.split(\"::\")[-1].strip()\n",
" if tail:\n",
" return tail\n",
"\n",
" # unittest verbose often has: test_xxx (ClassName)\n",
" if \"(\" in name:\n",
" head = name.split(\"(\", 1)[0].strip()\n",
" if re.match(r\"^[A-Za-z_][A-Za-z0-9_]*$\", head):\n",
" return head\n",
"\n",
" # Prefer explicit function-like identifiers when present.\n",
" m = re.search(r\"\\b(test[_A-Za-z0-9]+)\\b\", name)\n",
" if m:\n",
" return m.group(1)\n",
"\n",
" # If no function-style identifier exists, keep full label (do not truncate).\n",
" return name\n",
"\n",
"\n",
"def format_test_results(raw_output):\n",
" lines = raw_output.splitlines()\n",
" parsed = []\n",
"\n",
" for line in lines:\n",
" text = line.strip()\n",
"\n",
" # unittest verbose format: test_name (Class) ... ok|FAIL|ERROR\n",
" m = re.match(r\"^(.+?)\\s+\\.{3}\\s+(ok|FAIL|ERROR)$\", text)\n",
" if m:\n",
" test_name = extract_test_name(m.group(1).strip())\n",
" status = m.group(2)\n",
" parsed.append((test_name, status == \"ok\"))\n",
" continue\n",
"\n",
" # pytest format: path::class::test_name PASSED|FAILED|ERROR\n",
" m = re.match(r\"^(.+?)\\s+(PASSED|FAILED|ERROR)$\", text)\n",
" if m:\n",
" test_name = extract_test_name(m.group(1).strip())\n",
" status = m.group(2)\n",
" parsed.append((test_name, status == \"PASSED\"))\n",
" continue\n",
"\n",
" # generic check/cross output\n",
" m = re.match(r\"^[✓✔]\\s+(.+)$\", text)\n",
" if m:\n",
" parsed.append((extract_test_name(m.group(1).strip()), True))\n",
" continue\n",
" m = re.match(r\"^[✗✘×]\\s+(.+)$\", text)\n",
" if m:\n",
" parsed.append((extract_test_name(m.group(1).strip()), False))\n",
" continue\n",
"\n",
" if not parsed:\n",
" return raw_output\n",
"\n",
" passed = sum(1 for _, ok in parsed if ok)\n",
" failed = len(parsed) - passed\n",
"\n",
" ran_line = \"\"\n",
" for line in lines:\n",
" m = re.search(r\"Ran\\s+(\\d+)\\s+tests?\\s+in\\s+([0-9.]+s)\", line)\n",
" if m:\n",
" ran_line = f\"Ran {m.group(1)} tests in {m.group(2)}\"\n",
" break\n",
" if not ran_line:\n",
" ran_line = f\"Ran {len(parsed)} tests\"\n",
"\n",
" result_lines = []\n",
" for idx, (name, ok) in enumerate(parsed, start=1):\n",
" icon = \"✅\" if ok else \"❌\"\n",
" result_lines.append(f\"{idx}. {icon} {name}\")\n",
"\n",
" if failed == 0:\n",
" final_status = \"All tests passed\"\n",
" else:\n",
" final_status = f\"{passed} passed and {failed} failed\"\n",
"\n",
" result_lines.extend([\n",
" \"\",\n",
" \"Final note:\",\n",
" f\"• {ran_line}\",\n",
" f\"• {final_status}\",\n",
" ])\n",
"\n",
" return \"\\n\".join(result_lines)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c34f0afd",
"metadata": {},
"outputs": [],
"source": [
"# gradio UI\n",
"\n",
"\n",
"def on_editor_lang_change(language, current_code):\n",
" return language, gr.Code(value=current_code, language=language)\n",
"\n",
"\n",
"def on_generate(code, model_name, lang_state):\n",
" if not code or not code.strip():\n",
" yield gr.Code(value=\"No source code provided. Type or paste code in the editor.\", language=\"python\")\n",
" return\n",
" if not lang_state:\n",
" yield gr.Code(value=\"Language not selected. Choose a language from the dropdown.\", language=\"python\")\n",
" return\n",
"\n",
" for chunk in generate_tests(code, model_name, lang_state):\n",
" yield gr.Code(value=chunk, language=lang_state)\n",
"\n",
"\n",
"def on_run_tests(test_code, lang_state):\n",
" if not test_code or not test_code.strip():\n",
" return \"No test code to run. Generate tests first.\"\n",
" if not lang_state:\n",
" return \"Language not selected. Choose a language from the dropdown.\"\n",
"\n",
" raw = run_tests(test_code, lang_state)\n",
" return format_test_results(raw)\n",
"\n",
"\n",
"CUSTOM_CSS = \"\"\"\n",
"#generated-tests-box {\n",
" max-height: 80vh;\n",
"}\n",
"#generated-tests-box .cm-editor,\n",
"#generated-tests-box .cm-scroller,\n",
"#generated-tests-box textarea {\n",
" max-height: 80vh !important;\n",
" overflow: auto !important;\n",
"}\n",
"\"\"\"\n",
"\n",
"with gr.Blocks(title=\"Test Case Generator\", theme=gr.themes.Soft(), css=CUSTOM_CSS) as ui:\n",
" gr.Markdown(\"# Test Case Generator\\nWrite code, select a language, generate tests with an LLM, and run them.\")\n",
" lang_state = gr.State(value=\"python\")\n",
"\n",
" with gr.Row(equal_height=True):\n",
" # Left column — code editor + language selector + model selector + generate button\n",
" with gr.Column(scale=1):\n",
" code_display = gr.Code(\n",
" label=\"Source Code\",\n",
" language=\"python\",\n",
" lines=18,\n",
" interactive=True,\n",
" )\n",
" editor_lang = gr.Dropdown(\n",
" choices=EDITOR_LANGUAGES,\n",
" value=\"python\",\n",
" label=\"Language\",\n",
" )\n",
" model_dropdown = gr.Dropdown(\n",
" choices=list(MODELS.keys()),\n",
" value=\"gpt-oss\",\n",
" label=\"LLM Model\",\n",
" )\n",
" generate_btn = gr.Button(\n",
" \"Validate & Generate Tests\", variant=\"primary\", size=\"lg\"\n",
" )\n",
"\n",
" # Middle column — generated tests (85%), run button (15%)\n",
" with gr.Column(scale=1):\n",
" test_output = gr.Code(\n",
" label=\"Generated Test Cases\",\n",
" language=\"python\",\n",
" lines=22,\n",
" interactive=True,\n",
" elem_id=\"generated-tests-box\",\n",
" )\n",
" run_btn = gr.Button(\"Run Tests\", variant=\"secondary\", size=\"lg\")\n",
"\n",
" # Right column — test execution results\n",
" with gr.Column(scale=1):\n",
" test_results = gr.Textbox(\n",
" label=\"Test Results\",\n",
" lines=25,\n",
" max_lines=40,\n",
" interactive=False,\n",
" show_copy_button=True,\n",
" )\n",
"\n",
" editor_lang.change(\n",
" fn=on_editor_lang_change,\n",
" inputs=[editor_lang, code_display],\n",
" outputs=[lang_state, code_display],\n",
" )\n",
" generate_btn.click(\n",
" on_generate,\n",
" inputs=[code_display, model_dropdown, lang_state],\n",
" outputs=[test_output],\n",
" )\n",
" run_btn.click(\n",
" on_run_tests, inputs=[test_output, lang_state], outputs=[test_results],\n",
" )\n",
"\n",
"ui.launch(inbrowser=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8a239440",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}