ed-donner · profe-ssor · Mar 5, 2026
diff --git a/week5/community-contributions/profe-ssor/week5 Exercise.ipynb b/week5/community-contributions/profe-ssor/week5 Exercise.ipynb
@@ -0,0 +1,328 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "# Week 5 Exercise — Research / Learning RAG\n",
+        "\n",
+        "**Course notes & research digest Q&A**\n",
+        "\n",
+        "A RAG assistant over your knowledge base (course notes, lecture summaries, or research docs). Answer questions using only the retrieved context.\n",
+        "\n",
+        "**LLM backends:** Ollama (local), OpenRouter, or Anthropic (Claude)."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 1,
+      "metadata": {},
+      "outputs": [
+        {
+          "data": {
+            "text/plain": [
+              "True"
+            ]
+          },
+          "execution_count": 1,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "import os\n",
+        "from pathlib import Path\n",
+        "from dotenv import load_dotenv\n",
+        "from langchain_openai import ChatOpenAI\n",
+        "from langchain_ollama import ChatOllama\n",
+        "from langchain_anthropic import ChatAnthropic\n",
+        "from langchain_chroma import Chroma\n",
+        "from langchain_core.messages import SystemMessage, HumanMessage\n",
+        "from langchain_community.document_loaders import DirectoryLoader, TextLoader\n",
+        "from langchain_text_splitters import RecursiveCharacterTextSplitter\n",
+        "from langchain_huggingface import HuggingFaceEmbeddings\n",
+        "import gradio as gr\n",
+        "\n",
+        "load_dotenv(override=True)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Config\n",
+        "\n",
+        "- **KNOWLEDGE_BASE**: folder of `.md` (or other) docs. Default points to week5's `knowledge-base`; replace with your own course/research folder.\n",
+        "- **LLM_PROVIDER**: `\"ollama\"` | `\"openrouter\"` | `\"anthropic\"`.\n",
+        "- Set `OPENROUTER_API_KEY` in `.env` for OpenRouter; `ANTHROPIC_API_KEY` for Claude."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 2,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# Run notebook from week5/community-contributions/profe-ssor so paths resolve correctly\n",
+        "NOTEBOOK_DIR = Path(\".\").resolve()\n",
+        "\n",
+        "# Knowledge base: week5 default, or your own path (e.g. \"research_notes\", \"course_notes\")\n",
+        "KNOWLEDGE_BASE = NOTEBOOK_DIR / \"../../knowledge-base\"\n",
+        "DB_NAME = str(NOTEBOOK_DIR / \"research_vector_db\")\n",
+        "\n",
+        "LLM_PROVIDER = \"ollama\"  # \"ollama\" | \"openrouter\" | \"anthropic\"\n",
+        "OLLAMA_MODEL = \"llama3.2\"\n",
+        "OPENROUTER_MODEL = \"openai/gpt-4o-mini\"\n",
+        "ANTHROPIC_MODEL = \"claude-3-5-haiku-20241022\"\n",
+        "\n",
+        "CHUNK_SIZE = 500\n",
+        "CHUNK_OVERLAP = 100\n",
+        "RETRIEVAL_K = 6"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 3,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "embeddings = HuggingFaceEmbeddings(model_name=\"all-MiniLM-L6-v2\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Ingest: load docs, chunk, embed, store in Chroma\n",
+        "\n",
+        "Uses HuggingFace embeddings (no API key needed). Run once to build the vector store."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 4,
+      "metadata": {},
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "Loaded 76 documents, 884 chunks.\n",
+            "Vector store saved to /home/professor/projects/llm_engineering/week5/community-contributions/profe-ssor/research_vector_db\n"
+          ]
+        }
+      ],
+      "source": [
+        "def load_documents():\n",
+        "    docs = []\n",
+        "    base = Path(KNOWLEDGE_BASE)\n",
+        "    if not base.exists():\n",
+        "        raise FileNotFoundError(f\"Knowledge base not found: {base}. Point KNOWLEDGE_BASE to a folder of .md files.\")\n",
+        "    for folder in base.iterdir():\n",
+        "        if folder.is_dir():\n",
+        "            loader = DirectoryLoader(\n",
+        "                str(folder),\n",
+        "                glob=\"**/*.md\",\n",
+        "                loader_cls=TextLoader,\n",
+        "                loader_kwargs={\"encoding\": \"utf-8\"},\n",
+        "            )\n",
+        "            for doc in loader.load():\n",
+        "                doc.metadata[\"doc_type\"] = folder.name\n",
+        "                docs.append(doc)\n",
+        "    return docs\n",
+        "\n",
+        "text_splitter = RecursiveCharacterTextSplitter(chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP)\n",
+        "\n",
+        "documents = load_documents()\n",
+        "chunks = text_splitter.split_documents(documents)\n",
+        "print(f\"Loaded {len(documents)} documents, {len(chunks)} chunks.\")\n",
+        "\n",
+        "vectorstore = Chroma.from_documents(\n",
+        "    documents=chunks,\n",
+        "    embedding=embeddings,\n",
+        "    persist_directory=DB_NAME,\n",
+        ")\n",
+        "print(f\"Vector store saved to {DB_NAME}\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Connect to existing vector store and set up retriever + LLM\n",
+        "\n",
+        "If you already ran ingest above, you can skip re-running it and just load the store and pick the LLM."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 5,
+      "metadata": {},
+      "outputs": [
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "Using LLM: ollama\n"
+          ]
+        }
+      ],
+      "source": [
+        "vectorstore = Chroma(persist_directory=DB_NAME, embedding_function=embeddings)\n",
+        "retriever = vectorstore.as_retriever(search_kwargs={\"k\": RETRIEVAL_K})\n",
+        "\n",
+        "if LLM_PROVIDER == \"ollama\":\n",
+        "    llm = ChatOllama(model=OLLAMA_MODEL, temperature=0)\n",
+        "elif LLM_PROVIDER == \"openrouter\":\n",
+        "    api_key = os.getenv(\"OPENROUTER_API_KEY\")\n",
+        "    if not api_key:\n",
+        "        raise ValueError(\"Set OPENROUTER_API_KEY in .env for OpenRouter.\")\n",
+        "    llm = ChatOpenAI(\n",
+        "        base_url=\"https://openrouter.ai/api/v1\",\n",
+        "        api_key=api_key,\n",
+        "        model=OPENROUTER_MODEL,\n",
+        "        temperature=0,\n",
+        "    )\n",
+        "elif LLM_PROVIDER == \"anthropic\":\n",
+        "    llm = ChatAnthropic(model=ANTHROPIC_MODEL, temperature=0)\n",
+        "else:\n",
+        "    raise ValueError(f\"Unknown LLM_PROVIDER: {LLM_PROVIDER}\")\n",
+        "\n",
+        "print(f\"Using LLM: {LLM_PROVIDER}\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 6,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "SYSTEM_PROMPT_TEMPLATE = \"\"\"\n",
+        "You are a research and learning assistant. Answer the user's question using ONLY the context below (course notes, summaries, or documents).\n",
+        "If the context does not contain enough information, say so. Do not invent facts.\n",
+        "\n",
+        "Context:\n",
+        "{context}\n",
+        "\"\"\""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 7,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "def answer_question(question: str, history):\n",
+        "    docs = retriever.invoke(question)\n",
+        "    context = \"\\n\\n\".join(doc.page_content for doc in docs)\n",
+        "    system_prompt = SYSTEM_PROMPT_TEMPLATE.format(context=context)\n",
+        "    response = llm.invoke([\n",
+        "        SystemMessage(content=system_prompt),\n",
+        "        HumanMessage(content=question),\n",
+        "    ])\n",
+        "    return response.content"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Try a question"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 8,
+      "metadata": {},
+      "outputs": [
+        {
+          "data": {
+            "text/plain": [
+              "'I don\\'t have enough information to determine what \"RAG\" refers to in the context of the Contract with Harmony Health Plans for Healthllm. The provided text does not contain any definitions or explanations for this term. If you could provide more context or clarify what \"RAG\" stands for, I would be happy to try and assist you further.'"
+            ]
+          },
+          "execution_count": 8,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "answer_question(\"What is RAG?\", [])"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Chat UI (Gradio)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": 9,
+      "metadata": {},
+      "outputs": [
+        {
+          "name": "stderr",
+          "output_type": "stream",
+          "text": [
+            "/home/professor/projects/llm_engineering/.venv/lib/python3.12/site-packages/gradio/chat_interface.py:347: UserWarning: The 'tuples' format for chatbot messages is deprecated and will be removed in a future version of Gradio. Please set type='messages' instead, which uses openai-style 'role' and 'content' keys.\n",
+            "  self.chatbot = Chatbot(\n"
+          ]
+        },
+        {
+          "name": "stdout",
+          "output_type": "stream",
+          "text": [
+            "* Running on local URL:  http://127.0.0.1:7860\n",
+            "* To create a public link, set `share=True` in `launch()`.\n"
+          ]
+        },
+        {
+          "data": {
+            "text/html": [
+              "<div><iframe src=\"http://127.0.0.1:7860/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
+            ],
+            "text/plain": [
+              "<IPython.core.display.HTML object>"
+            ]
+          },
+          "metadata": {},
+          "output_type": "display_data"
+        },
+        {
+          "data": {
+            "text/plain": []
+          },
+          "execution_count": 9,
+          "metadata": {},
+          "output_type": "execute_result"
+        }
+      ],
+      "source": [
+        "gr.ChatInterface(answer_question, title=\"Research / Learning Q&A\").launch()"
+      ]
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": ".venv",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.12.12"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 4
+}