ed-donner · nsikanikpoh · Mar 4, 2026 · Mar 4, 2026
diff --git a/week5/community-contributions/ESV_Bible_companion_week5/Bible_companion.ipynb b/week5/community-contributions/ESV_Bible_companion_week5/Bible_companion.ipynb
@@ -0,0 +1,314 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "4ef491ed",
+   "metadata": {},
+   "source": [
+    "# 📖 English Standard Version (ESV) Bible Companion\n",
+    "\n",
+    "## User Story\n",
+    "\n",
+    "I have been appointed as the Sunday School Teacher for my church and the ESV Bible as the prefered Bible version to use for teaching. I need a Bible companion in contemporary English that balances word-for-word accuracy with modern,readable English like the ESV Bible does. I want to use the knowledge I have aquired to build a new reliable Bible companion RAG AI app that is unique and instructs in modern readable English as the ESV Bible which could also be Handy for any Sunday School teacher, Bible scholar, or personal studies.\n",
+    "\n",
+    "## Use cases\n",
+    "\n",
+    "- Given a Sunday school topic, the companion generates bible verses that are relevant to the topic.\n",
+    "- Given an excerpt from the Bible, the companion generates bible verses that are relevant to the excerpt\n",
+    "- At the end of every answer,  the companion generates what The bible wants us to do as lessons from the bible verse found.\n",
+    "\n",
+    "## Tools\n",
+    "langchain, gpt-4.1-nano . Gradio\n",
+    "\n",
+    "ESV Bible Markdown can be found here https://github.com/lguenth/mdbible. We are using the Bible data  in by_books folder. The only difference is seprating them into two folders old_testament and new_testament respectively.\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e47088f2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#imports\n",
+    "import gradio as gr\n",
+    "from dotenv import load_dotenv\n",
+    "from langchain_huggingface import HuggingFaceEmbeddings\n",
+    "from langchain_openai import ChatOpenAI\n",
+    "from langchain_chroma import Chroma\n",
+    "from langchain_core.messages import SystemMessage, HumanMessage, convert_to_messages\n",
+    "from langchain_core.documents import Document\n",
+    "from dotenv import load_dotenv\n",
+    "\n",
+    "\n",
+    "load_dotenv(override=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "id": "81872bd8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#contants\n",
+    "MODEL = \"gpt-4.1-nano\"\n",
+    "DB_NAME = \"esv_bible_db\"\n",
+    "\n",
+    "RETRIEVAL_K = 10\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "id": "acdd0759",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#embedding model\n",
+    "embeddings = HuggingFaceEmbeddings(model_name=\"all-MiniLM-L6-v2\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "id": "7ac1223f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#vector store\n",
+    "vectorstore = Chroma(\n",
+    "    persist_directory=DB_NAME,\n",
+    "    embedding_function=embeddings\n",
+    ")\n",
+    "\n",
+    "retriever = vectorstore.as_retriever(\n",
+    "    search_kwargs={\"k\": RETRIEVAL_K}\n",
+    ")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "id": "6e9e0cf1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#openai llm\n",
+    "llm = ChatOpenAI(temperature=0, model_name=MODEL)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "id": "86e0e849",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#system prompt\n",
+    "SYSTEM_PROMPT = \"\"\"\n",
+    "You are a knowledgeable, assistant as the English Standard Version (ESV) Bible Companion.\n",
+    "You are chatting with a user about the Bible.\n",
+    "Your answer will be evaluated for accuracy, relevance and completeness, so make sure it only answers the question according to matched context_markdown.\n",
+    "If you don't know the answer, say so.\n",
+    "Given a Sunday School topic list ESV Bible verses that are relevant to the topic.\n",
+    "Given an excerpt from the Bible, list ESV bible verses that are relevant to the excerpt\n",
+    "Finaly, list the lessons from the ESV Bible verses matched, on each point qoute the associated Bible verse.\n",
+    "\n",
+    "For context, here are specific extracts from the ESV Bible that is be directly relevant to the user's question:\n",
+    "\n",
+    "{context}\n",
+    "\n",
+    "With this context, please answer the user's question. Be accurate, relevant and complete.\n",
+    "\"\"\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 41,
+   "id": "3a1e0db2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#fetch_context\n",
+    "def fetch_context(question: str) -> list[Document]:\n",
+    "    \"\"\"\n",
+    "    Retrieve relevant context documents for a question.\n",
+    "    \"\"\"\n",
+    "    return retriever.invoke(question)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 42,
+   "id": "dfd2192a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# combined previous questions\n",
+    "def combined_question(question: str, history: list[dict] | None = None) -> str:\n",
+    "    \"\"\"\n",
+    "    Combine all the user's messages into a single string.\n",
+    "    \"\"\"\n",
+    "    history = history or []\n",
+    "    prior = \"\\n\".join(\n",
+    "        m[\"content\"] for m in history if m.get(\"role\") == \"user\"\n",
+    "    )\n",
+    "    return f\"{prior}\\n{question}\" if prior else question"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0f1035c0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#answer question\n",
+    "def answer_question(\n",
+    "    question: str,\n",
+    "    history: list[dict] | None = None\n",
+    ") -> tuple[str, list[Document]]:\n",
+    "    \"\"\"\n",
+    "    Answer the given question with RAG.\n",
+    "    Returns:\n",
+    "        - Generated answer\n",
+    "        - Retrieved context documents\n",
+    "    \"\"\"\n",
+    "  \n",
+    "    history = history or []\n",
+    "\n",
+    "    # Combine question with history for better retrieval\n",
+    "    combined = combined_question(question, history)\n",
+    "    # Retrieve documents\n",
+    "    docs = fetch_context(combined)\n",
+    "    context = \"\\n\\n\".join(doc.page_content for doc in docs)\n",
+    "\n",
+    "    # Build system prompt with context\n",
+    "    system_prompt = SYSTEM_PROMPT.format(context=context)\n",
+    "\n",
+    "    # Construct messages\n",
+    "    messages = [SystemMessage(content=system_prompt)]\n",
+    "    messages.extend(convert_to_messages(history))\n",
+    "    messages.append(HumanMessage(content=question))\n",
+    "\n",
+    "    # Invoke LLM\n",
+    "    response = llm.invoke(messages)\n",
+    "\n",
+    "    return response.content, docs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "df81fe7d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#formats matched context in the UI\n",
+    "def format_context(context):\n",
+    "    result = \"<h2 style='color: #ff7800;'>Relevant Context</h2>\\n\\n\"\n",
+    "    for doc in context:\n",
+    "        print(doc.metadata)\n",
+    "        result += f\"<span style='color: #ff7800;'>Source: {doc.metadata['source']}</span>\\n\\n\"\n",
+    "        result += doc.page_content + \"\\n\\n\"\n",
+    "    return result\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bc6aaf3e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# RAG chat UI function\n",
+    "def rag_chat(history):\n",
+    "    last_message = history[-1][\"content\"]\n",
+    "    prior = history[:-1]\n",
+    "    answer, context = answer_question(last_message, prior)\n",
+    "    history.append({\"role\": \"assistant\", \"content\": answer})\n",
+    "    return history, format_context(context)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 47,
+   "id": "57630334",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def put_message_in_chatbot(message, history):\n",
+    "        return \"\", history + [{\"role\": \"user\", \"content\": message}]\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4546eaf2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "theme = gr.themes.Soft(font=[\"Inter\", \"system-ui\", \"sans-serif\"])\n",
+    "\n",
+    "with gr.Blocks(title=\"English Standard Version (ESV) Bible Companion\", theme=theme) as ui:\n",
+    "    gr.Markdown(\"# 📖 ESV Bible Companion\\nAsk me anything about the Bible!\")\n",
+    "\n",
+    "    with gr.Row():\n",
+    "        with gr.Column(scale=1):\n",
+    "            chatbot = gr.Chatbot(\n",
+    "                label=\"💬 Conversation\", height=600, type=\"messages\", show_copy_button=True\n",
+    "            )\n",
+    "            message = gr.Textbox(\n",
+    "                label=\"Your Question\",\n",
+    "                placeholder=\"Ask anything about the Bible...\",\n",
+    "                show_label=False,\n",
+    "            )\n",
+    "\n",
+    "        with gr.Column(scale=1):\n",
+    "            context_markdown = gr.Markdown(\n",
+    "                label=\"📚 Retrieved Context\",\n",
+    "                value=\"*Retrieved context will appear here*\",\n",
+    "                container=True,\n",
+    "                height=600,\n",
+    "            )\n",
+    "\n",
+    "    message.submit(\n",
+    "        put_message_in_chatbot, inputs=[message, chatbot], outputs=[message, chatbot]\n",
+    "    ).then(rag_chat, inputs=chatbot, outputs=[chatbot, context_markdown])\n",
+    "\n",
+    "    ui.launch(inbrowser=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "908bcc43",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/week5/community-contributions/ESV_Bible_companion_week5/esv_bible_ingest.py b/week5/community-contributions/ESV_Bible_companion_week5/esv_bible_ingest.py
@@ -0,0 +1,68 @@
+import os
+import glob
+from pathlib import Path
+from langchain_community.document_loaders import DirectoryLoader, TextLoader
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+from langchain_chroma import Chroma
+from langchain_huggingface import HuggingFaceEmbeddings
+from langchain_openai import OpenAIEmbeddings
+
+
+from dotenv import load_dotenv
+
+
+DB_NAME = "esv_bible_db"
+KNOWLEDGE_BASE = str(Path(__file__).parent.parent / "ESV_Bible")
+
+embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
+
+load_dotenv(override=True)
+
+
+def fetch_books():
+    print(f"Fetching books from {KNOWLEDGE_BASE}")
+    folders = glob.glob(str(Path(KNOWLEDGE_BASE) / "*"))
+    books = []
+    for folder in folders:
+        book = os.path.basename(folder)
+        loader = DirectoryLoader(
+            folder, glob="**/*.md", loader_cls=TextLoader, loader_kwargs={"encoding": "utf-8"}
+        )
+        folder_docs = loader.load()
+        for doc in folder_docs:
+            doc.metadata["book"] = book
+            books.append(doc)
+    return books
+
+
+def create_chunks(books):
+    print(f"Creating chunks from {len(books)} books")
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=200)
+    chunks = text_splitter.split_documents(books)
+    return chunks
+
+
+def create_embeddings(chunks):
+    print(f"Creating embeddings from {len(chunks)} chunks")
+    if os.path.exists(DB_NAME):
+        Chroma(persist_directory=DB_NAME, embedding_function=embeddings).delete_collection()
+
+    vectorstore = Chroma.from_documents(
+        documents=chunks, embedding=embeddings, persist_directory=DB_NAME
+    )
+
+    collection = vectorstore._collection
+    count = collection.count()
+
+    sample_embedding = collection.get(limit=1, include=["embeddings"])["embeddings"][0]
+    dimensions = len(sample_embedding)
+    print(f"There are {count:,} vectors with {dimensions:,} dimensions in the vector store")
+    return vectorstore
+
+
+if __name__ == "__main__":
+    print("Starting Bible Ingestion")
+    books = fetch_books()
+    chunks = create_chunks(books)
+    create_embeddings(chunks)
+    print("Ingestion complete")