Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,314 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "4ef491ed",
"metadata": {},
"source": [
"# 📖 English Standard Version (ESV) Bible Companion\n",
"\n",
"## User Story\n",
"\n",
"I have been appointed as the Sunday School Teacher for my church and the ESV Bible as the prefered Bible version to use for teaching. I need a Bible companion in contemporary English that balances word-for-word accuracy with modern,readable English like the ESV Bible does. I want to use the knowledge I have aquired to build a new reliable Bible companion RAG AI app that is unique and instructs in modern readable English as the ESV Bible which could also be Handy for any Sunday School teacher, Bible scholar, or personal studies.\n",
"\n",
"## Use cases\n",
"\n",
"- Given a Sunday school topic, the companion generates bible verses that are relevant to the topic.\n",
"- Given an excerpt from the Bible, the companion generates bible verses that are relevant to the excerpt\n",
"- At the end of every answer, the companion generates what The bible wants us to do as lessons from the bible verse found.\n",
"\n",
"## Tools\n",
"langchain, gpt-4.1-nano . Gradio\n",
"\n",
"ESV Bible Markdown can be found here https://github.com/lguenth/mdbible. We are using the Bible data in by_books folder. The only difference is seprating them into two folders old_testament and new_testament respectively.\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e47088f2",
"metadata": {},
"outputs": [],
"source": [
"#imports\n",
"import gradio as gr\n",
"from dotenv import load_dotenv\n",
"from langchain_huggingface import HuggingFaceEmbeddings\n",
"from langchain_openai import ChatOpenAI\n",
"from langchain_chroma import Chroma\n",
"from langchain_core.messages import SystemMessage, HumanMessage, convert_to_messages\n",
"from langchain_core.documents import Document\n",
"from dotenv import load_dotenv\n",
"\n",
"\n",
"load_dotenv(override=True)"
]
},
{
"cell_type": "code",
"execution_count": 35,
"id": "81872bd8",
"metadata": {},
"outputs": [],
"source": [
"#contants\n",
"MODEL = \"gpt-4.1-nano\"\n",
"DB_NAME = \"esv_bible_db\"\n",
"\n",
"RETRIEVAL_K = 10\n"
]
},
{
"cell_type": "code",
"execution_count": 36,
"id": "acdd0759",
"metadata": {},
"outputs": [],
"source": [
"#embedding model\n",
"embeddings = HuggingFaceEmbeddings(model_name=\"all-MiniLM-L6-v2\")"
]
},
{
"cell_type": "code",
"execution_count": 37,
"id": "7ac1223f",
"metadata": {},
"outputs": [],
"source": [
"#vector store\n",
"vectorstore = Chroma(\n",
" persist_directory=DB_NAME,\n",
" embedding_function=embeddings\n",
")\n",
"\n",
"retriever = vectorstore.as_retriever(\n",
" search_kwargs={\"k\": RETRIEVAL_K}\n",
")\n"
]
},
{
"cell_type": "code",
"execution_count": 38,
"id": "6e9e0cf1",
"metadata": {},
"outputs": [],
"source": [
"#openai llm\n",
"llm = ChatOpenAI(temperature=0, model_name=MODEL)"
]
},
{
"cell_type": "code",
"execution_count": 40,
"id": "86e0e849",
"metadata": {},
"outputs": [],
"source": [
"#system prompt\n",
"SYSTEM_PROMPT = \"\"\"\n",
"You are a knowledgeable, assistant as the English Standard Version (ESV) Bible Companion.\n",
"You are chatting with a user about the Bible.\n",
"Your answer will be evaluated for accuracy, relevance and completeness, so make sure it only answers the question according to matched context_markdown.\n",
"If you don't know the answer, say so.\n",
"Given a Sunday School topic list ESV Bible verses that are relevant to the topic.\n",
"Given an excerpt from the Bible, list ESV bible verses that are relevant to the excerpt\n",
"Finaly, list the lessons from the ESV Bible verses matched, on each point qoute the associated Bible verse.\n",
"\n",
"For context, here are specific extracts from the ESV Bible that is be directly relevant to the user's question:\n",
"\n",
"{context}\n",
"\n",
"With this context, please answer the user's question. Be accurate, relevant and complete.\n",
"\"\"\""
]
},
{
"cell_type": "code",
"execution_count": 41,
"id": "3a1e0db2",
"metadata": {},
"outputs": [],
"source": [
"#fetch_context\n",
"def fetch_context(question: str) -> list[Document]:\n",
" \"\"\"\n",
" Retrieve relevant context documents for a question.\n",
" \"\"\"\n",
" return retriever.invoke(question)"
]
},
{
"cell_type": "code",
"execution_count": 42,
"id": "dfd2192a",
"metadata": {},
"outputs": [],
"source": [
"# combined previous questions\n",
"def combined_question(question: str, history: list[dict] | None = None) -> str:\n",
" \"\"\"\n",
" Combine all the user's messages into a single string.\n",
" \"\"\"\n",
" history = history or []\n",
" prior = \"\\n\".join(\n",
" m[\"content\"] for m in history if m.get(\"role\") == \"user\"\n",
" )\n",
" return f\"{prior}\\n{question}\" if prior else question"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0f1035c0",
"metadata": {},
"outputs": [],
"source": [
"#answer question\n",
"def answer_question(\n",
" question: str,\n",
" history: list[dict] | None = None\n",
") -> tuple[str, list[Document]]:\n",
" \"\"\"\n",
" Answer the given question with RAG.\n",
" Returns:\n",
" - Generated answer\n",
" - Retrieved context documents\n",
" \"\"\"\n",
" \n",
" history = history or []\n",
"\n",
" # Combine question with history for better retrieval\n",
" combined = combined_question(question, history)\n",
" # Retrieve documents\n",
" docs = fetch_context(combined)\n",
" context = \"\\n\\n\".join(doc.page_content for doc in docs)\n",
"\n",
" # Build system prompt with context\n",
" system_prompt = SYSTEM_PROMPT.format(context=context)\n",
"\n",
" # Construct messages\n",
" messages = [SystemMessage(content=system_prompt)]\n",
" messages.extend(convert_to_messages(history))\n",
" messages.append(HumanMessage(content=question))\n",
"\n",
" # Invoke LLM\n",
" response = llm.invoke(messages)\n",
"\n",
" return response.content, docs"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "df81fe7d",
"metadata": {},
"outputs": [],
"source": [
"#formats matched context in the UI\n",
"def format_context(context):\n",
" result = \"<h2 style='color: #ff7800;'>Relevant Context</h2>\\n\\n\"\n",
" for doc in context:\n",
" print(doc.metadata)\n",
" result += f\"<span style='color: #ff7800;'>Source: {doc.metadata['source']}</span>\\n\\n\"\n",
" result += doc.page_content + \"\\n\\n\"\n",
" return result\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "bc6aaf3e",
"metadata": {},
"outputs": [],
"source": [
"# RAG chat UI function\n",
"def rag_chat(history):\n",
" last_message = history[-1][\"content\"]\n",
" prior = history[:-1]\n",
" answer, context = answer_question(last_message, prior)\n",
" history.append({\"role\": \"assistant\", \"content\": answer})\n",
" return history, format_context(context)"
]
},
{
"cell_type": "code",
"execution_count": 47,
"id": "57630334",
"metadata": {},
"outputs": [],
"source": [
"def put_message_in_chatbot(message, history):\n",
" return \"\", history + [{\"role\": \"user\", \"content\": message}]\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4546eaf2",
"metadata": {},
"outputs": [],
"source": [
"theme = gr.themes.Soft(font=[\"Inter\", \"system-ui\", \"sans-serif\"])\n",
"\n",
"with gr.Blocks(title=\"English Standard Version (ESV) Bible Companion\", theme=theme) as ui:\n",
" gr.Markdown(\"# 📖 ESV Bible Companion\\nAsk me anything about the Bible!\")\n",
"\n",
" with gr.Row():\n",
" with gr.Column(scale=1):\n",
" chatbot = gr.Chatbot(\n",
" label=\"💬 Conversation\", height=600, type=\"messages\", show_copy_button=True\n",
" )\n",
" message = gr.Textbox(\n",
" label=\"Your Question\",\n",
" placeholder=\"Ask anything about the Bible...\",\n",
" show_label=False,\n",
" )\n",
"\n",
" with gr.Column(scale=1):\n",
" context_markdown = gr.Markdown(\n",
" label=\"📚 Retrieved Context\",\n",
" value=\"*Retrieved context will appear here*\",\n",
" container=True,\n",
" height=600,\n",
" )\n",
"\n",
" message.submit(\n",
" put_message_in_chatbot, inputs=[message, chatbot], outputs=[message, chatbot]\n",
" ).then(rag_chat, inputs=chatbot, outputs=[chatbot, context_markdown])\n",
"\n",
" ui.launch(inbrowser=False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "908bcc43",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
import os
import glob
from pathlib import Path
from langchain_community.document_loaders import DirectoryLoader, TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_chroma import Chroma
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_openai import OpenAIEmbeddings


from dotenv import load_dotenv


DB_NAME = "esv_bible_db"
KNOWLEDGE_BASE = str(Path(__file__).parent.parent / "ESV_Bible")

embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

load_dotenv(override=True)


def fetch_books():
print(f"Fetching books from {KNOWLEDGE_BASE}")
folders = glob.glob(str(Path(KNOWLEDGE_BASE) / "*"))
books = []
for folder in folders:
book = os.path.basename(folder)
loader = DirectoryLoader(
folder, glob="**/*.md", loader_cls=TextLoader, loader_kwargs={"encoding": "utf-8"}
)
folder_docs = loader.load()
for doc in folder_docs:
doc.metadata["book"] = book
books.append(doc)
return books


def create_chunks(books):
print(f"Creating chunks from {len(books)} books")
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=200)
chunks = text_splitter.split_documents(books)
return chunks


def create_embeddings(chunks):
print(f"Creating embeddings from {len(chunks)} chunks")
if os.path.exists(DB_NAME):
Chroma(persist_directory=DB_NAME, embedding_function=embeddings).delete_collection()

vectorstore = Chroma.from_documents(
documents=chunks, embedding=embeddings, persist_directory=DB_NAME
)

collection = vectorstore._collection
count = collection.count()

sample_embedding = collection.get(limit=1, include=["embeddings"])["embeddings"][0]
dimensions = len(sample_embedding)
print(f"There are {count:,} vectors with {dimensions:,} dimensions in the vector store")
return vectorstore


if __name__ == "__main__":
print("Starting Bible Ingestion")
books = fetch_books()
chunks = create_chunks(books)
create_embeddings(chunks)
print("Ingestion complete")