2222 spacing_size = "lg" ,
2323)
2424
25- def generate (
25+ def convert_pdf_to_descriptive_markdown (
2626 pdf_file_obj : Optional [gr .File ],
2727 ollama_endpoint : str ,
2828 ui_vlm_model : str ,
@@ -33,22 +33,31 @@ def generate(
3333 progress : gr .Progress = gr .Progress (track_tqdm = True )
3434) -> Tuple [str , gr .update , Optional [str ]]:
3535 """
36- Wrapper function to call the core conversion process and handle the Gradio UI for Ollama.
36+ Convert a PDF file to detailed page-by-page Markdown descriptions using local Ollama Vision-Language Models.
37+
38+ This function processes the uploaded PDF, analyzing the visual and textual content of each page
39+ using locally hosted Vision-Language Models (VLMs) through Ollama. It generates rich, contextual
40+ descriptions in Markdown format that capture both the visual elements and text content of the document,
41+ making the PDF accessible and searchable in contexts where traditional text extraction would fail.
42+
43+ Unlike the OpenRouter version, this function utilizes local models running through Ollama,
44+ providing privacy and eliminating the need for API keys, but potentially with different model options
45+ and performance characteristics.
3746
3847 Args:
3948 pdf_file_obj: Gradio File object for the uploaded PDF
40- ollama_endpoint: Ollama server endpoint URL
41- ui_vlm_model: VLM model name from UI
42- ui_lang: Output language from UI
43- ui_use_md: Whether to use Markitdown from UI
44- ui_use_sum: Whether to generate a summary from UI
45- ui_sum_model: Summary model name from UI
49+ ollama_endpoint: Ollama server endpoint URL (e.g., http://localhost:11434)
50+ ui_vlm_model: VLM model name from UI (e.g., llama3.2-vision)
51+ ui_lang: Output language for descriptions (e.g., English, Spanish)
52+ ui_use_md: Whether to use Markitdown for enhanced text extraction
53+ ui_use_sum: Whether to generate a document summary for context
54+ ui_sum_model: Summary model name from UI (e.g., qwen2.5)
4655 progress: Gradio progress tracker
4756
4857 Returns:
4958 Tuple containing:
50- - str: Status message
51- - gr.update: Download button update
59+ - str: Status message indicating success or failure
60+ - gr.update: Download button update with the result file
5261 - Optional[str]: Markdown result content
5362 """
5463 # Validate input file
@@ -249,7 +258,7 @@ def create_ui() -> gr.Blocks:
249258 progress_output , download_button , markdown_output
250259 ]
251260 convert_button .click (
252- fn = generate ,
261+ fn = convert_pdf_to_descriptive_markdown ,
253262 inputs = conversion_inputs ,
254263 outputs = conversion_outputs
255264 )
0 commit comments