@@ -1534,14 +1534,15 @@ def _run_inference(self, prompt: str) -> None:
15341534 )
15351535
15361536 def _run_cli_inference (self , prompt : str , model : str ) -> None :
1537- """Run inference via Ollama CLI (direct, no HTTP) ."""
1537+ """Run inference via Ollama CLI with tool support ."""
15381538 log = self .query_one ("#inference-log" , InferenceLog )
1539+ breaker = self .query_one ("#circuit-breaker" , CircuitBreakerPanel )
15391540
15401541 try :
15411542 import subprocess
15421543 import shutil
15431544
1544- # Find ollama binary - check common locations
1545+ # Find ollama binary
15451546 ollama_path = shutil .which ("ollama" )
15461547 if not ollama_path :
15471548 for candidate in [
@@ -1557,33 +1558,109 @@ def _run_cli_inference(self, prompt: str, model: str) -> None:
15571558 self .app .call_from_thread (log .write_error , "Ollama binary not found in PATH or common locations" )
15581559 return
15591560
1560- cmd = [ollama_path , "run" , model , prompt ]
1561- self .app .call_from_thread (log .write_system , f"CLI: { ollama_path } run { model } " , Colors .MUTED )
1561+ # Build full prompt with system context and conversation history
1562+ full_prompt = f"{ SYSTEM_PROMPT } \n \n "
1563+
1564+ if self .chat_mode and self .conversation_history :
1565+ for msg in self .conversation_history [- 10 :]: # Last 5 exchanges
1566+ role = msg .get ("role" , "user" )
1567+ content = msg .get ("content" , "" )
1568+ if role == "user" :
1569+ full_prompt += f"User: { content } \n "
1570+ elif role == "assistant" :
1571+ full_prompt += f"Assistant: { content } \n "
15621572
1573+ full_prompt += f"User: { prompt } \n Assistant:"
1574+
1575+ self .app .call_from_thread (log .write_system , f"CLI: { ollama_path } run { model } " , Colors .MUTED )
15631576 self .app .call_from_thread (log .write_response_start )
15641577
1565- result = subprocess .run (cmd , capture_output = True , text = True , timeout = 120.0 )
1578+ # Agent loop - max 3 turns for tool calls
1579+ MAX_TURNS = 3
1580+ current_turn = 0
1581+ current_prompt = full_prompt
1582+
1583+ while current_turn < MAX_TURNS :
1584+ current_turn += 1
1585+
1586+ cmd = [ollama_path , "run" , model , current_prompt ]
1587+ result = subprocess .run (cmd , capture_output = True , text = True , timeout = 120.0 )
1588+
1589+ if result .returncode != 0 :
1590+ self .app .call_from_thread (log .write_error , f"Ollama error: { result .stderr .strip ()} " )
1591+ return
15661592
1567- if result .returncode == 0 :
15681593 response = result .stdout .strip ()
15691594 if not response :
15701595 self .app .call_from_thread (log .write_error , "CLI returned empty response" )
15711596 return
15721597
1573- # Write the full response as a single block, not word-by-word
1598+ # Check for tool call
1599+ tool_match = re .search (r"<tool_call>(.*?)</tool_call>" , response , re .DOTALL )
1600+
1601+ if tool_match :
1602+ # Show response up to tool call
1603+ self .app .call_from_thread (log .write_token , response )
1604+
1605+ try :
1606+ tool_json = tool_match .group (1 )
1607+ tool_data = json .loads (tool_json )
1608+ tool_name = tool_data .get ("name" )
1609+ tool_query = tool_data .get ("query" , "" )
1610+
1611+ self .app .call_from_thread (
1612+ log .write_system ,
1613+ f"🛠️ Tool: { tool_name } ('{ tool_query } ')" ,
1614+ f"bold { Colors .CYAN } " ,
1615+ )
1616+
1617+ if tool_name == "search_vault" :
1618+ # Execute vault search
1619+ results , files_searched = self ._get_vault_search_results (tool_query )
1620+
1621+ if results :
1622+ tool_output = f"Found { len (results )} results:\n "
1623+ for r in results [:5 ]:
1624+ tool_output += f"- [{ r ['domain' ]} ] { r ['content' ][:150 ]} ...\n "
1625+ else :
1626+ tool_output = "No results found in vault."
1627+
1628+ self .app .call_from_thread (
1629+ log .write_system ,
1630+ f"🔍 { len (results )} results from { files_searched } files" ,
1631+ Colors .MUTED ,
1632+ )
1633+
1634+ # Continue with tool output
1635+ current_prompt = f"{ current_prompt } \n { response } \n \n Tool Output:\n { tool_output } \n \n Assistant:"
1636+ continue
1637+ else :
1638+ self .app .call_from_thread (log .write_system , f"Unknown tool: { tool_name } " , Colors .MUTED )
1639+
1640+ except json .JSONDecodeError :
1641+ self .app .call_from_thread (log .write_system , "Failed to parse tool call" , Colors .MUTED )
1642+
1643+ # No tool call or tool executed - show final response
15741644 self .app .call_from_thread (log .write_token , response )
1645+ break
15751646
1576- token_count = len (response .split ())
1577- self .app .call_from_thread (log .write_response_end , token_count , 0.5 )
1647+ token_count = len (response .split ())
1648+ self .app .call_from_thread (log .write_response_end , token_count , 0.5 )
15781649
1579- # Record in conversation history if chat mode
1580- if self .chat_mode :
1581- self .conversation_history .extend (
1582- [
1583- {"role" : "user" , "content" : prompt },
1584- {"role" : "assistant" , "content" : response },
1585- ]
1586- )
1650+ # Check for glyphs that trip breaker
1651+ glyphs_found = detect_glyphs (response )
1652+ for glyph , meaning , color , trips in glyphs_found :
1653+ if trips and not breaker .tripped :
1654+ self .app .call_from_thread (breaker .trip , glyph )
1655+
1656+ # Record in conversation history if chat mode
1657+ if self .chat_mode :
1658+ self .conversation_history .extend (
1659+ [
1660+ {"role" : "user" , "content" : prompt },
1661+ {"role" : "assistant" , "content" : response },
1662+ ]
1663+ )
15871664 else :
15881665 self .app .call_from_thread (log .write_error , f"Ollama error: { result .stderr .strip ()} " )
15891666
0 commit comments