Merge pull request #264 from editor-code-assistant/fix-gemini-openai-chat-tool-calling-2

ericdallo · web-flow · commit dbdf726fd611 · 2026-01-14T15:19:40.000-03:00
Fix gemini OpenAI chat tool calling 2
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,8 @@
 
 ## Unreleased
 
+- Fix Gemini (OpenAI compatible). #247
+
 ## 0.91.2
 
 - Fix `eca_shell_command` to include stderr output even when exit 0.
diff --git a/src/eca/llm_providers/openai_chat.clj b/src/eca/llm_providers/openai_chat.clj
@@ -25,13 +25,12 @@
 
 (defn ^:private extract-content
   "Extract text content from various message content formats.
-   Handles: strings (legacy eca), nested arrays from chat.clj, and fallback."
+   Returns a string for text-only content, and an array only when images are present (multimodal)."
   [content supports-image?]
   (cond
     ;; Legacy/fallback: handles system messages, error strings, or unexpected simple text content
     (string? content)
-    [{:type "text"
-      :text (string/trim content)}]
+    (string/trim content)
 
     (and (sequential? content)
          (every? #(= "text" (name (:type %))) content))
@@ -166,8 +165,7 @@
                 :reasoning_content (:text content)}
                ;; Fallback: wrap in thinking tags for models that use text-based reasoning
                {:role "assistant"
-                :content [{:type "text"
-                           :text (str think-tag-start (:text content) think-tag-end)}]})
+                :content (str think-tag-start (:text content) think-tag-end)})
     "assistant" {:role "assistant"
                  :content (extract-content content supports-image?)}
     "system" {:role "system"
@@ -301,13 +299,12 @@
                                           (assoc tool-call :arguments {} :parse-error error-msg)))))))
         ;; Filter out tool calls with parse errors to prevent execution with invalid data
         valid-tools (remove :parse-error completed-tools)]
-    (if (seq valid-tools)
+    (when (seq valid-tools)
       ;; We have valid tools to execute, continue the conversation
       (let [tool-turn-id (str (random-uuid))
             tools-with-turn-id (mapv #(assoc % :tool-turn-id tool-turn-id) valid-tools)]
-        (on-tools-called-wrapper tools-with-turn-id on-tools-called handle-response))
-      ;; No valid tools (all had parse errors or none accumulated) - don't loop
-      nil)))
+        (on-tools-called-wrapper tools-with-turn-id on-tools-called handle-response)
+        tools-with-turn-id))))
 
 (defn ^:private process-text-think-aware
   "Incremental parser that splits streamed content into user text and thinking blocks.
@@ -495,11 +492,15 @@
 
         handle-response (fn handle-response [event data tool-calls*]
                           (if (= event "stream-end")
-                            (do
+                            (let [had-tool-calls? (seq @tool-calls*)]
                               ;; Flush any leftover buffered content and finish reasoning if needed
                               (flush-content-buffer)
                               (finish-reasoning! reasoning-state* on-reason)
-                              (execute-accumulated-tools! tool-calls* on-tools-called-wrapper on-tools-called handle-response))
+                              (when (and had-tool-calls?
+                                         (nil? (execute-accumulated-tools! tool-calls* on-tools-called-wrapper on-tools-called handle-response)))
+                                ;; The stream ended with tool_calls, but none were executable (e.g. invalid JSON args).
+                                ;; Emit :finish so the UI does not hang waiting for the next turn.
+                                (on-message-received {:type :finish :finish-reason "stop"})))
                             (when (seq (:choices data))
                               (doseq [choice (:choices data)]
                                 (let [delta (:delta choice)
@@ -578,7 +579,11 @@
                                     ;; Handle reasoning completion
                                     (finish-reasoning! reasoning-state* on-reason)
                                     ;; Handle regular finish
-                                    (when (not= finish-reason "tool_calls")
+                                    ;; Some OpenAI-compatible providers (e.g. Google's) may emit finish_reason "stop"
+                                    ;; even when the turn contains tool_calls. In that case, defer :finish until after
+                                    ;; the tool loop completes, otherwise chat-level side effects may run too early.
+                                    (when (and (not= finish-reason "tool_calls")
+                                               (empty? @tool-calls*))
                                       (on-message-received {:type :finish :finish-reason finish-reason})))))))
                           (when-let [usage (:usage data)]
                             (on-usage-updated (parse-usage usage))))
diff --git a/test/eca/llm_providers/openai_chat_test.clj b/test/eca/llm_providers/openai_chat_test.clj
@@ -53,17 +53,17 @@
 (deftest normalize-messages-test
   (testing "With tool_call history - assistant text and tool calls are merged"
     (is (match?
-         [{:role "user" :content [{:type "text" :text "List the files"}]}
+         [{:role "user" :content "List the files"}
           {:role "assistant"
-           :content [{:type "text" :text "I'll list the files for you"}]
+           :content "I'll list the files for you"
            :tool_calls [{:id "call-1"
                          :type "function"
                          :function {:name "eca__list_files"
                                     :arguments "{}"}}]}
           {:role "tool"
            :tool_call_id "call-1"
            :content "file1.txt\nfile2.txt\n"}
-          {:role "assistant" :content [{:type "text" :text "I found 2 files"}]}]
+          {:role "assistant" :content "I found 2 files"}]
          (#'llm-providers.openai-chat/normalize-messages
           [{:role "user" :content "List the files"}
            {:role "assistant" :content "I'll list the files for you"}
@@ -81,9 +81,8 @@
 
   (testing "Reason messages without reasoning-content use think tags, merged with following assistant"
     (is (match?
-         [{:role "user" :content [{:type "text" :text "Hello"}]}
-          {:role "assistant" :content [{:type "text" :text "<think>Thinking...</think>"}
-                                       {:type "text" :text "Hi"}]}]
+         [{:role "user" :content "Hello"}
+          {:role "assistant" :content "<think>Thinking...</think>\nHi"}]
          (#'llm-providers.openai-chat/normalize-messages
           [{:role "user" :content "Hello"}
            {:role "reason" :content {:text "Thinking..."}}
@@ -93,8 +92,8 @@
           thinking-end-tag)))))
 
 (deftest extract-content-test
-  (testing "String input"
-    (is (= [{:type "text" :text "Hello world"}]
+  (testing "String input - returns string for text-only content"
+    (is (= "Hello world"
            (#'llm-providers.openai-chat/extract-content "  Hello world  " true))))
 
   (testing "Sequential messages with actual format"
@@ -161,10 +160,10 @@
           thinking-end-tag))))
 
   (testing "Reason messages - use reasoning_content if :delta-reasoning?, otherwise tags"
-    ;; Without :delta-reasoning?, uses think tags
+    ;; Without :delta-reasoning?, uses think tags (string, not array - for Gemini compatibility)
     (is (match?
          {:role "assistant"
-          :content [{:type "text" :text "<think>Reasoning...</think>"}]}
+          :content "<think>Reasoning...</think>"}
          (#'llm-providers.openai-chat/transform-message
           {:role "reason"
            :content {:text "Reasoning..."}}
@@ -372,13 +371,13 @@
       ;; After normalization, all tool_calls should be merged into one assistant message
       ;; followed by all tool outputs, then the final assistant message
       (is (match?
-           [{:role "user" :content [{:type "text" :text "Read two files"}]}
+           [{:role "user" :content "Read two files"}
             {:role "assistant"
              :tool_calls [{:id "call-1" :function {:name "eca__read_file"}}
                           {:id "call-2" :function {:name "eca__read_file"}}]}
             {:role "tool" :tool_call_id "call-1" :content "content1\n"}
             {:role "tool" :tool_call_id "call-2" :content "content2\n"}
-            {:role "assistant" :content [{:type "text" :text "I read both files"}]}]
+            {:role "assistant" :content "I read both files"}]
            normalized)
           "Tool calls must be grouped together before their outputs")))
 
@@ -421,11 +420,11 @@
            [{:role "user"}
             {:role "assistant" :tool_calls [{:id "call-1"}]}
             {:role "tool" :tool_call_id "call-1"}
-            {:role "assistant" :content [{:type "text" :text "First response"}]}
+            {:role "assistant" :content "First response"}
             {:role "user"}
             {:role "assistant" :tool_calls [{:id "call-2"}]}
             {:role "tool" :tool_call_id "call-2"}
-            {:role "assistant" :content [{:type "text" :text "Second response"}]}]
+            {:role "assistant" :content "Second response"}]
            normalized)))))
 
 (deftest gemini-thought-signature-test
@@ -527,7 +526,7 @@
             {:role "assistant"
              :tool_calls [{:id "call-2" :function {:name "tool2"}}]}
             {:role "tool" :tool_call_id "call-2" :content "r2\n"}
-            {:role "assistant" :content [{:type "text" :text "Done"}]}]
+            {:role "assistant" :content "Done"}]
            normalized)))))
 
 (deftest tool-call-order-by-index-test
@@ -768,7 +767,7 @@
       (is (= "think more" (:reason-text result)))
       (is (some? (:reasoning-content result)) "reasoning-content should be present in non-streaming result")
       (is (match?
-           [{:role "user" :content [{:type "text" :text "Q"}]}
+           [{:role "user" :content "Q"}
             {:role "assistant"
              :reasoning_content "think more"}]
            normalized)))))