maskshell
diff --git a/‎graphiti_core/llm_client/azure_openai_client.py‎
Lines changed: 72 additions & 20 deletions b/‎graphiti_core/llm_client/azure_openai_client.py‎
Lines changed: 72 additions & 20 deletions
@@ -14,8 +14,9 @@
 limitations under the License.
 """
 
+import json
 import logging
-from typing import ClassVar
+from typing import Any, ClassVar
 
 from openai import AsyncAzureOpenAI, AsyncOpenAI
 from openai.types.chat import ChatCompletionMessageParam
@@ -63,34 +64,52 @@ async def _create_structured_completion(
         reasoning: str | None,
         verbosity: str | None,
     ):
-        """Create a structured completion using Azure OpenAI's responses.parse API."""
-        supports_reasoning = self._supports_reasoning_features(model)
-        request_kwargs = {
-            'model': model,
-            'input': messages,
-            'max_output_tokens': max_tokens,
-            'text_format': response_model,  # type: ignore
-        }
+        """Create a structured completion using Azure OpenAI.
 
-        temperature_value = temperature if not supports_reasoning else None
-        if temperature_value is not None:
-            request_kwargs['temperature'] = temperature_value
-
-        if supports_reasoning and reasoning:
-            request_kwargs['reasoning'] = {'effort': reasoning}  # type: ignore
-
-        if supports_reasoning and verbosity:
-            request_kwargs['text'] = {'verbosity': verbosity}  # type: ignore
+        For reasoning models (GPT-5, o1, o3): uses responses.parse API
+        For regular models (GPT-4o, etc): uses chat.completions with response_format
+        """
+        supports_reasoning = self._supports_reasoning_features(model)
 
-        return await self.client.responses.parse(**request_kwargs)
+        if supports_reasoning:
+            # Use responses.parse for reasoning models (o1, o3, gpt-5)
+            request_kwargs = {
+                'model': model,
+                'input': messages,
+                'max_output_tokens': max_tokens,
+                'text_format': response_model,  # type: ignore
+            }
+
+            if reasoning:
+                request_kwargs['reasoning'] = {'effort': reasoning}  # type: ignore
+
+            if verbosity:
+                request_kwargs['text'] = {'verbosity': verbosity}  # type: ignore
+
+            return await self.client.responses.parse(**request_kwargs)
+        else:
+            # Use beta.chat.completions.parse for non-reasoning models (gpt-4o, etc.)
+            # Azure's v1 compatibility endpoint doesn't fully support responses.parse
+            # for non-reasoning models, so we use the structured output API instead
+            request_kwargs = {
+                'model': model,
+                'messages': messages,
+                'max_tokens': max_tokens,
+                'response_format': response_model,  # Structured output
+            }
+
+            if temperature is not None:
+                request_kwargs['temperature'] = temperature
+
+            return await self.client.beta.chat.completions.parse(**request_kwargs)
 
     async def _create_completion(
         self,
         model: str,
         messages: list[ChatCompletionMessageParam],
         temperature: float | None,
         max_tokens: int,
-        response_model: type[BaseModel] | None = None,
+        response_model: type[BaseModel] | None = None,  # noqa: ARG002 - inherited from abstract method
     ):
         """Create a regular completion with JSON format using Azure OpenAI."""
         supports_reasoning = self._supports_reasoning_features(model)
@@ -108,6 +127,39 @@ async def _create_completion(
 
         return await self.client.chat.completions.create(**request_kwargs)
 
+    def _handle_structured_response(self, response: Any) -> dict[str, Any]:
+        """Handle structured response parsing for both reasoning and non-reasoning models.
+
+        For reasoning models (responses.parse): uses response.output_text
+        For regular models (beta.chat.completions.parse): uses response.choices[0].message.parsed
+        """
+        # Check if this is a ParsedChatCompletion (from beta.chat.completions.parse)
+        if hasattr(response, 'choices') and response.choices:
+            # Standard ParsedChatCompletion format
+            message = response.choices[0].message
+            if hasattr(message, 'parsed') and message.parsed:
+                # The parsed object is already a Pydantic model, convert to dict
+                return message.parsed.model_dump()
+            elif hasattr(message, 'refusal') and message.refusal:
+                from graphiti_core.llm_client.errors import RefusalError
+
+                raise RefusalError(message.refusal)
+            else:
+                raise Exception(f'Invalid response from LLM: {response.model_dump()}')
+        elif hasattr(response, 'output_text'):
+            # Reasoning model response format (responses.parse)
+            response_object = response.output_text
+            if response_object:
+                return json.loads(response_object)
+            elif hasattr(response, 'refusal') and response.refusal:
+                from graphiti_core.llm_client.errors import RefusalError
+
+                raise RefusalError(response.refusal)
+            else:
+                raise Exception(f'Invalid response from LLM: {response.model_dump()}')
+        else:
+            raise Exception(f'Unknown response format: {type(response)}')
+
     @staticmethod
     def _supports_reasoning_features(model: str) -> bool:
         """Return True when the Azure model supports reasoning/verbosity options."""