From 5176da5f455b83b71d9d2dbde508ba6d6d2029d2 Mon Sep 17 00:00:00 2001
From: Stephen Toub <stoub@microsoft.com>
Date: Tue, 10 Feb 2026 18:24:16 -0500
Subject: [PATCH] Update to M.E.AI.Abstractions 10.3.0

This also lets us map ChatOptions.Reasoning.
---
 .../AnthropicClientBetaExtensionsTests.cs     |  65 +++
 .../AnthropicClientExtensionsTests.cs         |  65 +++
 .../AnthropicClientExtensionsTestsBase.cs     | 462 ++++++++++++++++++
 src/Anthropic/Anthropic.csproj                |   4 +-
 src/Anthropic/AnthropicClientExtensions.cs    |  50 ++
 .../Messages/AnthropicBetaClientExtensions.cs |  51 +-
 6 files changed, 694 insertions(+), 3 deletions(-)

diff --git a/src/Anthropic.Tests/AnthropicClientBetaExtensionsTests.cs b/src/Anthropic.Tests/AnthropicClientBetaExtensionsTests.cs
index 83671810..0f3b7268 100644
--- a/src/Anthropic.Tests/AnthropicClientBetaExtensionsTests.cs
+++ b/src/Anthropic.Tests/AnthropicClientBetaExtensionsTests.cs
@@ -2493,4 +2493,69 @@ public async Task GetResponseAsync_MeaiUserAgentHeader_PresentAlongsideDefaultHe
             "Default AnthropicClient user-agent header should be present"
         );
     }
+
+    [Fact]
+    public async Task GetResponseAsync_WithReasoningEffort_IgnoredWhenThinkingAlreadyConfigured()
+    {
+        // When RawRepresentationFactory pre-configures Thinking, the Reasoning option should be ignored.
+        VerbatimHttpHandler handler = new(
+            expectedRequest: """
+            {
+                "max_tokens": 50000,
+                "model": "claude-haiku-4-5",
+                "messages": [{
+                    "role": "user",
+                    "content": [{
+                        "type": "text",
+                        "text": "Think carefully"
+                    }]
+                }],
+                "thinking": {
+                    "type": "enabled",
+                    "budget_tokens": 5000
+                }
+            }
+            """,
+            actualResponse: """
+            {
+                "id": "msg_reasoning_preconfigured",
+                "type": "message",
+                "role": "assistant",
+                "model": "claude-haiku-4-5",
+                "content": [{
+                    "type": "text",
+                    "text": "Response"
+                }],
+                "stop_reason": "end_turn",
+                "usage": {
+                    "input_tokens": 10,
+                    "output_tokens": 15
+                }
+            }
+            """
+        );
+
+        IChatClient chatClient = CreateChatClient(handler, "claude-haiku-4-5");
+
+        ChatOptions options = new()
+        {
+            // RawRepresentationFactory sets Thinking to enabled with 5000 budget.
+            // Reasoning.Effort should be ignored since Thinking is already configured.
+            RawRepresentationFactory = _ => new MessageCreateParams()
+            {
+                MaxTokens = 50000,
+                Model = "claude-haiku-4-5",
+                Messages = [],
+                Thinking = new BetaThinkingConfigParam(new BetaThinkingConfigEnabled(5000)),
+            },
+            Reasoning = new() { Effort = ReasoningEffort.ExtraHigh },
+        };
+
+        ChatResponse response = await chatClient.GetResponseAsync(
+            "Think carefully",
+            options,
+            TestContext.Current.CancellationToken
+        );
+        Assert.NotNull(response);
+    }
 }
diff --git a/src/Anthropic.Tests/AnthropicClientExtensionsTests.cs b/src/Anthropic.Tests/AnthropicClientExtensionsTests.cs
index 7d41d832..654e83f8 100644
--- a/src/Anthropic.Tests/AnthropicClientExtensionsTests.cs
+++ b/src/Anthropic.Tests/AnthropicClientExtensionsTests.cs
@@ -750,4 +750,69 @@ public async Task GetResponseAsync_MeaiUserAgentHeader_PresentAlongsideDefaultHe
             "Default AnthropicClient user-agent header should be present"
         );
     }
+
+    [Fact]
+    public async Task GetResponseAsync_WithReasoningEffort_IgnoredWhenThinkingAlreadyConfigured()
+    {
+        // When RawRepresentationFactory pre-configures Thinking, the Reasoning option should be ignored.
+        VerbatimHttpHandler handler = new(
+            expectedRequest: """
+            {
+                "max_tokens": 50000,
+                "model": "claude-haiku-4-5",
+                "messages": [{
+                    "role": "user",
+                    "content": [{
+                        "type": "text",
+                        "text": "Think carefully"
+                    }]
+                }],
+                "thinking": {
+                    "type": "enabled",
+                    "budget_tokens": 5000
+                }
+            }
+            """,
+            actualResponse: """
+            {
+                "id": "msg_reasoning_preconfigured",
+                "type": "message",
+                "role": "assistant",
+                "model": "claude-haiku-4-5",
+                "content": [{
+                    "type": "text",
+                    "text": "Response"
+                }],
+                "stop_reason": "end_turn",
+                "usage": {
+                    "input_tokens": 10,
+                    "output_tokens": 15
+                }
+            }
+            """
+        );
+
+        IChatClient chatClient = CreateChatClient(handler, "claude-haiku-4-5");
+
+        ChatOptions options = new()
+        {
+            // RawRepresentationFactory sets Thinking to enabled with 5000 budget.
+            // Reasoning.Effort should be ignored since Thinking is already configured.
+            RawRepresentationFactory = _ => new MessageCreateParams()
+            {
+                MaxTokens = 50000,
+                Model = "claude-haiku-4-5",
+                Messages = [],
+                Thinking = new ThinkingConfigParam(new ThinkingConfigEnabled(5000)),
+            },
+            Reasoning = new() { Effort = ReasoningEffort.ExtraHigh },
+        };
+
+        ChatResponse response = await chatClient.GetResponseAsync(
+            "Think carefully",
+            options,
+            TestContext.Current.CancellationToken
+        );
+        Assert.NotNull(response);
+    }
 }
diff --git a/src/Anthropic.Tests/AnthropicClientExtensionsTestsBase.cs b/src/Anthropic.Tests/AnthropicClientExtensionsTestsBase.cs
index b645e1f6..12437db2 100644
--- a/src/Anthropic.Tests/AnthropicClientExtensionsTestsBase.cs
+++ b/src/Anthropic.Tests/AnthropicClientExtensionsTestsBase.cs
@@ -2704,6 +2704,468 @@ public async Task GetResponseAsync_WithNullFinishReason()
         Assert.Null(response.FinishReason);
     }
 
+    [Theory]
+    [InlineData(ReasoningEffort.Low, 1024)]
+    [InlineData(ReasoningEffort.Medium, 10024)]
+    [InlineData(ReasoningEffort.High, 16000)]
+    [InlineData(ReasoningEffort.ExtraHigh, 32000)]
+    public async Task GetResponseAsync_WithReasoningEffort_SetsThinkingEnabled(
+        ReasoningEffort effort,
+        int expectedBudgetTokens
+    )
+    {
+        VerbatimHttpHandler handler = new(
+            expectedRequest: $$"""
+            {
+                "model": "claude-haiku-4-5",
+                "messages": [{
+                    "role": "user",
+                    "content": [{
+                        "type": "text",
+                        "text": "Think carefully"
+                    }]
+                }],
+                "max_tokens": 100000,
+                "thinking": {
+                    "type": "enabled",
+                    "budget_tokens": {{expectedBudgetTokens}}
+                }
+            }
+            """,
+            actualResponse: """
+            {
+                "id": "msg_reasoning_01",
+                "type": "message",
+                "role": "assistant",
+                "model": "claude-haiku-4-5",
+                "content": [{
+                    "type": "text",
+                    "text": "Here is my response"
+                }],
+                "stop_reason": "end_turn",
+                "usage": {
+                    "input_tokens": 10,
+                    "output_tokens": 20
+                }
+            }
+            """
+        );
+
+        IChatClient chatClient = CreateChatClient(handler, "claude-haiku-4-5");
+
+        ChatOptions options = new()
+        {
+            MaxOutputTokens = 100000,
+            Reasoning = new() { Effort = effort },
+        };
+
+        ChatResponse response = await chatClient.GetResponseAsync(
+            "Think carefully",
+            options,
+            TestContext.Current.CancellationToken
+        );
+        Assert.NotNull(response);
+    }
+
+    [Fact]
+    public async Task GetResponseAsync_WithReasoningEffortNone_SetsThinkingDisabled()
+    {
+        VerbatimHttpHandler handler = new(
+            expectedRequest: """
+            {
+                "model": "claude-haiku-4-5",
+                "messages": [{
+                    "role": "user",
+                    "content": [{
+                        "type": "text",
+                        "text": "Simple question"
+                    }]
+                }],
+                "max_tokens": 1024,
+                "thinking": {
+                    "type": "disabled"
+                }
+            }
+            """,
+            actualResponse: """
+            {
+                "id": "msg_reasoning_02",
+                "type": "message",
+                "role": "assistant",
+                "model": "claude-haiku-4-5",
+                "content": [{
+                    "type": "text",
+                    "text": "Quick answer"
+                }],
+                "stop_reason": "end_turn",
+                "usage": {
+                    "input_tokens": 10,
+                    "output_tokens": 5
+                }
+            }
+            """
+        );
+
+        IChatClient chatClient = CreateChatClient(handler, "claude-haiku-4-5");
+
+        ChatOptions options = new()
+        {
+            Reasoning = new() { Effort = ReasoningEffort.None },
+        };
+
+        ChatResponse response = await chatClient.GetResponseAsync(
+            "Simple question",
+            options,
+            TestContext.Current.CancellationToken
+        );
+        Assert.NotNull(response);
+    }
+
+    [Fact]
+    public async Task GetResponseAsync_WithReasoningEffort_ClampsBudgetToExplicitMaxTokens()
+    {
+        // High effort maps to 16000, but caller explicitly set max_tokens to 5000,
+        // so budget should clamp to 4999.
+        VerbatimHttpHandler handler = new(
+            expectedRequest: """
+            {
+                "model": "claude-haiku-4-5",
+                "messages": [{
+                    "role": "user",
+                    "content": [{
+                        "type": "text",
+                        "text": "Think carefully"
+                    }]
+                }],
+                "max_tokens": 5000,
+                "thinking": {
+                    "type": "enabled",
+                    "budget_tokens": 4999
+                }
+            }
+            """,
+            actualResponse: """
+            {
+                "id": "msg_reasoning_03",
+                "type": "message",
+                "role": "assistant",
+                "model": "claude-haiku-4-5",
+                "content": [{
+                    "type": "text",
+                    "text": "Response"
+                }],
+                "stop_reason": "end_turn",
+                "usage": {
+                    "input_tokens": 10,
+                    "output_tokens": 15
+                }
+            }
+            """
+        );
+
+        IChatClient chatClient = CreateChatClient(handler, "claude-haiku-4-5");
+
+        ChatOptions options = new()
+        {
+            MaxOutputTokens = 5000,
+            Reasoning = new() { Effort = ReasoningEffort.High },
+        };
+
+        ChatResponse response = await chatClient.GetResponseAsync(
+            "Think carefully",
+            options,
+            TestContext.Current.CancellationToken
+        );
+        Assert.NotNull(response);
+    }
+
+    [Fact]
+    public async Task GetResponseAsync_WithReasoningEffort_SkipsThinkingWhenExplicitMaxTokensTooSmall()
+    {
+        // Medium effort maps to 10024, but caller explicitly set max_tokens to 1024,
+        // so after clamping budget would be 1023 which is < 1024 minimum. Thinking is skipped.
+        VerbatimHttpHandler handler = new(
+            expectedRequest: """
+            {
+                "model": "claude-haiku-4-5",
+                "messages": [{
+                    "role": "user",
+                    "content": [{
+                        "type": "text",
+                        "text": "Think carefully"
+                    }]
+                }],
+                "max_tokens": 1024
+            }
+            """,
+            actualResponse: """
+            {
+                "id": "msg_reasoning_04",
+                "type": "message",
+                "role": "assistant",
+                "model": "claude-haiku-4-5",
+                "content": [{
+                    "type": "text",
+                    "text": "Response"
+                }],
+                "stop_reason": "end_turn",
+                "usage": {
+                    "input_tokens": 10,
+                    "output_tokens": 15
+                }
+            }
+            """
+        );
+
+        IChatClient chatClient = CreateChatClient(handler, "claude-haiku-4-5");
+
+        ChatOptions options = new()
+        {
+            MaxOutputTokens = 1024,
+            Reasoning = new() { Effort = ReasoningEffort.Medium },
+        };
+
+        ChatResponse response = await chatClient.GetResponseAsync(
+            "Think carefully",
+            options,
+            TestContext.Current.CancellationToken
+        );
+        Assert.NotNull(response);
+    }
+
+    [Fact]
+    public async Task GetResponseAsync_WithReasoningEffort_AutoIncreasesMaxTokensFromDefault()
+    {
+        // Medium effort maps to 10024. Default max_tokens is 1024, so max_tokens should
+        // auto-increase to budget (10024) + default (1024) = 11048.
+        VerbatimHttpHandler handler = new(
+            expectedRequest: """
+            {
+                "model": "claude-haiku-4-5",
+                "messages": [{
+                    "role": "user",
+                    "content": [{
+                        "type": "text",
+                        "text": "Think carefully"
+                    }]
+                }],
+                "max_tokens": 11048,
+                "thinking": {
+                    "type": "enabled",
+                    "budget_tokens": 10024
+                }
+            }
+            """,
+            actualResponse: """
+            {
+                "id": "msg_reasoning_05",
+                "type": "message",
+                "role": "assistant",
+                "model": "claude-haiku-4-5",
+                "content": [{
+                    "type": "text",
+                    "text": "Response"
+                }],
+                "stop_reason": "end_turn",
+                "usage": {
+                    "input_tokens": 10,
+                    "output_tokens": 15
+                }
+            }
+            """
+        );
+
+        IChatClient chatClient = CreateChatClient(handler, "claude-haiku-4-5");
+
+        ChatOptions options = new()
+        {
+            Reasoning = new() { Effort = ReasoningEffort.Medium },
+        };
+
+        ChatResponse response = await chatClient.GetResponseAsync(
+            "Think carefully",
+            options,
+            TestContext.Current.CancellationToken
+        );
+        Assert.NotNull(response);
+    }
+
+    [Fact]
+    public async Task GetResponseAsync_WithReasoningEffortLow_AutoIncreasesFromDefaultMaxTokens()
+    {
+        // Low effort maps to 1024. Default max_tokens is also 1024, so 1024 <= 1024
+        // triggers auto-increase to budget (1024) + default (1024) = 2048.
+        VerbatimHttpHandler handler = new(
+            expectedRequest: """
+            {
+                "model": "claude-haiku-4-5",
+                "messages": [{
+                    "role": "user",
+                    "content": [{
+                        "type": "text",
+                        "text": "Think a little"
+                    }]
+                }],
+                "max_tokens": 2048,
+                "thinking": {
+                    "type": "enabled",
+                    "budget_tokens": 1024
+                }
+            }
+            """,
+            actualResponse: """
+            {
+                "id": "msg_reasoning_06",
+                "type": "message",
+                "role": "assistant",
+                "model": "claude-haiku-4-5",
+                "content": [{
+                    "type": "text",
+                    "text": "Response"
+                }],
+                "stop_reason": "end_turn",
+                "usage": {
+                    "input_tokens": 10,
+                    "output_tokens": 15
+                }
+            }
+            """
+        );
+
+        IChatClient chatClient = CreateChatClient(handler, "claude-haiku-4-5");
+
+        ChatOptions options = new()
+        {
+            Reasoning = new() { Effort = ReasoningEffort.Low },
+        };
+
+        ChatResponse response = await chatClient.GetResponseAsync(
+            "Think a little",
+            options,
+            TestContext.Current.CancellationToken
+        );
+        Assert.NotNull(response);
+    }
+
+    [Fact]
+    public async Task GetResponseAsync_WithReasoningEffort_ExactFitMaxTokensOneBeyondBudget()
+    {
+        // Low effort maps to 1024. MaxOutputTokens is 1025, so 1025 > 1024 means
+        // no auto-increase needed — budget fits exactly.
+        VerbatimHttpHandler handler = new(
+            expectedRequest: """
+            {
+                "model": "claude-haiku-4-5",
+                "messages": [{
+                    "role": "user",
+                    "content": [{
+                        "type": "text",
+                        "text": "Think a little"
+                    }]
+                }],
+                "max_tokens": 1025,
+                "thinking": {
+                    "type": "enabled",
+                    "budget_tokens": 1024
+                }
+            }
+            """,
+            actualResponse: """
+            {
+                "id": "msg_reasoning_07",
+                "type": "message",
+                "role": "assistant",
+                "model": "claude-haiku-4-5",
+                "content": [{
+                    "type": "text",
+                    "text": "Response"
+                }],
+                "stop_reason": "end_turn",
+                "usage": {
+                    "input_tokens": 10,
+                    "output_tokens": 15
+                }
+            }
+            """
+        );
+
+        IChatClient chatClient = CreateChatClient(handler, "claude-haiku-4-5");
+
+        ChatOptions options = new()
+        {
+            MaxOutputTokens = 1025,
+            Reasoning = new() { Effort = ReasoningEffort.Low },
+        };
+
+        ChatResponse response = await chatClient.GetResponseAsync(
+            "Think a little",
+            options,
+            TestContext.Current.CancellationToken
+        );
+        Assert.NotNull(response);
+    }
+
+    [Fact]
+    public async Task GetResponseAsync_WithReasoningEffort_NoAutoIncreaseWhenDefaultMaxTokensSufficient()
+    {
+        // Low effort maps to 1024. Custom default max_tokens is 5000, so 5000 > 1024 means
+        // no auto-increase is needed.
+        VerbatimHttpHandler handler = new(
+            expectedRequest: """
+            {
+                "model": "claude-haiku-4-5",
+                "messages": [{
+                    "role": "user",
+                    "content": [{
+                        "type": "text",
+                        "text": "Think a little"
+                    }]
+                }],
+                "max_tokens": 5000,
+                "thinking": {
+                    "type": "enabled",
+                    "budget_tokens": 1024
+                }
+            }
+            """,
+            actualResponse: """
+            {
+                "id": "msg_reasoning_08",
+                "type": "message",
+                "role": "assistant",
+                "model": "claude-haiku-4-5",
+                "content": [{
+                    "type": "text",
+                    "text": "Response"
+                }],
+                "stop_reason": "end_turn",
+                "usage": {
+                    "input_tokens": 10,
+                    "output_tokens": 15
+                }
+            }
+            """
+        );
+
+        IChatClient chatClient = CreateChatClient(
+            handler,
+            "claude-haiku-4-5",
+            defaultMaxOutputTokens: 5000
+        );
+
+        ChatOptions options = new()
+        {
+            Reasoning = new() { Effort = ReasoningEffort.Low },
+        };
+
+        ChatResponse response = await chatClient.GetResponseAsync(
+            "Think a little",
+            options,
+            TestContext.Current.CancellationToken
+        );
+        Assert.NotNull(response);
+    }
+
     [Fact]
     public async Task GetResponseAsync_SendsTextReasoningAsThinkingBlock()
     {
diff --git a/src/Anthropic/Anthropic.csproj b/src/Anthropic/Anthropic.csproj
index 82e43428..5bc663fe 100644
--- a/src/Anthropic/Anthropic.csproj
+++ b/src/Anthropic/Anthropic.csproj
@@ -15,9 +15,9 @@
     <InternalsVisibleTo Include="Anthropic.Tests,PublicKey=002400000480000094000000060200000024000052534131000400000100010063a3c9395e3d40be5b18dc9c4104236f7a42b63f7fee034f73dcfdb09a677d5bb552f6ffe35d580da1195fd5098ee99affcd842ceeca7973611c3eae10bafa96159ecf8bdd0252ac670b49ffaefd8986a272897cf68b9b1b3e0372dcabff785d5c6c90eab31633416d812ad7f899cc70a2ffc6d869580694017cab7e8c97f4b6"/>
     <None Include="..\logo.png" Pack="true" PackagePath="\"/>
     <None Include="..\..\README.md" Pack="true" PackagePath="\"/>
-    <PackageReference Include="System.Text.Json" Version="10.0.2"/>
+    <PackageReference Include="System.Text.Json" Version="10.0.3"/>
     <PackageReference Include="System.Net.ServerSentEvents" Version="10.0.1"/>
-    <PackageReference Include="Microsoft.Extensions.AI.Abstractions" Version="10.2.0"/>
+    <PackageReference Include="Microsoft.Extensions.AI.Abstractions" Version="10.3.0"/>
     <PackageReference Include="Microsoft.SourceLink.GitHub" Version="8.0.0" PrivateAssets="All"/>
   </ItemGroup>
   <ItemGroup Condition="'$(TargetFramework)' == 'netstandard2.0'">
diff --git a/src/Anthropic/AnthropicClientExtensions.cs b/src/Anthropic/AnthropicClientExtensions.cs
index a5f96cce..49de56da 100644
--- a/src/Anthropic/AnthropicClientExtensions.cs
+++ b/src/Anthropic/AnthropicClientExtensions.cs
@@ -917,6 +917,56 @@ toolMode is AutoChatToolMode
                         createParams = createParams with { ToolChoice = toolChoice };
                     }
                 }
+
+                if (createParams.Thinking is null && options.Reasoning is { } reasoning)
+                {
+                    ThinkingConfigParam? thinkingConfig = null;
+                    if (reasoning.Effort is ReasoningEffort.None)
+                    {
+                        thinkingConfig = new(new ThinkingConfigDisabled());
+                    }
+                    else
+                    {
+                        long? budgetTokens = reasoning.Effort switch
+                        {
+                            ReasoningEffort.Low => 1024,
+                            ReasoningEffort.Medium => 10024,
+                            ReasoningEffort.High => 16000,
+                            ReasoningEffort.ExtraHigh => 32000,
+                            _ => null,
+                        };
+                     
+                        if (budgetTokens is { } budget)
+                        {
+                            // Anthropic requires budget_tokens >= 1024 and budget_tokens < max_tokens.
+                            if (createParams.MaxTokens <= budget)
+                            {
+                                if (options.MaxOutputTokens is not null)
+                                {
+                                    // Caller explicitly set MaxOutputTokens. Clamp the budget to fit,
+                                    // and skip thinking if it can't meet the minimum.
+                                    budget = createParams.MaxTokens - 1;
+                                }
+                                else
+                                {
+                                    // Caller didn't set MaxOutputTokens. Auto-increase max_tokens
+                                    // to accommodate the thinking budget plus room for output.
+                                    createParams = createParams with
+                                    {
+                                        MaxTokens = budget + _defaultMaxTokens,
+                                    };
+                                }
+                            }
+
+                            thinkingConfig = budget >= 1024 ? new(new ThinkingConfigEnabled(budget)) : null;
+                        }
+                    }
+
+                    if (thinkingConfig is not null)
+                    {
+                        createParams = createParams with { Thinking = thinkingConfig };
+                    }
+                }
             }
 
             if (systemMessages is not null)
diff --git a/src/Anthropic/Services/Beta/Messages/AnthropicBetaClientExtensions.cs b/src/Anthropic/Services/Beta/Messages/AnthropicBetaClientExtensions.cs
index 1736674e..1154d3b1 100644
--- a/src/Anthropic/Services/Beta/Messages/AnthropicBetaClientExtensions.cs
+++ b/src/Anthropic/Services/Beta/Messages/AnthropicBetaClientExtensions.cs
@@ -11,7 +11,6 @@
 using System.Threading;
 using System.Threading.Tasks;
 using Anthropic.Core;
-using Anthropic.Models.Beta;
 using Anthropic.Models.Beta.Messages;
 using Anthropic.Services.Beta;
 
@@ -1156,6 +1155,56 @@ toolMode is AutoChatToolMode
                         createParams = createParams with { ToolChoice = toolChoice };
                     }
                 }
+
+                if (createParams.Thinking is null && options.Reasoning is { } reasoning)
+                {
+                    BetaThinkingConfigParam? thinkingConfig = null;
+                    if (reasoning.Effort is ReasoningEffort.None)
+                    {
+                        thinkingConfig = new(new BetaThinkingConfigDisabled());
+                    }
+                    else
+                    {
+                        long? budgetTokens = reasoning.Effort switch
+                        {
+                            ReasoningEffort.Low => 1024,
+                            ReasoningEffort.Medium => 10024,
+                            ReasoningEffort.High => 16000,
+                            ReasoningEffort.ExtraHigh => 32000,
+                            _ => null,
+                        };
+                     
+                        if (budgetTokens is { } budget)
+                        {
+                            // Anthropic requires budget_tokens >= 1024 and budget_tokens < max_tokens.
+                            if (createParams.MaxTokens <= budget)
+                            {
+                                if (options.MaxOutputTokens is not null)
+                                {
+                                    // Caller explicitly set MaxOutputTokens. Clamp the budget to fit,
+                                    // and skip thinking if it can't meet the minimum.
+                                    budget = createParams.MaxTokens - 1;
+                                }
+                                else
+                                {
+                                    // Caller didn't set MaxOutputTokens. Auto-increase max_tokens
+                                    // to accommodate the thinking budget plus room for output.
+                                    createParams = createParams with
+                                    {
+                                        MaxTokens = budget + _defaultMaxTokens,
+                                    };
+                                }
+                            }
+
+                            thinkingConfig = budget >= 1024 ? new(new BetaThinkingConfigEnabled(budget)) : null;
+                        }
+                    }
+
+                    if (thinkingConfig is not null)
+                    {
+                        createParams = createParams with { Thinking = thinkingConfig };
+                    }
+                }
             }
 
             if (systemMessages is not null)