diff --git a/docs/guides/multimodal-content.md b/docs/guides/multimodal-content.md
index 65100420d..2b8e32bd2 100644
--- a/docs/guides/multimodal-content.md
+++ b/docs/guides/multimodal-content.md
@@ -26,12 +26,13 @@ const textPart: TextPart = {
   content: 'What do you see in this image?'
 }
 
-// Image from base64 data
+// Image from base64 data (mimeType is required for data sources)
 const imagePart: ImagePart = {
   type: 'image',
   source: {
     type: 'data',
-    value: 'base64EncodedImageData...'
+    value: 'base64EncodedImageData...',
+    mimeType: 'image/jpeg' // Required for data sources
   },
   metadata: {
     // Provider-specific metadata
@@ -39,12 +40,13 @@ const imagePart: ImagePart = {
   }
 }
 
-// Image from URL
+// Image from URL (mimeType is optional for URL sources)
 const imageUrlPart: ImagePart = {
   type: 'image',
   source: {
     type: 'url',
-    value: 'https://example.com/image.jpg'
+    value: 'https://example.com/image.jpg',
+    mimeType: 'image/jpeg' // Optional hint for URL sources
   }
 }
 ```
@@ -95,7 +97,7 @@ const message = {
     { type: 'text' , content: 'Describe this image' },
     {
       type: 'image' ,
-      source: { type: 'data' , value: imageBase64 },
+      source: { type: 'data' , value: imageBase64, mimeType: 'image/jpeg' },
       metadata: { detail: 'high' } // 'auto' | 'low' | 'high'
     }
   ]
@@ -115,15 +117,14 @@ import { anthropicText } from '@tanstack/ai-anthropic'
 
 const adapter = anthropicText()
 
-// Image with media type
+// Image with mimeType in source
 const imageMessage = {
   role: 'user' ,
   content: [
     { type: 'text' , content: 'What do you see?' },
     {
       type: 'image' ,
-      source: { type: 'data' , value: imageBase64 },
-      metadata: { media_type: 'image/jpeg' }
+      source: { type: 'data' , value: imageBase64, mimeType: 'image/jpeg' }
     }
   ]
 }
@@ -135,7 +136,7 @@ const docMessage = {
     { type: 'text', content: 'Summarize this document' },
     {
       type: 'document',
-      source: { type: 'data', value: pdfBase64 }
+      source: { type: 'data', value: pdfBase64, mimeType: 'application/pdf' }
     }
   ]
 }
@@ -154,15 +155,14 @@ import { geminiText } from '@tanstack/ai-gemini'
 
 const adapter = geminiText()
 
-// Image with mimeType
+// Image with mimeType in source
 const message = {
   role: 'user',
   content: [
     { type: 'text', content: 'Analyze this image' },
     {
       type: 'image',
-      source: { type: 'data', value: imageBase64 },
-      metadata: { mimeType: 'image/png' }
+      source: { type: 'data', value: imageBase64, mimeType: 'image/png' }
     }
   ]
 }
@@ -188,7 +188,7 @@ const message = {
     { type: 'text', content: 'What is in this image?' },
     {
       type: 'image',
-      source: { type: 'data', value: imageBase64 }
+      source: { type: 'data', value: imageBase64, mimeType: 'image/jpeg' }
     }
   ]
 }
@@ -202,28 +202,39 @@ Content can be provided as either inline data or a URL:
 
 ### Data (Base64)
 
-Use `type: 'data'` for inline base64-encoded content:
+Use `type: 'data'` for inline base64-encoded content. **The `mimeType` field is required** to ensure providers receive proper content type information:
 
 ```typescript
 const imagePart = {
   type: 'image',
   source: {
     type: 'data',
-    value: 'iVBORw0KGgoAAAANSUhEUgAAAAUA...' // Base64 string
+    value: 'iVBORw0KGgoAAAANSUhEUgAAAAUA...', // Base64 string
+    mimeType: 'image/png' // Required for data sources
+  }
+}
+
+const audioPart = {
+  type: 'audio',
+  source: {
+    type: 'data',
+    value: 'base64AudioData...',
+    mimeType: 'audio/mp3' // Required for data sources
   }
 }
 ```
 
 ### URL
 
-Use `type: 'url'` for content hosted at a URL:
+Use `type: 'url'` for content hosted at a URL. The `mimeType` field is **optional** as providers can often infer it from the URL or response headers:
 
 ```typescript
 const imagePart = {
   type: 'image' ,
   source: {
     type: 'url' ,
-    value: 'https://example.com/image.jpg'
+    value: 'https://example.com/image.jpg',
+    mimeType: 'image/jpeg' // Optional hint
   }
 }
 ```
@@ -315,3 +326,163 @@ const stream = chat({
 3. **Check model support**: Not all models support all modalities. Verify the model you're using supports the content types you want to send.
 
 4. **Handle errors gracefully**: When a model doesn't support a particular modality, it may throw an error. Handle these cases in your application.
+
+## Client-Side Multimodal Messages
+
+When using the `ChatClient` from `@tanstack/ai-client`, you can send multimodal messages directly from your UI using the `sendMessage` method.
+
+### Basic Usage
+
+The `sendMessage` method accepts either a simple string or a `MultimodalContent` object:
+
+```typescript
+import { ChatClient, fetchServerSentEvents } from '@tanstack/ai-client'
+
+const client = new ChatClient({
+  connection: fetchServerSentEvents('/api/chat'),
+})
+
+// Simple text message
+await client.sendMessage('Hello!')
+
+// Multimodal message with image
+await client.sendMessage({
+  content: [
+    { type: 'text', content: 'What is in this image?' },
+    {
+      type: 'image',
+      source: { type: 'url', value: 'https://example.com/photo.jpg' }
+    }
+  ]
+})
+```
+
+### Custom Message ID
+
+You can provide a custom ID for the message:
+
+```typescript
+await client.sendMessage({
+  content: 'Hello!',
+  id: 'custom-message-id-123'
+})
+```
+
+### Per-Message Body Parameters
+
+The second parameter allows you to pass additional body parameters for that specific request. These are shallow-merged with the client's base body configuration, with per-message parameters taking priority:
+
+```typescript
+const client = new ChatClient({
+  connection: fetchServerSentEvents('/api/chat'),
+  body: { model: 'gpt-5' }, // Base body params
+})
+
+// Override model for this specific message
+await client.sendMessage('Analyze this complex problem', {
+  model: 'gpt-5',
+  temperature: 0.2,
+})
+
+ 
+```
+
+### React Example
+
+Here's how to use multimodal messages in a React component:
+
+```tsx
+import { useChat } from '@tanstack/ai-react'
+import { fetchServerSentEvents } from '@tanstack/ai-client'
+import { useState } from 'react'
+
+function ChatWithImages() {
+  const [imageUrl, setImageUrl] = useState('')
+  const { sendMessage, messages } = useChat({
+    connection: fetchServerSentEvents('/api/chat'),
+  })
+
+  const handleSendWithImage = () => {
+    if (imageUrl) {
+      sendMessage({
+        content: [
+          { type: 'text', content: 'What do you see in this image?' },
+          { type: 'image', source: { type: 'url', value: imageUrl } }
+        ]
+      })
+    }
+  }
+
+  return (
+    <div>
+      <input
+        type="url"
+        placeholder="Image URL"
+        value={imageUrl}
+        onChange={(e) => setImageUrl(e.target.value)}
+      />
+      <button onClick={handleSendWithImage}>Send with Image</button>
+    </div>
+  )
+}
+```
+
+### File Upload Example
+
+Here's how to handle file uploads and send them as multimodal content:
+
+```tsx
+import { useChat } from '@tanstack/ai-react'
+import { fetchServerSentEvents } from '@tanstack/ai-client'
+
+function ChatWithFileUpload() {
+  const { sendMessage } = useChat({
+    connection: fetchServerSentEvents('/api/chat'),
+  })
+
+  const handleFileUpload = async (file: File) => {
+    // Convert file to base64
+    const base64 = await new Promise<string>((resolve) => {
+      const reader = new FileReader()
+      reader.onload = () => {
+        const result = reader.result as string
+        // Remove data URL prefix (e.g., "data:image/png;base64,")
+        resolve(result.split(',')[1])
+      }
+      reader.readAsDataURL(file)
+    })
+
+    // Determine content type based on file type
+    const type = file.type.startsWith('image/')
+      ? 'image'
+      : file.type.startsWith('audio/')
+        ? 'audio'
+        : file.type.startsWith('video/')
+          ? 'video'
+          : 'document'
+
+    await sendMessage({
+      content: [
+        { type: 'text', content: `Please analyze this ${type}` },
+        {
+          type,
+          source: { type: 'data', value: base64 },
+          metadata: { mimeType: file.type }
+        }
+      ]
+    })
+  }
+
+  return (
+    <input
+      type="file"
+      accept="image/*,audio/*,video/*,.pdf"
+      onChange={(e) => {
+        const file = e.target.files?.[0]
+        if (file) handleFileUpload(file)
+      }}
+    />
+  )
+}
+```
+
diff --git a/examples/ts-react-chat/src/routes/index.tsx b/examples/ts-react-chat/src/routes/index.tsx
index c9436c7a9..3463c6610 100644
--- a/examples/ts-react-chat/src/routes/index.tsx
+++ b/examples/ts-react-chat/src/routes/index.tsx
@@ -1,6 +1,6 @@
 import { useEffect, useMemo, useRef, useState } from 'react'
 import { createFileRoute } from '@tanstack/react-router'
-import { Send, Square } from 'lucide-react'
+import { ImagePlus, Send, Square, X } from 'lucide-react'
 import ReactMarkdown from 'react-markdown'
 import rehypeRaw from 'rehype-raw'
 import rehypeSanitize from 'rehype-sanitize'
@@ -10,6 +10,7 @@ import { fetchServerSentEvents, useChat } from '@tanstack/ai-react'
 import { clientTools } from '@tanstack/ai-client'
 import { ThinkingPart } from '@tanstack/ai-react-ui'
 import type { UIMessage } from '@tanstack/ai-react'
+import type { ContentPart } from '@tanstack/ai'
 import type { ModelOption } from '@/lib/model-selection'
 import GuitarRecommendation from '@/components/example-GuitarRecommendation'
 import {
@@ -20,6 +21,13 @@ import {
 } from '@/lib/guitar-tools'
 import { DEFAULT_MODEL_OPTION, MODEL_OPTIONS } from '@/lib/model-selection'
 
+/**
+ * Generate a random message ID
+ */
+function generateMessageId(): string {
+  return `msg-${Date.now()}-${Math.random().toString(36).substring(2, 9)}`
+}
+
 const getPersonalGuitarPreferenceToolClient =
   getPersonalGuitarPreferenceToolDef.client(() => ({ preference: 'acoustic' }))
 
@@ -148,6 +156,23 @@ function Messages({
                     )
                   }
 
+                  // Render image parts
+                  if (part.type === 'image') {
+                    const imageUrl =
+                      part.source.type === 'url'
+                        ? part.source.value
+                        : `data:image/png;base64,${part.source.value}`
+                    return (
+                      <div key={`image-${index}`} className="mt-2 mb-2">
+                        <img
+                          src={imageUrl}
+                          alt="Attached image"
+                          className="max-w-md rounded-lg border border-gray-700"
+                        />
+                      </div>
+                    )
+                  }
+
                   // Approval UI
                   if (
                     part.type === 'tool-call' &&
@@ -226,6 +251,10 @@ function Messages({
 function ChatPage() {
   const [selectedModel, setSelectedModel] =
     useState<ModelOption>(DEFAULT_MODEL_OPTION)
+  const [attachedImages, setAttachedImages] = useState<
+    Array<{ id: string; base64: string; mimeType: string; preview: string }>
+  >([])
+  const fileInputRef = useRef<HTMLInputElement>(null)
 
   const body = useMemo(
     () => ({
@@ -243,6 +272,103 @@ function ChatPage() {
     })
   const [input, setInput] = useState('')
 
+  /**
+   * Handle file selection for image attachment
+   */
+  const handleFileSelect = async (e: React.ChangeEvent<HTMLInputElement>) => {
+    const files = e.target.files
+    if (!files || files.length === 0) return
+
+    const newImages: Array<{
+      id: string
+      base64: string
+      mimeType: string
+      preview: string
+    }> = []
+
+    for (const file of Array.from(files)) {
+      if (!file.type.startsWith('image/')) continue
+
+      const base64 = await new Promise<string>((resolve) => {
+        const reader = new FileReader()
+        reader.onload = () => {
+          const result = reader.result as string
+          // Remove data URL prefix (e.g., "data:image/png;base64,")
+          resolve(result.split(',')[1])
+        }
+        reader.readAsDataURL(file)
+      })
+
+      const preview = URL.createObjectURL(file)
+      newImages.push({
+        id: generateMessageId(),
+        base64,
+        mimeType: file.type, // Capture the actual mime type
+        preview,
+      })
+    }
+
+    setAttachedImages((prev) => [...prev, ...newImages])
+
+    // Reset the file input
+    if (fileInputRef.current) {
+      fileInputRef.current.value = ''
+    }
+  }
+
+  /**
+   * Remove an attached image
+   */
+  const removeImage = (id: string) => {
+    setAttachedImages((prev) => {
+      const image = prev.find((img) => img.id === id)
+      if (image) {
+        URL.revokeObjectURL(image.preview)
+      }
+      return prev.filter((img) => img.id !== id)
+    })
+  }
+
+  /**
+   * Send message with optional image attachments
+   */
+  const handleSendMessage = () => {
+    if (!input.trim() && attachedImages.length === 0) return
+
+    if (attachedImages.length > 0) {
+      // Build multimodal content array
+      const contentParts: Array<ContentPart> = []
+
+      // Add text if present
+      if (input.trim()) {
+        contentParts.push({ type: 'text', content: input.trim() })
+      }
+
+      // Add images with mime type metadata
+      for (const img of attachedImages) {
+        contentParts.push({
+          type: 'image',
+          source: { type: 'data', value: img.base64, mimeType: img.mimeType },
+        })
+      }
+
+      // Send with custom message ID
+      sendMessage({
+        content: contentParts,
+        id: generateMessageId(),
+      })
+
+      // Clean up image previews
+      attachedImages.forEach((img) => URL.revokeObjectURL(img.preview))
+      setAttachedImages([])
+    } else {
+      // Simple text message
+      sendMessage(input.trim())
+    }
+
+    setInput('')
+  }
+
   return (
     <div className="flex h-[calc(100vh-72px)] bg-gray-900">
       {/* Chat */}
@@ -295,41 +421,89 @@ function ChatPage() {
                 </button>
               </div>
             )}
-            <div className="relative">
-              <textarea
-                value={input}
-                onChange={(e) => setInput(e.target.value)}
-                placeholder="Type something clever (or don't, we won't judge)..."
-                className="w-full rounded-lg border border-orange-500/20 bg-gray-800/50 pl-4 pr-12 py-3 text-sm text-white placeholder-gray-400 focus:outline-none focus:ring-2 focus:ring-orange-500/50 focus:border-transparent resize-none overflow-hidden shadow-lg"
-                rows={1}
-                style={{ minHeight: '44px', maxHeight: '200px' }}
-                disabled={isLoading}
-                onInput={(e) => {
-                  const target = e.target as HTMLTextAreaElement
-                  target.style.height = 'auto'
-                  target.style.height =
-                    Math.min(target.scrollHeight, 200) + 'px'
-                }}
-                onKeyDown={(e) => {
-                  if (e.key === 'Enter' && !e.shiftKey && input.trim()) {
-                    e.preventDefault()
-                    sendMessage(input)
-                    setInput('')
-                  }
-                }}
+
+            {/* Image attachment preview */}
+            {attachedImages.length > 0 && (
+              <div className="flex flex-wrap gap-2 p-2 bg-gray-800/50 rounded-lg border border-orange-500/20">
+                {attachedImages.map((img) => (
+                  <div key={img.id} className="relative group">
+                    <img
+                      src={img.preview}
+                      alt="Attached"
+                      className="w-16 h-16 object-cover rounded-lg border border-gray-700"
+                    />
+                    <button
+                      onClick={() => removeImage(img.id)}
+                      className="absolute -top-2 -right-2 w-5 h-5 bg-red-600 hover:bg-red-700 rounded-full flex items-center justify-center opacity-0 group-hover:opacity-100 transition-opacity"
+                    >
+                      <X className="w-3 h-3 text-white" />
+                    </button>
+                  </div>
+                ))}
+              </div>
+            )}
+
+            <div className="relative flex items-end gap-2">
+              {/* Hidden file input */}
+              <input
+                ref={fileInputRef}
+                type="file"
+                accept="image/*"
+                multiple
+                onChange={handleFileSelect}
+                className="hidden"
               />
+
+              {/* Image attachment button */}
               <button
-                onClick={() => {
-                  if (input.trim()) {
-                    sendMessage(input)
-                    setInput('')
-                  }
-                }}
-                disabled={!input.trim() || isLoading}
-                className="absolute right-2 top-1/2 -translate-y-1/2 p-2 text-orange-500 hover:text-orange-400 disabled:text-gray-500 transition-colors focus:outline-none"
+                onClick={() => fileInputRef.current?.click()}
+                disabled={isLoading}
+                className="p-3 text-gray-400 hover:text-orange-500 disabled:text-gray-600 transition-colors focus:outline-none"
+                title="Attach image"
               >
-                <Send className="w-4 h-4" />
+                <ImagePlus className="w-5 h-5" />
               </button>
+
+              <div className="flex-1 relative">
+                <textarea
+                  value={input}
+                  onChange={(e) => setInput(e.target.value)}
+                  placeholder={
+                    attachedImages.length > 0
+                      ? 'Add a message about your image(s)...'
+                      : "Type something clever (or don't, we won't judge)..."
+                  }
+                  className="w-full rounded-lg border border-orange-500/20 bg-gray-800/50 pl-4 pr-12 py-3 text-sm text-white placeholder-gray-400 focus:outline-none focus:ring-2 focus:ring-orange-500/50 focus:border-transparent resize-none overflow-hidden shadow-lg"
+                  rows={1}
+                  style={{ minHeight: '44px', maxHeight: '200px' }}
+                  disabled={isLoading}
+                  onInput={(e) => {
+                    const target = e.target as HTMLTextAreaElement
+                    target.style.height = 'auto'
+                    target.style.height =
+                      Math.min(target.scrollHeight, 200) + 'px'
+                  }}
+                  onKeyDown={(e) => {
+                    if (
+                      e.key === 'Enter' &&
+                      !e.shiftKey &&
+                      (input.trim() || attachedImages.length > 0)
+                    ) {
+                      e.preventDefault()
+                      handleSendMessage()
+                    }
+                  }}
+                />
+                <button
+                  onClick={handleSendMessage}
+                  disabled={
+                    (!input.trim() && attachedImages.length === 0) || isLoading
+                  }
+                  className="absolute right-2 top-1/2 -translate-y-1/2 p-2 text-orange-500 hover:text-orange-400 disabled:text-gray-500 transition-colors focus:outline-none"
+                >
+                  <Send className="w-4 h-4" />
+                </button>
+              </div>
             </div>
           </div>
         </ChatInputArea>
diff --git a/packages/typescript/ai-anthropic/src/adapters/text.ts b/packages/typescript/ai-anthropic/src/adapters/text.ts
index 5b1896b2d..a1dca82c0 100644
--- a/packages/typescript/ai-anthropic/src/adapters/text.ts
+++ b/packages/typescript/ai-anthropic/src/adapters/text.ts
@@ -313,17 +313,20 @@ export class AnthropicTextAdapter<
             ? {
                 type: 'base64',
                 data: part.source.value,
-                media_type: metadata?.mediaType ?? 'image/jpeg',
+                media_type: part.source.mimeType as
+                  | 'image/jpeg'
+                  | 'image/png'
+                  | 'image/gif'
+                  | 'image/webp',
               }
             : {
                 type: 'url',
                 url: part.source.value,
               }
-        const { mediaType: _mediaType, ...meta } = metadata || {}
         return {
           type: 'image',
           source: imageSource,
-          ...meta,
+          ...metadata,
         }
       }
       case 'document': {
@@ -333,7 +336,7 @@ export class AnthropicTextAdapter<
             ? {
                 type: 'base64',
                 data: part.source.value,
-                media_type: 'application/pdf',
+                media_type: part.source.mimeType as 'application/pdf',
               }
             : {
                 type: 'url',
diff --git a/packages/typescript/ai-anthropic/src/index.ts b/packages/typescript/ai-anthropic/src/index.ts
index 4bca2e4b3..b0ff0750b 100644
--- a/packages/typescript/ai-anthropic/src/index.ts
+++ b/packages/typescript/ai-anthropic/src/index.ts
@@ -34,8 +34,6 @@ export type {
   AnthropicDocumentMetadata,
   AnthropicAudioMetadata,
   AnthropicVideoMetadata,
-  AnthropicImageMediaType,
-  AnthropicDocumentMediaType,
   AnthropicMessageMetadataByModality,
 } from './message-types'
 
diff --git a/packages/typescript/ai-client/src/chat-client.ts b/packages/typescript/ai-client/src/chat-client.ts
index 27604beb5..f6ead390d 100644
--- a/packages/typescript/ai-client/src/chat-client.ts
+++ b/packages/typescript/ai-client/src/chat-client.ts
@@ -4,13 +4,19 @@ import {
   normalizeToUIMessage,
 } from '@tanstack/ai'
 import { DefaultChatClientEventEmitter } from './events'
-import type { AnyClientTool, ModelMessage, StreamChunk } from '@tanstack/ai'
+import type {
+  AnyClientTool,
+  ContentPart,
+  ModelMessage,
+  StreamChunk,
+} from '@tanstack/ai'
 import type { ConnectionAdapter } from './connection-adapters'
 import type { ChatClientEventEmitter } from './events'
 import type {
   ChatClientOptions,
   ChatClientState,
   MessagePart,
+  MultimodalContent,
   ToolCallPart,
   UIMessage,
 } from './types'
@@ -20,6 +26,7 @@ export class ChatClient {
   private connection: ConnectionAdapter
   private uniqueId: string
   private body: Record<string, any> = {}
+  private pendingMessageBody: Record<string, any> | undefined = undefined
   private isLoading = false
   private error: Error | undefined = undefined
   private status: ChatClientState = 'ready'
@@ -262,20 +269,82 @@ export class ChatClient {
   }
 
   /**
-   * Send a message and stream the response
+   * Send a message and stream the response.
+   * Supports both simple string content and multimodal content (images, audio, video, documents).
+   *
+   * @param content - The message content. Can be:
+   *   - A simple string for text-only messages
+   *   - A MultimodalContent object with content array and optional custom ID
+   * @param body - Optional body parameters to merge with the client's base body for this request.
+   *               Uses shallow merge with per-message body taking priority.
+   *
+   * @example
+   * ```ts
+   * // Simple text message
+   * await client.sendMessage('Hello!')
+   *
+   * // Text message with custom body params
+   * await client.sendMessage('Hello!', { temperature: 0.7 })
+   *
+   * // Multimodal message with image
+   * await client.sendMessage({
+   *   content: [
+   *     { type: 'text', content: 'What is in this image?' },
+   *     { type: 'image', source: { type: 'url', value: 'https://example.com/photo.jpg' } }
+   *   ]
+   * })
+   *
+   * // Multimodal message with custom ID and body params
+   * await client.sendMessage(
+   *   {
+   *     content: [
+   *       { type: 'text', content: 'Describe this audio' },
+   *       { type: 'audio', source: { type: 'data', value: 'base64...' } }
+   *     ],
+   *     id: 'custom-message-id'
+   *   },
+   *   { model: 'gpt-4-audio' }
+   * )
+   * ```
    */
-  async sendMessage(content: string): Promise<void> {
-    if (!content.trim() || this.isLoading) {
+  async sendMessage(
+    content: string | MultimodalContent,
+    body?: Record<string, any>,
+  ): Promise<void> {
+    const emptyMessage = typeof content === 'string' && !content.trim()
+    if (emptyMessage || this.isLoading) {
       return
     }
+    // Normalize input to extract content, id, and validate
+    const normalizedContent = this.normalizeMessageInput(content)
+
+    // Store the per-message body for use in streamResponse
+    this.pendingMessageBody = body
 
     // Add user message via processor
-    const userMessage = this.processor.addUserMessage(content.trim())
-    this.events.messageSent(userMessage.id, content.trim())
+    const userMessage = this.processor.addUserMessage(
+      normalizedContent.content,
+      normalizedContent.id,
+    )
+    this.events.messageSent(userMessage.id, normalizedContent.content)
 
     await this.streamResponse()
   }
 
+  /**
+   * Normalize the message input to extract content and optional id.
+   * Trims string content automatically.
+   */
+  private normalizeMessageInput(input: string | MultimodalContent): {
+    content: string | Array<ContentPart>
+    id?: string
+  } {
+    if (typeof input === 'string') {
+      return { content: input.trim() }
+    }
+    return { content: input.content, id: input.id }
+  }
+
   /**
    * Append a message and stream the response
    */
@@ -317,16 +386,21 @@ export class ChatClient {
       // Call onResponse callback
       await this.callbacksRef.current.onResponse()
 
-      // Include conversationId in the body for server-side event correlation
-      const bodyWithConversationId = {
+      // Merge body: base body + per-message body (per-message takes priority)
+      // Include conversationId for server-side event correlation
+      const mergedBody = {
         ...this.body,
+        ...this.pendingMessageBody,
         conversationId: this.uniqueId,
       }
 
+      // Clear the pending message body after use
+      this.pendingMessageBody = undefined
+
       // Connect and stream
       const stream = this.connection.connect(
         modelMessages,
-        bodyWithConversationId,
+        mergedBody,
         this.abortController.signal,
       )
 
@@ -343,6 +417,7 @@ export class ChatClient {
     } finally {
       this.abortController = null
       this.setIsLoading(false)
+      this.pendingMessageBody = undefined // Ensure it's cleared even on error
 
       // Drain any actions that were queued while the stream was in progress
       await this.drainPostStreamActions()
diff --git a/packages/typescript/ai-client/src/events.ts b/packages/typescript/ai-client/src/events.ts
index 33532f411..0713625bf 100644
--- a/packages/typescript/ai-client/src/events.ts
+++ b/packages/typescript/ai-client/src/events.ts
@@ -1,4 +1,5 @@
 import { aiEventClient } from '@tanstack/ai/event-client'
+import type { ContentPart } from '@tanstack/ai'
 import type { UIMessage } from './types'
 
 /**
@@ -137,19 +138,36 @@ export abstract class ChatClientEventEmitter {
   }
 
   /**
-   * Emit message sent event
+   * Emit message sent event.
+   * Supports both simple string content and multimodal content arrays.
+   *
+   * @param messageId - The ID of the sent message
+   * @param content - The message content (string or array of ContentPart for multimodal)
    */
-  messageSent(messageId: string, content: string): void {
+  messageSent(messageId: string, content: string | Array<ContentPart>): void {
+    // For text content, extract it; for multimodal, provide the array
+    const textContent =
+      typeof content === 'string'
+        ? content
+        : content
+            .filter((part) => part.type === 'text')
+            .map((part) => (part as { type: 'text'; content: string }).content)
+            .join('')
+
     this.emitEvent('text:message:created', {
       messageId,
       role: 'user',
-      content,
+      content: textContent,
+      // Include full content for multimodal messages
+      ...(Array.isArray(content) && { parts: content }),
     })
 
     this.emitEvent('text:message:user', {
       messageId,
       role: 'user',
-      content,
+      content: textContent,
+      // Include full content for multimodal messages
+      ...(Array.isArray(content) && { parts: content }),
     })
   }
 
@@ -161,7 +179,6 @@ export abstract class ChatClientEventEmitter {
       fromMessageIndex,
     })
   }
-
   /**
    * Emit stopped event
    */
diff --git a/packages/typescript/ai-client/src/index.ts b/packages/typescript/ai-client/src/index.ts
index b12c7c2ab..b279605d1 100644
--- a/packages/typescript/ai-client/src/index.ts
+++ b/packages/typescript/ai-client/src/index.ts
@@ -12,6 +12,8 @@ export type {
   ChatRequestBody,
   InferChatMessages,
   ChatClientState,
+  // Multimodal content input type
+  MultimodalContent,
 } from './types'
 export { clientTools, createChatClientOptions } from './types'
 export type {
diff --git a/packages/typescript/ai-client/src/types.ts b/packages/typescript/ai-client/src/types.ts
index 8aff9e4d0..985725481 100644
--- a/packages/typescript/ai-client/src/types.ts
+++ b/packages/typescript/ai-client/src/types.ts
@@ -1,10 +1,15 @@
 import type {
   AnyClientTool,
+  AudioPart,
   ChunkStrategy,
+  ContentPart,
+  DocumentPart,
+  ImagePart,
   InferToolInput,
   InferToolOutput,
   ModelMessage,
   StreamChunk,
+  VideoPart,
 } from '@tanstack/ai'
 import type { ConnectionAdapter } from './connection-adapters'
 
@@ -31,6 +36,35 @@ export type ToolResultState =
  */
 export type ChatClientState = 'ready' | 'submitted' | 'streaming' | 'error'
 
+/**
+ * Multimodal content input for sending messages with rich media.
+ * Allows sending text, images, audio, video, and documents to the LLM.
+ *
+ * @example
+ * ```ts
+ * // Send an image with a question
+ * client.sendMessage({
+ *   content: [
+ *     { type: 'text', content: 'What is in this image?' },
+ *     { type: 'image', source: { type: 'url', value: 'https://example.com/photo.jpg' } }
+ *   ],
+ *   id: 'custom-message-id' // optional
+ * })
+ * ```
+ */
+export interface MultimodalContent {
+  /**
+   * The content of the message.
+   * Can be a simple string or an array of content parts for multimodal messages.
+   */
+  content: string | Array<ContentPart>
+  /**
+   * Optional custom ID for the message.
+   * If not provided, a unique ID will be generated.
+   */
+  id?: string
+}
+
 /**
  * Message parts - building blocks of UIMessage
  */
@@ -121,6 +155,10 @@ export interface ThinkingPart {
 
 export type MessagePart<TTools extends ReadonlyArray<AnyClientTool> = any> =
   | TextPart
+  | ImagePart
+  | AudioPart
+  | VideoPart
+  | DocumentPart
   | ToolCallPart<TTools>
   | ToolResultPart
   | ThinkingPart
diff --git a/packages/typescript/ai-client/tests/chat-client.test.ts b/packages/typescript/ai-client/tests/chat-client.test.ts
index d632becbf..279603783 100644
--- a/packages/typescript/ai-client/tests/chat-client.test.ts
+++ b/packages/typescript/ai-client/tests/chat-client.test.ts
@@ -1,12 +1,12 @@
 import { describe, expect, it, vi } from 'vitest'
 import { ChatClient } from '../src/chat-client'
-import type { UIMessage } from '../src/types'
 import {
   createMockConnectionAdapter,
   createTextChunks,
   createThinkingChunks,
   createToolCallChunks,
 } from './test-utils'
+import type { UIMessage } from '../src/types'
 
 describe('ChatClient', () => {
   describe('constructor', () => {
@@ -618,4 +618,287 @@ describe('ChatClient', () => {
       expect(thinkingCalls.length).toBeGreaterThan(0)
     })
   })
+
+  describe('multimodal sendMessage', () => {
+    it('should send a multimodal message with image content', async () => {
+      const chunks = createTextChunks('I see a cat in the image')
+      const adapter = createMockConnectionAdapter({ chunks })
+
+      const client = new ChatClient({ connection: adapter })
+
+      await client.sendMessage({
+        content: [
+          { type: 'text', content: 'What is in this image?' },
+          {
+            type: 'image',
+            source: { type: 'url', value: 'https://example.com/cat.jpg' },
+          },
+        ],
+      })
+
+      const messages = client.getMessages()
+      expect(messages.length).toBeGreaterThan(0)
+      expect(messages[0]?.role).toBe('user')
+      expect(messages[0]?.parts.length).toBe(2)
+      expect(messages[0]?.parts[0]).toEqual({
+        type: 'text',
+        content: 'What is in this image?',
+      })
+      expect(messages[0]?.parts[1]).toEqual({
+        type: 'image',
+        source: { type: 'url', value: 'https://example.com/cat.jpg' },
+      })
+    })
+
+    it('should send a multimodal message with audio content', async () => {
+      const chunks = createTextChunks('The audio says hello')
+      const adapter = createMockConnectionAdapter({ chunks })
+
+      const client = new ChatClient({ connection: adapter })
+
+      await client.sendMessage({
+        content: [
+          { type: 'text', content: 'Transcribe this audio' },
+          {
+            type: 'audio',
+            source: {
+              type: 'data',
+              value: 'base64AudioData',
+              mimeType: 'audio/mp3',
+            },
+          },
+        ],
+      })
+
+      const messages = client.getMessages()
+      expect(messages[0]?.parts[1]).toEqual({
+        type: 'audio',
+        source: {
+          type: 'data',
+          value: 'base64AudioData',
+          mimeType: 'audio/mp3',
+        },
+      })
+    })
+
+    it('should send a multimodal message with video content', async () => {
+      const chunks = createTextChunks('The video shows a sunset')
+      const adapter = createMockConnectionAdapter({ chunks })
+
+      const client = new ChatClient({ connection: adapter })
+
+      await client.sendMessage({
+        content: [
+          { type: 'text', content: 'Describe this video' },
+          {
+            type: 'video',
+            source: { type: 'url', value: 'https://example.com/video.mp4' },
+          },
+        ],
+      })
+
+      const messages = client.getMessages()
+      expect(messages[0]?.parts[1]).toEqual({
+        type: 'video',
+        source: { type: 'url', value: 'https://example.com/video.mp4' },
+      })
+    })
+
+    it('should send a multimodal message with document content', async () => {
+      const chunks = createTextChunks('The document discusses AI')
+      const adapter = createMockConnectionAdapter({ chunks })
+
+      const client = new ChatClient({ connection: adapter })
+
+      await client.sendMessage({
+        content: [
+          { type: 'text', content: 'Summarize this PDF' },
+          {
+            type: 'document',
+            source: {
+              type: 'data',
+              value: 'base64PdfData',
+              mimeType: 'application/pdf',
+            },
+          },
+        ],
+      })
+
+      const messages = client.getMessages()
+      expect(messages[0]?.parts[1]).toEqual({
+        type: 'document',
+        source: {
+          type: 'data',
+          value: 'base64PdfData',
+          mimeType: 'application/pdf',
+        },
+      })
+    })
+
+    it('should use custom message id when provided', async () => {
+      const chunks = createTextChunks('Response')
+      const adapter = createMockConnectionAdapter({ chunks })
+
+      const client = new ChatClient({ connection: adapter })
+
+      await client.sendMessage({
+        content: 'Hello',
+        id: 'custom-message-id-123',
+      })
+
+      const messages = client.getMessages()
+      expect(messages[0]?.id).toBe('custom-message-id-123')
+    })
+
+    it('should generate message id when not provided', async () => {
+      const chunks = createTextChunks('Response')
+      const adapter = createMockConnectionAdapter({ chunks })
+
+      const client = new ChatClient({ connection: adapter })
+
+      await client.sendMessage({
+        content: 'Hello',
+      })
+
+      const messages = client.getMessages()
+      expect(messages[0]?.id).toMatch(/^msg-/)
+    })
+
+    it('should allow empty content array', async () => {
+      const chunks = createTextChunks('Response')
+      const adapter = createMockConnectionAdapter({ chunks })
+
+      const client = new ChatClient({ connection: adapter })
+
+      await client.sendMessage({
+        content: [],
+      })
+
+      const messages = client.getMessages()
+      expect(messages.length).toBeGreaterThan(0)
+      expect(messages[0]?.parts).toEqual([])
+    })
+
+    it('should send string content as simple text message', async () => {
+      const chunks = createTextChunks('Response')
+      const adapter = createMockConnectionAdapter({ chunks })
+
+      const client = new ChatClient({ connection: adapter })
+
+      await client.sendMessage({
+        content: 'Hello world',
+      })
+
+      const messages = client.getMessages()
+      expect(messages[0]?.parts).toEqual([
+        { type: 'text', content: 'Hello world' },
+      ])
+    })
+
+    it('should merge per-message body with base body', async () => {
+      const chunks = createTextChunks('Response')
+      let capturedData: Record<string, any> | undefined
+      const adapter = createMockConnectionAdapter({
+        chunks,
+        onConnect: (_messages, data) => {
+          capturedData = data
+        },
+      })
+
+      const client = new ChatClient({
+        connection: adapter,
+        body: { model: 'gpt-4', temperature: 0.7 },
+      })
+
+      await client.sendMessage('Hello', {
+        model: 'gpt-4-turbo',
+        maxTokens: 100,
+      })
+
+      // Per-message body should override base body
+      expect(capturedData?.model).toBe('gpt-4-turbo')
+      expect(capturedData?.temperature).toBe(0.7) // From base body
+      expect(capturedData?.maxTokens).toBe(100) // From per-message body
+    })
+
+    it('should include conversationId in merged body', async () => {
+      const chunks = createTextChunks('Response')
+      let capturedData: Record<string, any> | undefined
+      const adapter = createMockConnectionAdapter({
+        chunks,
+        onConnect: (_messages, data) => {
+          capturedData = data
+        },
+      })
+
+      const client = new ChatClient({
+        connection: adapter,
+        id: 'my-conversation',
+      })
+
+      await client.sendMessage('Hello')
+
+      expect(capturedData?.conversationId).toBe('my-conversation')
+    })
+
+    it('should clear per-message body after request', async () => {
+      const chunks = createTextChunks('Response')
+      let capturedData: Record<string, any> | undefined
+      const adapter = createMockConnectionAdapter({
+        chunks,
+        onConnect: (_messages, data) => {
+          capturedData = data
+        },
+      })
+
+      const client = new ChatClient({
+        connection: adapter,
+        body: { model: 'gpt-4' },
+      })
+
+      // First message with per-message body
+      await client.sendMessage('First', { temperature: 0.9 })
+      expect(capturedData?.temperature).toBe(0.9)
+
+      // Second message without per-message body should not have temperature
+      await client.sendMessage('Second')
+      expect(capturedData?.temperature).toBeUndefined()
+      expect(capturedData?.model).toBe('gpt-4')
+    })
+
+    it('should emit events with multimodal content', async () => {
+      const chunks = createTextChunks('Response')
+      const adapter = createMockConnectionAdapter({ chunks })
+
+      const { aiEventClient } = await import('@tanstack/ai/event-client')
+      const emitSpy = vi.spyOn(aiEventClient, 'emit')
+      emitSpy.mockClear() // Clear any previous calls
+
+      const client = new ChatClient({ connection: adapter })
+
+      await client.sendMessage({
+        content: [
+          { type: 'text', content: 'What is this?' },
+          {
+            type: 'image',
+            source: { type: 'url', value: 'https://example.com/img.jpg' },
+          },
+        ],
+      })
+
+      // Find message created events for user role
+      const userMessageCreatedCalls = emitSpy.mock.calls.filter(
+        ([eventName, data]) =>
+          eventName === 'text:message:created' &&
+          (data as any)?.role === 'user',
+      )
+
+      // Should have at least one user message created event
+      expect(userMessageCreatedCalls.length).toBeGreaterThan(0)
+
+      // The event should include the text content extracted from multimodal content
+      const userMessageEvent = userMessageCreatedCalls[0]
+      expect((userMessageEvent?.[1] as any)?.content).toBe('What is this?')
+    })
+  })
 })
diff --git a/packages/typescript/ai-devtools/src/store/ai-context.tsx b/packages/typescript/ai-devtools/src/store/ai-context.tsx
index bc43d7f32..4b852da15 100644
--- a/packages/typescript/ai-devtools/src/store/ai-context.tsx
+++ b/packages/typescript/ai-devtools/src/store/ai-context.tsx
@@ -1,10 +1,19 @@
 import { batch, createContext, onCleanup, onMount, useContext } from 'solid-js'
 import { createStore, produce } from 'solid-js/store'
 import { aiEventClient } from '@tanstack/ai/event-client'
+import type { ContentPartSource } from '@tanstack/ai'
 import type { ParentComponent } from 'solid-js'
 
 interface MessagePart {
-  type: 'text' | 'tool-call' | 'tool-result' | 'thinking'
+  type:
+    | 'text'
+    | 'tool-call'
+    | 'tool-result'
+    | 'thinking'
+    | 'image'
+    | 'audio'
+    | 'video'
+    | 'document'
   content?: string
   toolCallId?: string
   toolName?: string
@@ -12,6 +21,9 @@ interface MessagePart {
   state?: string
   output?: unknown
   error?: string
+  // Multimodal content fields
+  source?: ContentPartSource
+  metadata?: unknown
 }
 
 export interface ToolCall {
@@ -685,37 +697,58 @@ export const AIProvider: ParentComponent = (props) => {
           (message) => message.id === messageId,
         )
 
-        const parts = e.payload.parts?.map((part) => {
-          if (part.type === 'text') {
-            return { type: 'text', content: part.content } satisfies MessagePart
-          }
-          if (part.type === 'tool-call') {
-            return {
-              type: 'tool-call',
-              toolCallId: part.id,
-              toolName: part.name,
-              arguments: part.arguments,
-              state: part.state,
-              output: part.output,
-              content: part.approval
-                ? JSON.stringify(part.approval)
-                : undefined,
-            } satisfies MessagePart
-          }
-          if (part.type === 'tool-result') {
-            return {
-              type: 'tool-result',
-              toolCallId: part.toolCallId,
-              content: part.content,
-              state: part.state,
-              error: part.error,
-            } satisfies MessagePart
-          }
-          return {
-            type: 'thinking',
-            content: part.content,
-          } satisfies MessagePart
-        })
+        const parts = e.payload.parts
+          ?.map((part): MessagePart | null => {
+            if (part.type === 'text') {
+              return { type: 'text', content: part.content }
+            }
+            if (part.type === 'tool-call') {
+              return {
+                type: 'tool-call',
+                toolCallId: part.id,
+                toolName: part.name,
+                arguments: part.arguments,
+                state: part.state,
+                output: part.output,
+                content: part.approval
+                  ? JSON.stringify(part.approval)
+                  : undefined,
+              }
+            }
+            if (part.type === 'tool-result') {
+              return {
+                type: 'tool-result',
+                toolCallId: part.toolCallId,
+                content: part.content,
+                state: part.state,
+                error: part.error,
+              }
+            }
+            if (part.type === 'thinking') {
+              return {
+                type: 'thinking',
+                content: part.content,
+              }
+            }
+            // Handle multimodal parts (image, audio, video, document)
+            // These have a source property instead of content
+            if (
+              part.type === 'image' ||
+              part.type === 'audio' ||
+              part.type === 'video' ||
+              // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition
+              part.type === 'document'
+            ) {
+              return {
+                type: part.type,
+                source: part.source,
+                metadata: part.metadata,
+              }
+            }
+            // Fallback for any unknown part types - skip them
+            return null
+          })
+          .filter((part): part is MessagePart => part !== null)
 
         const toolCalls = e.payload.toolCalls?.map((toolCall) => ({
           id: toolCall.id,
diff --git a/packages/typescript/ai-gemini/src/adapters/text.ts b/packages/typescript/ai-gemini/src/adapters/text.ts
index 6210dadc4..031298c18 100644
--- a/packages/typescript/ai-gemini/src/adapters/text.ts
+++ b/packages/typescript/ai-gemini/src/adapters/text.ts
@@ -30,13 +30,7 @@ import type {
   TextOptions,
 } from '@tanstack/ai'
 import type { ExternalTextProviderOptions } from '../text/text-provider-options'
-import type {
-  GeminiAudioMetadata,
-  GeminiDocumentMetadata,
-  GeminiImageMetadata,
-  GeminiMessageMetadataByModality,
-  GeminiVideoMetadata,
-} from '../message-types'
+import type { GeminiMessageMetadataByModality } from '../message-types'
 import type { GeminiClientConfig } from '../utils'
 
 /**
@@ -483,20 +477,6 @@ export class GeminiTextAdapter<
   }
 
   private convertContentPartToGemini(part: ContentPart): Part {
-    const getDefaultFileType = (
-      part: 'image' | 'audio' | 'video' | 'document',
-    ) => {
-      switch (part) {
-        case 'image':
-          return 'image/jpeg'
-        case 'audio':
-          return 'audio/mp3'
-        case 'video':
-          return 'video/mp4'
-        case 'document':
-          return 'application/pdf'
-      }
-    }
     switch (part.type) {
       case 'text':
         return { text: part.content }
@@ -504,24 +484,26 @@ export class GeminiTextAdapter<
       case 'audio':
       case 'video':
       case 'document': {
-        const metadata = part.metadata as
-          | GeminiDocumentMetadata
-          | GeminiImageMetadata
-          | GeminiVideoMetadata
-          | GeminiAudioMetadata
-          | undefined
         if (part.source.type === 'data') {
           return {
             inlineData: {
               data: part.source.value,
-              mimeType: metadata?.mimeType ?? getDefaultFileType(part.type),
+              mimeType: part.source.mimeType,
             },
           }
         } else {
+          // For URL sources, use provided mimeType or fall back to reasonable defaults
+          const defaultMimeType = {
+            image: 'image/jpeg',
+            audio: 'audio/mp3',
+            video: 'video/mp4',
+            document: 'application/pdf',
+          }[part.type]
+
           return {
             fileData: {
               fileUri: part.source.value,
-              mimeType: metadata?.mimeType ?? getDefaultFileType(part.type),
+              mimeType: part.source.mimeType ?? defaultMimeType,
             },
           }
         }
diff --git a/packages/typescript/ai-gemini/src/index.ts b/packages/typescript/ai-gemini/src/index.ts
index c60ce0756..ffea4c31c 100644
--- a/packages/typescript/ai-gemini/src/index.ts
+++ b/packages/typescript/ai-gemini/src/index.ts
@@ -79,9 +79,5 @@ export type {
   GeminiAudioMetadata,
   GeminiVideoMetadata,
   GeminiDocumentMetadata,
-  GeminiImageMimeType,
-  GeminiAudioMimeType,
-  GeminiVideoMimeType,
-  GeminiDocumentMimeType,
   GeminiMessageMetadataByModality,
 } from './message-types'
diff --git a/packages/typescript/ai-grok/src/adapters/text.ts b/packages/typescript/ai-grok/src/adapters/text.ts
index f8703f7fc..c0204ab53 100644
--- a/packages/typescript/ai-grok/src/adapters/text.ts
+++ b/packages/typescript/ai-grok/src/adapters/text.ts
@@ -502,10 +502,16 @@ export class GrokTextAdapter<
         parts.push({ type: 'text', text: part.content })
       } else if (part.type === 'image') {
         const imageMetadata = part.metadata as GrokImageMetadata | undefined
+        // For base64 data, construct a data URI using the mimeType from source
+        const imageValue = part.source.value
+        const imageUrl =
+          part.source.type === 'data' && !imageValue.startsWith('data:')
+            ? `data:${part.source.mimeType};base64,${imageValue}`
+            : imageValue
         parts.push({
           type: 'image_url',
           image_url: {
-            url: part.source.value,
+            url: imageUrl,
             detail: imageMetadata?.detail || 'auto',
           },
         })
diff --git a/packages/typescript/ai-openai/src/adapters/text.ts b/packages/typescript/ai-openai/src/adapters/text.ts
index b367afcc0..1747ce4ec 100644
--- a/packages/typescript/ai-openai/src/adapters/text.ts
+++ b/packages/typescript/ai-openai/src/adapters/text.ts
@@ -813,10 +813,14 @@ export class OpenAITextAdapter<
             detail: imageMetadata?.detail || 'auto',
           }
         }
-        // For base64 data, construct a data URI
+        // For base64 data, construct a data URI using the mimeType from source
+        const imageValue = part.source.value
+        const imageUrl = imageValue.startsWith('data:')
+          ? imageValue
+          : `data:${part.source.mimeType};base64,${imageValue}`
         return {
           type: 'input_image',
-          image_url: part.source.value,
+          image_url: imageUrl,
           detail: imageMetadata?.detail || 'auto',
         }
       }
diff --git a/packages/typescript/ai-openrouter/src/adapters/text.ts b/packages/typescript/ai-openrouter/src/adapters/text.ts
index d3a7e6a53..387110b8b 100644
--- a/packages/typescript/ai-openrouter/src/adapters/text.ts
+++ b/packages/typescript/ai-openrouter/src/adapters/text.ts
@@ -590,10 +590,16 @@ export class OpenRouterTextAdapter<
           break
         case 'image': {
           const meta = part.metadata as OpenRouterImageMetadata | undefined
+          // For base64 data, construct a data URI using the mimeType from source
+          const imageValue = part.source.value
+          const imageUrl =
+            part.source.type === 'data' && !imageValue.startsWith('data:')
+              ? `data:${part.source.mimeType};base64,${imageValue}`
+              : imageValue
           parts.push({
             type: 'image_url',
             imageUrl: {
-              url: part.source.value,
+              url: imageUrl,
               detail: meta?.detail || 'auto',
             },
           })
diff --git a/packages/typescript/ai-preact/src/types.ts b/packages/typescript/ai-preact/src/types.ts
index 7679e08af..3b1a4a041 100644
--- a/packages/typescript/ai-preact/src/types.ts
+++ b/packages/typescript/ai-preact/src/types.ts
@@ -3,11 +3,12 @@ import type {
   ChatClientOptions,
   ChatClientState,
   ChatRequestBody,
+  MultimodalContent,
   UIMessage,
 } from '@tanstack/ai-client'
 
 // Re-export types from ai-client
-export type { ChatRequestBody, UIMessage }
+export type { ChatRequestBody, MultimodalContent, UIMessage }
 
 /**
  * Options for the useChat hook.
@@ -40,9 +41,10 @@ export interface UseChatReturn<
   messages: Array<UIMessage<TTools>>
 
   /**
-   * Send a message and get a response
+   * Send a message and get a response.
+   * Can be a simple string or multimodal content with images, audio, etc.
    */
-  sendMessage: (content: string) => Promise<void>
+  sendMessage: (content: string | MultimodalContent) => Promise<void>
 
   /**
    * Append a message to the conversation
diff --git a/packages/typescript/ai-preact/src/use-chat.ts b/packages/typescript/ai-preact/src/use-chat.ts
index 336eb7cb8..cfa9340f4 100644
--- a/packages/typescript/ai-preact/src/use-chat.ts
+++ b/packages/typescript/ai-preact/src/use-chat.ts
@@ -10,7 +10,12 @@ import {
 import type { ChatClientState } from '@tanstack/ai-client'
 import type { AnyClientTool, ModelMessage } from '@tanstack/ai'
 
-import type { UIMessage, UseChatOptions, UseChatReturn } from './types'
+import type {
+  MultimodalContent,
+  UIMessage,
+  UseChatOptions,
+  UseChatReturn,
+} from './types'
 
 export function useChat<TTools extends ReadonlyArray<AnyClientTool> = any>(
   options: UseChatOptions<TTools>,
@@ -109,7 +114,7 @@ export function useChat<TTools extends ReadonlyArray<AnyClientTool> = any>(
   // are captured at client creation time. Changes to these callbacks require
   // remounting the component or changing the connection to recreate the client.
   const sendMessage = useCallback(
-    async (content: string) => {
+    async (content: string | MultimodalContent) => {
       await client.sendMessage(content)
     },
     [client],
diff --git a/packages/typescript/ai-react/src/types.ts b/packages/typescript/ai-react/src/types.ts
index 0bca98831..a960a1b8f 100644
--- a/packages/typescript/ai-react/src/types.ts
+++ b/packages/typescript/ai-react/src/types.ts
@@ -3,11 +3,12 @@ import type {
   ChatClientOptions,
   ChatClientState,
   ChatRequestBody,
+  MultimodalContent,
   UIMessage,
 } from '@tanstack/ai-client'
 
 // Re-export types from ai-client
-export type { ChatRequestBody, UIMessage }
+export type { ChatRequestBody, MultimodalContent, UIMessage }
 
 /**
  * Options for the useChat hook.
@@ -40,9 +41,10 @@ export interface UseChatReturn<
   messages: Array<UIMessage<TTools>>
 
   /**
-   * Send a message and get a response
+   * Send a message and get a response.
+   * Can be a simple string or multimodal content with images, audio, etc.
    */
-  sendMessage: (content: string) => Promise<void>
+  sendMessage: (content: string | MultimodalContent) => Promise<void>
 
   /**
    * Append a message to the conversation
diff --git a/packages/typescript/ai-react/src/use-chat.ts b/packages/typescript/ai-react/src/use-chat.ts
index 509fd64f1..2cdc02d11 100644
--- a/packages/typescript/ai-react/src/use-chat.ts
+++ b/packages/typescript/ai-react/src/use-chat.ts
@@ -3,7 +3,12 @@ import { useCallback, useEffect, useId, useMemo, useRef, useState } from 'react'
 import type { AnyClientTool, ModelMessage } from '@tanstack/ai'
 import type { ChatClientState } from '@tanstack/ai-client'
 
-import type { UIMessage, UseChatOptions, UseChatReturn } from './types'
+import type {
+  MultimodalContent,
+  UIMessage,
+  UseChatOptions,
+  UseChatReturn,
+} from './types'
 
 export function useChat<TTools extends ReadonlyArray<AnyClientTool> = any>(
   options: UseChatOptions<TTools>,
@@ -109,7 +114,7 @@ export function useChat<TTools extends ReadonlyArray<AnyClientTool> = any>(
   // remounting the component or changing the connection to recreate the client.
 
   const sendMessage = useCallback(
-    async (content: string) => {
+    async (content: string | MultimodalContent) => {
       await client.sendMessage(content)
     },
     [client],
diff --git a/packages/typescript/ai-react/tests/use-chat.test.ts b/packages/typescript/ai-react/tests/use-chat.test.ts
index 1eba78a7d..282c2ae5d 100644
--- a/packages/typescript/ai-react/tests/use-chat.test.ts
+++ b/packages/typescript/ai-react/tests/use-chat.test.ts
@@ -1242,4 +1242,326 @@ describe('useChat', () => {
       })
     })
   })
+
+  describe('multimodal sendMessage', () => {
+    it('should send a multimodal message with image URL', async () => {
+      const chunks = createTextChunks('I see a cat in the image')
+      const adapter = createMockConnectionAdapter({ chunks })
+      const { result } = renderUseChat({ connection: adapter })
+
+      await result.current.sendMessage({
+        content: [
+          { type: 'text', content: 'What is in this image?' },
+          {
+            type: 'image',
+            source: { type: 'url', value: 'https://example.com/cat.jpg' },
+          },
+        ],
+      })
+
+      await waitFor(() => {
+        expect(result.current.messages.length).toBeGreaterThan(0)
+      })
+
+      const userMessage = result.current.messages.find((m) => m.role === 'user')
+      expect(userMessage).toBeDefined()
+      expect(userMessage?.parts.length).toBe(2)
+      expect(userMessage?.parts[0]).toEqual({
+        type: 'text',
+        content: 'What is in this image?',
+      })
+      expect(userMessage?.parts[1]).toEqual({
+        type: 'image',
+        source: { type: 'url', value: 'https://example.com/cat.jpg' },
+      })
+    })
+
+    it('should send a multimodal message with image data and required mimeType', async () => {
+      const chunks = createTextChunks('I see a cat in the image')
+      const adapter = createMockConnectionAdapter({ chunks })
+      const { result } = renderUseChat({ connection: adapter })
+
+      await result.current.sendMessage({
+        content: [
+          { type: 'text', content: 'What is in this image?' },
+          {
+            type: 'image',
+            source: {
+              type: 'data',
+              value: 'base64ImageData',
+              mimeType: 'image/png',
+            },
+          },
+        ],
+      })
+
+      await waitFor(() => {
+        expect(result.current.messages.length).toBeGreaterThan(0)
+      })
+
+      const userMessage = result.current.messages.find((m) => m.role === 'user')
+      expect(userMessage?.parts[1]).toEqual({
+        type: 'image',
+        source: {
+          type: 'data',
+          value: 'base64ImageData',
+          mimeType: 'image/png',
+        },
+      })
+    })
+
+    it('should send a multimodal message with audio data and required mimeType', async () => {
+      const chunks = createTextChunks('The audio says hello')
+      const adapter = createMockConnectionAdapter({ chunks })
+      const { result } = renderUseChat({ connection: adapter })
+
+      await result.current.sendMessage({
+        content: [
+          { type: 'text', content: 'Transcribe this audio' },
+          {
+            type: 'audio',
+            source: {
+              type: 'data',
+              value: 'base64AudioData',
+              mimeType: 'audio/mp3',
+            },
+          },
+        ],
+      })
+
+      await waitFor(() => {
+        expect(result.current.messages.length).toBeGreaterThan(0)
+      })
+
+      const userMessage = result.current.messages.find((m) => m.role === 'user')
+      expect(userMessage?.parts[1]).toEqual({
+        type: 'audio',
+        source: {
+          type: 'data',
+          value: 'base64AudioData',
+          mimeType: 'audio/mp3',
+        },
+      })
+    })
+
+    it('should send a multimodal message with video URL', async () => {
+      const chunks = createTextChunks('The video shows a sunset')
+      const adapter = createMockConnectionAdapter({ chunks })
+      const { result } = renderUseChat({ connection: adapter })
+
+      await result.current.sendMessage({
+        content: [
+          { type: 'text', content: 'Describe this video' },
+          {
+            type: 'video',
+            source: { type: 'url', value: 'https://example.com/video.mp4' },
+          },
+        ],
+      })
+
+      await waitFor(() => {
+        expect(result.current.messages.length).toBeGreaterThan(0)
+      })
+
+      const userMessage = result.current.messages.find((m) => m.role === 'user')
+      expect(userMessage?.parts[1]).toEqual({
+        type: 'video',
+        source: { type: 'url', value: 'https://example.com/video.mp4' },
+      })
+    })
+
+    it('should send a multimodal message with video data and required mimeType', async () => {
+      const chunks = createTextChunks('The video shows a sunset')
+      const adapter = createMockConnectionAdapter({ chunks })
+      const { result } = renderUseChat({ connection: adapter })
+
+      await result.current.sendMessage({
+        content: [
+          { type: 'text', content: 'Describe this video' },
+          {
+            type: 'video',
+            source: {
+              type: 'data',
+              value: 'base64VideoData',
+              mimeType: 'video/mp4',
+            },
+          },
+        ],
+      })
+
+      await waitFor(() => {
+        expect(result.current.messages.length).toBeGreaterThan(0)
+      })
+
+      const userMessage = result.current.messages.find((m) => m.role === 'user')
+      expect(userMessage?.parts[1]).toEqual({
+        type: 'video',
+        source: {
+          type: 'data',
+          value: 'base64VideoData',
+          mimeType: 'video/mp4',
+        },
+      })
+    })
+
+    it('should send a multimodal message with document data and required mimeType', async () => {
+      const chunks = createTextChunks('The document discusses AI')
+      const adapter = createMockConnectionAdapter({ chunks })
+      const { result } = renderUseChat({ connection: adapter })
+
+      await result.current.sendMessage({
+        content: [
+          { type: 'text', content: 'Summarize this PDF' },
+          {
+            type: 'document',
+            source: {
+              type: 'data',
+              value: 'base64PdfData',
+              mimeType: 'application/pdf',
+            },
+          },
+        ],
+      })
+
+      await waitFor(() => {
+        expect(result.current.messages.length).toBeGreaterThan(0)
+      })
+
+      const userMessage = result.current.messages.find((m) => m.role === 'user')
+      expect(userMessage?.parts[1]).toEqual({
+        type: 'document',
+        source: {
+          type: 'data',
+          value: 'base64PdfData',
+          mimeType: 'application/pdf',
+        },
+      })
+    })
+
+    it('should send a multimodal message with document URL', async () => {
+      const chunks = createTextChunks('The document discusses AI')
+      const adapter = createMockConnectionAdapter({ chunks })
+      const { result } = renderUseChat({ connection: adapter })
+
+      await result.current.sendMessage({
+        content: [
+          { type: 'text', content: 'Summarize this document' },
+          {
+            type: 'document',
+            source: {
+              type: 'url',
+              value: 'https://example.com/doc.pdf',
+            },
+          },
+        ],
+      })
+
+      await waitFor(() => {
+        expect(result.current.messages.length).toBeGreaterThan(0)
+      })
+
+      const userMessage = result.current.messages.find((m) => m.role === 'user')
+      expect(userMessage?.parts[1]).toEqual({
+        type: 'document',
+        source: {
+          type: 'url',
+          value: 'https://example.com/doc.pdf',
+        },
+      })
+    })
+
+    it('should send complex multimodal message with multiple content parts', async () => {
+      const chunks = createTextChunks('I see multiple items')
+      const adapter = createMockConnectionAdapter({ chunks })
+      const { result } = renderUseChat({ connection: adapter })
+
+      await result.current.sendMessage({
+        content: [
+          { type: 'text', content: 'Compare these items' },
+          {
+            type: 'image',
+            source: {
+              type: 'data',
+              value: 'base64Image1',
+              mimeType: 'image/jpeg',
+            },
+          },
+          {
+            type: 'image',
+            source: {
+              type: 'url',
+              value: 'https://example.com/image2.png',
+            },
+          },
+          { type: 'text', content: 'Which one is better?' },
+        ],
+      })
+
+      await waitFor(() => {
+        expect(result.current.messages.length).toBeGreaterThan(0)
+      })
+
+      const userMessage = result.current.messages.find((m) => m.role === 'user')
+      expect(userMessage?.parts.length).toBe(4)
+      expect(userMessage?.parts[0]).toEqual({
+        type: 'text',
+        content: 'Compare these items',
+      })
+      expect(userMessage?.parts[1]).toEqual({
+        type: 'image',
+        source: {
+          type: 'data',
+          value: 'base64Image1',
+          mimeType: 'image/jpeg',
+        },
+      })
+      expect(userMessage?.parts[2]).toEqual({
+        type: 'image',
+        source: {
+          type: 'url',
+          value: 'https://example.com/image2.png',
+        },
+      })
+      expect(userMessage?.parts[3]).toEqual({
+        type: 'text',
+        content: 'Which one is better?',
+      })
+    })
+
+    it('should use custom message id when provided in multimodal message', async () => {
+      const chunks = createTextChunks('Response')
+      const adapter = createMockConnectionAdapter({ chunks })
+      const { result } = renderUseChat({ connection: adapter })
+
+      await result.current.sendMessage({
+        content: [{ type: 'text', content: 'Hello' }],
+        id: 'custom-multimodal-id-123',
+      })
+
+      await waitFor(() => {
+        expect(result.current.messages.length).toBeGreaterThan(0)
+      })
+
+      expect(result.current.messages[0]?.id).toBe('custom-multimodal-id-123')
+    })
+
+    it('should send string content as simple text message via multimodal interface', async () => {
+      const chunks = createTextChunks('Response')
+      const adapter = createMockConnectionAdapter({ chunks })
+      const { result } = renderUseChat({ connection: adapter })
+
+      await result.current.sendMessage({
+        content: 'Hello world',
+      })
+
+      await waitFor(() => {
+        expect(result.current.messages.length).toBeGreaterThan(0)
+      })
+
+      const userMessage = result.current.messages.find((m) => m.role === 'user')
+      expect(userMessage?.parts).toEqual([
+        { type: 'text', content: 'Hello world' },
+      ])
+    })
+  })
 })
diff --git a/packages/typescript/ai-solid/src/types.ts b/packages/typescript/ai-solid/src/types.ts
index 050fb8010..4d75ae531 100644
--- a/packages/typescript/ai-solid/src/types.ts
+++ b/packages/typescript/ai-solid/src/types.ts
@@ -3,12 +3,13 @@ import type {
   ChatClientOptions,
   ChatClientState,
   ChatRequestBody,
+  MultimodalContent,
   UIMessage,
 } from '@tanstack/ai-client'
 import type { Accessor } from 'solid-js'
 
 // Re-export types from ai-client
-export type { ChatRequestBody, UIMessage }
+export type { ChatRequestBody, MultimodalContent, UIMessage }
 
 /**
  * Options for the useChat hook.
@@ -41,9 +42,10 @@ export interface UseChatReturn<
   messages: Accessor<Array<UIMessage<TTools>>>
 
   /**
-   * Send a message and get a response
+   * Send a message and get a response.
+   * Can be a simple string or multimodal content with images, audio, etc.
    */
-  sendMessage: (content: string) => Promise<void>
+  sendMessage: (content: string | MultimodalContent) => Promise<void>
 
   /**
    * Append a message to the conversation
diff --git a/packages/typescript/ai-solid/src/use-chat.ts b/packages/typescript/ai-solid/src/use-chat.ts
index e178a2c5e..77d0edf96 100644
--- a/packages/typescript/ai-solid/src/use-chat.ts
+++ b/packages/typescript/ai-solid/src/use-chat.ts
@@ -8,7 +8,12 @@ import {
 import { ChatClient } from '@tanstack/ai-client'
 import type { ChatClientState } from '@tanstack/ai-client'
 import type { AnyClientTool, ModelMessage } from '@tanstack/ai'
-import type { UIMessage, UseChatOptions, UseChatReturn } from './types'
+import type {
+  MultimodalContent,
+  UIMessage,
+  UseChatOptions,
+  UseChatReturn,
+} from './types'
 
 export function useChat<TTools extends ReadonlyArray<AnyClientTool> = any>(
   options: UseChatOptions<TTools> = {} as UseChatOptions<TTools>,
@@ -93,7 +98,7 @@ export function useChat<TTools extends ReadonlyArray<AnyClientTool> = any>(
   // are captured at client creation time. Changes to these callbacks require
   // remounting the component or changing the connection to recreate the client.
 
-  const sendMessage = async (content: string) => {
+  const sendMessage = async (content: string | MultimodalContent) => {
     await client().sendMessage(content)
   }
 
diff --git a/packages/typescript/ai-svelte/src/create-chat.svelte.ts b/packages/typescript/ai-svelte/src/create-chat.svelte.ts
index 07242d018..5354ae113 100644
--- a/packages/typescript/ai-svelte/src/create-chat.svelte.ts
+++ b/packages/typescript/ai-svelte/src/create-chat.svelte.ts
@@ -1,7 +1,12 @@
 import { ChatClient } from '@tanstack/ai-client'
 import type { ChatClientState } from '@tanstack/ai-client'
 import type { AnyClientTool, ModelMessage } from '@tanstack/ai'
-import type { CreateChatOptions, CreateChatReturn, UIMessage } from './types'
+import type {
+  CreateChatOptions,
+  CreateChatReturn,
+  MultimodalContent,
+  UIMessage,
+} from './types'
 
 /**
  * Creates a reactive chat instance for Svelte 5.
@@ -83,7 +88,7 @@ export function createChat<TTools extends ReadonlyArray<AnyClientTool> = any>(
   // Users should call chat.stop() in their component's cleanup if needed.
 
   // Define methods
-  const sendMessage = async (content: string) => {
+  const sendMessage = async (content: string | MultimodalContent) => {
     await client.sendMessage(content)
   }
 
diff --git a/packages/typescript/ai-svelte/src/types.ts b/packages/typescript/ai-svelte/src/types.ts
index e18e87721..df13ed0a0 100644
--- a/packages/typescript/ai-svelte/src/types.ts
+++ b/packages/typescript/ai-svelte/src/types.ts
@@ -3,11 +3,12 @@ import type {
   ChatClientOptions,
   ChatClientState,
   ChatRequestBody,
+  MultimodalContent,
   UIMessage,
 } from '@tanstack/ai-client'
 
 // Re-export types from ai-client
-export type { ChatRequestBody, UIMessage }
+export type { ChatRequestBody, MultimodalContent, UIMessage }
 
 /**
  * Options for the createChat function.
@@ -41,9 +42,10 @@ export interface CreateChatReturn<
   readonly messages: Array<UIMessage<TTools>>
 
   /**
-   * Send a message and get a response
+   * Send a message and get a response.
+   * Can be a simple string or multimodal content with images, audio, etc.
    */
-  sendMessage: (content: string) => Promise<void>
+  sendMessage: (content: string | MultimodalContent) => Promise<void>
 
   /**
    * Append a message to the conversation
diff --git a/packages/typescript/ai-vue/src/types.ts b/packages/typescript/ai-vue/src/types.ts
index 7c09f103a..31c948503 100644
--- a/packages/typescript/ai-vue/src/types.ts
+++ b/packages/typescript/ai-vue/src/types.ts
@@ -3,12 +3,13 @@ import type {
   ChatClientOptions,
   ChatClientState,
   ChatRequestBody,
+  MultimodalContent,
   UIMessage,
 } from '@tanstack/ai-client'
 import type { DeepReadonly, ShallowRef } from 'vue'
 
 // Re-export types from ai-client
-export type { ChatRequestBody, UIMessage }
+export type { ChatRequestBody, MultimodalContent, UIMessage }
 
 /**
  * Options for the useChat composable.
@@ -41,9 +42,10 @@ export interface UseChatReturn<
   messages: DeepReadonly<ShallowRef<Array<UIMessage<TTools>>>>
 
   /**
-   * Send a message and get a response
+   * Send a message and get a response.
+   * Can be a simple string or multimodal content with images, audio, etc.
    */
-  sendMessage: (content: string) => Promise<void>
+  sendMessage: (content: string | MultimodalContent) => Promise<void>
 
   /**
    * Append a message to the conversation
diff --git a/packages/typescript/ai-vue/src/use-chat.ts b/packages/typescript/ai-vue/src/use-chat.ts
index 684d3f800..6042fc535 100644
--- a/packages/typescript/ai-vue/src/use-chat.ts
+++ b/packages/typescript/ai-vue/src/use-chat.ts
@@ -2,7 +2,12 @@ import { ChatClient } from '@tanstack/ai-client'
 import { onScopeDispose, readonly, shallowRef, useId, watch } from 'vue'
 import type { AnyClientTool, ModelMessage } from '@tanstack/ai'
 import type { ChatClientState } from '@tanstack/ai-client'
-import type { UIMessage, UseChatOptions, UseChatReturn } from './types'
+import type {
+  MultimodalContent,
+  UIMessage,
+  UseChatOptions,
+  UseChatReturn,
+} from './types'
 
 export function useChat<TTools extends ReadonlyArray<AnyClientTool> = any>(
   options: UseChatOptions<TTools> = {} as UseChatOptions<TTools>,
@@ -66,7 +71,7 @@ export function useChat<TTools extends ReadonlyArray<AnyClientTool> = any>(
   // are captured at client creation time. Changes to these callbacks require
   // remounting the component or changing the connection to recreate the client.
 
-  const sendMessage = async (content: string) => {
+  const sendMessage = async (content: string | MultimodalContent) => {
     await client.sendMessage(content)
   }
 
diff --git a/packages/typescript/ai/src/activities/chat/messages.ts b/packages/typescript/ai/src/activities/chat/messages.ts
index 14c8dc621..8c9511ec7 100644
--- a/packages/typescript/ai/src/activities/chat/messages.ts
+++ b/packages/typescript/ai/src/activities/chat/messages.ts
@@ -1,16 +1,34 @@
 import type {
+  AudioPart,
   ContentPart,
+  DocumentPart,
+  ImagePart,
   MessagePart,
   ModelMessage,
   TextPart,
   ToolCallPart,
   ToolResultPart,
   UIMessage,
+  VideoPart,
 } from '../../types'
 // ===========================
 // Message Converters
 // ===========================
 
+/**
+ * Helper to check if a part is a multimodal content part (image, audio, video, document)
+ */
+function isMultimodalPart(
+  part: MessagePart,
+): part is ImagePart | AudioPart | VideoPart | DocumentPart {
+  return (
+    part.type === 'image' ||
+    part.type === 'audio' ||
+    part.type === 'video' ||
+    part.type === 'document'
+  )
+}
+
 /**
  * Helper to extract text content from string or ContentPart array
  * For multimodal content, this extracts only the text parts
@@ -52,7 +70,8 @@ export function convertMessagesToModelMessages(
  * Convert a UIMessage to ModelMessage(s)
  *
  * This conversion handles the parts-based structure:
- * - Text parts → content field
+ * - Text parts → content field (string or as part of ContentPart array)
+ * - Multimodal parts (image, audio, video, document) → ContentPart array
  * - ToolCall parts → toolCalls array
  * - ToolResult parts → separate role="tool" messages
  *
@@ -72,12 +91,17 @@ export function uiMessageToModelMessages(
   // Separate parts by type
   // Note: thinking parts are UI-only and not included in ModelMessages
   const textParts: Array<TextPart> = []
+  const multimodalParts: Array<
+    ImagePart | AudioPart | VideoPart | DocumentPart
+  > = []
   const toolCallParts: Array<ToolCallPart> = []
   const toolResultParts: Array<ToolResultPart> = []
 
   for (const part of uiMessage.parts) {
     if (part.type === 'text') {
       textParts.push(part)
+    } else if (isMultimodalPart(part)) {
+      multimodalParts.push(part)
     } else if (part.type === 'tool-call') {
       toolCallParts.push(part)
     } else if (part.type === 'tool-result') {
@@ -86,8 +110,26 @@ export function uiMessageToModelMessages(
     // thinking parts are skipped - they're UI-only
   }
 
-  // Build the main message (user or assistant)
-  const content = textParts.map((p) => p.content).join('') || null
+  // Build the content field
+  // If we have multimodal parts, use ContentPart array format
+  // Otherwise, use simple string format for backward compatibility
+  let content: string | null | Array<ContentPart>
+  if (multimodalParts.length > 0) {
+    // Build ContentPart array preserving the order of text and multimodal parts
+    const contentParts: Array<ContentPart> = []
+    for (const part of uiMessage.parts) {
+      if (part.type === 'text') {
+        contentParts.push(part)
+      } else if (isMultimodalPart(part)) {
+        contentParts.push(part)
+      }
+    }
+    content = contentParts
+  } else {
+    // Simple string content for text-only messages
+    content = textParts.map((p) => p.content).join('') || null
+  }
+
   const toolCalls =
     toolCallParts.length > 0
       ? toolCallParts
@@ -108,7 +150,9 @@ export function uiMessageToModelMessages(
       : undefined
 
   // Create the main message
-  if (uiMessage.role !== 'assistant' || content || !toolCalls) {
+  // For multimodal content, we always create a message even if content is an empty array
+  const hasContent = Array.isArray(content) ? true : content !== null
+  if (uiMessage.role !== 'assistant' || hasContent || !toolCalls) {
     messageList.push({
       role: uiMessage.role,
       content,
diff --git a/packages/typescript/ai/src/activities/chat/stream/processor.ts b/packages/typescript/ai/src/activities/chat/stream/processor.ts
index 0f480f9d3..0ce10309f 100644
--- a/packages/typescript/ai/src/activities/chat/stream/processor.ts
+++ b/packages/typescript/ai/src/activities/chat/stream/processor.ts
@@ -35,6 +35,8 @@ import type {
   ToolResultState,
 } from './types'
 import type {
+  ContentPart,
+  MessagePart,
   ModelMessage,
   StreamChunk,
   ToolCall,
@@ -164,13 +166,42 @@ export class StreamProcessor {
   }
 
   /**
-   * Add a user message to the conversation
-   */
-  addUserMessage(content: string): UIMessage {
+   * Add a user message to the conversation.
+   * Supports both simple string content and multimodal content arrays.
+   *
+   * @param content - The message content (string or array of content parts)
+   * @param id - Optional custom message ID (generated if not provided)
+   * @returns The created UIMessage
+   *
+   * @example
+   * ```ts
+   * // Simple text message
+   * processor.addUserMessage('Hello!')
+   *
+   * // Multimodal message with image
+   * processor.addUserMessage([
+   *   { type: 'text', content: 'What is in this image?' },
+   *   { type: 'image', source: { type: 'url', value: 'https://example.com/photo.jpg' } }
+   * ])
+   *
+   * // With custom ID
+   * processor.addUserMessage('Hello!', 'custom-id-123')
+   * ```
+   */
+  addUserMessage(content: string | Array<ContentPart>, id?: string): UIMessage {
+    // Convert content to message parts
+    const parts: Array<MessagePart> =
+      typeof content === 'string'
+        ? [{ type: 'text', content }]
+        : content.map((part) => {
+            // ContentPart types (text, image, audio, video, document) are compatible with MessagePart
+            return part as MessagePart
+          })
+
     const userMessage: UIMessage = {
-      id: generateMessageId(),
+      id: id ?? generateMessageId(),
       role: 'user',
-      parts: [{ type: 'text', content }],
+      parts,
       createdAt: new Date(),
     }
 
diff --git a/packages/typescript/ai/src/index.ts b/packages/typescript/ai/src/index.ts
index 48ae1164e..92263e37e 100644
--- a/packages/typescript/ai/src/index.ts
+++ b/packages/typescript/ai/src/index.ts
@@ -73,6 +73,9 @@ export {
 // All types
 export * from './types'
 
+// Utility functions
+export { detectImageMimeType } from './utils'
+
 // Event client + event types
 export * from './event-client'
 
diff --git a/packages/typescript/ai/src/types.ts b/packages/typescript/ai/src/types.ts
index 7bd3c52d9..344034f34 100644
--- a/packages/typescript/ai/src/types.ts
+++ b/packages/typescript/ai/src/types.ts
@@ -108,24 +108,52 @@ export interface ToolCall {
 export type Modality = 'text' | 'image' | 'audio' | 'video' | 'document'
 
 /**
- * Source specification for multimodal content.
- * Supports both inline data (base64) and URL-based content.
+ * Source specification for inline data content (base64).
+ * Requires a mimeType to ensure providers receive proper content type information.
+ */
+export interface ContentPartDataSource {
+  /**
+   * Indicates this is inline data content.
+   */
+  type: 'data'
+  /**
+   * The base64-encoded content value.
+   */
+  value: string
+  /**
+   * The MIME type of the content (e.g., 'image/png', 'audio/wav').
+   * Required for data sources to ensure proper handling by providers.
+   */
+  mimeType: string
+}
+
+/**
+ * Source specification for URL-based content.
+ * mimeType is optional as it can often be inferred from the URL or response headers.
  */
-export interface ContentPartSource {
+export interface ContentPartUrlSource {
   /**
-   * The type of source:
-   * - 'data': Inline data (typically base64 encoded)
-   * - 'url': URL reference to the content
+   * Indicates this is URL-referenced content.
    */
-  type: 'data' | 'url'
+  type: 'url'
   /**
-   * The actual content value:
-   * - For 'data': base64-encoded string
-   * - For 'url': HTTP(S) URL or data URI
+   * HTTP(S) URL or data URI pointing to the content.
    */
   value: string
+  /**
+   * Optional MIME type hint for cases where providers can't infer it from the URL.
+   */
+  mimeType?: string
 }
 
+/**
+ * Source specification for multimodal content.
+ * Discriminated union supporting both inline data (base64) and URL-based content.
+ * - For 'data' sources: mimeType is required
+ * - For 'url' sources: mimeType is optional
+ */
+export type ContentPartSource = ContentPartDataSource | ContentPartUrlSource
+
 /**
  * Image content part for multimodal messages.
  * @template TMetadata - Provider-specific metadata type (e.g., OpenAI's detail level)
@@ -282,6 +310,10 @@ export interface ThinkingPart {
 
 export type MessagePart =
   | TextPart
+  | ImagePart
+  | AudioPart
+  | VideoPart
+  | DocumentPart
   | ToolCallPart
   | ToolResultPart
   | ThinkingPart
diff --git a/packages/typescript/ai/src/utils.ts b/packages/typescript/ai/src/utils.ts
new file mode 100644
index 000000000..e89eb1db1
--- /dev/null
+++ b/packages/typescript/ai/src/utils.ts
@@ -0,0 +1,41 @@
+/**
+ * Detect image mime type from base64 data using magic bytes.
+ * Returns undefined if the format cannot be detected.
+ *
+ * This function analyzes the first few bytes of base64-encoded image data
+ * to determine the image format based on file signature (magic bytes).
+ *
+ * @param base64Data - The base64-encoded image data
+ * @returns The detected mime type, or undefined if unrecognized
+ *
+ * @example
+ * ```ts
+ * const mimeType = detectImageMimeType(imageBase64)
+ * // Returns 'image/jpeg', 'image/png', 'image/gif', 'image/webp', or undefined
+ * ```
+ */
+export function detectImageMimeType(
+  base64Data: string,
+): 'image/jpeg' | 'image/png' | 'image/gif' | 'image/webp' | undefined {
+  // Get first few bytes (base64 encoded)
+  const prefix = base64Data.substring(0, 20)
+
+  // JPEG: starts with /9j/ (FFD8FF in base64)
+  if (prefix.startsWith('/9j/')) {
+    return 'image/jpeg'
+  }
+  // PNG: starts with iVBORw0KGgo (89504E47 in base64)
+  if (prefix.startsWith('iVBORw0KGgo')) {
+    return 'image/png'
+  }
+  // GIF: starts with R0lGOD (474946 in base64)
+  if (prefix.startsWith('R0lGOD')) {
+    return 'image/gif'
+  }
+  // WebP: starts with UklGR (52494646 in base64, followed by WEBP)
+  if (prefix.startsWith('UklGR')) {
+    return 'image/webp'
+  }
+
+  return undefined
+}
diff --git a/packages/typescript/ai/tests/message-converters.test.ts b/packages/typescript/ai/tests/message-converters.test.ts
new file mode 100644
index 000000000..60df58ec0
--- /dev/null
+++ b/packages/typescript/ai/tests/message-converters.test.ts
@@ -0,0 +1,397 @@
+import { describe, expect, it } from 'vitest'
+import {
+  modelMessageToUIMessage,
+  uiMessageToModelMessages,
+} from '../src/activities/chat/messages'
+import type { ContentPart, ModelMessage, UIMessage } from '../src/types'
+
+describe('Message Converters', () => {
+  describe('uiMessageToModelMessages', () => {
+    it('should convert simple text message', () => {
+      const uiMessage: UIMessage = {
+        id: 'msg-1',
+        role: 'user',
+        parts: [{ type: 'text', content: 'Hello' }],
+      }
+
+      const result = uiMessageToModelMessages(uiMessage)
+
+      expect(result).toEqual([
+        {
+          role: 'user',
+          content: 'Hello',
+        },
+      ])
+    })
+
+    it('should convert multiple text parts to single string', () => {
+      const uiMessage: UIMessage = {
+        id: 'msg-1',
+        role: 'user',
+        parts: [
+          { type: 'text', content: 'Hello ' },
+          { type: 'text', content: 'world!' },
+        ],
+      }
+
+      const result = uiMessageToModelMessages(uiMessage)
+
+      expect(result).toEqual([
+        {
+          role: 'user',
+          content: 'Hello world!',
+        },
+      ])
+    })
+
+    it('should convert multimodal message with image to ContentPart array', () => {
+      const uiMessage: UIMessage = {
+        id: 'msg-1',
+        role: 'user',
+        parts: [
+          { type: 'text', content: 'What is in this image?' },
+          {
+            type: 'image',
+            source: { type: 'url', value: 'https://example.com/cat.jpg' },
+          },
+        ],
+      }
+
+      const result = uiMessageToModelMessages(uiMessage)
+
+      expect(result.length).toBe(1)
+      expect(result[0]?.role).toBe('user')
+      expect(Array.isArray(result[0]?.content)).toBe(true)
+
+      const contentParts = result[0]?.content as Array<ContentPart>
+      expect(contentParts.length).toBe(2)
+      expect(contentParts[0]).toEqual({
+        type: 'text',
+        content: 'What is in this image?',
+      })
+      expect(contentParts[1]).toEqual({
+        type: 'image',
+        source: { type: 'url', value: 'https://example.com/cat.jpg' },
+      })
+    })
+
+    it('should convert multimodal message with audio', () => {
+      const uiMessage: UIMessage = {
+        id: 'msg-1',
+        role: 'user',
+        parts: [
+          { type: 'text', content: 'Transcribe this' },
+          {
+            type: 'audio',
+            source: {
+              type: 'data',
+              value: 'base64audio',
+              mimeType: 'audio/mp3',
+            },
+          },
+        ],
+      }
+
+      const result = uiMessageToModelMessages(uiMessage)
+
+      const contentParts = result[0]?.content as Array<ContentPart>
+      expect(contentParts[1]).toEqual({
+        type: 'audio',
+        source: { type: 'data', value: 'base64audio', mimeType: 'audio/mp3' },
+      })
+    })
+
+    it('should convert multimodal message with video', () => {
+      const uiMessage: UIMessage = {
+        id: 'msg-1',
+        role: 'user',
+        parts: [
+          { type: 'text', content: 'Describe this video' },
+          {
+            type: 'video',
+            source: { type: 'url', value: 'https://example.com/video.mp4' },
+          },
+        ],
+      }
+
+      const result = uiMessageToModelMessages(uiMessage)
+
+      const contentParts = result[0]?.content as Array<ContentPart>
+      expect(contentParts[1]).toEqual({
+        type: 'video',
+        source: { type: 'url', value: 'https://example.com/video.mp4' },
+      })
+    })
+
+    it('should convert multimodal message with document', () => {
+      const uiMessage: UIMessage = {
+        id: 'msg-1',
+        role: 'user',
+        parts: [
+          { type: 'text', content: 'Summarize this document' },
+          {
+            type: 'document',
+            source: {
+              type: 'data',
+              value: 'base64pdf',
+              mimeType: 'application/pdf',
+            },
+          },
+        ],
+      }
+
+      const result = uiMessageToModelMessages(uiMessage)
+
+      const contentParts = result[0]?.content as Array<ContentPart>
+      expect(contentParts[1]).toEqual({
+        type: 'document',
+        source: {
+          type: 'data',
+          value: 'base64pdf',
+          mimeType: 'application/pdf',
+        },
+      })
+    })
+
+    it('should preserve order of text and multimodal parts', () => {
+      const uiMessage: UIMessage = {
+        id: 'msg-1',
+        role: 'user',
+        parts: [
+          {
+            type: 'image',
+            source: { type: 'url', value: 'https://example.com/img1.jpg' },
+          },
+          { type: 'text', content: 'First image above' },
+          {
+            type: 'image',
+            source: { type: 'url', value: 'https://example.com/img2.jpg' },
+          },
+          { type: 'text', content: 'Second image above' },
+        ],
+      }
+
+      const result = uiMessageToModelMessages(uiMessage)
+
+      const contentParts = result[0]?.content as Array<ContentPart>
+      expect(contentParts.length).toBe(4)
+      expect(contentParts[0]?.type).toBe('image')
+      expect(contentParts[1]?.type).toBe('text')
+      expect(contentParts[2]?.type).toBe('image')
+      expect(contentParts[3]?.type).toBe('text')
+    })
+
+    it('should skip thinking parts in conversion', () => {
+      const uiMessage: UIMessage = {
+        id: 'msg-1',
+        role: 'assistant',
+        parts: [
+          { type: 'thinking', content: 'Let me think...' },
+          { type: 'text', content: 'Here is my answer' },
+        ],
+      }
+
+      const result = uiMessageToModelMessages(uiMessage)
+
+      expect(result.length).toBe(1)
+      expect(result[0]?.content).toBe('Here is my answer')
+    })
+
+    it('should skip system messages', () => {
+      const uiMessage: UIMessage = {
+        id: 'msg-1',
+        role: 'system',
+        parts: [{ type: 'text', content: 'You are a helpful assistant' }],
+      }
+
+      const result = uiMessageToModelMessages(uiMessage)
+
+      expect(result).toEqual([])
+    })
+
+    it('should handle text-only message without multimodal parts as string content', () => {
+      const uiMessage: UIMessage = {
+        id: 'msg-1',
+        role: 'user',
+        parts: [{ type: 'text', content: 'Just text' }],
+      }
+
+      const result = uiMessageToModelMessages(uiMessage)
+
+      // Should be string, not array
+      expect(typeof result[0]?.content).toBe('string')
+      expect(result[0]?.content).toBe('Just text')
+    })
+
+    it('should handle empty parts array', () => {
+      const uiMessage: UIMessage = {
+        id: 'msg-1',
+        role: 'user',
+        parts: [],
+      }
+
+      const result = uiMessageToModelMessages(uiMessage)
+
+      expect(result.length).toBe(1)
+      expect(result[0]?.content).toBe(null)
+    })
+
+    it('should handle multimodal message with only image (no text)', () => {
+      const uiMessage: UIMessage = {
+        id: 'msg-1',
+        role: 'user',
+        parts: [
+          {
+            type: 'image',
+            source: { type: 'url', value: 'https://example.com/cat.jpg' },
+          },
+        ],
+      }
+
+      const result = uiMessageToModelMessages(uiMessage)
+
+      expect(Array.isArray(result[0]?.content)).toBe(true)
+      const contentParts = result[0]?.content as Array<ContentPart>
+      expect(contentParts.length).toBe(1)
+      expect(contentParts[0]?.type).toBe('image')
+    })
+
+    it('should include metadata in multimodal parts', () => {
+      const uiMessage: UIMessage = {
+        id: 'msg-1',
+        role: 'user',
+        parts: [
+          { type: 'text', content: 'Analyze' },
+          {
+            type: 'image',
+            source: { type: 'url', value: 'https://example.com/cat.jpg' },
+            metadata: { detail: 'high' },
+          },
+        ],
+      }
+
+      const result = uiMessageToModelMessages(uiMessage)
+
+      const contentParts = result[0]?.content as Array<ContentPart>
+      expect(contentParts[1]).toEqual({
+        type: 'image',
+        source: { type: 'url', value: 'https://example.com/cat.jpg' },
+        metadata: { detail: 'high' },
+      })
+    })
+
+    it('should handle tool call parts', () => {
+      const uiMessage: UIMessage = {
+        id: 'msg-1',
+        role: 'assistant',
+        parts: [
+          {
+            type: 'tool-call',
+            id: 'tool-1',
+            name: 'getWeather',
+            arguments: '{"city": "NYC"}',
+            state: 'input-complete',
+          },
+        ],
+      }
+
+      const result = uiMessageToModelMessages(uiMessage)
+
+      expect(result[0]?.toolCalls).toBeDefined()
+      expect(result[0]?.toolCalls?.length).toBe(1)
+      expect(result[0]?.toolCalls?.[0]).toEqual({
+        id: 'tool-1',
+        type: 'function',
+        function: {
+          name: 'getWeather',
+          arguments: '{"city": "NYC"}',
+        },
+      })
+    })
+
+    it('should handle tool result parts', () => {
+      const uiMessage: UIMessage = {
+        id: 'msg-1',
+        role: 'assistant',
+        parts: [
+          {
+            type: 'tool-result',
+            toolCallId: 'tool-1',
+            content: '{"temp": 72}',
+            state: 'complete',
+          },
+        ],
+      }
+
+      const result = uiMessageToModelMessages(uiMessage)
+
+      // Should have assistant message + tool message
+      expect(result.length).toBe(2)
+      expect(result[1]?.role).toBe('tool')
+      expect(result[1]?.toolCallId).toBe('tool-1')
+      expect(result[1]?.content).toBe('{"temp": 72}')
+    })
+  })
+
+  describe('modelMessageToUIMessage', () => {
+    it('should convert simple text ModelMessage', () => {
+      const modelMessage: ModelMessage = {
+        role: 'user',
+        content: 'Hello',
+      }
+
+      const result = modelMessageToUIMessage(modelMessage)
+
+      expect(result.role).toBe('user')
+      expect(result.parts).toEqual([{ type: 'text', content: 'Hello' }])
+      expect(result.id).toBeTruthy()
+    })
+
+    it('should use provided id', () => {
+      const modelMessage: ModelMessage = {
+        role: 'user',
+        content: 'Hello',
+      }
+
+      const result = modelMessageToUIMessage(modelMessage, 'custom-id')
+
+      expect(result.id).toBe('custom-id')
+    })
+
+    it('should convert multimodal content to text', () => {
+      const modelMessage: ModelMessage = {
+        role: 'user',
+        content: [
+          { type: 'text', content: 'What is this?' },
+          {
+            type: 'image',
+            source: { type: 'url', value: 'https://example.com/img.jpg' },
+          },
+        ],
+      }
+
+      const result = modelMessageToUIMessage(modelMessage)
+
+      // Currently, modelMessageToUIMessage only extracts text content
+      expect(result.parts).toEqual([{ type: 'text', content: 'What is this?' }])
+    })
+
+    it('should handle tool message', () => {
+      const modelMessage: ModelMessage = {
+        role: 'tool',
+        content: '{"result": "success"}',
+        toolCallId: 'tool-1',
+      }
+
+      const result = modelMessageToUIMessage(modelMessage)
+
+      expect(result.role).toBe('assistant') // Tool messages become assistant
+      expect(result.parts).toContainEqual({
+        type: 'tool-result',
+        toolCallId: 'tool-1',
+        content: '{"result": "success"}',
+        state: 'complete',
+      })
+    })
+  })
+})
diff --git a/packages/typescript/smoke-tests/adapters/fixtures/jpgfixture.jpg b/packages/typescript/smoke-tests/adapters/fixtures/jpgfixture.jpg
new file mode 100644
index 000000000..0a24df99d
Binary files /dev/null and b/packages/typescript/smoke-tests/adapters/fixtures/jpgfixture.jpg differ
diff --git a/packages/typescript/smoke-tests/adapters/fixtures/pngfixture.png b/packages/typescript/smoke-tests/adapters/fixtures/pngfixture.png
new file mode 100644
index 000000000..b82a6a355
Binary files /dev/null and b/packages/typescript/smoke-tests/adapters/fixtures/pngfixture.png differ
diff --git a/packages/typescript/smoke-tests/adapters/src/adapters/index.ts b/packages/typescript/smoke-tests/adapters/src/adapters/index.ts
index 532b3ed0e..25fb6a8a3 100644
--- a/packages/typescript/smoke-tests/adapters/src/adapters/index.ts
+++ b/packages/typescript/smoke-tests/adapters/src/adapters/index.ts
@@ -83,7 +83,7 @@ const GEMINI_TTS_MODEL =
 const OLLAMA_MODEL = process.env.OLLAMA_MODEL || 'mistral:7b'
 const OLLAMA_SUMMARY_MODEL = process.env.OLLAMA_SUMMARY_MODEL || OLLAMA_MODEL
 
-const GROK_MODEL = process.env.GROK_MODEL || 'grok-3'
+const GROK_MODEL = process.env.GROK_MODEL || 'grok-4'
 const GROK_SUMMARY_MODEL = process.env.GROK_SUMMARY_MODEL || GROK_MODEL
 const GROK_IMAGE_MODEL = process.env.GROK_IMAGE_MODEL || 'grok-2-image-1212'
 
diff --git a/packages/typescript/smoke-tests/adapters/src/tests/index.ts b/packages/typescript/smoke-tests/adapters/src/tests/index.ts
index e3b24cb60..8c4f8bee9 100644
--- a/packages/typescript/smoke-tests/adapters/src/tests/index.ts
+++ b/packages/typescript/smoke-tests/adapters/src/tests/index.ts
@@ -1,5 +1,3 @@
-import type { AdapterContext, TestOutcome } from '../harness'
-
 // Import all test runners
 import { runCST } from './cst-chat-stream'
 import { runOST } from './ost-one-shot-text'
@@ -12,6 +10,9 @@ import { runSMS } from './sms-summarize-stream'
 import { runIMG } from './img-image-generation'
 import { runTTS } from './tts-text-to-speech'
 import { runTRN } from './trn-transcription'
+import { runMMJ, runMMP } from './mmi-multimodal-image'
+import { runMMS, runMMT } from './mms-multimodal-structured'
+import type { AdapterContext, TestOutcome } from '../harness'
 
 /**
  * Adapter capability types
@@ -36,7 +37,7 @@ export interface TestDefinition {
   /** Function to run the test */
   run: (ctx: AdapterContext) => Promise<TestOutcome>
   /** Required adapter capabilities (defaults to ['text']) */
-  requires: AdapterCapability[]
+  requires: Array<AdapterCapability>
   /** If true, test is skipped unless explicitly requested */
   skipByDefault?: boolean
 }
@@ -44,7 +45,7 @@ export interface TestDefinition {
 /**
  * Registry of all available tests
  */
-export const TESTS: TestDefinition[] = [
+export const TESTS: Array<TestDefinition> = [
   {
     id: 'CST',
     name: 'Chat Stream',
@@ -125,6 +126,35 @@ export const TESTS: TestDefinition[] = [
     requires: ['transcription'],
     skipByDefault: true, // Skip unless explicitly requested
   },
+  {
+    id: 'MMJ',
+    name: 'Multimodal JPEG',
+    description:
+      'Describe a JPEG image (meme with man, React icon, code/email text)',
+    run: runMMJ,
+    requires: ['text'],
+  },
+  {
+    id: 'MMP',
+    name: 'Multimodal PNG',
+    description: 'Describe a PNG image (beach scene with AG UI text)',
+    run: runMMP,
+    requires: ['text'],
+  },
+  {
+    id: 'MMS',
+    name: 'Multimodal Structured JPEG',
+    description: 'Describe a JPEG image with structured JSON output',
+    run: runMMS,
+    requires: ['text'],
+  },
+  {
+    id: 'MMT',
+    name: 'Multimodal Structured PNG',
+    description: 'Describe a PNG image with structured JSON output',
+    run: runMMT,
+    requires: ['text'],
+  },
 ]
 
 /**
@@ -137,13 +167,13 @@ export function getTest(id: string): TestDefinition | undefined {
 /**
  * Get all test IDs
  */
-export function getTestIds(): string[] {
+export function getTestIds(): Array<string> {
   return TESTS.map((t) => t.id)
 }
 
 /**
  * Get tests that run by default (excluding skipByDefault tests)
  */
-export function getDefaultTests(): TestDefinition[] {
+export function getDefaultTests(): Array<TestDefinition> {
   return TESTS.filter((t) => !t.skipByDefault)
 }
diff --git a/packages/typescript/smoke-tests/adapters/src/tests/mmi-multimodal-image.ts b/packages/typescript/smoke-tests/adapters/src/tests/mmi-multimodal-image.ts
new file mode 100644
index 000000000..15bdcac92
--- /dev/null
+++ b/packages/typescript/smoke-tests/adapters/src/tests/mmi-multimodal-image.ts
@@ -0,0 +1,193 @@
+import { readFile } from 'node:fs/promises'
+import { join } from 'node:path'
+import { runTestCase } from '../harness'
+import type { AdapterContext, TestOutcome } from '../harness'
+import type { ContentPart } from '@tanstack/ai'
+
+/**
+ * Detect image mime type from file extension
+ */
+function getMimeType(filename: string): string {
+  const ext = filename.toLowerCase().split('.').pop()
+  switch (ext) {
+    case 'jpg':
+    case 'jpeg':
+      return 'image/jpeg'
+    case 'png':
+      return 'image/png'
+    case 'gif':
+      return 'image/gif'
+    case 'webp':
+      return 'image/webp'
+    default:
+      return 'image/jpeg'
+  }
+}
+
+/**
+ * MMJ: Multimodal Image JPEG Test
+ *
+ * Tests multimodal image support by sending a JPEG image
+ * and asking the model to describe it.
+ * The image shows a man pointing towards a React icon with text
+ * "MY CODE" and "IS THIS AN EMAIL?" (meme format).
+ */
+export async function runMMJ(
+  adapterContext: AdapterContext,
+): Promise<TestOutcome> {
+  const testName = 'mmj-multimodal-jpeg'
+  const adapterName = adapterContext.adapterName
+  const fixtureFile = 'jpgfixture.jpg'
+  const fixturePath = join(process.cwd(), 'fixtures', fixtureFile)
+
+  // Try to load the image file
+  let imageBase64: string
+  try {
+    const imageBuffer = await readFile(fixturePath)
+    imageBase64 = imageBuffer.toString('base64')
+  } catch {
+    console.log(
+      `[${adapterName}] — ${testName}: Ignored (no fixture file at fixtures/${fixtureFile})`,
+    )
+    return { passed: true, ignored: true }
+  }
+
+  const mimeType = getMimeType(fixtureFile)
+
+  // Build multimodal content
+  const contentParts: Array<ContentPart> = [
+    { type: 'text', content: 'Describe this image' },
+    {
+      type: 'image',
+      source: { type: 'data', value: imageBase64, mimeType },
+    },
+  ]
+
+  return runTestCase({
+    adapterContext,
+    testName,
+    description:
+      'JPEG image description mentions man/person, React icon, and meme text',
+    messages: [{ role: 'user' as const, content: contentParts }],
+    validate: (run) => {
+      const response = run.fullResponse.toLowerCase()
+
+      // Check for person/man/character
+      const hasPerson =
+        response.includes('man') ||
+        response.includes('person') ||
+        response.includes('guy') ||
+        response.includes('someone') ||
+        response.includes('character') ||
+        response.includes('hand') ||
+        response.includes('figure')
+
+      // Check for React icon/logo
+      const hasReact =
+        response.includes('react') ||
+        response.includes('logo') ||
+        response.includes('icon') ||
+        response.includes('atom')
+
+      // Check for meme text content
+      const hasCodeText =
+        response.includes('code') || response.includes('my code')
+      const hasEmailText =
+        response.includes('email') || response.includes('is this an email')
+
+      const passed =
+        hasPerson ||
+        hasReact ||
+        hasCodeText ||
+        hasEmailText ||
+        response.includes('image')
+
+      return {
+        passed,
+        error: passed
+          ? undefined
+          : `Response missing expected content. hasPerson=${hasPerson}, hasReact=${hasReact}, hasCodeText=${hasCodeText}, hasEmailText=${hasEmailText}, or mentions "image"`,
+        meta: {
+          hasPerson,
+          hasReact,
+          hasCodeText,
+          hasEmailText,
+          responseLength: response.length,
+        },
+      }
+    },
+  })
+}
+
+/**
+ * MMP: Multimodal Image PNG Test
+ *
+ * Tests multimodal image support by sending a PNG image
+ * and asking the model to describe it.
+ * The image shows a beach scene with "AG UI READY" text.
+ * Expects the response to mention at least one of: beach, sea/ocean, or AG UI text.
+ */
+export async function runMMP(
+  adapterContext: AdapterContext,
+): Promise<TestOutcome> {
+  const testName = 'mmp-multimodal-png'
+  const adapterName = adapterContext.adapterName
+  const fixtureFile = 'pngfixture.png'
+  const fixturePath = join(process.cwd(), 'fixtures', fixtureFile)
+
+  // Try to load the image file
+  let imageBase64: string
+  try {
+    const imageBuffer = await readFile(fixturePath)
+    imageBase64 = imageBuffer.toString('base64')
+  } catch {
+    console.log(
+      `[${adapterName}] — ${testName}: Ignored (no fixture file at fixtures/${fixtureFile})`,
+    )
+    return { passed: true, ignored: true }
+  }
+
+  const mimeType = getMimeType(fixtureFile)
+
+  // Build multimodal content
+  const contentParts: Array<ContentPart> = [
+    { type: 'text', content: 'Describe this image' },
+    {
+      type: 'image',
+      source: { type: 'data', value: imageBase64, mimeType },
+    },
+  ]
+
+  return runTestCase({
+    adapterContext,
+    testName,
+    description:
+      'PNG image description mentions beach, sea, or AG UI text (at least one)',
+    messages: [{ role: 'user' as const, content: contentParts }],
+    validate: (run) => {
+      const response = run.fullResponse.toLowerCase()
+
+      const hasBeach = response.includes('beach')
+      const hasSea =
+        response.includes('sea') ||
+        response.includes('ocean') ||
+        response.includes('water')
+      const hasAgUi =
+        response.includes('ag ui') ||
+        response.includes('ag-ui') ||
+        response.includes('agui') ||
+        response.includes('ready')
+
+      // Pass if at least one of the expected elements is mentioned
+      const passed = hasBeach || hasSea || hasAgUi || response.includes('image')
+
+      return {
+        passed,
+        error: passed
+          ? undefined
+          : `Response missing expected content. Need at least one of: hasBeach=${hasBeach}, hasSea=${hasSea}, hasAgUi=${hasAgUi}, or mentions "image"`,
+        meta: { hasBeach, hasSea, hasAgUi, responseLength: response.length },
+      }
+    },
+  })
+}
diff --git a/packages/typescript/smoke-tests/adapters/src/tests/mms-multimodal-structured.ts b/packages/typescript/smoke-tests/adapters/src/tests/mms-multimodal-structured.ts
new file mode 100644
index 000000000..12bc9a0b5
--- /dev/null
+++ b/packages/typescript/smoke-tests/adapters/src/tests/mms-multimodal-structured.ts
@@ -0,0 +1,248 @@
+import { readFile } from 'node:fs/promises'
+import { join } from 'node:path'
+import { runTestCase } from '../harness'
+import type { AdapterContext, TestOutcome } from '../harness'
+import type { ContentPart } from '@tanstack/ai'
+
+/**
+ * Detect image mime type from file extension
+ */
+function getMimeType(filename: string): string {
+  const ext = filename.toLowerCase().split('.').pop()
+  switch (ext) {
+    case 'jpg':
+    case 'jpeg':
+      return 'image/jpeg'
+    case 'png':
+      return 'image/png'
+    case 'gif':
+      return 'image/gif'
+    case 'webp':
+      return 'image/webp'
+    default:
+      return 'image/jpeg'
+  }
+}
+
+/**
+ * JSON Schema prompt for structured image description
+ */
+const STRUCTURED_PROMPT = `Analyze this image and provide a structured description. Return ONLY valid JSON (no markdown code blocks) matching this schema:
+{
+  "description": "A brief description of what the image shows",
+  "hasText": true/false,
+  "textContent": "The text content visible in the image, if any",
+  "mainSubject": "The main subject or focal point of the image",
+  "colors": ["array", "of", "primary", "colors"]
+}`
+
+interface ImageDescription {
+  description: string
+  hasText: boolean
+  textContent?: string
+  mainSubject: string
+  colors: Array<string>
+}
+
+/**
+ * MMS: Multimodal Structured JPEG Test
+ *
+ * Tests multimodal image support with structured output by sending a JPEG image
+ * and asking the model to describe it using a JSON schema.
+ * The image shows a man pointing towards a React icon with text
+ * "MY CODE" and "IS THIS AN EMAIL?" (meme format).
+ */
+export async function runMMS(
+  adapterContext: AdapterContext,
+): Promise<TestOutcome> {
+  const testName = 'mms-multimodal-structured-jpeg'
+  const adapterName = adapterContext.adapterName
+  const fixtureFile = 'jpgfixture.jpg'
+  const fixturePath = join(process.cwd(), 'fixtures', fixtureFile)
+
+  // Try to load the image file
+  let imageBase64: string
+  try {
+    const imageBuffer = await readFile(fixturePath)
+    imageBase64 = imageBuffer.toString('base64')
+  } catch {
+    console.log(
+      `[${adapterName}] — ${testName}: Ignored (no fixture file at fixtures/${fixtureFile})`,
+    )
+    return { passed: true, ignored: true }
+  }
+
+  const mimeType = getMimeType(fixtureFile)
+
+  // Build multimodal content with structured output request
+  const contentParts: Array<ContentPart> = [
+    {
+      type: 'text',
+      content: STRUCTURED_PROMPT,
+    },
+    {
+      type: 'image',
+      source: { type: 'data', value: imageBase64, mimeType },
+    },
+  ]
+
+  return runTestCase({
+    adapterContext,
+    testName,
+    description:
+      'JPEG image with structured output returns valid JSON with description, hasText, mainSubject, colors',
+    messages: [{ role: 'user' as const, content: contentParts }],
+    validate: (run) => {
+      const response = run.fullResponse
+
+      // Try to parse as JSON
+      let parsed: ImageDescription | null = null
+      try {
+        // Try to extract JSON from response (might be wrapped in markdown code blocks)
+        const jsonMatch = response.match(/```(?:json)?\s*([\s\S]*?)```/)
+        const jsonStr =
+          jsonMatch && jsonMatch[1] ? jsonMatch[1].trim() : response.trim()
+        parsed = JSON.parse(jsonStr)
+      } catch {
+        // If direct parse fails, try the raw response
+        try {
+          parsed = JSON.parse(response)
+        } catch {
+          return {
+            passed: false,
+            error: `Failed to parse response as JSON: ${response.substring(0, 200)}`,
+            meta: { responseLength: response.length },
+          }
+        }
+      }
+
+      // Validate structure
+      const hasDescription =
+        typeof parsed?.description === 'string' && parsed.description.length > 0
+      const hasMainSubject =
+        typeof parsed?.mainSubject === 'string' && parsed.mainSubject.length > 0
+      const hasColors =
+        Array.isArray(parsed?.colors) && parsed.colors.length > 0
+      const hasTextBoolean = typeof parsed?.hasText === 'boolean'
+
+      const passed =
+        hasDescription && hasMainSubject && hasColors && hasTextBoolean
+
+      return {
+        passed,
+        error: passed
+          ? undefined
+          : `Structured output missing required fields. hasDescription=${hasDescription}, hasMainSubject=${hasMainSubject}, hasColors=${hasColors}, hasTextBoolean=${hasTextBoolean}`,
+        meta: {
+          hasDescription,
+          hasMainSubject,
+          hasColors,
+          hasTextBoolean,
+          parsed,
+          responseLength: response.length,
+        },
+      }
+    },
+  })
+}
+
+/**
+ * MMT: Multimodal Structured PNG Test
+ *
+ * Tests multimodal image support with structured output by sending a PNG image
+ * and asking the model to describe it using a JSON schema.
+ * The image shows a beach scene with "AG UI READY" text.
+ */
+export async function runMMT(
+  adapterContext: AdapterContext,
+): Promise<TestOutcome> {
+  const testName = 'mmt-multimodal-structured-png'
+  const adapterName = adapterContext.adapterName
+  const fixtureFile = 'pngfixture.png'
+  const fixturePath = join(process.cwd(), 'fixtures', fixtureFile)
+
+  // Try to load the image file
+  let imageBase64: string
+  try {
+    const imageBuffer = await readFile(fixturePath)
+    imageBase64 = imageBuffer.toString('base64')
+  } catch {
+    console.log(
+      `[${adapterName}] — ${testName}: Ignored (no fixture file at fixtures/${fixtureFile})`,
+    )
+    return { passed: true, ignored: true }
+  }
+
+  const mimeType = getMimeType(fixtureFile)
+
+  // Build multimodal content with structured output request
+  const contentParts: Array<ContentPart> = [
+    {
+      type: 'text',
+      content: STRUCTURED_PROMPT,
+    },
+    {
+      type: 'image',
+      source: { type: 'data', value: imageBase64, mimeType },
+    },
+  ]
+
+  return runTestCase({
+    adapterContext,
+    testName,
+    description:
+      'PNG image with structured output returns valid JSON with description, hasText, mainSubject, colors',
+    messages: [{ role: 'user' as const, content: contentParts }],
+    validate: (run) => {
+      const response = run.fullResponse
+
+      // Try to parse as JSON
+      let parsed: ImageDescription | null = null
+      try {
+        // Try to extract JSON from response (might be wrapped in markdown code blocks)
+        const jsonMatch = response.match(/```(?:json)?\s*([\s\S]*?)```/)
+        const jsonStr =
+          jsonMatch && jsonMatch[1] ? jsonMatch[1].trim() : response.trim()
+        parsed = JSON.parse(jsonStr)
+      } catch {
+        // If direct parse fails, try the raw response
+        try {
+          parsed = JSON.parse(response)
+        } catch {
+          return {
+            passed: false,
+            error: `Failed to parse response as JSON: ${response.substring(0, 200)}`,
+            meta: { responseLength: response.length },
+          }
+        }
+      }
+
+      // Validate structure
+      const hasDescription =
+        typeof parsed?.description === 'string' && parsed.description.length > 0
+      const hasMainSubject =
+        typeof parsed?.mainSubject === 'string' && parsed.mainSubject.length > 0
+      const hasColors =
+        Array.isArray(parsed?.colors) && parsed.colors.length > 0
+      const hasTextBoolean = typeof parsed?.hasText === 'boolean'
+
+      const passed =
+        hasDescription && hasMainSubject && hasColors && hasTextBoolean
+
+      return {
+        passed,
+        error: passed
+          ? undefined
+          : `Structured output missing required fields. hasDescription=${hasDescription}, hasMainSubject=${hasMainSubject}, hasColors=${hasColors}, hasTextBoolean=${hasTextBoolean}`,
+        meta: {
+          hasDescription,
+          hasMainSubject,
+          hasColors,
+          hasTextBoolean,
+          parsed,
+          responseLength: response.length,
+        },
+      }
+    },
+  })
+}