diff --git a/docs/guides/multimodal-content.md b/docs/guides/multimodal-content.md index 65100420d..2b8e32bd2 100644 --- a/docs/guides/multimodal-content.md +++ b/docs/guides/multimodal-content.md @@ -26,12 +26,13 @@ const textPart: TextPart = { content: 'What do you see in this image?' } -// Image from base64 data +// Image from base64 data (mimeType is required for data sources) const imagePart: ImagePart = { type: 'image', source: { type: 'data', - value: 'base64EncodedImageData...' + value: 'base64EncodedImageData...', + mimeType: 'image/jpeg' // Required for data sources }, metadata: { // Provider-specific metadata @@ -39,12 +40,13 @@ const imagePart: ImagePart = { } } -// Image from URL +// Image from URL (mimeType is optional for URL sources) const imageUrlPart: ImagePart = { type: 'image', source: { type: 'url', - value: 'https://example.com/image.jpg' + value: 'https://example.com/image.jpg', + mimeType: 'image/jpeg' // Optional hint for URL sources } } ``` @@ -95,7 +97,7 @@ const message = { { type: 'text' , content: 'Describe this image' }, { type: 'image' , - source: { type: 'data' , value: imageBase64 }, + source: { type: 'data' , value: imageBase64, mimeType: 'image/jpeg' }, metadata: { detail: 'high' } // 'auto' | 'low' | 'high' } ] @@ -115,15 +117,14 @@ import { anthropicText } from '@tanstack/ai-anthropic' const adapter = anthropicText() -// Image with media type +// Image with mimeType in source const imageMessage = { role: 'user' , content: [ { type: 'text' , content: 'What do you see?' }, { type: 'image' , - source: { type: 'data' , value: imageBase64 }, - metadata: { media_type: 'image/jpeg' } + source: { type: 'data' , value: imageBase64, mimeType: 'image/jpeg' } } ] } @@ -135,7 +136,7 @@ const docMessage = { { type: 'text', content: 'Summarize this document' }, { type: 'document', - source: { type: 'data', value: pdfBase64 } + source: { type: 'data', value: pdfBase64, mimeType: 'application/pdf' } } ] } @@ -154,15 +155,14 @@ import { geminiText } from '@tanstack/ai-gemini' const adapter = geminiText() -// Image with mimeType +// Image with mimeType in source const message = { role: 'user', content: [ { type: 'text', content: 'Analyze this image' }, { type: 'image', - source: { type: 'data', value: imageBase64 }, - metadata: { mimeType: 'image/png' } + source: { type: 'data', value: imageBase64, mimeType: 'image/png' } } ] } @@ -188,7 +188,7 @@ const message = { { type: 'text', content: 'What is in this image?' }, { type: 'image', - source: { type: 'data', value: imageBase64 } + source: { type: 'data', value: imageBase64, mimeType: 'image/jpeg' } } ] } @@ -202,28 +202,39 @@ Content can be provided as either inline data or a URL: ### Data (Base64) -Use `type: 'data'` for inline base64-encoded content: +Use `type: 'data'` for inline base64-encoded content. **The `mimeType` field is required** to ensure providers receive proper content type information: ```typescript const imagePart = { type: 'image', source: { type: 'data', - value: 'iVBORw0KGgoAAAANSUhEUgAAAAUA...' // Base64 string + value: 'iVBORw0KGgoAAAANSUhEUgAAAAUA...', // Base64 string + mimeType: 'image/png' // Required for data sources + } +} + +const audioPart = { + type: 'audio', + source: { + type: 'data', + value: 'base64AudioData...', + mimeType: 'audio/mp3' // Required for data sources } } ``` ### URL -Use `type: 'url'` for content hosted at a URL: +Use `type: 'url'` for content hosted at a URL. The `mimeType` field is **optional** as providers can often infer it from the URL or response headers: ```typescript const imagePart = { type: 'image' , source: { type: 'url' , - value: 'https://example.com/image.jpg' + value: 'https://example.com/image.jpg', + mimeType: 'image/jpeg' // Optional hint } } ``` @@ -315,3 +326,163 @@ const stream = chat({ 3. **Check model support**: Not all models support all modalities. Verify the model you're using supports the content types you want to send. 4. **Handle errors gracefully**: When a model doesn't support a particular modality, it may throw an error. Handle these cases in your application. + +## Client-Side Multimodal Messages + +When using the `ChatClient` from `@tanstack/ai-client`, you can send multimodal messages directly from your UI using the `sendMessage` method. + +### Basic Usage + +The `sendMessage` method accepts either a simple string or a `MultimodalContent` object: + +```typescript +import { ChatClient, fetchServerSentEvents } from '@tanstack/ai-client' + +const client = new ChatClient({ + connection: fetchServerSentEvents('/api/chat'), +}) + +// Simple text message +await client.sendMessage('Hello!') + +// Multimodal message with image +await client.sendMessage({ + content: [ + { type: 'text', content: 'What is in this image?' }, + { + type: 'image', + source: { type: 'url', value: 'https://example.com/photo.jpg' } + } + ] +}) +``` + +### Custom Message ID + +You can provide a custom ID for the message: + +```typescript +await client.sendMessage({ + content: 'Hello!', + id: 'custom-message-id-123' +}) +``` + +### Per-Message Body Parameters + +The second parameter allows you to pass additional body parameters for that specific request. These are shallow-merged with the client's base body configuration, with per-message parameters taking priority: + +```typescript +const client = new ChatClient({ + connection: fetchServerSentEvents('/api/chat'), + body: { model: 'gpt-5' }, // Base body params +}) + +// Override model for this specific message +await client.sendMessage('Analyze this complex problem', { + model: 'gpt-5', + temperature: 0.2, +}) + + +``` + +### React Example + +Here's how to use multimodal messages in a React component: + +```tsx +import { useChat } from '@tanstack/ai-react' +import { fetchServerSentEvents } from '@tanstack/ai-client' +import { useState } from 'react' + +function ChatWithImages() { + const [imageUrl, setImageUrl] = useState('') + const { sendMessage, messages } = useChat({ + connection: fetchServerSentEvents('/api/chat'), + }) + + const handleSendWithImage = () => { + if (imageUrl) { + sendMessage({ + content: [ + { type: 'text', content: 'What do you see in this image?' }, + { type: 'image', source: { type: 'url', value: imageUrl } } + ] + }) + } + } + + return ( +
+ setImageUrl(e.target.value)} + /> + +
+ ) +} +``` + +### File Upload Example + +Here's how to handle file uploads and send them as multimodal content: + +```tsx +import { useChat } from '@tanstack/ai-react' +import { fetchServerSentEvents } from '@tanstack/ai-client' + +function ChatWithFileUpload() { + const { sendMessage } = useChat({ + connection: fetchServerSentEvents('/api/chat'), + }) + + const handleFileUpload = async (file: File) => { + // Convert file to base64 + const base64 = await new Promise((resolve) => { + const reader = new FileReader() + reader.onload = () => { + const result = reader.result as string + // Remove data URL prefix (e.g., "data:image/png;base64,") + resolve(result.split(',')[1]) + } + reader.readAsDataURL(file) + }) + + // Determine content type based on file type + const type = file.type.startsWith('image/') + ? 'image' + : file.type.startsWith('audio/') + ? 'audio' + : file.type.startsWith('video/') + ? 'video' + : 'document' + + await sendMessage({ + content: [ + { type: 'text', content: `Please analyze this ${type}` }, + { + type, + source: { type: 'data', value: base64 }, + metadata: { mimeType: file.type } + } + ] + }) + } + + return ( + { + const file = e.target.files?.[0] + if (file) handleFileUpload(file) + }} + /> + ) +} +``` + diff --git a/examples/ts-react-chat/src/routes/index.tsx b/examples/ts-react-chat/src/routes/index.tsx index c9436c7a9..3463c6610 100644 --- a/examples/ts-react-chat/src/routes/index.tsx +++ b/examples/ts-react-chat/src/routes/index.tsx @@ -1,6 +1,6 @@ import { useEffect, useMemo, useRef, useState } from 'react' import { createFileRoute } from '@tanstack/react-router' -import { Send, Square } from 'lucide-react' +import { ImagePlus, Send, Square, X } from 'lucide-react' import ReactMarkdown from 'react-markdown' import rehypeRaw from 'rehype-raw' import rehypeSanitize from 'rehype-sanitize' @@ -10,6 +10,7 @@ import { fetchServerSentEvents, useChat } from '@tanstack/ai-react' import { clientTools } from '@tanstack/ai-client' import { ThinkingPart } from '@tanstack/ai-react-ui' import type { UIMessage } from '@tanstack/ai-react' +import type { ContentPart } from '@tanstack/ai' import type { ModelOption } from '@/lib/model-selection' import GuitarRecommendation from '@/components/example-GuitarRecommendation' import { @@ -20,6 +21,13 @@ import { } from '@/lib/guitar-tools' import { DEFAULT_MODEL_OPTION, MODEL_OPTIONS } from '@/lib/model-selection' +/** + * Generate a random message ID + */ +function generateMessageId(): string { + return `msg-${Date.now()}-${Math.random().toString(36).substring(2, 9)}` +} + const getPersonalGuitarPreferenceToolClient = getPersonalGuitarPreferenceToolDef.client(() => ({ preference: 'acoustic' })) @@ -148,6 +156,23 @@ function Messages({ ) } + // Render image parts + if (part.type === 'image') { + const imageUrl = + part.source.type === 'url' + ? part.source.value + : `data:image/png;base64,${part.source.value}` + return ( +
+ Attached image +
+ ) + } + // Approval UI if ( part.type === 'tool-call' && @@ -226,6 +251,10 @@ function Messages({ function ChatPage() { const [selectedModel, setSelectedModel] = useState(DEFAULT_MODEL_OPTION) + const [attachedImages, setAttachedImages] = useState< + Array<{ id: string; base64: string; mimeType: string; preview: string }> + >([]) + const fileInputRef = useRef(null) const body = useMemo( () => ({ @@ -243,6 +272,103 @@ function ChatPage() { }) const [input, setInput] = useState('') + /** + * Handle file selection for image attachment + */ + const handleFileSelect = async (e: React.ChangeEvent) => { + const files = e.target.files + if (!files || files.length === 0) return + + const newImages: Array<{ + id: string + base64: string + mimeType: string + preview: string + }> = [] + + for (const file of Array.from(files)) { + if (!file.type.startsWith('image/')) continue + + const base64 = await new Promise((resolve) => { + const reader = new FileReader() + reader.onload = () => { + const result = reader.result as string + // Remove data URL prefix (e.g., "data:image/png;base64,") + resolve(result.split(',')[1]) + } + reader.readAsDataURL(file) + }) + + const preview = URL.createObjectURL(file) + newImages.push({ + id: generateMessageId(), + base64, + mimeType: file.type, // Capture the actual mime type + preview, + }) + } + + setAttachedImages((prev) => [...prev, ...newImages]) + + // Reset the file input + if (fileInputRef.current) { + fileInputRef.current.value = '' + } + } + + /** + * Remove an attached image + */ + const removeImage = (id: string) => { + setAttachedImages((prev) => { + const image = prev.find((img) => img.id === id) + if (image) { + URL.revokeObjectURL(image.preview) + } + return prev.filter((img) => img.id !== id) + }) + } + + /** + * Send message with optional image attachments + */ + const handleSendMessage = () => { + if (!input.trim() && attachedImages.length === 0) return + + if (attachedImages.length > 0) { + // Build multimodal content array + const contentParts: Array = [] + + // Add text if present + if (input.trim()) { + contentParts.push({ type: 'text', content: input.trim() }) + } + + // Add images with mime type metadata + for (const img of attachedImages) { + contentParts.push({ + type: 'image', + source: { type: 'data', value: img.base64, mimeType: img.mimeType }, + }) + } + + // Send with custom message ID + sendMessage({ + content: contentParts, + id: generateMessageId(), + }) + + // Clean up image previews + attachedImages.forEach((img) => URL.revokeObjectURL(img.preview)) + setAttachedImages([]) + } else { + // Simple text message + sendMessage(input.trim()) + } + + setInput('') + } + return (
{/* Chat */} @@ -295,41 +421,89 @@ function ChatPage() {
)} -
-