Skip to content

Commit 0158d14

Browse files
feat: multiple modalities from the client (#263)
* feat: multiple modalities from the client * extend the client side with multimodality and add mimeTypes * ci: apply automated fixes * changeset * changeset --------- Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
1 parent 735a17e commit 0158d14

File tree

40 files changed

+2381
-190
lines changed

40 files changed

+2381
-190
lines changed

.changeset/beige-mangos-act.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
---
2+
'@tanstack/ai-openrouter': patch
3+
'@tanstack/ai-anthropic': patch
4+
'@tanstack/ai-gemini': patch
5+
'@tanstack/ai-openai': patch
6+
'@tanstack/ai-grok': patch
7+
---
8+
9+
re-release adapter packages

.changeset/six-numbers-bathe.md

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
---
2+
'@tanstack/ai-devtools-core': minor
3+
'@tanstack/ai-preact': minor
4+
'@tanstack/ai-svelte': minor
5+
'@tanstack/ai-react': minor
6+
'@tanstack/ai-solid': minor
7+
'@tanstack/ai-vue': minor
8+
'@tanstack/ai': minor
9+
'@tanstack/tests-adapters': patch
10+
'@tanstack/ai-openrouter': patch
11+
'@tanstack/ai-anthropic': patch
12+
'@tanstack/ai-client': patch
13+
'@tanstack/ai-gemini': patch
14+
'@tanstack/ai-openai': patch
15+
'@tanstack/ai-grok': patch
16+
---
17+
18+
add multiple modalities support to the client

docs/guides/multimodal-content.md

Lines changed: 188 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -26,25 +26,27 @@ const textPart: TextPart = {
2626
content: 'What do you see in this image?'
2727
}
2828

29-
// Image from base64 data
29+
// Image from base64 data (mimeType is required for data sources)
3030
const imagePart: ImagePart = {
3131
type: 'image',
3232
source: {
3333
type: 'data',
34-
value: 'base64EncodedImageData...'
34+
value: 'base64EncodedImageData...',
35+
mimeType: 'image/jpeg' // Required for data sources
3536
},
3637
metadata: {
3738
// Provider-specific metadata
3839
detail: 'high' // OpenAI detail level
3940
}
4041
}
4142

42-
// Image from URL
43+
// Image from URL (mimeType is optional for URL sources)
4344
const imageUrlPart: ImagePart = {
4445
type: 'image',
4546
source: {
4647
type: 'url',
47-
value: 'https://example.com/image.jpg'
48+
value: 'https://example.com/image.jpg',
49+
mimeType: 'image/jpeg' // Optional hint for URL sources
4850
}
4951
}
5052
```
@@ -95,7 +97,7 @@ const message = {
9597
{ type: 'text' , content: 'Describe this image' },
9698
{
9799
type: 'image' ,
98-
source: { type: 'data' , value: imageBase64 },
100+
source: { type: 'data' , value: imageBase64, mimeType: 'image/jpeg' },
99101
metadata: { detail: 'high' } // 'auto' | 'low' | 'high'
100102
}
101103
]
@@ -115,15 +117,14 @@ import { anthropicText } from '@tanstack/ai-anthropic'
115117

116118
const adapter = anthropicText()
117119

118-
// Image with media type
120+
// Image with mimeType in source
119121
const imageMessage = {
120122
role: 'user' ,
121123
content: [
122124
{ type: 'text' , content: 'What do you see?' },
123125
{
124126
type: 'image' ,
125-
source: { type: 'data' , value: imageBase64 },
126-
metadata: { media_type: 'image/jpeg' }
127+
source: { type: 'data' , value: imageBase64, mimeType: 'image/jpeg' }
127128
}
128129
]
129130
}
@@ -135,7 +136,7 @@ const docMessage = {
135136
{ type: 'text', content: 'Summarize this document' },
136137
{
137138
type: 'document',
138-
source: { type: 'data', value: pdfBase64 }
139+
source: { type: 'data', value: pdfBase64, mimeType: 'application/pdf' }
139140
}
140141
]
141142
}
@@ -154,15 +155,14 @@ import { geminiText } from '@tanstack/ai-gemini'
154155

155156
const adapter = geminiText()
156157

157-
// Image with mimeType
158+
// Image with mimeType in source
158159
const message = {
159160
role: 'user',
160161
content: [
161162
{ type: 'text', content: 'Analyze this image' },
162163
{
163164
type: 'image',
164-
source: { type: 'data', value: imageBase64 },
165-
metadata: { mimeType: 'image/png' }
165+
source: { type: 'data', value: imageBase64, mimeType: 'image/png' }
166166
}
167167
]
168168
}
@@ -188,7 +188,7 @@ const message = {
188188
{ type: 'text', content: 'What is in this image?' },
189189
{
190190
type: 'image',
191-
source: { type: 'data', value: imageBase64 }
191+
source: { type: 'data', value: imageBase64, mimeType: 'image/jpeg' }
192192
}
193193
]
194194
}
@@ -202,28 +202,39 @@ Content can be provided as either inline data or a URL:
202202

203203
### Data (Base64)
204204

205-
Use `type: 'data'` for inline base64-encoded content:
205+
Use `type: 'data'` for inline base64-encoded content. **The `mimeType` field is required** to ensure providers receive proper content type information:
206206

207207
```typescript
208208
const imagePart = {
209209
type: 'image',
210210
source: {
211211
type: 'data',
212-
value: 'iVBORw0KGgoAAAANSUhEUgAAAAUA...' // Base64 string
212+
value: 'iVBORw0KGgoAAAANSUhEUgAAAAUA...', // Base64 string
213+
mimeType: 'image/png' // Required for data sources
214+
}
215+
}
216+
217+
const audioPart = {
218+
type: 'audio',
219+
source: {
220+
type: 'data',
221+
value: 'base64AudioData...',
222+
mimeType: 'audio/mp3' // Required for data sources
213223
}
214224
}
215225
```
216226

217227
### URL
218228

219-
Use `type: 'url'` for content hosted at a URL:
229+
Use `type: 'url'` for content hosted at a URL. The `mimeType` field is **optional** as providers can often infer it from the URL or response headers:
220230

221231
```typescript
222232
const imagePart = {
223233
type: 'image' ,
224234
source: {
225235
type: 'url' ,
226-
value: 'https://example.com/image.jpg'
236+
value: 'https://example.com/image.jpg',
237+
mimeType: 'image/jpeg' // Optional hint
227238
}
228239
}
229240
```
@@ -315,3 +326,163 @@ const stream = chat({
315326
3. **Check model support**: Not all models support all modalities. Verify the model you're using supports the content types you want to send.
316327

317328
4. **Handle errors gracefully**: When a model doesn't support a particular modality, it may throw an error. Handle these cases in your application.
329+
330+
## Client-Side Multimodal Messages
331+
332+
When using the `ChatClient` from `@tanstack/ai-client`, you can send multimodal messages directly from your UI using the `sendMessage` method.
333+
334+
### Basic Usage
335+
336+
The `sendMessage` method accepts either a simple string or a `MultimodalContent` object:
337+
338+
```typescript
339+
import { ChatClient, fetchServerSentEvents } from '@tanstack/ai-client'
340+
341+
const client = new ChatClient({
342+
connection: fetchServerSentEvents('/api/chat'),
343+
})
344+
345+
// Simple text message
346+
await client.sendMessage('Hello!')
347+
348+
// Multimodal message with image
349+
await client.sendMessage({
350+
content: [
351+
{ type: 'text', content: 'What is in this image?' },
352+
{
353+
type: 'image',
354+
source: { type: 'url', value: 'https://example.com/photo.jpg' }
355+
}
356+
]
357+
})
358+
```
359+
360+
### Custom Message ID
361+
362+
You can provide a custom ID for the message:
363+
364+
```typescript
365+
await client.sendMessage({
366+
content: 'Hello!',
367+
id: 'custom-message-id-123'
368+
})
369+
```
370+
371+
### Per-Message Body Parameters
372+
373+
The second parameter allows you to pass additional body parameters for that specific request. These are shallow-merged with the client's base body configuration, with per-message parameters taking priority:
374+
375+
```typescript
376+
const client = new ChatClient({
377+
connection: fetchServerSentEvents('/api/chat'),
378+
body: { model: 'gpt-5' }, // Base body params
379+
})
380+
381+
// Override model for this specific message
382+
await client.sendMessage('Analyze this complex problem', {
383+
model: 'gpt-5',
384+
temperature: 0.2,
385+
})
386+
387+
388+
```
389+
390+
### React Example
391+
392+
Here's how to use multimodal messages in a React component:
393+
394+
```tsx
395+
import { useChat } from '@tanstack/ai-react'
396+
import { fetchServerSentEvents } from '@tanstack/ai-client'
397+
import { useState } from 'react'
398+
399+
function ChatWithImages() {
400+
const [imageUrl, setImageUrl] = useState('')
401+
const { sendMessage, messages } = useChat({
402+
connection: fetchServerSentEvents('/api/chat'),
403+
})
404+
405+
const handleSendWithImage = () => {
406+
if (imageUrl) {
407+
sendMessage({
408+
content: [
409+
{ type: 'text', content: 'What do you see in this image?' },
410+
{ type: 'image', source: { type: 'url', value: imageUrl } }
411+
]
412+
})
413+
}
414+
}
415+
416+
return (
417+
<div>
418+
<input
419+
type="url"
420+
placeholder="Image URL"
421+
value={imageUrl}
422+
onChange={(e) => setImageUrl(e.target.value)}
423+
/>
424+
<button onClick={handleSendWithImage}>Send with Image</button>
425+
</div>
426+
)
427+
}
428+
```
429+
430+
### File Upload Example
431+
432+
Here's how to handle file uploads and send them as multimodal content:
433+
434+
```tsx
435+
import { useChat } from '@tanstack/ai-react'
436+
import { fetchServerSentEvents } from '@tanstack/ai-client'
437+
438+
function ChatWithFileUpload() {
439+
const { sendMessage } = useChat({
440+
connection: fetchServerSentEvents('/api/chat'),
441+
})
442+
443+
const handleFileUpload = async (file: File) => {
444+
// Convert file to base64
445+
const base64 = await new Promise<string>((resolve) => {
446+
const reader = new FileReader()
447+
reader.onload = () => {
448+
const result = reader.result as string
449+
// Remove data URL prefix (e.g., "data:image/png;base64,")
450+
resolve(result.split(',')[1])
451+
}
452+
reader.readAsDataURL(file)
453+
})
454+
455+
// Determine content type based on file type
456+
const type = file.type.startsWith('image/')
457+
? 'image'
458+
: file.type.startsWith('audio/')
459+
? 'audio'
460+
: file.type.startsWith('video/')
461+
? 'video'
462+
: 'document'
463+
464+
await sendMessage({
465+
content: [
466+
{ type: 'text', content: `Please analyze this ${type}` },
467+
{
468+
type,
469+
source: { type: 'data', value: base64 },
470+
metadata: { mimeType: file.type }
471+
}
472+
]
473+
})
474+
}
475+
476+
return (
477+
<input
478+
type="file"
479+
accept="image/*,audio/*,video/*,.pdf"
480+
onChange={(e) => {
481+
const file = e.target.files?.[0]
482+
if (file) handleFileUpload(file)
483+
}}
484+
/>
485+
)
486+
}
487+
```
488+

0 commit comments

Comments
 (0)