Skip to content

Commit b23a4af

Browse files
committed
chore: ark llm new
1 parent a66cca6 commit b23a4af

File tree

3 files changed

+329
-87
lines changed

3 files changed

+329
-87
lines changed

veadk/agent.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,6 @@
5454
from veadk.tracing.base_tracer import BaseTracer
5555
from veadk.utils.logger import get_logger
5656
from veadk.utils.patches import patch_asyncio, patch_tracer
57-
from veadk.utils.misc import check_litellm_version
5857
from veadk.version import VERSION
5958

6059
patch_tracer()
@@ -171,10 +170,7 @@ def model_post_init(self, __context: Any) -> None:
171170

172171
if not self.model:
173172
if self.enable_responses:
174-
min_version = "1.79.3"
175-
check_litellm_version(min_version)
176-
177-
from veadk.models.ark_llm import ArkLlm
173+
from veadk.models.ark_llm_new import ArkLlm
178174

179175
self.model = ArkLlm(
180176
model=f"{self.model_provider}/{self.model_name}",

veadk/models/ark_llm_new.py

Lines changed: 96 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -19,30 +19,31 @@
1919
from typing import Any, Dict, Union, AsyncGenerator, Tuple, List, Optional, Literal
2020

2121
import openai
22-
from openai.types.responses import Response as OpenAITypeResponse, ResponseStreamEvent
23-
from volcenginesdkarkruntime.types import ResponseFormatJSONObject
24-
from volcenginesdkarkruntime.types.responses import FunctionToolParam, ResponseTextConfigParam
22+
from openai.types.responses import ResponseStreamEvent
23+
from volcenginesdkarkruntime.types.responses import Response as ArkTypeResponse
24+
from volcenginesdkarkruntime.types.responses import (
25+
FunctionToolParam,
26+
ResponseTextConfigParam,
27+
)
2528

2629
from volcenginesdkarkruntime.types.responses.response_input_param import (
2730
ResponseInputItemParam,
2831
ResponseFunctionToolCallParam,
2932
EasyInputMessageParam,
30-
FunctionCallOutput
33+
FunctionCallOutput,
3134
)
3235
from volcenginesdkarkruntime.types.responses.response_input_message_content_list_param import (
3336
ResponseInputTextParam,
3437
ResponseInputImageParam,
3538
ResponseInputVideoParam,
36-
ResponseInputAudioParam,
3739
ResponseInputFileParam,
38-
ResponseInputContentParam
40+
ResponseInputContentParam,
3941
)
4042

4143

4244
from google.adk.models import LlmRequest, LlmResponse
4345
from google.adk.models.lite_llm import (
4446
LiteLlm,
45-
_get_completion_inputs,
4647
FunctionChunk,
4748
TextChunk,
4849
_message_to_generate_content_response,
@@ -59,8 +60,9 @@
5960

6061
from veadk.config import settings
6162
from veadk.consts import DEFAULT_VIDEO_MODEL_API_BASE
62-
from veadk.models.ark_transform import (
63-
CompletionToResponsesAPIHandler,
63+
from veadk.models.ark_transform_new import (
64+
request_reorganization_by_ark,
65+
ark_response_to_generate_content_response,
6466
)
6567
from veadk.utils.logger import get_logger
6668

@@ -69,6 +71,7 @@
6971

7072
_LITELLM_STRUCTURED_TYPES = {"json_object", "json_schema"}
7173

74+
7275
def _to_responses_api_role(role: Optional[str]) -> Literal["user", "assistant"]:
7376
if role in ["model", "assistant"]:
7477
return "assistant"
@@ -127,15 +130,17 @@ def _file_data_to_content_param(
127130
# file
128131
param = {"file_id": value} if is_file_id else {"file_url": value}
129132
if display_name:
130-
param['filename'] = display_name
133+
param["filename"] = display_name
131134
return ResponseInputFileParam(
132135
type="input_file",
133136
**param,
134137
)
135138

136139

137140
def _inline_data_to_content_param(part: types.Part) -> ResponseInputContentParam:
138-
mime_type = (part.inline_data.mime_type if part.inline_data else None) or "application/octet-stream"
141+
mime_type = (
142+
part.inline_data.mime_type if part.inline_data else None
143+
) or "application/octet-stream"
139144
base64_string = base64.b64encode(part.inline_data.data).decode("utf-8")
140145
data_uri = f"data:{mime_type};base64,{base64_string}"
141146

@@ -170,8 +175,8 @@ def _inline_data_to_content_param(part: types.Part) -> ResponseInputContentParam
170175

171176

172177
def _get_content(
173-
parts: List[types.Part],
174-
role: Literal["user", "system", "developer", "assistant"],
178+
parts: List[types.Part],
179+
role: Literal["user", "system", "developer", "assistant"],
175180
) -> Optional[EasyInputMessageParam]:
176181
content = []
177182
for part in parts:
@@ -186,15 +191,12 @@ def _get_content(
186191
content.append(_inline_data_to_content_param(part))
187192
elif part.file_data: # 有两种,file_id和file_url
188193
content.append(_file_data_to_content_param(part))
189-
if len(content)>0:
190-
return EasyInputMessageParam(
191-
type="message",
192-
role=role,
193-
content=content
194-
)
194+
if len(content) > 0:
195+
return EasyInputMessageParam(type="message", role=role, content=content)
195196
else:
196197
return None
197198

199+
198200
def _content_to_input_item(
199201
content: types.Content,
200202
) -> Union[ResponseInputItemParam, List[ResponseInputItemParam]]:
@@ -223,7 +225,6 @@ def _content_to_input_item(
223225
return input_content
224226
else: # model
225227
# 3. 处理model的消息
226-
content_present = False
227228
for part in content.parts:
228229
if part.function_call:
229230
input_list.append(
@@ -242,19 +243,14 @@ def _content_to_input_item(
242243

243244

244245
def _function_declarations_to_tool_param(
245-
function_declaration: types.FunctionDeclaration
246+
function_declaration: types.FunctionDeclaration,
246247
) -> FunctionToolParam:
247248
from google.adk.models.lite_llm import _schema_to_dict
249+
248250
assert function_declaration.name
249251

250-
parameters = {
251-
"type": "object",
252-
"properties" : {}
253-
}
254-
if (
255-
function_declaration.parameters
256-
and function_declaration.parameters.properties
257-
):
252+
parameters = {"type": "object", "properties": {}}
253+
if function_declaration.parameters and function_declaration.parameters.properties:
258254
properties = {}
259255
for key, value in function_declaration.parameters.properties.items():
260256
properties[key] = _schema_to_dict(value)
@@ -268,53 +264,80 @@ def _function_declarations_to_tool_param(
268264

269265
tool_params = FunctionToolParam(
270266
name=function_declaration.name,
271-
parameters=parameters, # todo:here
267+
parameters=parameters, # todo:here
272268
type="function",
273269
description=function_declaration.description,
274270
)
275271

276272
return tool_params
277273

278274

279-
def _responses_schema_to_text(response_schema:types.SchemaUnion) -> Optional[ResponseTextConfigParam]:
280-
from google.adk.models.lite_llm import _to_litellm_response_format
281-
format_value = _to_litellm_response_format(
282-
response_schema
283-
)
284-
if format_value:
285-
return ResponseTextConfigParam(
286-
format=format_value
287-
)
275+
def _responses_schema_to_text(
276+
response_schema: types.SchemaUnion,
277+
) -> Optional[ResponseTextConfigParam | dict]:
278+
schema_name = ""
279+
if isinstance(response_schema, dict):
280+
schema_type = response_schema.get("type")
281+
if (
282+
isinstance(schema_type, str)
283+
and schema_type.lower() in _LITELLM_STRUCTURED_TYPES
284+
):
285+
return response_schema
286+
schema_dict = dict(response_schema)
287+
elif isinstance(response_schema, type) and issubclass(response_schema, BaseModel):
288+
schema_name = response_schema.__name__
289+
schema_dict = response_schema.model_json_schema()
290+
elif isinstance(response_schema, BaseModel):
291+
if isinstance(response_schema, types.Schema):
292+
# GenAI Schema instances already represent JSON schema definitions.
293+
schema_name = response_schema.__name__
294+
schema_dict = response_schema.model_dump(exclude_none=True, mode="json")
295+
else:
296+
schema_name = response_schema.__name__
297+
schema_dict = response_schema.__class__.model_json_schema()
298+
elif hasattr(response_schema, "model_dump"):
299+
schema_name = response_schema.__name__
300+
schema_dict = response_schema.model_dump(exclude_none=True, mode="json")
288301
else:
302+
logger.warning(
303+
"Unsupported response_schema type %s for LiteLLM structured outputs.",
304+
type(response_schema),
305+
)
289306
return None
290307

308+
return ResponseTextConfigParam(
309+
format={
310+
"type": "json_schema",
311+
"name": schema_name,
312+
"schema": schema_dict,
313+
"strict": True,
314+
}
315+
)
316+
291317

292-
async def _get_responses_inputs(
318+
def _get_responses_inputs(
293319
llm_request: LlmRequest,
294-
) -> Tuple:
295-
input_params: List[ResponseInputItemParam] = []
320+
) -> Tuple[
321+
Optional[str],
322+
Optional[List[ResponseInputItemParam]],
323+
Optional[List[FunctionToolParam]],
324+
Optional[ResponseTextConfigParam],
325+
Optional[Dict],
326+
]:
327+
# 0. instruction(system prompt)
328+
instruction: Optional[str] = None
329+
if llm_request.config and llm_request.config.system_instruction:
330+
instruction = llm_request.config.system_instruction
331+
# 1. input
332+
input_params: Optional[List[ResponseInputItemParam]] = []
296333
for content in llm_request.contents or []:
297334
# 每个content,代表`一次对话`,这`一次对话`可能有`多个内容`,但不可能有`多个对话`
298335
input_item_or_list = _content_to_input_item(content)
299-
if isinstance(input_item_or_list,list):
336+
if isinstance(input_item_or_list, list):
300337
input_params.extend(input_item_or_list)
301338
elif input_item_or_list:
302339
input_params.append(input_item_or_list)
303340

304-
# 将system_prompt插入到开头
305-
if llm_request.config.system_instruction:
306-
input_params.insert(
307-
0,
308-
EasyInputMessageParam(
309-
role="system",
310-
type="message",
311-
content=[ResponseInputTextParam(
312-
type="input_text",
313-
text=llm_request.config.system_instruction,
314-
)]
315-
)
316-
)
317-
318341
# 2. Convert tool declarations
319342
tools: Optional[List[FunctionToolParam]] = None
320343
if (
@@ -330,9 +353,7 @@ async def _get_responses_inputs(
330353
# 3. Handle output-schema -> `text`
331354
text: Optional[ResponseTextConfigParam] = None
332355
if llm_request.config and llm_request.config.response_schema:
333-
text = _responses_schema_to_text(
334-
llm_request.config.response_schema
335-
)
356+
text = _responses_schema_to_text(llm_request.config.response_schema)
336357

337358
# 4. Extract generation parameters
338359
generation_params: Optional[Dict] = None
@@ -344,28 +365,27 @@ async def _get_responses_inputs(
344365
"stop_sequences": "stop",
345366
}
346367
for key in (
347-
"temperature",
348-
"max_output_tokens",
349-
"top_p",
350-
"top_k",
351-
"stop_sequences",
352-
"presence_penalty",
353-
"frequency_penalty",
368+
"temperature",
369+
"max_output_tokens",
370+
"top_p",
371+
"top_k",
372+
"stop_sequences",
373+
"presence_penalty",
374+
"frequency_penalty",
354375
):
355376
if key in config_dict:
356377
mapped_key = param_mapping.get(key, key)
357378
generation_params[mapped_key] = config_dict[key]
358379

359380
if not generation_params:
360381
generation_params = None
361-
return input_params, tools, text, generation_params
362-
382+
return instruction, input_params, tools, text, generation_params
363383

364384

365385
class ArkLlmClient:
366386
async def aresponse(
367387
self, **kwargs
368-
) -> Union[OpenAITypeResponse, openai.AsyncStream[ResponseStreamEvent]]:
388+
) -> Union[ArkTypeResponse, openai.AsyncStream[ResponseStreamEvent]]:
369389
# 1. Get request params
370390
api_base = kwargs.pop("api_base", DEFAULT_VIDEO_MODEL_API_BASE)
371391
api_key = kwargs.pop("api_key", settings.model.api_key)
@@ -383,9 +403,6 @@ async def aresponse(
383403
class ArkLlm(LiteLlm):
384404
llm_client: ArkLlmClient = Field(default_factory=ArkLlmClient)
385405
_additional_args: Dict[str, Any] = None
386-
transform_handler: CompletionToResponsesAPIHandler = Field(
387-
default_factory=CompletionToResponsesAPIHandler
388-
)
389406

390407
def __init__(self, **kwargs):
391408
super().__init__(**kwargs)
@@ -405,8 +422,8 @@ async def generate_content_async(
405422
self._maybe_append_user_content(llm_request)
406423
# logger.debug(_build_request_log(llm_request))
407424

408-
input_param, tools, text_format, generation_params = _get_responses_inputs(
409-
llm_request
425+
instruction, input_param, tools, text_format, generation_params = (
426+
_get_responses_inputs(llm_request)
410427
)
411428

412429
if "functions" in self._additional_args:
@@ -419,6 +436,7 @@ async def generate_content_async(
419436
previous_response_id = llm_request.cache_metadata.cache_name
420437
responses_args = {
421438
"model": self.model,
439+
"instruction": instruction,
422440
"input": input_param,
423441
"tools": tools,
424442
"text": text_format,
@@ -429,7 +447,7 @@ async def generate_content_async(
429447

430448
if generation_params:
431449
responses_args.update(generation_params)
432-
responses_args = self.transform_handler.transform_request(**responses_args)
450+
responses_args = request_reorganization_by_ark(responses_args)
433451

434452
if stream:
435453
text_format = ""
@@ -558,9 +576,5 @@ async def generate_content_async(
558576

559577
else:
560578
raw_response = await self.llm_client.aresponse(**responses_args)
561-
for (
562-
llm_response
563-
) in self.transform_handler.openai_response_to_generate_content_response(
564-
llm_request, raw_response
565-
):
566-
yield llm_response
579+
llm_response = ark_response_to_generate_content_response(raw_response)
580+
yield llm_response

0 commit comments

Comments
 (0)