Skip to content

Commit 8b34d18

Browse files
committed
Merge branch 'main' into feat/skills
2 parents 61649d8 + 918c0f4 commit 8b34d18

File tree

5 files changed

+169
-4
lines changed

5 files changed

+169
-4
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "veadk-python"
3-
version = "0.5.19"
3+
version = "0.5.20"
44
description = "Volcengine agent development kit, integrations with Volcengine cloud services."
55
readme = "README.md"
66
requires-python = ">=3.10"

veadk/agent.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,8 @@ class Agent(LlmAgent):
154154

155155
enable_ghostchar: bool = False
156156

157+
enable_dataset_gen: bool = False
158+
157159
def model_post_init(self, __context: Any) -> None:
158160
super().model_post_init(None) # for sub_agents init
159161

@@ -312,6 +314,22 @@ def model_post_init(self, __context: Any) -> None:
312314

313315
self.instruction += "Please add a character `< at the beginning of you each text-based response."
314316

317+
if self.enable_dataset_gen:
318+
from veadk.toolkits.dataset_auto_gen_callback import (
319+
dataset_auto_gen_callback,
320+
)
321+
322+
if self.after_agent_callback:
323+
if isinstance(self.after_agent_callback, list):
324+
self.after_agent_callback.append(dataset_auto_gen_callback)
325+
else:
326+
self.after_agent_callback = [
327+
self.after_agent_callback,
328+
dataset_auto_gen_callback,
329+
]
330+
else:
331+
self.after_agent_callback = dataset_auto_gen_callback
332+
315333
logger.info(f"VeADK version: {VERSION}")
316334

317335
logger.info(f"{self.__class__.__name__} `{self.name}` init done.")

veadk/memory/long_term_memory_backends/vikingdb_memory_backend.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,8 @@ def _get_client(self) -> VikingDBMemoryClient:
152152
ak, sk, sts_token = self._get_ak_sk_sts()
153153
if self.cloud_provider.lower() == "byteplus":
154154
host = f"api-knowledgebase.mlp.{self.region}.bytepluses.com"
155-
155+
else:
156+
host = f"api-knowledgebase.mlp.{self.region}.volces.com"
156157
logger.info(f"Cloud provider: {self.cloud_provider.lower()}")
157158
logger.info(f"VikingDBLTMBackend: region={self.region}, host={host}")
158159

@@ -168,7 +169,8 @@ def _get_sdk_client(self) -> VikingMem:
168169
ak, sk, sts_token = self._get_ak_sk_sts()
169170
if self.cloud_provider.lower() == "byteplus":
170171
host = f"api-knowledgebase.mlp.{self.region}.bytepluses.com"
171-
172+
else:
173+
host = f"api-knowledgebase.mlp.{self.region}.volces.com"
172174
logger.info(f"Cloud provider: {self.cloud_provider.lower()}")
173175
logger.info(f"VikingDBLTMBackend: region={self.region}, host={host}")
174176

Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
# Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
import json
16+
import os
17+
import re
18+
from pathlib import Path
19+
from typing import Optional
20+
21+
from google.adk.agents.callback_context import CallbackContext
22+
from google.genai import types
23+
24+
from veadk.utils.logger import get_logger
25+
26+
logger = get_logger(__name__)
27+
28+
JUDGE_PROMPT = """You are an AI quality evaluator. Analyze the agent interaction trace and classify it.
29+
30+
## Trace Data
31+
{trace}
32+
33+
## Evaluation Dimensions
34+
35+
### 1. Task Completion
36+
- Did the agent understand the user's intent correctly?
37+
- Was the user's request fully addressed?
38+
- Did the agent provide the expected output?
39+
40+
### 2. Tool Usage (if applicable)
41+
- Were the correct tools/functions selected for the task?
42+
- Were the function arguments accurate and complete?
43+
- Was the function response handled properly?
44+
- Did the agent interpret tool results correctly?
45+
46+
### 3. Response Quality
47+
- Is the response accurate and factually correct?
48+
- Is the response complete without missing information?
49+
- Is the response clear and well-structured?
50+
- Does it match the tool/function output when applicable?
51+
52+
### 4. Error Handling
53+
- Were there any errors or exceptions in the trace?
54+
- Did the agent handle edge cases appropriately?
55+
- Were error messages helpful if errors occurred?
56+
57+
### 5. Conversation Flow
58+
- Is the dialogue natural and coherent?
59+
- Did the agent maintain context across turns?
60+
- Were there any unnecessary or redundant steps?
61+
62+
## Classification Criteria
63+
- **good (1)**: Task completed successfully with correct tool usage, accurate response, and smooth conversation flow
64+
- **general (0)**: Normal interaction without notable issues or achievements, routine responses
65+
- **bad (-1)**: Contains errors, incorrect tool usage, wrong/incomplete response, or failed to address user needs
66+
67+
## Output Format (JSON only, no other text)
68+
{{"type": <-1|0|1>, "reason": "<brief explanation covering key evaluation points>"}}"""
69+
70+
71+
async def dataset_auto_gen_callback(
72+
callback_context: CallbackContext,
73+
) -> Optional[types.Content]:
74+
"""After agent callback to auto-generate dataset from traces."""
75+
ctx = callback_context._invocation_context
76+
agent = ctx.agent
77+
session = ctx.session
78+
79+
if not session or not session.events:
80+
return None
81+
82+
# Build trace json
83+
trace_data = {
84+
"session_id": session.id,
85+
"events": [
86+
{
87+
"author": e.author,
88+
"content": e.content.model_dump() if e.content else None,
89+
}
90+
for e in session.events
91+
],
92+
}
93+
trace_json = json.dumps(trace_data, ensure_ascii=False)
94+
95+
# Judge using LLM
96+
try:
97+
from litellm import acompletion
98+
99+
model_name = getattr(agent.model, "model", "openai/gpt-4o-mini")
100+
api_key = getattr(agent, "model_api_key", None) or getattr(
101+
agent.model, "api_key", None
102+
)
103+
api_base = getattr(agent, "model_api_base", None) or getattr(
104+
agent.model, "api_base", None
105+
)
106+
107+
response = await acompletion(
108+
model=model_name,
109+
messages=[
110+
{"role": "user", "content": JUDGE_PROMPT.format(trace=trace_json)}
111+
],
112+
api_key=api_key,
113+
api_base=api_base,
114+
)
115+
raw_content = response.choices[0].message.content
116+
117+
# Extract JSON from response
118+
json_match = re.search(r'\{[^{}]*"type"[^{}]*\}', raw_content)
119+
if not json_match:
120+
logger.debug("No valid JSON found in LLM response")
121+
return None
122+
result = json.loads(json_match.group())
123+
except Exception as e:
124+
logger.warning(f"Dataset auto gen failed: {e}")
125+
return None
126+
127+
# Save to file based on type
128+
case_type = result.get("type", 0)
129+
130+
output_dir = Path(os.getcwd()) / "dataset" / agent.name
131+
output_dir.mkdir(parents=True, exist_ok=True)
132+
133+
if case_type == 1:
134+
file_name = "good_case.jsonl"
135+
elif case_type == -1:
136+
file_name = "bad_case.jsonl"
137+
else:
138+
file_name = "general_case.jsonl"
139+
record = {"trace": trace_data, "reason": result.get("reason", "")}
140+
141+
with open(output_dir / file_name, "a", encoding="utf-8") as f:
142+
f.write(json.dumps(record, ensure_ascii=False) + "\n")
143+
144+
logger.info(f"Dataset case saved to {output_dir / file_name}")
145+
return None

veadk/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,4 +12,4 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
VERSION = "0.5.19"
15+
VERSION = "0.5.20"

0 commit comments

Comments
 (0)