Skip to content

Commit 4390779

Browse files
committed
update: api formate
1 parent c7801ac commit 4390779

File tree

1 file changed

+10
-32
lines changed

1 file changed

+10
-32
lines changed

src/flask_server_llm.py

Lines changed: 10 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -219,7 +219,7 @@ def chat_completions():
219219
global_text = []
220220
global_state = -1
221221

222-
# 构建提示词 - 简化的聊天格式
222+
# 构建提示词
223223
prompt = ""
224224
for msg in messages:
225225
if msg['role'] == 'system':
@@ -243,47 +243,23 @@ def generate_response():
243243
model_thread.start()
244244

245245
if stream:
246-
# 流式响应
246+
# 流式响应 - 直接返回纯文本
247247
model_thread_finished = False
248248
while not model_thread_finished:
249249
if global_text:
250250
chunk = global_text.pop(0)
251-
response_chunk = {
252-
"id": "chatcmpl-123",
253-
"object": "chat.completion.chunk",
254-
"created": int(time.time()),
255-
"model": "rkllm-model",
256-
"choices": [{
257-
"index": 0,
258-
"delta": {"content": chunk},
259-
"finish_reason": None
260-
}]
261-
}
262-
yield f"data: {json.dumps(response_chunk, ensure_ascii=False)}\n\n"
251+
yield chunk # 直接返回文本内容,不包装为JSON
263252

264253
model_thread.join(timeout=0.01)
265254
model_thread_finished = not model_thread.is_alive()
266255

267256
if global_state == LLMCallState.RKLLM_RUN_FINISH:
268-
# 发送结束标记
269-
response_chunk = {
270-
"id": "chatcmpl-123",
271-
"object": "chat.completion.chunk",
272-
"created": int(time.time()),
273-
"model": "rkllm-model",
274-
"choices": [{
275-
"index": 0,
276-
"delta": {},
277-
"finish_reason": "stop"
278-
}]
279-
}
280-
yield f"data: {json.dumps(response_chunk, ensure_ascii=False)}\n\n"
281257
break
282258

283-
# 发送结束标记
284-
yield "data: [DONE]\n\n"
259+
# 添加结束标记
260+
yield "[DONE]"
285261
else:
286-
# 非流式响应
262+
# 非流式响应 - 返回JSON
287263
model_thread_finished = False
288264
full_response = ""
289265
while not model_thread_finished:
@@ -318,10 +294,12 @@ def generate_response():
318294
return json.dumps(response, ensure_ascii=False)
319295

320296
if stream:
321-
return Response(generate_response(), content_type='text/event-stream')
297+
# 对于流式响应,返回纯文本流
298+
return Response(generate_response(), content_type='text/plain; charset=utf-8')
322299
else:
300+
# 对于非流式响应,返回JSON
323301
response_data = generate_response()
324-
return Response(response_data, content_type='application/json')
302+
return Response(response_data, content_type='application/json; charset=utf-8')
325303

326304
finally:
327305
lock.release()

0 commit comments

Comments
 (0)