@@ -219,7 +219,7 @@ def chat_completions():
219219 global_text = []
220220 global_state = - 1
221221
222- # 构建提示词 - 简化的聊天格式
222+ # 构建提示词
223223 prompt = ""
224224 for msg in messages :
225225 if msg ['role' ] == 'system' :
@@ -243,47 +243,23 @@ def generate_response():
243243 model_thread .start ()
244244
245245 if stream :
246- # 流式响应
246+ # 流式响应 - 直接返回纯文本
247247 model_thread_finished = False
248248 while not model_thread_finished :
249249 if global_text :
250250 chunk = global_text .pop (0 )
251- response_chunk = {
252- "id" : "chatcmpl-123" ,
253- "object" : "chat.completion.chunk" ,
254- "created" : int (time .time ()),
255- "model" : "rkllm-model" ,
256- "choices" : [{
257- "index" : 0 ,
258- "delta" : {"content" : chunk },
259- "finish_reason" : None
260- }]
261- }
262- yield f"data: { json .dumps (response_chunk , ensure_ascii = False )} \n \n "
251+ yield chunk # 直接返回文本内容,不包装为JSON
263252
264253 model_thread .join (timeout = 0.01 )
265254 model_thread_finished = not model_thread .is_alive ()
266255
267256 if global_state == LLMCallState .RKLLM_RUN_FINISH :
268- # 发送结束标记
269- response_chunk = {
270- "id" : "chatcmpl-123" ,
271- "object" : "chat.completion.chunk" ,
272- "created" : int (time .time ()),
273- "model" : "rkllm-model" ,
274- "choices" : [{
275- "index" : 0 ,
276- "delta" : {},
277- "finish_reason" : "stop"
278- }]
279- }
280- yield f"data: { json .dumps (response_chunk , ensure_ascii = False )} \n \n "
281257 break
282258
283- # 发送结束标记
284- yield "data: [DONE]\n \n "
259+ # 添加结束标记
260+ yield "[DONE]"
285261 else :
286- # 非流式响应
262+ # 非流式响应 - 返回JSON
287263 model_thread_finished = False
288264 full_response = ""
289265 while not model_thread_finished :
@@ -318,10 +294,12 @@ def generate_response():
318294 return json .dumps (response , ensure_ascii = False )
319295
320296 if stream :
321- return Response (generate_response (), content_type = 'text/event-stream' )
297+ # 对于流式响应,返回纯文本流
298+ return Response (generate_response (), content_type = 'text/plain; charset=utf-8' )
322299 else :
300+ # 对于非流式响应,返回JSON
323301 response_data = generate_response ()
324- return Response (response_data , content_type = 'application/json' )
302+ return Response (response_data , content_type = 'application/json; charset=utf-8 ' )
325303
326304 finally :
327305 lock .release ()
0 commit comments