@@ -529,7 +529,7 @@ def __init__(self):
529529 self .default_temperature = 0.7
530530 self .default_top_p = 1.0
531531 self .default_top_k = 1
532- self .default_max_tokens = 50
532+ self .default_max_tokens = 512
533533 self .max_concurrent_requests = 1
534534 self .timeout_seconds = 300
535535 self .rknn_core_num = 3
@@ -1026,7 +1026,7 @@ def process_inference():
10261026 python rkllm_vision_server.py \\
10271027 --encoder_model ../model/vision.rknn \\
10281028 --llm_model ../model/llm.rkllm \\
1029- --port 8080 --max_concurrent 1 --default_max_tokens 50
1029+ --port 8080 --max_concurrent 1 --default_max_tokens 512
10301030 """
10311031 )
10321032
@@ -1048,8 +1048,8 @@ def process_inference():
10481048 help = 'Default top_p parameter (default: 1.0)' )
10491049 parser .add_argument ('--default_top_k' , type = int , default = 1 ,
10501050 help = 'Default top_k parameter (default: 1)' )
1051- parser .add_argument ('--default_max_tokens' , type = int , default = 50 ,
1052- help = 'Default maximum tokens to generate (default: 50 )' )
1051+ parser .add_argument ('--default_max_tokens' , type = int , default = 512 ,
1052+ help = 'Default maximum tokens to generate (default: 512 )' )
10531053
10541054 parser .add_argument ('--max_concurrent' , type = int , default = 1 ,
10551055 help = 'Maximum concurrent requests (default: 1)' )
@@ -1134,4 +1134,4 @@ def process_inference():
11341134 print ("\n 👋 Server interrupted by user" )
11351135 except Exception as e :
11361136 print (f"❌ Server error: { e } " )
1137- sys .exit (1 )
1137+ sys .exit (1 )
0 commit comments