Skip to content

Commit efac057

Browse files
authored
Update fastapi_server_vlm.py
1 parent 8ce9daa commit efac057

File tree

1 file changed

+5
-5
lines changed

1 file changed

+5
-5
lines changed

src/vlm/fastapi_server_vlm.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -529,7 +529,7 @@ def __init__(self):
529529
self.default_temperature = 0.7
530530
self.default_top_p = 1.0
531531
self.default_top_k = 1
532-
self.default_max_tokens = 50
532+
self.default_max_tokens = 512
533533
self.max_concurrent_requests = 1
534534
self.timeout_seconds = 300
535535
self.rknn_core_num = 3
@@ -1026,7 +1026,7 @@ def process_inference():
10261026
python rkllm_vision_server.py \\
10271027
--encoder_model ../model/vision.rknn \\
10281028
--llm_model ../model/llm.rkllm \\
1029-
--port 8080 --max_concurrent 1 --default_max_tokens 50
1029+
--port 8080 --max_concurrent 1 --default_max_tokens 512
10301030
"""
10311031
)
10321032

@@ -1048,8 +1048,8 @@ def process_inference():
10481048
help='Default top_p parameter (default: 1.0)')
10491049
parser.add_argument('--default_top_k', type=int, default=1,
10501050
help='Default top_k parameter (default: 1)')
1051-
parser.add_argument('--default_max_tokens', type=int, default=50,
1052-
help='Default maximum tokens to generate (default: 50)')
1051+
parser.add_argument('--default_max_tokens', type=int, default=512,
1052+
help='Default maximum tokens to generate (default: 512)')
10531053

10541054
parser.add_argument('--max_concurrent', type=int, default=1,
10551055
help='Maximum concurrent requests (default: 1)')
@@ -1134,4 +1134,4 @@ def process_inference():
11341134
print("\n👋 Server interrupted by user")
11351135
except Exception as e:
11361136
print(f"❌ Server error: {e}")
1137-
sys.exit(1)
1137+
sys.exit(1)

0 commit comments

Comments
 (0)