Skip to content

Commit 7c8d16f

Browse files
authored
Merge branch 'main' into support_ds_eager
2 parents 1d3325e + 5a23c09 commit 7c8d16f

File tree

239 files changed

+10257
-9714
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

239 files changed

+10257
-9714
lines changed

.github/workflows/cuda12.8-whl-release.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ jobs:
1313
linux-build:
1414
strategy:
1515
matrix:
16-
pyver: [py39, py310, py311, py312, py313]
16+
pyver: [py310, py311, py312, py313]
1717
runs-on: ubuntu-latest
1818
env:
1919
PYTHON_VERSION: ${{ matrix.pyver }}
@@ -56,7 +56,7 @@ jobs:
5656
windows-build:
5757
strategy:
5858
matrix:
59-
pyver: ['3.9', '3.10', '3.11', '3.12', '3.13']
59+
pyver: ['3.10', '3.11', '3.12', '3.13']
6060
runs-on: windows-latest
6161
steps:
6262
- name: Set git for windows

.github/workflows/unit-test.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ jobs:
3636
timeout-minutes: 4320 # 72hours
3737
container:
3838
image: openmmlab/lmdeploy:dev-cu12.8
39-
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e CUDA_VISIBLE_DEVICES=2,3 --pull never"
39+
options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e CUDA_VISIBLE_DEVICES=2,3 -e HF_HOME=/root/.cache/huggingface --pull never"
4040
volumes:
4141
- /nvme/share_data/github-actions/pip-cache:/root/.cache/pip
4242
- /nvme/share_data/github-actions/hf_home:/root/.cache/huggingface

CMakeLists.txt

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -71,11 +71,9 @@ FetchContent_MakeAvailable(repo-cutlass)
7171
FetchContent_Declare(
7272
yaml-cpp
7373
GIT_REPOSITORY https://github.com/jbeder/yaml-cpp.git
74-
GIT_TAG 0.8.0
74+
GIT_TAG 65c1c270dbe7eec37b2df2531d7497c4eea79aee
7575
GIT_PROGRESS TRUE
7676
USES_TERMINAL_DOWNLOAD TRUE
77-
PATCH_COMMAND git apply ${CMAKE_CURRENT_SOURCE_DIR}/cmake/yaml-cpp_cmake_policy.patch
78-
UPDATE_DISCONNECTED 1
7977
)
8078
set(YAML_BUILD_SHARED_LIBS OFF CACHE BOOL "Build static library of yaml-cpp")
8179
FetchContent_MakeAvailable(yaml-cpp)
@@ -87,7 +85,6 @@ FetchContent_Declare(
8785
GIT_SUBMODULES "3rdparty/dlpack"
8886
GIT_PROGRESS TRUE
8987
USES_TERMINAL_DOWNLOAD TRUE
90-
UPDATE_DISCONNECTED 1
9188
)
9289

9390
FetchContent_GetProperties(xgrammar)
@@ -110,6 +107,7 @@ endif()
110107

111108
# the environment variable
112109
# ASAN_OPTIONS=protect_shadow_gap=0,intercept_tls_get_addr=0
110+
# LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libasan.so.6:/usr/lib/x86_64-linux-gnu/libstdc++.so.6
113111
# must be set at runtime
114112
# https://github.com/google/sanitizers/issues/1322
115113
if (LMDEPLOY_ASAN_ENABLE)
@@ -333,6 +331,8 @@ if (MSVC)
333331
CMAKE_CUDA_FLAGS_RELWITHDEBINFO)
334332
string(REGEX REPLACE "-Wall" " /W0 " ${flag_var} "${${flag_var}}")
335333
endforeach()
334+
# avoid min/max macro in "windows.h" conflict with std::min/std::max
335+
add_definitions(-DNOMINMAX=1)
336336
endif()
337337

338338
include_directories(

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,7 @@ They differ in the types of supported models and the inference data type. Please
203203

204204
## Installation
205205

206-
It is recommended installing lmdeploy using pip in a conda environment (python 3.9 - 3.13):
206+
It is recommended installing lmdeploy using pip in a conda environment (python 3.10 - 3.13):
207207

208208
```shell
209209
conda create -n lmdeploy python=3.10 -y

README_ja.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,7 @@ LMDeployは、[TurboMind](./docs/en/inference/turbomind.md)および[PyTorch](./
189189

190190
## インストール
191191

192-
クリーンなconda環境(Python 3.9 - 3.12)でlmdeployをインストールすることをお勧めします。
192+
クリーンなconda環境(Python 3.10 - 3.13)でlmdeployをインストールすることをお勧めします。
193193

194194
```shell
195195
conda create -n lmdeploy python=3.10 -y

autotest/config.yaml

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,6 @@ pytorch_chat_model:
117117
- meta-llama/Llama-4-Scout-17B-16E-Instruct
118118
- meta-llama/Llama-3.2-1B-Instruct
119119
- meta-llama/Llama-3.2-3B-Instruct
120-
- meta-llama/Llama-3.2-11B-Vision-Instruct
121120
- meta-llama/Meta-Llama-3-1-8B-Instruct
122121
- meta-llama/Meta-Llama-3-1-70B-Instruct
123122
- meta-llama/Meta-Llama-3-8B-Instruct
@@ -219,7 +218,6 @@ turbomind_vl_model:
219218

220219
pytorch_vl_model:
221220
tp:
222-
- meta-llama/Llama-3.2-11B-Vision-Instruct
223221
- internlm/Intern-S1
224222
- internlm/Intern-S1-mini
225223
- OpenGVLab/InternVL2_5-26B-MPO
@@ -244,7 +242,7 @@ pytorch_vl_model:
244242
- Qwen/Qwen2.5-VL-7B-Instruct
245243
- Qwen/Qwen2.5-VL-32B-Instruct
246244
- THUDM/cogvlm-chat-hf
247-
- THUDM/cogvlm2-llama3-chinese-chat-19B
245+
# - THUDM/cogvlm2-llama3-chinese-chat-19B # 'HFChatTemplate' object has no attribute 'eoa'
248246
- THUDM/glm-4v-9b
249247
- microsoft/Phi-3-vision-128k-instruct
250248
- microsoft/Phi-3.5-vision-instruct

benchmark/profile_pipeline_api.py

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,7 @@ def parse_args():
275275
ArgumentHelper.num_tokens_per_iter(tb_group)
276276
ArgumentHelper.max_prefill_iters(tb_group)
277277
ArgumentHelper.communicator(tb_group)
278+
ArgumentHelper.async_(tb_group)
278279

279280
args = parser.parse_args()
280281
return args
@@ -285,19 +286,19 @@ def main():
285286
random.seed(args.seed)
286287
os.environ['TM_LOG_LEVEL'] = args.log_level
287288
if args.backend == 'turbomind':
288-
engine_config = TurbomindEngineConfig(
289-
max_batch_size=args.concurrency,
290-
tp=args.tp,
291-
cache_max_entry_count=args.cache_max_entry_count,
292-
session_len=args.session_len,
293-
cache_block_seq_len=args.cache_block_seq_len,
294-
model_format=args.model_format,
295-
quant_policy=args.quant_policy,
296-
num_tokens_per_iter=args.num_tokens_per_iter,
297-
max_prefill_iters=args.max_prefill_iters,
298-
enable_prefix_caching=args.enable_prefix_caching,
299-
communicator=args.communicator,
300-
)
289+
engine_config = TurbomindEngineConfig(max_batch_size=args.concurrency,
290+
tp=args.tp,
291+
cache_max_entry_count=args.cache_max_entry_count,
292+
session_len=args.session_len,
293+
cache_block_seq_len=args.cache_block_seq_len,
294+
model_format=args.model_format,
295+
quant_policy=args.quant_policy,
296+
num_tokens_per_iter=args.num_tokens_per_iter,
297+
max_prefill_iters=args.max_prefill_iters,
298+
enable_prefix_caching=args.enable_prefix_caching,
299+
communicator=args.communicator,
300+
enable_metrics=False,
301+
async_=args.async_)
301302
elif args.backend == 'pytorch':
302303
engine_config = PytorchEngineConfig(
303304
cache_max_entry_count=args.cache_max_entry_count,

benchmark/profile_throughput.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -327,6 +327,7 @@ def parse_args():
327327
ArgumentHelper.model_format(tb_group, default='hf')
328328
ArgumentHelper.num_tokens_per_iter(tb_group)
329329
ArgumentHelper.max_prefill_iters(tb_group)
330+
ArgumentHelper.async_(tb_group)
330331
ArgumentHelper.communicator(tb_group)
331332

332333
args = parser.parse_args()
@@ -348,6 +349,7 @@ def main():
348349
quant_policy=args.quant_policy,
349350
num_tokens_per_iter=args.num_tokens_per_iter,
350351
max_prefill_iters=args.max_prefill_iters,
352+
async_=args.async_,
351353
enable_prefix_caching=args.enable_prefix_caching,
352354
dtype=args.dtype,
353355
communicator=args.communicator,

builder/manywheel/Dockerfile_2014

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ RUN bash /tmp/install_conda.sh && rm /tmp/install_conda.sh
2929
RUN /opt/conda/bin/conda tos accept --override-channels --channel https://repo.anaconda.com/pkgs/main && \
3030
/opt/conda/bin/conda tos accept --override-channels --channel https://repo.anaconda.com/pkgs/r
3131

32-
RUN PY_VERSIONS=(3.9 3.10 3.11 3.12 3.13) && \
32+
RUN PY_VERSIONS=(3.10 3.11 3.12 3.13) && \
3333
for pyver in "${PY_VERSIONS[@]}"; do \
3434
/opt/conda/bin/conda create -n py${pyver//./} python=${pyver} -yq && \
3535
/opt/conda/envs/py${pyver//./}/bin/pip install -i 'https://mirrors.aliyun.com/pypi/simple/' --no-cache-dir pybind11; \

builder/manywheel/build_all_wheel.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ PLAT_NAME=manylinux2014_x86_64
1010
for cuver in ${CUDA_VER}; do
1111
DOCKER_TAG=cuda${cuver}
1212
OUTPUT_FOLDER=cuda${cuver}_dist
13-
for pyver in py39 py310 py311 py312 py313; do
13+
for pyver in py310 py311 py312 py313; do
1414
bash ${TOPDIR}/manywheel/build_wheel.sh ${pyver} ${PLAT_NAME} ${DOCKER_TAG} ${OUTPUT_FOLDER} \
1515
|& tee ${PLAT_NAME}.${pyver}.cuda${cuver}.log.txt
1616
done

0 commit comments

Comments
 (0)