Skip to content

Commit b4a0516

Browse files
committed
add gpu vs npu case
1 parent 1daa44c commit b4a0516

File tree

5 files changed

+153
-2
lines changed

5 files changed

+153
-2
lines changed

.github/workflows/e2e_test_npu.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ jobs:
3737
run: |
3838
source activate npuci
3939
unset HTTP_PROXY;unset HTTPS_PROXY;unset http_proxy;unset https_proxy;
40-
export DEVICE=npu && pytest autotest/test_all.py::test_all[npu-qwen3-sft-ep8] -m all -n 1 -vv --run_id ${{ github.run_id }}
40+
export DEVICE=npu && pytest autotest/test_all.py -m all -n 1 -vv --run_id ${{ github.run_id }}
4141
4242
- name: Upload Artifacts
4343
if: ${{ !cancelled() }}

autotest/config-npu.yaml

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,6 @@ case:
101101
loss/reduced_llm_loss: 0.000001
102102
lr: 0
103103
memory/max_memory_GB: 0.2
104-
runtime_info/tgs: 0.05
105104
runtime_info/text_tokens: 0
106105
timeout: 10800
107106

@@ -181,3 +180,26 @@ case:
181180
loss/reduced_llm_loss: 0.000001
182181
lr: 0
183182
timeout: 10800
183+
184+
npu-qwen3-sft-celoss-vs-gpu:
185+
-
186+
type: sft
187+
parameters:
188+
config: autotest/config/npu_qwen3_sft_celoss.py
189+
output_path: /mnt/hwfile/vc-intern-delivery/qa-llm-cicd/test_output
190+
resource:
191+
num_nodes: 1
192+
envs:
193+
- QWEN3_MOE_PATH=/mnt/hwfile/vc-intern-delivery/qa-llm-cicd/qa_test_models/Qwen3-30B-A3B
194+
- ALPACA_PATH=/mnt/hwfile/vc-intern-delivery/qa-llm-cicd/xtuner_resource/datasets/alpaca
195+
- XTUNER_DETERMINISTIC=true
196+
- TORCH_NPU_USE_HCCL=1
197+
- PIP_INDEX_URL=http://pkg.pjlab.org.cn/repository/pypi-tsinghua/simple
198+
- PIP_TRUSTED_HOST=pkg.pjlab.org.cn
199+
assert_info:
200+
base_metric: npu-qwen3-sft-celoss-vs-gpu/812c1021/tracker.jsonl
201+
check_metrics:
202+
grad_norm: 0.02
203+
loss/reduced_llm_loss: 0.02
204+
lr: 0
205+
timeout: 10800

autotest/config.yaml

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,5 +186,26 @@ case:
186186
grad_norm: 1
187187
loss/reduced_llm_loss: 0.02
188188
lr: 0
189+
timeout: 10800
190+
191+
qwen3-sft-celoss:
192+
-
193+
type: sft
194+
parameters:
195+
config: autotest/config/qwen3_sft_celoss.py
196+
output_path: /mnt/shared-storage-user/llmrazor-share/qa-llm-cicd/test_output
197+
resource:
198+
envs:
199+
- QWEN3_MOE_PATH=/mnt/shared-storage-user/llmrazor-share/model/Qwen3-30B-A3B
200+
- ALPACA_PATH=/mnt/shared-storage-user/llmrazor-share/data/alpaca
201+
- XTUNER_DETERMINISTIC=true
202+
assert_info:
203+
base_metric: qwen3-sft-celoss/812c1021/tracker.jsonl
204+
check_metrics:
205+
grad_norm: 0.000001
206+
loss/reduced_llm_loss: 0.000001
207+
lr: 0
208+
memory/max_memory_GB: 0.2
189209
runtime_info/tgs: 0.05
210+
runtime_info/text_tokens: 0
190211
timeout: 10800
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
import os
2+
3+
from xtuner.v1.config import (
4+
AdamWConfig,
5+
FSDPConfig,
6+
LRConfig,
7+
)
8+
from xtuner.v1.datasets import FTDPTokenizeFnConfig
9+
from xtuner.v1.datasets.config import DataloaderConfig, DatasetConfig
10+
from xtuner.v1.loss.ce_loss import CELossConfig
11+
from xtuner.v1.model.moe.qwen3 import Qwen3MoE30BA3Config
12+
from xtuner.v1.train import TrainerConfig
13+
14+
15+
QWEN3_MOE_PATH = os.environ["QWEN3_MOE_PATH"]
16+
ALPACA_PATH = os.environ["ALPACA_PATH"]
17+
18+
19+
moe_cfg = Qwen3MoE30BA3Config()
20+
optim_cfg = AdamWConfig(lr=6e-05)
21+
lr_cfg = LRConfig(lr_type="cosine", lr_min=1e-6)
22+
fsdp_cfg = FSDPConfig(
23+
torch_compile=False,
24+
cpu_offload=False,
25+
ep_size=moe_cfg.ep_size,
26+
)
27+
28+
dataset_config = [
29+
{
30+
"dataset": DatasetConfig(name="alpaca", anno_path=ALPACA_PATH, sample_ratio=1.0),
31+
"tokenize_fn": FTDPTokenizeFnConfig(max_length=16384),
32+
},
33+
]
34+
35+
dataloader_config = DataloaderConfig(pack_max_length=16384)
36+
37+
loss_cfg = CELossConfig(mode="chunk", chunk_size=1024) # CELossConfig()
38+
39+
40+
trainer = TrainerConfig(
41+
load_from=QWEN3_MOE_PATH,
42+
model_cfg=moe_cfg,
43+
optim_cfg=optim_cfg,
44+
fsdp_cfg=fsdp_cfg,
45+
dataset_cfg=dataset_config,
46+
dataloader_cfg=dataloader_config,
47+
lr_cfg=lr_cfg,
48+
loss_cfg=loss_cfg,
49+
tokenizer_path=QWEN3_MOE_PATH,
50+
global_batch_size=16,
51+
total_epoch=1,
52+
work_dir=f"/mnt/hwfile/vc-intern-delivery/qa-llm-cicd/test_output/{os.environ['GITHUB_RUN_ID']}/npu-qwen3-sft-celoss-vs-gpu/sft",
53+
seed=0,
54+
dist_backend="npu:hccl",
55+
)
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
import os
2+
3+
from xtuner.v1.config import (
4+
AdamWConfig,
5+
FSDPConfig,
6+
LRConfig,
7+
)
8+
from xtuner.v1.datasets import FTDPTokenizeFnConfig
9+
from xtuner.v1.datasets.config import DataloaderConfig, DatasetConfig
10+
from xtuner.v1.loss.ce_loss import CELossConfig
11+
from xtuner.v1.model.moe.qwen3 import Qwen3MoE30BA3Config
12+
from xtuner.v1.train import TrainerConfig
13+
14+
15+
QWEN3_MOE_PATH = os.environ["QWEN3_MOE_PATH"]
16+
ALPACA_PATH = os.environ["ALPACA_PATH"]
17+
18+
19+
moe_cfg = Qwen3MoE30BA3Config()
20+
optim_cfg = AdamWConfig(lr=6e-05)
21+
lr_cfg = LRConfig(lr_type="cosine", lr_min=1e-6)
22+
fsdp_cfg = FSDPConfig(
23+
torch_compile=False,
24+
cpu_offload=False,
25+
ep_size=moe_cfg.ep_size,
26+
)
27+
28+
dataset_config = [
29+
{
30+
"dataset": DatasetConfig(name="alpaca", anno_path=ALPACA_PATH, sample_ratio=1.0),
31+
"tokenize_fn": FTDPTokenizeFnConfig(max_length=16384),
32+
},
33+
]
34+
35+
dataloader_config = DataloaderConfig(pack_max_length=16384)
36+
37+
loss_cfg = CELossConfig(mode="chunk", chunk_size=1024)
38+
39+
trainer = TrainerConfig(
40+
load_from=QWEN3_MOE_PATH,
41+
model_cfg=moe_cfg,
42+
optim_cfg=optim_cfg,
43+
fsdp_cfg=fsdp_cfg,
44+
dataset_cfg=dataset_config,
45+
dataloader_cfg=dataloader_config,
46+
lr_cfg=lr_cfg,
47+
loss_cfg=loss_cfg,
48+
tokenizer_path=QWEN3_MOE_PATH,
49+
global_batch_size=16,
50+
total_epoch=1,
51+
work_dir=f"/mnt/shared-storage-user/llmrazor-share/qa-llm-cicd/test_output/{os.environ['GITHUB_RUN_ID']}/qwen3-sft-celoss/sft",
52+
seed=0,
53+
)

0 commit comments

Comments
 (0)