[ci] refactor ete testcase #4274

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open

zhulinJulia24 wants to merge 107 commits into InternLM:main from zhulinJulia24:refactor_all_configs

.github/workflows/api_eval.yml

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -38,10 +38,10 @@ env:
  
      HOST_PIP_CACHE_DIR: /nvme/github-actions/pip-cache

      HOST_LOCALTIME: /usr/share/zoneinfo/Asia/Shanghai

      ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true

      REPORT_DIR: /nvme/qa_test_models/evaluation-reports/allure_report/${{ github.run_id }}

      REPORT_DIR: /nvme/qa_test_models/evaluation_report/allure_report/${{ inputs.repo_ref }}_${{ github.run_id }}

      COV_PARAM: --cov /opt/py3/lib/python3.10/site-packages/lmdeploy

      FAIL_CONFIG: '--lf'

      TEST_CODE_PATH: /nvme/qa_test_models/test_pkg/lmdeploy/${{ github.run_id }}

      TEST_CODE_PATH: /nvme/qa_test_models/test_pkg/lmdeploy/${{ inputs.repo_ref }}_${{ github.run_id }}

      OFFLINE_CODE_PATH: /nvme/qa_test_models/offline_pkg/lmdeploy

      OFFLINE_REQUIREMENTS: /nvme/qa_test_models/offline_pkg/requirements.txt

      DEEPSEEK_VL: /nvme/qa_test_models/offline_pkg/DeepSeek-VL

    @@ -50,6 +50,7 @@ env:
  
      HF_DATASETS_CACHE: /nvme/qa_test_models/hf_datasets

      HF_HUB_OFFLINE: 1

      HF_EVALUATE_OFFLINE: 1

      RUN_ID: ${{ inputs.repo_ref }}_${{ github.run_id }}

    jobs:

      linux-build:

    @@ -146,30 +147,20 @@ jobs:
  
      test_evaluation:

        needs: download_pkgs

        if: ${{ !cancelled() }}

        runs-on: [self-hosted, test-140]

        timeout-minutes: 2400

        runs-on: [self-hosted, linux-a100]

        timeout-minutes: 7200

        strategy:

          fail-fast: false

          matrix:

            backend: ${{ fromJSON(inputs.backend || '["turbomind", "pytorch"]')}}

            gpu_num: ['gpu_num_1', 'gpu_num_2', 'gpu_num_4', 'gpu_num_8']

            include:

              - n: 8

                gpu_num: gpu_num_1

              - n: 4

                gpu_num: gpu_num_2

              - n: 2

                gpu_num: gpu_num_4

              - n: 1

                gpu_num: gpu_num_8

        container:

          image: openmmlab/lmdeploy:latest-cu12.8

          options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"

          volumes:

            - /nvme/github-actions/pip-cache:/root/.cache/pip

            - /nvme/github-actions/packages:/root/packages

            - /nvme/github-actions/resources:/root/resources

            - /nvme/qa_test_models/evaluation-reports:/root/evaluation-reports

            - /nvme/qa_test_models:/nvme/qa_test_models

            - /nvme/huggingface_hub:/nvme/huggingface_hub

            - /mnt/121:/mnt/121

    @@ -208,11 +199,12 @@ jobs:
  
              ln -s /mnt/104/opencompass-data/data ./data

              ln -s /nvme/qa_test_models/resource/nltk_data /usr/share/nltk_data

              execution_mode="${{ github.event.inputs.execution_mode || 'both' }}"

              ulimit -n 65535

              if [ "$execution_mode" = "both" ] || [ "$execution_mode" = "infer" ]; then

                pytest autotest/evaluate/test_api_evaluate.py -m "${{matrix.gpu_num}} and ${{matrix.backend}} and infer" -n ${{matrix.n}} --run_id ${{ github.event.inputs.run_id || github.run_id }} --alluredir=${{env.REPORT_DIR}} || overall_exit=$?

                pytest autotest/evaluate/test_api_evaluate.py -m "${{matrix.gpu_num}} and ${{matrix.backend}} and infer" --alluredir=${{env.REPORT_DIR}} || overall_exit=$?

              fi

              if [ "$execution_mode" = "both" ] || [ "$execution_mode" = "eval" ]; then

                pytest autotest/evaluate/test_api_evaluate.py -m "${{matrix.gpu_num}} and ${{matrix.backend}} and eval" -n 4 --run_id ${{ github.event.inputs.run_id || github.run_id }} --alluredir=${{env.REPORT_DIR}} || overall_exit=$?

                pytest autotest/evaluate/test_api_evaluate.py -m "${{matrix.gpu_num}} and ${{matrix.backend}} and eval" -n 4 --alluredir=${{env.REPORT_DIR}} || overall_exit=$?

              fi

              exit $overall_exit

          - name: Clear workspace

.github/workflows/api_eval_h800.yml

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -39,10 +39,10 @@ env:
  
      HOST_LOCALTIME: /usr/share/zoneinfo/Asia/Shanghai

      OUTPUT_FOLDER: cuda12.8_dist_${{ github.run_id }}

      ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true

      REPORT_DIR: /nvme/qa_test_models/evaluation-reports/allure_report/${{ github.run_id }}

      REPORT_DIR: /nvme/qa_test_models/evaluation_report/allure_report/${{ inputs.repo_ref }}_${{ github.run_id }}

      COV_PARAM: --cov /opt/py3/lib/python3.10/site-packages/lmdeploy

      FAIL_CONFIG: '--lf'

      TEST_CODE_PATH: /nvme/qa_test_models/test_pkg/lmdeploy/${{ github.run_id }}

      TEST_CODE_PATH: /nvme/qa_test_models/test_pkg/lmdeploy/${{ inputs.repo_ref }}_${{ github.run_id }}

      OFFLINE_CODE_PATH: /nvme/qa_test_models/offline_pkg/lmdeploy

      OFFLINE_REQUIREMENTS: /nvme/qa_test_models/offline_pkg/requirements.txt

      DEEPSEEK_VL: /nvme/qa_test_models/offline_pkg/DeepSeek-VL

    @@ -51,6 +51,8 @@ env:
  
      HF_DATASETS_CACHE: /nvme/qa_test_models/hf_datasets

      HF_HUB_OFFLINE: 1

      HF_EVALUATE_OFFLINE: 1

      RUN_ID: ${{ inputs.repo_ref }}_${{ github.run_id }}

      TEST_ENV: h800

    jobs:

      linux-build:

    @@ -105,7 +107,6 @@ jobs:
  
            - /nvme/github-actions/packages:/root/packages

            - /nvme/github-actions/resources:/root/resources

            - /nvme/github-actions/opencompass-data:/root/opencompass-data

            - /nvme/qa_test_models/evaluation-reports:/root/evaluation-reports

            - /nvme/qa_test_models:/nvme/qa_test_models

            - /nvme1/qa_test_models:/nvme1/qa_test_models

            - /nvme2/share:/nvme2/share

    @@ -133,7 +134,6 @@ jobs:
  
            run: |

              python3 -m pip install lmdeploy-*.whl --no-deps

              python3 -m pip install -r requirements/test.txt

              mv autotest/config-h800.yaml autotest/config.yaml

          - name: Install opencompass

            run: |

              python3 -m pip install opencompass

    @@ -152,13 +152,13 @@ jobs:
  
              ln -s /nvme/qa_test_models/resource/nltk_data /usr/share/nltk_data

              execution_mode="${{ github.event.inputs.execution_mode || 'both' }}"

              if [ "$execution_mode" = "both" ] || [ "$execution_mode" = "infer" ]; then

                pytest autotest/evaluate/test_api_evaluate.py -m "gpu_num_1 and ${{matrix.backend}} and infer" -n 8 --run_id ${{ github.event.inputs.run_id || github.run_id }} --alluredir=${{env.REPORT_DIR}} || overall_exit=$?

                pytest autotest/evaluate/test_api_evaluate.py -m "gpu_num_2 and ${{matrix.backend}} and infer" -n 4 --run_id ${{ github.event.inputs.run_id || github.run_id }} --alluredir=${{env.REPORT_DIR}} || overall_exit=$?

                pytest autotest/evaluate/test_api_evaluate.py -m "gpu_num_4 and ${{matrix.backend}} and infer" -n 2 --run_id ${{ github.event.inputs.run_id || github.run_id }} --alluredir=${{env.REPORT_DIR}} || overall_exit=$?

                pytest autotest/evaluate/test_api_evaluate.py -m "gpu_num_8 and ${{matrix.backend}} and infer" -n 1 --run_id ${{ github.event.inputs.run_id || github.run_id }} --alluredir=${{env.REPORT_DIR}} || overall_exit=$?

                pytest autotest/evaluate/test_api_evaluate.py -m "gpu_num_1 and ${{matrix.backend}} and infer" -n 8 --alluredir=${{env.REPORT_DIR}} || overall_exit=$?

                pytest autotest/evaluate/test_api_evaluate.py -m "gpu_num_2 and ${{matrix.backend}} and infer" -n 4 --alluredir=${{env.REPORT_DIR}} || overall_exit=$?

                pytest autotest/evaluate/test_api_evaluate.py -m "gpu_num_4 and ${{matrix.backend}} and infer" -n 2 --alluredir=${{env.REPORT_DIR}} || overall_exit=$?

                pytest autotest/evaluate/test_api_evaluate.py -m "gpu_num_8 and ${{matrix.backend}} and infer" -n 1 --alluredir=${{env.REPORT_DIR}} || overall_exit=$?

              fi

              if [ "$execution_mode" = "both" ] || [ "$execution_mode" = "eval" ]; then

                pytest autotest/evaluate/test_api_evaluate.py -m "${{matrix.backend}} and eval" -n 4 --run_id ${{ github.event.inputs.run_id || github.run_id }} --alluredir=${{env.REPORT_DIR}} || overall_exit=$?

                pytest autotest/evaluate/test_api_evaluate.py -m "${{matrix.backend}} and eval" -n 4 --alluredir=${{env.REPORT_DIR}} || overall_exit=$?

              fi

              exit $overall_exit

          - name: Clear workspace

.github/workflows/benchmark.yml

            
                      Original file line number
                      Diff line number
                      Diff line change
                  
    @@ -17,7 +17,12 @@ on:
  
            required: true

            description: 'Set benchmark type. Default is "["longtext", "throughput", "api_server", "prefixcache"]"'

            type: string

            default: "['apiserver', 'throughput', 'longtext', 'prefixcache']"

            default: "['apiserver', 'mllm_apiserver', 'throughput', 'longtext', 'prefixcache']"

          backend:

            required: true

            description: 'Set backend filter. Default is "["turbomind", "pytorch"]"'

            type: string

            default: "['turbomind', 'pytorch']"

          offline_mode:

            required: true

            description: 'Whether start a offline mode, if true, you should prepare code and whl package by yourself'

    @@ -28,11 +33,12 @@ env:
  
      HOST_PIP_CACHE_DIR: /nvme/github-actions/pip-cache

      HOST_LOCALTIME: /usr/share/zoneinfo/Asia/Shanghai

      OUTPUT_FOLDER: cuda12.8_dist_${{ github.run_id }}

      REPORT_DIR: /nvme/qa_test_models/benchmark-reports/${{ github.run_id }}

      ALLURE_REPORT_DIR: /nvme/qa_test_models/benchmark-reports/allure_report/${{ github.run_id }}

      TEST_CODE_PATH: /nvme/qa_test_models/test_pkg/lmdeploy/${{ github.run_id }}

      REPORT_DIR: /nvme/qa_test_models/benchmark_report/${{ inputs.repo_ref }}_${{ github.run_id }}

      ALLURE_REPORT_DIR: /nvme/qa_test_models/benchmark_report/allure_report/${{ inputs.repo_ref }}_${{ github.run_id }}

      TEST_CODE_PATH: /nvme/qa_test_models/test_pkg/lmdeploy/${{ inputs.repo_ref }}_${{ github.run_id }}

      OFFLINE_CODE_PATH: /nvme/qa_test_models/offline_pkg/lmdeploy

      ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true

      RUN_ID: ${{ inputs.repo_ref }}_${{ github.run_id }}

    jobs:

      linux-build:

    @@ -172,9 +178,18 @@ jobs:
  
            run: |

              python3 -m pip list

              lmdeploy check_env

          - name: Run other benchmark

          - name: Run other benchmark - all

            if: contains(fromJson(github.event.inputs.backend), 'turbomind') && contains(fromJson(github.event.inputs.backend), 'pytorch')

            run: |

                pytest autotest/benchmark/test_${{matrix.benchmark_type}}_performance.py -n ${{matrix.n}} -m '${{matrix.gpu_num}} and not pr_test and not function' --alluredir=${{env.ALLURE_REPORT_DIR}}

          - name: Run other benchmark - turbomind

            if: contains(fromJson(github.event.inputs.backend), 'turbomind') && !contains(fromJson(github.event.inputs.backend), 'pytorch')

            run: |

                pytest autotest/benchmark/test_${{matrix.benchmark_type}}_performance.py -n ${{matrix.n}} --run_id ${{ github.run_id }} -m '${{matrix.gpu_num}} and not pr_test' --alluredir=${{env.ALLURE_REPORT_DIR}}

                pytest autotest/benchmark/test_${{matrix.benchmark_type}}_performance.py -n ${{matrix.n}} -m '${{matrix.gpu_num}} and not pr_test and not function and turbomind' --alluredir=${{env.ALLURE_REPORT_DIR}}

          - name: Run other benchmark - pytorch

            if: contains(fromJson(github.event.inputs.backend), 'pytorch') && !contains(fromJson(github.event.inputs.backend), 'turbomind')

            run: |

                pytest autotest/benchmark/test_${{matrix.benchmark_type}}_performance.py -n ${{matrix.n}} -m '${{matrix.gpu_num}} and not pr_test and not function and pytorch' --alluredir=${{env.ALLURE_REPORT_DIR}}

          - name: Clear workfile

            if: always()

            run: |

    @@ -185,27 +200,3 @@ jobs:
  
              rm -rf $workdir

              mkdir $workdir

              chmod -R 777 $workdir

      get_result_overview:

        if: always() && !cancelled()

        needs: [benchmark]

        timeout-minutes: 5

        runs-on: [self-hosted, linux-a100]

        container:

          image: openmmlab/lmdeploy:latest-cu12.8

          options: "--gpus=all --ipc=host --user root -e PIP_CACHE_DIR=/root/.cache/pip -e NVIDIA_DISABLE_REQUIRE=1 --pull never"

          volumes:

            - /nvme/qa_test_models:/nvme/qa_test_models

            - /usr/share/zoneinfo/Asia/Shanghai:/etc/localtime:ro

        steps:

          - name: Clone repository

            uses: actions/checkout@v2

            with:

              repository: ${{ github.event.inputs.repo_org || 'InternLM/lmdeploy' }}

              ref: ${{github.event.inputs.repo_ref || 'main'}}

          - name: Get overview

            run: |

              echo "status=done" >> ${{env.REPORT_DIR}}/status.txt

              pip install pandas fire mmengine

              python3 .github/scripts/action_tools.py generate_benchmark_report $REPORT_DIR

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[ci] refactor ete testcase #4274

Uh oh!

Diff view

Diff view

There are no files selected for viewing

Uh oh!

[ci] refactor ete testcase #4274

Are you sure you want to change the base?

Uh oh!

[ci] refactor ete testcase #4274

Uh oh!

Uh oh!

Diff view

Diff view

There are no files selected for viewing

Uh oh!