llama3-chat/ci.yml at main · codesphere-cloud/llama3-chat · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
prepare:
  steps:
    - name: Clone repository
      command: git clone https://github.com/ggerganov/llama.cpp.git | (cd llama.cpp ;
        git pull)
    - name: Install Cmake
      command: nix-env -iA nixpkgs.cmakeMinimal
    - name: Build Llama Cpp
      command: cd llama.cpp && if [ -v NV_LIBCUBLAS_VERSION ]; then echo 'Make for
        gpu' && cmake -B build -DGGML_CUDA=ON && cmake --build build --config
        Release; else echo 'Make for cpu' && cmake -B build -DLLAMA_CURL=OFF && cmake --build
        build --config Release -j 8 ; fi
    - name: Download model
      command: "[ -f
        /home/user/app/llama.cpp/models/Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf ]
        || wget -P /home/user/app/llama.cpp/models
        https://huggingface.co/lmstudio-community/Meta-Llama-3.1-8B-Instruct-GG\
        UF/resolve/main/Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf"
test:
  steps: []
run:
  steps:
    - name: Run
      command: cd llama.cpp && if [ -v NV_LIBCUBLAS_VERSION ]; then echo 'Starting gpu
        server' && ./build/bin/llama-server -m
        ./models/Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf -c 30000 --port 3000
        --host 0.0.0.0 -ngl 35; else echo 'Starting cpu server'
        &&  ./build/bin/llama-server -m
        ./models/Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf -c 30000 --port 3000
        --host 0.0.0.0 -t 8; fi