Skip to content

Commit 0d34ca6

Browse files
[feat] upgrade torchrec to 1.0.0 (#19)
1 parent 55d2874 commit 0d34ca6

File tree

14 files changed

+74
-35
lines changed

14 files changed

+74
-35
lines changed

.github/workflows/codestyle_ci.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ jobs:
99
ci-test:
1010
runs-on: tzrec-codestyle-runner
1111
container:
12-
image: mybigpai-public-registry.cn-beijing.cr.aliyuncs.com/easyrec/tzrec-devel:0.5
12+
image: mybigpai-public-registry.cn-beijing.cr.aliyuncs.com/easyrec/tzrec-devel:0.6
1313
steps:
1414
- name: FetchCommit ${{ github.event.pull_request.head.sha }}
1515
uses: actions/checkout@v2

.github/workflows/pytyping_ci.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ jobs:
99
ci-test:
1010
runs-on: tzrec-codestyle-runner
1111
container:
12-
image: mybigpai-public-registry.cn-beijing.cr.aliyuncs.com/easyrec/tzrec-devel:0.5
12+
image: mybigpai-public-registry.cn-beijing.cr.aliyuncs.com/easyrec/tzrec-devel:0.6
1313
steps:
1414
- name: FetchCommit ${{ github.event.pull_request.head.sha }}
1515
uses: actions/checkout@v2

.github/workflows/unittest_ci.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ jobs:
99
ci-test:
1010
runs-on: tzrec-runner
1111
container:
12-
image: mybigpai-public-registry.cn-beijing.cr.aliyuncs.com/easyrec/tzrec-devel:0.5
12+
image: mybigpai-public-registry.cn-beijing.cr.aliyuncs.com/easyrec/tzrec-devel:0.6
1313
options: --gpus all --ipc host
1414
steps:
1515
- name: FetchCommit ${{ github.event.pull_request.head.sha }}

.pre-commit-config.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,13 @@ repos:
66
files: \.py$
77
args: ["--license-filepath", "data/.license_header.txt", "--use-current-year"]
88
- repo: https://github.com/astral-sh/ruff-pre-commit
9-
rev: v0.4.10
9+
rev: v0.7.1
1010
hooks:
1111
- id: ruff
1212
args: [ --fix ]
1313
- id: ruff-format
1414
- repo: https://github.com/pre-commit/pre-commit-hooks
15-
rev: v4.6.0
15+
rev: v5.0.0
1616
hooks:
1717
- id: trailing-whitespace
1818
- id: check-yaml
@@ -29,7 +29,7 @@ repos:
2929
- id: codespell
3030
args: ["--skip", "*.json"]
3131
- repo: https://github.com/executablebooks/mdformat
32-
rev: 0.7.17
32+
rev: 0.7.18
3333
hooks:
3434
- id: mdformat
3535
additional_dependencies:

docker/Dockerfile

Lines changed: 30 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,44 @@
1-
FROM pytorch/pytorch:2.4.0-cuda12.1-cudnn9-devel
1+
FROM ubuntu:22.04
22

33
RUN sed -i "s@http://archive.ubuntu.com@http://mirrors.aliyun.com@g" /etc/apt/sources.list && \
44
sed -i "s@http://security.ubuntu.com@http://mirrors.aliyun.com@g" /etc/apt/sources.list && \
55
sed -i "s@http://ports.ubuntu.com@http://mirrors.aliyun.com@g" /etc/apt/sources.list && \
66
apt-get update && \
77
apt-get upgrade -y && \
8-
apt-get install -y git vim watchman wget
8+
apt-get install -y --no-install-recommends \
9+
build-essential ca-certificates \
10+
ccache cmake gcc git vim watchman wget curl && \
11+
rm -rf /var/lib/apt/lists/*
912

1013
RUN wget https://tzrec.oss-cn-beijing.aliyuncs.com/third_party/libidn11_1.33-2.2ubuntu2_amd64.deb && \
11-
apt-get install ./libidn11_1.33-2.2ubuntu2_amd64.deb
14+
apt-get install ./libidn11_1.33-2.2ubuntu2_amd64.deb && rm libidn11_1.33-2.2ubuntu2_amd64.deb
1215

1316
ADD pip.conf /root/.config/pip/pip.conf
17+
RUN curl -fsSL -v -o ~/miniconda.sh -O "https://tzrec.oss-cn-beijing.aliyuncs.com/third_party/Miniforge3-Linux-x86_64.sh" && \
18+
chmod +x ~/miniconda.sh && \
19+
bash ~/miniconda.sh -b -p /opt/conda && \
20+
rm ~/miniconda.sh && \
21+
/opt/conda/bin/conda update -y -n base -c defaults conda && \
22+
/opt/conda/bin/conda install -y python=3.11 && \
23+
/opt/conda/bin/conda clean -ya
24+
ENV PATH /opt/conda/bin:$PATH
1425

15-
RUN pip install fbgemm-gpu==0.8.0 --index-url https://download.pytorch.org/whl/cu121 && \
16-
pip install torchmetrics==1.0.3 && \
17-
pip install torchrec==0.8.0 --index-url https://download.pytorch.org/whl/cu121 && \
18-
pip install torch_tensorrt==2.4.0
26+
ARG DEVICE
27+
RUN case ${DEVICE} in \
28+
"cu121") pip install torch==2.5.0 fbgemm-gpu==1.0.0 --index-url https://download.pytorch.org/whl/cu121 && \
29+
pip install torchmetrics==1.0.3 torch_tensorrt==2.5.0 && \
30+
pip install torchrec==1.0.0 --index-url https://download.pytorch.org/whl/cu121 ;; \
31+
* ) pip install torch==2.5.0 fbgemm-gpu==1.0.0 --index-url https://download.pytorch.org/whl/cpu && \
32+
pip install torchmetrics==1.0.3 && \
33+
pip install torchrec==1.0.0 --index-url https://download.pytorch.org/whl/cpu ;; \
34+
esac && \
35+
/opt/conda/bin/conda clean -ya
36+
37+
ENV NVIDIA_VISIBLE_DEVICES all
38+
ENV NVIDIA_DRIVER_CAPABILITIES compute,utility
39+
ENV LD_LIBRARY_PATH /usr/local/nvidia/lib:/usr/local/nvidia/lib64
40+
ENV PATH /usr/local/nvidia/bin:/usr/local/cuda/bin:$PATH
1941

2042
ADD requirements.txt /root/requirements.txt
2143
ADD requirements /root/requirements
22-
RUN cd /root && pip install -r requirements.txt
44+
RUN cd /root && pip install -r requirements.txt && rm requirements.txt

docs/source/quick_start/local_tutorial.md

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,10 @@ pip index versions tzrec -f http://tzrec.oss-cn-beijing.aliyuncs.com/release/nig
1515
```bash
1616
conda create -n tzrec python=3.11
1717
conda activate tzrec
18-
pip install torch==2.4.0 --index-url https://download.pytorch.org/whl/cu121
19-
pip install fbgemm-gpu==0.8.0 --index-url https://download.pytorch.org/whl/cu121
18+
pip install torch==2.5.0 --index-url https://download.pytorch.org/whl/cu121
19+
pip install fbgemm-gpu==1.0.0 --index-url https://download.pytorch.org/whl/cu121
2020
pip install torchmetrics==1.0.3
21-
pip install torchrec==0.8.0 --index-url https://download.pytorch.org/whl/cu121
21+
pip install torchrec==1.0.0 --index-url https://download.pytorch.org/whl/cu121
2222
pip install tzrec==${TZREC_NIGHTLY_VERSION} -f http://tzrec.oss-cn-beijing.aliyuncs.com/release/nightly/repo.html --trusted-host tzrec.oss-cn-beijing.aliyuncs.com
2323
```
2424

@@ -30,6 +30,15 @@ docker exec -it <CONTAINER_ID> bash
3030
pip install tzrec==${TZREC_NIGHTLY_VERSION} -f http://tzrec.oss-cn-beijing.aliyuncs.com/release/nightly/repo.html --trusted-host tzrec.oss-cn-beijing.aliyuncs.com
3131
```
3232

33+
注:
34+
35+
```
36+
GPU版本(CUDA 12.1) 镜像地址:
37+
mybigpai-public-registry.cn-beijing.cr.aliyuncs.com/easyrec/tzrec-devel:${TZREC_DOCKER_VERSION}-cu121
38+
CPU版本 镜像地址:
39+
mybigpai-public-registry.cn-beijing.cr.aliyuncs.com/easyrec/tzrec-devel:${TZREC_DOCKER_VERSION}-cpu
40+
```
41+
3342
## 前置准备
3443

3544
### 数据

requirements/runtime.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ alibabacloud_credentials
22
anytree
33
common_io @ https://tzrec.oss-cn-beijing.aliyuncs.com/third_party/common_io-0.4.1%2Btunnel-py2.py3-none-any.whl
44
faiss-cpu
5-
fbgemm-gpu==0.8.0
5+
fbgemm-gpu==1.0.0
66
graphlearn @ https://tzrec.oss-cn-beijing.aliyuncs.com/third_party/graphlearn-1.3.0-cp311-cp311-linux_x86_64.whl ; python_version=="3.11"
77
graphlearn @ https://tzrec.oss-cn-beijing.aliyuncs.com/third_party/graphlearn-1.3.0-cp310-cp310-linux_x86_64.whl ; python_version=="3.10"
88
grpcio-tools<1.63.0
@@ -11,6 +11,6 @@ pyfg @ https://tzrec.oss-cn-beijing.aliyuncs.com/third_party/pyfg-0.3.2-cp311-cp
1111
pyfg @ https://tzrec.oss-cn-beijing.aliyuncs.com/third_party/pyfg-0.3.2-cp310-cp310-linux_x86_64.whl ; python_version=="3.10"
1212
scikit-learn
1313
tensorboard
14-
torch==2.4.0
14+
torch==2.5.0
1515
torchmetrics==1.0.3
16-
torchrec==0.8.0
16+
torchrec==1.0.0

scripts/build_docker.sh

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,20 @@
11
#!/usr/bin/env bash
22

33
REGISTRY=mybigpai-public-registry.cn-beijing.cr.aliyuncs.com/easyrec
4-
DOCKER_TAG=0.5
4+
DOCKER_TAG=0.6
55

66
cp requirements.txt docker/
77
rm -rf docker/requirements
88
cp -r requirements/ docker/requirements
99
cd docker
1010

11-
docker build -t ${REGISTRY}/tzrec-devel:latest .
12-
docker images -q ${REGISTRY}/tzrec-devel:latest | xargs -I {} docker tag {} ${REGISTRY}/tzrec-devel:${DOCKER_TAG}
13-
docker push ${REGISTRY}/tzrec-devel:latest
11+
for DEVICE in cu121 cpu
12+
do
13+
docker build --network host -t ${REGISTRY}/tzrec-devel:${DOCKER_TAG}-${DEVICE} --build-arg DEVICE=${DEVICE} .
14+
docker push ${REGISTRY}/tzrec-devel:${DOCKER_TAG}-${DEVICE}
15+
done
16+
17+
docker images -q ${REGISTRY}/tzrec-devel:${DOCKER_TAG}-cu121 | xargs -I {} docker tag {} ${REGISTRY}/tzrec-devel:${DOCKER_TAG}
18+
docker images -q ${REGISTRY}/tzrec-devel:${DOCKER_TAG}-cu121 | xargs -I {} docker tag {} ${REGISTRY}/tzrec-devel:latest
1419
docker push ${REGISTRY}/tzrec-devel:${DOCKER_TAG}
20+
docker push ${REGISTRY}/tzrec-devel:latest

scripts/ci_test.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,4 @@
33
pip install -r requirements.txt
44
bash scripts/gen_proto.sh
55

6-
MKL_THREADING_LAYER=GNU PYTHONPATH=. python tzrec/tests/run.py
6+
MKL_THREADING_LAYER=GNU TORCH_DEVICE_BACKEND_AUTOLOAD=0 PYTHONPATH=. python tzrec/tests/run.py

tzrec/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111

1212
import os as _os
1313

14+
import torch as _torch # NOQA
15+
1416
if "OMP_NUM_THREADS" not in _os.environ:
1517
_os.environ["OMP_NUM_THREADS"] = "1"
1618

0 commit comments

Comments
 (0)