Skip to content

Commit 129f38e

Browse files
committed
[dist] Add build variants: full, minimal, legacy
* full: SASS for all architectures, PTX for 10.0 * minimal: SASS/PTX for 5.0 only * legacy: SASS/PTX for 3.5 only
1 parent 1235241 commit 129f38e

File tree

4 files changed

+59
-34
lines changed

4 files changed

+59
-34
lines changed

.claude/settings.local.json

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@
77
"Bash(gh run view:*)",
88
"Bash(gh workflow:*)",
99
"Bash(unzip:*)",
10-
"Bash(ls:*)"
10+
"Bash(ls:*)",
11+
"Bash(gh run list:*)"
1112
]
1213
}
1314
}

.github/workflows/build_wheels_windows.yml

Lines changed: 47 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -38,37 +38,47 @@ jobs:
3838
strategy:
3939
fail-fast: false
4040
matrix:
41+
# Build variants control binary size vs JIT compilation tradeoff:
42+
# - full: CUDA 12, native SASS for all GPUs (5.0-10.0), largest binary, no JIT needed
43+
# - legacy: CUDA 11, PTX/SASS 3.5, for older GPUs (Kepler+) and systems with older drivers
44+
build-variant: ['full', 'legacy']
4145
python-version: ['3.13']
4246
platform: [x64]
43-
cuda-version: ['12']
4447
include:
45-
# CUDA 12: Supports Maxwell (5.0) through Blackwell (10.0)
46-
# TODO(@Breakthrough): Expand PTX legacy support back to 5.0, it doesn't make the final
47-
# distribution that much larger and may help users with older GPUs.
48-
- cuda-version: '12'
48+
# CUDA 12 - Full variant: All SASS targets for maximum performance
49+
- build-variant: 'full'
4950
runs-on: 'windows-2025'
51+
cuda-version: '12'
52+
cmake-toolset: 'v143'
53+
cuda-nvcc-flags: '-Xcompiler /Zc:preprocessor'
5054
cuda-installer: 'cuda_12.9.1_windows_network.exe'
5155
cuda-path-version: 'v12.9'
5256
cudnn-archive: 'cudnn-windows-x86_64-9.18.1.3_cuda12-archive.zip'
5357
cudnn-folder: 'cudnn-windows-x86_64-9.18.1.3_cuda12-archive'
58+
cudnn-dll-path: 'bin/x64'
5459
video-codec-sdk-archive: 'Video_Codec_SDK_13.0.37.zip'
5560
video-codec-sdk-folder: 'Video_Codec_SDK_13.0.37'
5661
cuda-arch-bin: '5.0;5.2;6.0;6.1;7.0;7.5;8.0;8.6;8.9;9.0;10.0'
5762
cuda-arch-ptx: '10.0'
5863
cache-key: 'nvidia-deps-cuda-12.9.1-cudnn-9.18.1.3'
59-
# TODO: Re-enable CUDA 13 after updating to OpenCV 4.13+
60-
# CUDA 13: Supports Turing (7.5) through Blackwell (12.0)
61-
# - cuda-version: '13'
62-
# runs-on: 'windows-2025'
63-
# cuda-installer: 'cuda_13.1.1_windows_network.exe'
64-
# cuda-path-version: 'v13.1'
65-
# cudnn-archive: 'cudnn-windows-x86_64-9.18.1.3_cuda13-archive.zip'
66-
# cudnn-folder: 'cudnn-windows-x86_64-9.18.1.3_cuda13-archive'
67-
# video-codec-sdk-archive: 'Video_Codec_SDK_13.0.37.zip'
68-
# video-codec-sdk-folder: 'Video_Codec_SDK_13.0.37'
69-
# cuda-arch-bin: '7.5;8.0;8.6;8.9;9.0;10.0;12.0'
70-
# cuda-arch-ptx: '12.0'
71-
# cache-key: 'nvidia-deps-cuda-13.1.1-cudnn-9.18.1.3'
64+
# CUDA 11 - Legacy variant: For older GPUs (Kepler+) and systems with older drivers
65+
# TODO: Figure out how to setup a build environment so we don't need --allow-unsupported-compiler.
66+
# CUDA 11.8's host_config.h rejects newer _MSC_VER values even though they're compatible.
67+
- build-variant: 'legacy'
68+
runs-on: 'windows-2025'
69+
cuda-version: '11'
70+
cmake-toolset: 'v143'
71+
cuda-nvcc-flags: '--allow-unsupported-compiler'
72+
cuda-installer: 'cuda_11.8.0_windows_network.exe'
73+
cuda-path-version: 'v11.8'
74+
cudnn-archive: 'cudnn-windows-x86_64-8.9.7.29_cuda11-archive.zip'
75+
cudnn-folder: 'cudnn-windows-x86_64-8.9.7.29_cuda11-archive'
76+
cudnn-dll-path: 'bin'
77+
video-codec-sdk-archive: 'Video_Codec_SDK_13.0.37.zip'
78+
video-codec-sdk-folder: 'Video_Codec_SDK_13.0.37'
79+
cuda-arch-bin: '3.5'
80+
cuda-arch-ptx: '3.5'
81+
cache-key: 'nvidia-deps-cuda-11.8.0-cudnn-8.9.7.29'
7282
env:
7383
ACTIONS_ALLOW_UNSECURE_COMMANDS: true
7484
SDIST: 0
@@ -139,7 +149,7 @@ jobs:
139149
$CUDNN_PATH = "D:/a/opencv-python-cuda/opencv-python-cuda/${{ matrix.cudnn-folder }}"
140150
echo "CUDNN_LIBRARY=$CUDNN_PATH/lib/x64/cudnn.lib" | Out-File -FilePath $env:GITHUB_ENV -Append
141151
echo "CUDNN_INCLUDE_DIR=$CUDNN_PATH/include" | Out-File -FilePath $env:GITHUB_ENV -Append
142-
Copy-Item -Path "$CUDNN_PATH/bin/x64/*" -Destination . -Include "*.dll"
152+
Copy-Item -Path "$CUDNN_PATH/${{ matrix.cudnn-dll-path }}/*" -Destination . -Include "*.dll"
143153
shell: pwsh
144154
- name: 🔧 Install NVIDIA Video Codec SDK
145155
run: |
@@ -160,9 +170,9 @@ jobs:
160170
if: ${{ inputs.restore_build_cache && !inputs.rolling_build }}
161171
with:
162172
path: _skbuild
163-
key: ${{ runner.os }}-${{ matrix.python-version }}-cuda${{ matrix.cuda-version }}-${{ hashFiles('**/CMakeLists.txt') }}
173+
key: ${{ runner.os }}-${{ matrix.python-version }}-cuda${{ matrix.cuda-version }}-${{ matrix.build-variant }}-${{ hashFiles('**/CMakeLists.txt') }}
164174
restore-keys: |
165-
${{ runner.os }}-${{ matrix.python-version }}-cuda${{ matrix.cuda-version }}-
175+
${{ runner.os }}-${{ matrix.python-version }}-cuda${{ matrix.cuda-version }}-${{ matrix.build-variant }}-
166176
167177
- name: Build a package
168178
# CMake 3.25 regression fix. See https://stackoverflow.com/questions/74162633/problem-compiling-from-source-opencv-with-mvsc2019-in-64-bit-version
@@ -171,11 +181,22 @@ jobs:
171181
python -m pip install --upgrade pip
172182
python -m pip install --upgrade setuptools
173183
python -m pip install cmake==3.24.2
174-
python -m pip install toml && python -c "import toml; c = toml.load('pyproject.toml'); print('\n'.join(c['build-system']['requires']))" >> requirements.txt | python -m pip install -r requirements.txt
184+
python -m pip install toml
185+
python -c "import toml; c = toml.load('pyproject.toml'); print('\n'.join(c['build-system']['requires']))" > requirements.txt
186+
python -m pip install -r requirements.txt
175187
$CI_BUILD = 1
176188
if (Test-Path _skbuild) {
177189
$SKIP_CMAKE = "--skip-cmake"
178190
}
191+
# Set CMake generator and toolset (e.g., v142 for CUDA 11, v143 for CUDA 12)
192+
# Note: Toolset (-T) requires Visual Studio generator, not Ninja
193+
# Using environment variables avoids quoting issues with CMAKE_ARGS
194+
$env:CMAKE_GENERATOR = "Visual Studio 17 2022"
195+
$env:CMAKE_GENERATOR_PLATFORM = "x64"
196+
$env:CMAKE_GENERATOR_TOOLSET = "${{ matrix.cmake-toolset }}"
197+
$env:CUDA_NVCC_FLAGS = "${{ matrix.cuda-nvcc-flags }}"
198+
echo "Using CMake generator: Visual Studio 17 2022, platform: x64, toolset: ${{ matrix.cmake-toolset }}"
199+
echo "Using CUDA NVCC flags: ${{ matrix.cuda-nvcc-flags }}"
179200
python setup.py $SKIP_CMAKE bdist_wheel --py-limited-api=cp37 --dist-dir="$PWD\wheelhouse" -v
180201
shell: pwsh
181202

@@ -184,11 +205,11 @@ jobs:
184205
if: ${{ inputs.save_build_cache && !inputs.rolling_build }}
185206
with:
186207
path: _skbuild
187-
key: ${{ runner.os }}-${{ matrix.python-version }}-cuda${{ matrix.cuda-version }}-${{ hashFiles('**/CMakeLists.txt') }}
208+
key: ${{ runner.os }}-${{ matrix.python-version }}-cuda${{ matrix.cuda-version }}-${{ matrix.build-variant }}-${{ hashFiles('**/CMakeLists.txt') }}
188209
- name: Saving all wheels
189210
uses: actions/upload-artifact@v4
190211
with:
191-
name: wheel-cuda${{ matrix.cuda-version }}-${{ matrix.platform }}
212+
name: opencv-python-cuda-${{ matrix.build-variant }}-${{ matrix.platform }}
192213
path: wheelhouse/opencv*
193214

194215
Test:
@@ -202,11 +223,10 @@ jobs:
202223
matrix:
203224
python-version: ['3.12']
204225
platform: [x64]
205-
cuda-version: ['12'] # TODO: Re-enable '13' after updating to OpenCV 4.13+
226+
build-variant: ['full', 'legacy']
206227
env:
207228
ACTIONS_ALLOW_UNSECURE_COMMANDS: true
208229
OPENCV_TEST_DATA_PATH: ${{ github.workspace }}\opencv_extra\testdata
209-
PlatformToolset: v143
210230
steps:
211231
- name: Cleanup
212232
shell: bash
@@ -227,7 +247,7 @@ jobs:
227247
- name: Download a wheel accordingly to matrix
228248
uses: actions/download-artifact@v4
229249
with:
230-
name: wheel-cuda${{ matrix.cuda-version }}-${{ matrix.platform }}
250+
name: opencv-python-cuda-${{ matrix.build-variant }}-${{ matrix.platform }}
231251
path: wheelhouse/
232252
- name: Package installation
233253
run: |

docs/cuda-compatibility.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@
3333
PTX is forward-compatible only (e.g. PTX 10.0 works on GPUs ≥10.0, not older). SASS is required for older GPUs that are lower than the PTX version. A GPU needs either matching SASS or older PTX which it can JIT compile.
3434

3535

36-
3736
## References
3837

3938
- [CUDA Toolkit Archive](https://developer.nvidia.com/cuda-toolkit-archive)

setup.py

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -153,8 +153,11 @@ def main():
153153
# Raw paths relative to sourcetree root.
154154
files_outside_package_dir = {"cv2": ["LICENSE.txt", "LICENSE-3RD-PARTY.txt","*.dll"]}
155155

156+
# CMake generator and toolset configuration
157+
# The toolset can be overridden via CMAKE_GENERATOR_TOOLSET env var (e.g., v142 for CUDA 11.x)
158+
cmake_toolset = os.environ.get("CMAKE_GENERATOR_TOOLSET", "v143")
156159
ci_cmake_generator = (
157-
["-G", "Visual Studio 17 2022", "-T", "v143"]
160+
["-G", "Visual Studio 17 2022", "-T", cmake_toolset]
158161
if os.name == "nt"
159162
else ["-G", "Unix Makefiles"]
160163
)
@@ -214,10 +217,12 @@ def main():
214217
else []
215218
)
216219
+ (
217-
# CUDA 12.9+ requires MSVC's conformant preprocessor for CCCL headers.
218-
# -Xcompiler passes the flag from nvcc to the host compiler (MSVC).
219-
# See: https://github.com/NVIDIA/cccl/issues/5166
220-
["-DCUDA_NVCC_FLAGS=-Xcompiler /Zc:preprocessor"]
220+
# NVCC flags for Windows builds:
221+
# - CUDA 12.9+ requires MSVC's conformant preprocessor (-Xcompiler /Zc:preprocessor)
222+
# See: https://github.com/NVIDIA/cccl/issues/5166
223+
# - CUDA 11.x needs --allow-unsupported-compiler for newer MSVC versions
224+
# where _MSC_VER exceeds what host_config.h expects
225+
["-DCUDA_NVCC_FLAGS=" + os.environ.get("CUDA_NVCC_FLAGS", "-Xcompiler /Zc:preprocessor")]
221226
if sys.platform == "win32"
222227
else []
223228
)

0 commit comments

Comments
 (0)