@@ -33,6 +33,47 @@ set(CUDA_USE_STATIC_CUDA_RUNTIME OFF)
3333
3434find_package (CUDAToolkit REQUIRED)
3535
36+ # Try to enable CUDA language when a working CUDA compiler toolchain is
37+ # available. Some CI environments (notably Windows packaging jobs) provide
38+ # CUDAToolkit headers/libs but cannot complete CUDA compiler identification. In
39+ # those cases, keep configuration working and skip CUDA-only sources below.
40+ if (NOT CMAKE_CUDA_COMPILER)
41+ include (CheckLanguage)
42+ check_language(CUDA)
43+ endif ()
44+
45+ if (CMAKE_CUDA_COMPILER)
46+ enable_language (CUDA)
47+ endif ()
48+
49+ # Centralize Windows/MSVC checks used throughout this file.
50+ set (_cuda_is_msvc_toolchain OFF )
51+ if (MSVC )
52+ set (_cuda_is_msvc_toolchain ON )
53+ endif ()
54+
55+ set (_cuda_is_windows_msvc OFF )
56+ if (WIN32 AND _cuda_is_msvc_toolchain)
57+ set (_cuda_is_windows_msvc ON )
58+ endif ()
59+
60+ # Common C++ compile options for CUDA backend targets.
61+ if (_cuda_is_msvc_toolchain)
62+ set (_cuda_cxx_compile_options /EHsc /GR)
63+ else ()
64+ set (_cuda_cxx_compile_options -fexceptions -frtti -fPIC)
65+ endif ()
66+
67+ # Platform-specific linker option for exporting symbols from shared libs.
68+ set (_cuda_export_dynamic_option "" )
69+ if (NOT _cuda_is_msvc_toolchain)
70+ if (APPLE )
71+ set (_cuda_export_dynamic_option -Wl,-export_dynamic)
72+ else ()
73+ set (_cuda_export_dynamic_option -Wl,--export-dynamic)
74+ endif ()
75+ endif ()
76+
3677# Use ExecuTorch's standard way to find PyTorch libraries for AOTI
3778include (${EXECUTORCH_ROOT} /tools/cmake/Utils.cmake)
3879find_package_torch()
@@ -47,17 +88,11 @@ target_include_directories(
4788)
4889target_compile_options (
4990 cuda_tensor_maker
50- PUBLIC $<$<CXX_COMPILER_ID:MSVC >:/EHsc /GR>
51- $<$<NOT :$<CXX_COMPILER_ID:MSVC >>:-fexceptions -frtti -fPIC>
91+ PUBLIC "$<$<COMPILE_LANGUAGE:CXX>:${_cuda_cxx_compile_options} >"
5292)
5393# Ensure symbols are exported properly
54- if (APPLE )
55- target_link_options (cuda_tensor_maker PUBLIC -Wl,-export_dynamic)
56- else ()
57- target_link_options (
58- cuda_tensor_maker PUBLIC
59- $<$<NOT :$<CXX_COMPILER_ID:MSVC >>:-Wl,--export-dynamic>
60- )
94+ if (_cuda_export_dynamic_option)
95+ target_link_options (cuda_tensor_maker PUBLIC ${_cuda_export_dynamic_option} )
6196endif ()
6297
6398# Link against ExecuTorch core libraries
@@ -84,8 +119,7 @@ target_include_directories(
84119
85120target_compile_options (
86121 cuda_platform
87- PUBLIC $<$<CXX_COMPILER_ID:MSVC >:/EHsc /GR>
88- $<$<NOT :$<CXX_COMPILER_ID:MSVC >>:-fexceptions -frtti -fPIC>
122+ PUBLIC "$<$<COMPILE_LANGUAGE:CXX>:${_cuda_cxx_compile_options} >"
89123)
90124
91125# Link against ExecuTorch core libraries
@@ -98,24 +132,30 @@ install(
98132)
99133
100134# CUDA-specific AOTI shim symbols (dynamically linked)
101- set (_aoti_cuda_shim_sources
102- runtime/shims/memory.cpp runtime/shims/cuda_guard.cpp
103- runtime/shims/int4mm.cu
135+ set (_aoti_cuda_shim_sources runtime/shims/memory.cpp
136+ runtime/shims/cuda_guard.cpp
104137)
105138
139+ # Only build int4mm shim when CUDA language/toolchain is available.
140+ if (CMAKE_CUDA_COMPILER)
141+ list (APPEND _aoti_cuda_shim_sources runtime/shims/int4mm.cu)
142+ endif ()
143+
106144add_library (aoti_cuda_shims SHARED ${_aoti_cuda_shim_sources} )
107145
108146# Define CUDA_AVAILABLE to use SlimTensor on GPU in common_shims_slim.h
109147target_compile_definitions (aoti_cuda_shims PRIVATE CUDA_AVAILABLE=1)
110148
111- # Define export macros for shared library
112- if (MSVC )
149+ # Define export macros for shared library. Use WIN32 (not just MSVC) so MinGW
150+ # cross-compiles also emit dllexport symbols for AOTI shims.
151+ if (WIN32 )
113152 target_compile_definitions (aoti_cuda_shims PRIVATE EXPORT_AOTI_FUNCTIONS)
114-
115- # Ensure proper DLL import/export library naming on Windows
116- set_target_properties (
117- aoti_cuda_shims PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS OFF
118- )
153+ if (_cuda_is_windows_msvc)
154+ # Ensure proper DLL import/export library naming on Windows
155+ set_target_properties (
156+ aoti_cuda_shims PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS OFF
157+ )
158+ endif ()
119159endif ()
120160
121161target_include_directories (
@@ -126,30 +166,35 @@ target_include_directories(
126166
127167target_compile_options (
128168 aoti_cuda_shims
129- PUBLIC $<$<CXX_COMPILER_ID:MSVC >:/EHsc /GR>
130- $<$<NOT :$<CXX_COMPILER_ID:MSVC >>:-fexceptions -frtti -fPIC>
169+ PUBLIC "$<$<COMPILE_LANGUAGE:CXX>:${_cuda_cxx_compile_options} >"
131170)
132171
133172# Ensure symbols are exported properly
134- target_link_options (
135- aoti_cuda_shims PUBLIC $<$< NOT :$<CXX_COMPILER_ID: MSVC >>:-Wl,--export-dynamic>
136- )
173+ if (_cuda_export_dynamic_option)
174+ target_link_options ( aoti_cuda_shims PUBLIC ${_cuda_export_dynamic_option} )
175+ endif ( )
137176
138177# Link against CUDA::cudart, common AOTI library, cuda_tensor_maker, and
139- # platform utilities. Use --whole-archive for aoti_common_shims_slim to ensure
140- # all symbols are exported from this shared library.
141- target_link_libraries (
142- aoti_cuda_shims
143- PRIVATE cuda_platform
144- PUBLIC $<$<NOT :$<CXX_COMPILER_ID:MSVC >>:-Wl,--whole-archive>
145- aoti_common_shims_slim
146- $<$<NOT :$<CXX_COMPILER_ID:MSVC >>:-Wl,--no -whole-archive>
147- cuda_tensor_maker
148- CUDA::cudart
149- ${CMAKE_DL_LIBS}
150- )
178+ # platform utilities. On non-MSVC, use --whole-archive for
179+ # aoti_common_shims_slim to force shim symbol retention.
180+ if (_cuda_is_msvc_toolchain)
181+ target_link_libraries (
182+ aoti_cuda_shims PRIVATE cuda_platform cuda_tensor_maker CUDA::cudart
183+ ${CMAKE_DL_LIBS}
184+ )
185+ # Link object library directly so symbols are pulled exactly once while
186+ # avoiding duplicate static/object inclusion and interface leakage.
187+ target_link_libraries (aoti_cuda_shims PRIVATE aoti_common_shims_slim_obj)
188+ else ()
189+ target_link_libraries (
190+ aoti_cuda_shims
191+ PRIVATE cuda_platform
192+ PUBLIC -Wl,--whole-archive aoti_common_shims_slim -Wl,--no -whole-archive
193+ cuda_tensor_maker CUDA::cudart ${CMAKE_DL_LIBS}
194+ )
195+ endif ()
151196
152- if (NOT MSVC )
197+ if (NOT _cuda_is_msvc_toolchain )
153198 executorch_target_link_options_shared_lib(aoti_cuda_shims)
154199endif ()
155200
@@ -172,14 +217,12 @@ target_include_directories(
172217)
173218target_compile_options (
174219 aoti_cuda_backend
175- PUBLIC $<$<CXX_COMPILER_ID:MSVC >:/EHsc /GR>
176- $<$<NOT :$<CXX_COMPILER_ID:MSVC >>:-fexceptions -frtti -fPIC>
220+ PUBLIC "$<$<COMPILE_LANGUAGE:CXX>:${_cuda_cxx_compile_options} >"
177221)
178222# Ensure symbols are exported properly
179- target_link_options (
180- aoti_cuda_backend PUBLIC
181- $<$<NOT :$<CXX_COMPILER_ID:MSVC >>:-Wl,--export-dynamic>
182- )
223+ if (_cuda_export_dynamic_option)
224+ target_link_options (aoti_cuda_backend PUBLIC ${_cuda_export_dynamic_option} )
225+ endif ()
183226
184227# Link against shims library and other dependencies On Windows (MSVC), use
185228# PRIVATE linkage for aoti_cuda_shims since the DLL is copied to the executable
@@ -190,8 +233,15 @@ target_link_libraries(
190233 CUDA::cudart ${CMAKE_DL_LIBS}
191234)
192235
193- if (MSVC )
194- target_link_libraries (aoti_cuda_backend PRIVATE aoti_cuda_shims)
236+ if (_cuda_is_msvc_toolchain)
237+ # cuda_backend.cpp uses SlimTensor CUDA utilities (e.g. getCurrentCUDAStream)
238+ # from aoti_common_shims_slim via headers; propagate the static lib so final
239+ # MSVC links (e.g. parakeet_runner) can resolve those C++ symbols.
240+ target_link_libraries (
241+ aoti_cuda_backend
242+ PRIVATE aoti_cuda_shims
243+ PUBLIC aoti_common_shims_slim
244+ )
195245else ()
196246 target_link_libraries (aoti_cuda_backend PUBLIC aoti_cuda_shims)
197247endif ()
0 commit comments