Skip to content

Commit 092e0e8

Browse files
authored
Merge branch 'ICLDisco:master' into update/parsec623
2 parents cdc8984 + 4ad5c19 commit 092e0e8

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

56 files changed

+1710
-516
lines changed

CMakeLists.txt

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,10 @@ set(DPLASMA_VERSION "${DPLASMA_VERSION_MAJOR}.${DPLASMA_VERSION_MINOR}")
1616

1717
############################################################################
1818
# CMake Policies Tuning
19+
if(POLICY CMP0144)
20+
# CMP0144: find_package uses upper-case <PACKAGENAME>_ROOT variables in addition to <PackageName>_ROOT
21+
cmake_policy(SET CMP0144 NEW)
22+
endif(POLICY CMP0144)
1923
set(CMAKE_NO_SYSTEM_FROM_IMPORTED True)
2024

2125
############################################################################
@@ -231,12 +235,30 @@ endif(NOT TARGET PaRSEC::parsec AND NOT TARGET PaRSEC::parsec_ptgpp)
231235

232236
############################################################################
233237
# Resume configuring dplasma
234-
option(DPLASMA_HAVE_CUDA "Use CUDA to accelerate DPLASMA routines" ${PARSEC_HAVE_CUDA})
235-
if(DPLASMA_HAVE_CUDA)
238+
option(DPLASMA_GPU_WITH_CUDA "Use CUDA to accelerate DPLASMA routines" ${PARSEC_HAVE_CUDA})
239+
if(DPLASMA_GPU_WITH_CUDA)
240+
if(NOT PARSEC_HAVE_CUDA)
241+
message(FATAL_ERROR "CUDA support for DPLASMA requested, but detected PaRSEC does not support it")
242+
endif()
236243
message(STATUS "CUDA support for DPLASMA enabled")
237244
if(NOT TARGET CUDA::cusolver)
238245
find_package(CUDAToolkit REQUIRED)
239246
endif(NOT TARGET CUDA::cusolver)
247+
set(DPLASMA_HAVE_CUDA ${PARSEC_HAVE_CUDA} CACHE BOOL "True if DPLASMA provide support for CUDA")
248+
endif()
249+
option(DPLASMA_GPU_WITH_HIP "Use HIP to accelerate DPLASMA routines" ${PARSEC_HAVE_HIP})
250+
if(DPLASMA_GPU_WITH_HIP)
251+
if(NOT PARSEC_HAVE_HIP)
252+
message(FATAL_ERROR "HIP support for DPLASMA requested, but detected PaRSEC does not support it")
253+
endif()
254+
message(STATUS "HIP support for DPLASMA enabled")
255+
# This is kinda ugly but the PATH and HINTS don't get transmitted to sub-dependents
256+
set(CMAKE_SYSTEM_PREFIX_PATH_save ${CMAKE_SYSTEM_PREFIX_PATH})
257+
list(APPEND CMAKE_SYSTEM_PREFIX_PATH /opt/rocm)
258+
find_package(hipblas REQUIRED)
259+
find_package(rocsolver REQUIRED)
260+
set(CMAKE_SYSTEM_PREFIX_PATH ${CMAKE_SYSTEM_PREFIX_PATH_save})
261+
set(DPLASMA_HAVE_HIP ${PARSEC_HAVE_HIP} CACHE BOOL "True if DPLASMA provide support for HIP")
240262
endif()
241263

242264
############################################################################

cmake_modules/FindLAPACKE.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -284,7 +284,7 @@ foreach(_comp ${LAPACKE_FIND_COMPONENTS})
284284
elseif(_comp STREQUAL "BLAS")
285285
if(NOT BLAS_FOUND)
286286
_find_library_with_header(${_comp} "" blas refblas)
287-
set(BLA_VENDOR CACHE "Generic")
287+
set(BLA_VENDOR "Generic" CACHE STRING "BLAS vendor")
288288
else()
289289
set(LAPACKE_BLAS_FOUND 1)
290290
set(LAPACKE_BLAS_LIB_FOUND 1)

configure

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,9 @@ cat <<EOF
132132
compile kernels optimized for the CUDA SM model x, y and z
133133
where x,y,z are two digit numbers representing a valid CUDA architecture (e.g. 35,37,60) (default=autodetect)
134134
135+
--with-hip[=DIR]
136+
use the AMD RoCM accelerator libray [installed in DIR] (default=autodetect)
137+
135138
136139
Some influential environment variables:
137140
CC C compiler command
@@ -296,6 +299,11 @@ while [ "x$1" != x ]; do
296299
--with-cuda-sm-targets) with_cuda_sm_targets=yes; shift;;
297300
--without-cuda-sm-targets) with_cuda_sm_targets=no; shift;;
298301

302+
# RoCM options
303+
--with-hip=*) with_hip="${1#*=}"; shift;;
304+
--with-hip) with_hip=yes; shift;;
305+
--without-hip) with_hip=no; shift;;
306+
299307
# Python options
300308
--with-python=*) with_python="${1#*=}"; shift;;
301309
--with-python) with_python=yes; shift;;
@@ -399,12 +407,12 @@ _EOF
399407
mkdir -p "$NATIVE_DIR" && pushd "$NATIVE_DIR"
400408
rm -rf CMakeCache.txt CMakeFiles
401409

402-
# Disable MPI, CUDA, HWLOC when creating the build-tools
410+
# Disable MPI, GPU, HWLOC when creating the build-tools
403411
local NATIVE_MPI="-DPARSEC_DIST_WITH_MPI=OFF"
404-
local NATIVE_CUDA="-DPARSEC_GPU_WITH_CUDA=OFF"
412+
local NATIVE_GPU="-DPARSEC_GPU_WITH_CUDA=OFF -DPARSEC_GPU_WITH_HIP=OFF"
405413
local NATIVE_HWLOC=""
406414
local NATIVE_COMPILERS="-DSUPPORT_FORTRAN=OFF"
407-
local NATIVE_OPTS="-DBUILD_TESTING=OFF -DBUILD_TOOLS=ON -DBUILD_PARSEC=ON -DCMAKE_INSTALL_PREFIX=$NATIVE_PREFIX $NATIVE_MPI $NATIVE_CUDA $NATIVE_HWLOC $NATIVE_COMPILERS"
415+
local NATIVE_OPTS="-DBUILD_TESTING=OFF -DBUILD_TOOLS=ON -DBUILD_PARSEC=ON -DCMAKE_INSTALL_PREFIX=$NATIVE_PREFIX $NATIVE_MPI $NATIVE_GPU $NATIVE_HWLOC $NATIVE_COMPILERS"
408416

409417
set_cmake_executable #may have been changed in the platform file
410418
echo "CC=\"${NATIVE_CC}\" CFLAGS=\"${NATIVE_CFLAGS}\" CXX=\"${NATIVE_CXX}\" CXXFLAGS=\"${NATIVE_CXXFLAGS}\" LDFLAGS=\"${NATIVE_LDFLAGS}\" ${cmake_executable} -G\"${cmake_generator}\" ${NATIVE_OPTS} ${PARSEC_TOOLCHAIN_OPTIONS} $(for i in "$@"; do printf ' %q' "$i"; done) ${srcdir}"
@@ -621,6 +629,12 @@ x) ;;
621629
*) CMAKE_DEFINES+=" -DCUDA_SM_TARGETS='${with_cuda_sm_targets/,/;}'";;
622630
esac
623631
632+
case x$with_hip in
633+
xno) CMAKE_DEFINES+=" -DPARSEC_GPU_WITH_HIP=OFF -DDPLASMA_GPU_WITH_HIP=OFF";;
634+
xyes) CMAKE_DEFINES+=" -DPARSEC_GPU_WITH_HIP=ON -DDPLASMA_GPU_WITH_HIP=ON";;
635+
x) ;;
636+
*) CMAKE_DEFINES+=" -DPARSEC_GPU_WITH_HIP=ON -DHIP_ROOT=$(printf %q "$with_hip") -DDPLASMA_GPU_WITH_HIP=ON";;
637+
esac
624638
625639
case x$with_python in
626640
xno) echo >&2 "Python is required. Please provide a path to the python executable."; exit 3;;

contrib/platforms/macosx

Lines changed: 40 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,27 @@ for this reason the Apple VecLib remains the default option.
1212
EOF
1313
with_blas=Apple
1414

15+
# OpenBLAS on OSX is not in system directories,
16+
# lets make sure it gets auto-detected, if requested with with_blas
17+
if [ -d /opt/homebrew/opt/openblas/lib/cmake/OpenBLAS ]; then # brew (arm)
18+
ENVVARS+=" CMAKE_PREFIX_PATH+=':/opt/homebrew/opt/openblas/lib'"
19+
elif [ -d /usr/local/opt/openblas/lib/cmake/OpenBLAS ]; then # brew (intel)
20+
ENVVARS+=" CMAKE_PREFIX_PATH+=':/usr/local/opt/openblas/lib'"
21+
fi
22+
if [ -d /opt/local/lib/cmake/OpenBLAS ]; then # macports
23+
ENVVARS+=" CMAKE_PREFIX_PATH+=':/opt/local/lib'"
24+
fi
25+
if [ -d /opt/sw/lib/cmake/OpenBLAS ]; then # fink (10.15..)
26+
ENVVARS+=" CMAKE_PREFIX_PATH+=':/opt/sw/lib'"
27+
elif [ -d /sw/lib/cmake/OpenBLAS ]; then # fink (..10.14)
28+
ENVVARS+=" CMAKE_PREFIX_PATH+=':/sw/lib'"
29+
fi
30+
31+
#####################################################################
32+
# Everything below is imported from the parsec contrib plaform file
33+
# If you need to modify something below, it probably needs modified
34+
# in parsec first, and backported here second
35+
1536
# if icc/CLang is not set, detect the matching gcc/gfortran pair
1637
# OS X has no fortran by default, and flavor varies between fink, port, self installed, etc
1738
# Lets try to guess and use the same version for all 3 compilers
@@ -30,24 +51,25 @@ if [ "x$enable_fortran" != xno -a ! -x "$(command -v "$CC")" ]; then
3051
done
3152
fi
3253

33-
# We have Brew OpenBLAS, lets make sure it gets auto-detected, if requested
34-
if [ -d /usr/local/opt/openblas/lib ]; then
35-
ENVVARS+=" CMAKE_PREFIX_PATH+=':/usr/local/opt/openblas/lib'"
36-
fi
37-
3854
# OS-X 12.2 provides Bison 2.3, we need Bison 3 or better
39-
# Try to get the 'brew' Bison if installed
40-
if [ -d /usr/local/opt/bison ]; then
41-
ENVVARS+=" BISON_ROOT=${BISON_ROOT:-/usr/local/opt/bison}"
55+
local bison_version=$(expr "$(bison --version)" : "bison.*\([0-9]\)\.[0-9]")
56+
# do not override valid version in PATH or user choice
57+
if [ "$bison_version" -lt 3 -a -z "$BISON_ROOT" ]; then
58+
# Try to find Bison if installed with brew, ports, fink
59+
if [ -d /opt/homebrew/opt/bison ]; then # brew (arm)
60+
bison_dir=/opt/homebrew/opt/bison
61+
elif [ -d /usr/local/opt/bison ]; then # brew (intel)
62+
bison_dir=/usr/local/opt/bison
63+
elif [ -x /opt/local/bin/bison ]; then # macports
64+
bison_dir=/opt/local
65+
elif [ -x /opt/sw/bin/bison ]; then # fink (10.15..)
66+
bison_dir=/opt/sw
67+
elif [ -x /sw/bin/bison ]; then # fink (..10.14)
68+
bison_dir=/sw
69+
fi
70+
if [ -n "${bison_dir}" ]; then
71+
echo "Default Bison is too old, will search in ${bison_dir}; override by setting BISON_ROOT"
72+
ENVVARS+=" BISON_ROOT=${bison_dir}"
73+
fi
4274
fi
43-
# Try to get the 'MacPort' Bison if installed
44-
if [ -x /opt/local/bin/bison ]; then
45-
ENVVARS+=" BISON_ROOT=${BISON_ROOT:-/opt/local}"
46-
fi
47-
# Try to get the 'Fink' Bison if installed
48-
if [ -x /sw/bin/bison ]; then
49-
ENVVARS+=" BISON_ROOT=${BISON_ROOT:-/sw}"
50-
fi
51-
# If Bison still not found, please set BISON_ROOT by hand
52-
5375

share/help-dplasma.txt

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
1-
[cu*_alloc_failed]
2-
There was not enough memory available on a CUDA device
1+
[gpu_alloc_failed]
2+
There was not enough memory available on a GPU device
33
while trying to allocate a %s handle to manage tasks on
4-
this device, or another CUDA device on the node. The
4+
this device, or another GPU device on the node. The
55
PaRSEC runtime system may be configured to reserve too
6-
much memory on CUDA devices. Try reducing the amount of
6+
much memory on GPU devices. Try reducing the amount of
77
reserved memory by setting the PaRSEC MCA parameter
8-
'device_cuda_memory_use' to a lower value.
8+
'device_cuda_memory_use' (or similar for the type of
9+
device) to a lower value.

src/CMakeLists.txt

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,27 @@ if( NOT DPLASMA_HAVE_COMPLEX_H )
2828
list(APPEND EXTRA_SOURCES complex.c)
2929
endif()
3030
if( DPLASMA_HAVE_CUDA )
31-
list(APPEND EXTRA_SOURCES dplasmaaux_cuda.c)
31+
list(APPEND EXTRA_SOURCES dplasmaaux_cuda.c cuda/lapack_cuda_stage_in.c)
3232
endif()
33+
if( DPLASMA_HAVE_HIP )
34+
list(APPEND EXTRA_SOURCES dplasmaaux_hip.c)
35+
FILE(GLOB cuda_sources cuda/[^\\.]*.[ch])
36+
find_package(Perl REQUIRED)
37+
find_program(HIPIFY_PERL_COMMAND NAMES hipify-perl HINTS ${HIP_BIN_INSTALL_DIR} REQUIRED)
38+
foreach(cuda_file ${cuda_sources})
39+
file(RELATIVE_PATH cuda_filename ${CMAKE_CURRENT_SOURCE_DIR}/cuda ${cuda_file})
40+
string(REPLACE cuda hip hip_file ${cuda_filename})
41+
string(PREPEND hip_file "${CMAKE_CURRENT_BINARY_DIR}/hip/")
42+
add_custom_command(OUTPUT ${hip_file}
43+
DEPENDS ${cuda_file} # do not use MAIN_DEPENDENCY, that overides the default .c.o rule
44+
COMMAND ${CMAKE_COMMAND} -E copy "${cuda_file}" "${hip_file}.prehip"
45+
COMMAND ${PERL_EXECUTABLE} ${HIPIFY_PERL_COMMAND} --inplace --print-stats "${hip_file}"
46+
COMMAND ${PERL_EXECUTABLE} -i -pe "s{(cuda)}{ substr uc hip | (uc \$1 ^ \$1), 0, 3 }egi" "${hip_file}" VERBATIM) # Convert all remaining cuda/CUDA
47+
if(${hip_file} MATCHES [^\\.]*.c) # do not add .h to sources
48+
list(APPEND EXTRA_SOURCES ${hip_file})
49+
endif()
50+
endforeach()
51+
endif( DPLASMA_HAVE_HIP )
3352

3453
### Generate .c files from .jdf for all required precisions
3554
set(JDF
@@ -236,7 +255,9 @@ target_link_libraries(dplasma
236255
PaRSEC::parsec
237256
LAPACKE::LAPACKE
238257
$<$<BOOL:${DPLASMA_HAVE_CUDA}>:CUDA::cublas>
239-
$<$<BOOL:${DPLASMA_HAVE_CUDA}>:CUDA::cusolver>)
258+
$<$<BOOL:${DPLASMA_HAVE_CUDA}>:CUDA::cusolver>
259+
$<$<BOOL:${DPLASMA_HAVE_HIP}>:roc::hipblas>
260+
$<$<BOOL:${DPLASMA_HAVE_HIP}>:roc::rocsolver>)
240261
set_target_properties(dplasma PROPERTIES VERSION ${DPLASMA_VERSION_MAJOR}.${DPLASMA_VERSION_MINOR}
241262
SOVERSION ${DPLASMA_VERSION_MAJOR})
242263

src/cuda/README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
This directory contains files that are automatically converted from CUDA to HIP using Hipify.
2+
If your file is not automatically convertible, put it somewhere else.
3+

0 commit comments

Comments
 (0)