Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 8 additions & 6 deletions clang/lib/CodeGen/CGExpr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6979,43 +6979,45 @@ void CodeGenFunction::SetFPAccuracy(llvm::Value *Val, float Accuracy) {

void CodeGenFunction::SetSqrtFPAccuracy(llvm::Value *Val) {
llvm::Type *EltTy = Val->getType()->getScalarType();
if (!EltTy->isFloatTy())
if (!EltTy->isFloatTy() && !EltTy->isHalfTy())
return;

if ((getLangOpts().OpenCL &&
!CGM.getCodeGenOpts().OpenCLCorrectlyRoundedDivSqrt) ||
(getLangOpts().HIP && getLangOpts().CUDAIsDevice &&
!CGM.getCodeGenOpts().HIPCorrectlyRoundedDivSqrt)) {
// OpenCL v1.1 s7.4: minimum accuracy of single precision / is 3ulp
// OpenCL v1.1 s7.4: minimum accuracy of single precision sqrt is 3 ulp.
// OpenCL v3.0 s7.4: minimum accuracy of half precision sqrt is 1.5 ulp.
//
// OpenCL v1.2 s5.6.4.2: The -cl-fp32-correctly-rounded-divide-sqrt
// build option allows an application to specify that single precision
// floating-point divide (x/y and 1/x) and sqrt used in the program
// source are correctly rounded.
//
// TODO: CUDA has a prec-sqrt flag
SetFPAccuracy(Val, 3.0f);
SetFPAccuracy(Val, EltTy->isFloatTy() ? 3.0f : 1.5f);
}
}

void CodeGenFunction::SetDivFPAccuracy(llvm::Value *Val) {
llvm::Type *EltTy = Val->getType()->getScalarType();
if (!EltTy->isFloatTy())
if (!EltTy->isFloatTy() && !EltTy->isHalfTy())
return;

if ((getLangOpts().OpenCL &&
!CGM.getCodeGenOpts().OpenCLCorrectlyRoundedDivSqrt) ||
(getLangOpts().HIP && getLangOpts().CUDAIsDevice &&
!CGM.getCodeGenOpts().HIPCorrectlyRoundedDivSqrt)) {
// OpenCL v1.1 s7.4: minimum accuracy of single precision / is 2.5ulp
// OpenCL v1.1 s7.4: minimum accuracy of single precision / is 2.5 ulp.
// OpenCL v3.0 s7.4: minimum accuracy of half precision / is 1 ulp.
//
// OpenCL v1.2 s5.6.4.2: The -cl-fp32-correctly-rounded-divide-sqrt
// build option allows an application to specify that single precision
// floating-point divide (x/y and 1/x) and sqrt used in the program
// source are correctly rounded.
//
// TODO: CUDA has a prec-div flag
SetFPAccuracy(Val, 2.5f);
SetFPAccuracy(Val, EltTy->isFloatTy() ? 2.5f : 1.f);
}
}

Expand Down
44 changes: 41 additions & 3 deletions clang/test/CodeGenOpenCL/fpmath.cl
Original file line number Diff line number Diff line change
@@ -1,8 +1,44 @@
// RUN: %clang_cc1 %s -emit-llvm -o - -triple spir-unknown-unknown | FileCheck --check-prefix=CHECK --check-prefix=NODIVOPT %s
// RUN: %clang_cc1 %s -emit-llvm -o - -triple spir-unknown-unknown -cl-fp32-correctly-rounded-divide-sqrt | FileCheck --check-prefix=CHECK --check-prefix=DIVOPT %s
// RUN: %clang_cc1 %s -emit-llvm -o - -DNOFP64 -cl-std=CL1.2 -triple r600-unknown-unknown -target-cpu r600 -pedantic | FileCheck --check-prefix=CHECK-FLT %s
// RUN: %clang_cc1 %s -emit-llvm -o - -DNOFP16 -DNOFP64 -cl-std=CL1.2 -triple r600-unknown-unknown -target-cpu r600 -pedantic | FileCheck --check-prefix=CHECK-FLT %s
// RUN: %clang_cc1 %s -emit-llvm -o - -DFP64 -cl-std=CL1.2 -triple spir-unknown-unknown -pedantic | FileCheck --check-prefix=CHECK-DBL %s

#ifndef NOFP16
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
typedef __attribute__(( ext_vector_type(4) )) half half4;

half hpscalardiv(half a, half b) {
// CHECK: @hpscalardiv
// CHECK: fdiv{{.*}},
// NODIVOPT: !fpmath ![[MD_HFDIV:[0-9]+]]
// DIVOPT-NOT: !fpmath !{{[0-9]+}}
return a / b;
}

half4 hpvectordiv(half4 a, half4 b) {
// CHECK: @hpvectordiv
// CHECK: fdiv{{.*}},
// NODIVOPT: !fpmath ![[MD_HFDIV]]
// DIVOPT-NOT: !fpmath !{{[0-9]+}}
return a / b;
}

half elementwise_sqrt_f16(half a) {
// CHECK-LABEL: @elementwise_sqrt_f16
// NODIVOPT: call half @llvm.sqrt.f16(half %{{.+}}), !fpmath ![[MD_HSQRT:[0-9]+]]
// DIVOPT: call half @llvm.sqrt.f16(half %{{.+}}){{$}}
return __builtin_elementwise_sqrt(a);
}

half4 elementwise_sqrt_v4f16(half4 a) {
// CHECK-LABEL: @elementwise_sqrt_v4f16
// NODIVOPT: call <4 x half> @llvm.sqrt.v4f16(<4 x half> %{{.+}}), !fpmath ![[MD_HSQRT]]
// DIVOPT: call <4 x half> @llvm.sqrt.v4f16(<4 x half> %{{.+}}){{$}}
return __builtin_elementwise_sqrt(a);
}

#endif // NOFP16

typedef __attribute__(( ext_vector_type(4) )) float float4;

float spscalardiv(float a, float b) {
Expand Down Expand Up @@ -30,14 +66,14 @@ float spscalarsqrt(float a) {

float elementwise_sqrt_f32(float a) {
// CHECK-LABEL: @elementwise_sqrt_f32
// NODIVOPT: call float @llvm.sqrt.f32(float %{{.+}}), !fpmath ![[MD_SQRT:[0-9]+]]
// NODIVOPT: call float @llvm.sqrt.f32(float %{{.+}}), !fpmath ![[MD_SQRT]]
// DIVOPT: call float @llvm.sqrt.f32(float %{{.+}}){{$}}
return __builtin_elementwise_sqrt(a);
}

float4 elementwise_sqrt_v4f32(float4 a) {
// CHECK-LABEL: @elementwise_sqrt_v4f32
// NODIVOPT: call <4 x float> @llvm.sqrt.v4f32(<4 x float> %{{.+}}), !fpmath ![[MD_SQRT:[0-9]+]]
// NODIVOPT: call <4 x float> @llvm.sqrt.v4f32(<4 x float> %{{.+}}), !fpmath ![[MD_SQRT]]
// DIVOPT: call <4 x float> @llvm.sqrt.v4f32(<4 x float> %{{.+}}){{$}}
return __builtin_elementwise_sqrt(a);
}
Expand Down Expand Up @@ -90,5 +126,7 @@ double4 elementwise_sqrt_v4f64(double4 a) {

#endif

// NODIVOPT: ![[MD_HFDIV]] = !{float 1.000000e+00}
// NODIVOPT: ![[MD_HSQRT]] = !{float 1.500000e+00}
// NODIVOPT: ![[MD_FDIV]] = !{float 2.500000e+00}
// NODIVOPT: ![[MD_SQRT]] = !{float 3.000000e+00}
20 changes: 13 additions & 7 deletions clang/test/CodeGenOpenCL/sqrt-fpmath.cl
Original file line number Diff line number Diff line change
Expand Up @@ -134,46 +134,52 @@ double16 call_sqrt_v16f64(double16 x) {
}


// Not for f16
// CHECK-LABEL: define {{.*}} half @call_sqrt_f16(
// CHECK: call {{.*}} half @_Z4sqrtDh(half noundef %{{.+}}) #{{[0-9]+$}}{{$}}
// DEFAULT: call {{.*}} half @_Z4sqrtDh(half noundef %{{.+}}) #{{[0-9]+}}, !fpmath [[HFPMATH:\![0-9]+]]{{$}}
// CORRECTLYROUNDED: call {{.*}} half @_Z4sqrtDh(half noundef %{{.+}}) #{{[0-9]+$}}{{$}}
half call_sqrt_f16(half x) {
return sqrt(x);
}


// CHECK-LABEL: define {{.*}} <2 x half> @call_sqrt_v2f16(
// CHECK: call {{.*}} <2 x half> @_Z4sqrtDv2_Dh(<2 x half> noundef %{{.+}}) #{{[0-9]+$}}{{$}}
// DEFAULT: call {{.*}} <2 x half> @_Z4sqrtDv2_Dh(<2 x half> noundef %{{.+}}) #{{[0-9]+}}, !fpmath [[HFPMATH]]{{$}}
// CORRECTLYROUNDED: call {{.*}} <2 x half> @_Z4sqrtDv2_Dh(<2 x half> noundef %{{.+}}) #{{[0-9]+$}}{{$}}
half2 call_sqrt_v2f16(half2 x) {
return sqrt(x);
}


// CHECK-LABEL: define {{.*}} <3 x half> @call_sqrt_v3f16(
// CHECK: call {{.*}} <3 x half> @_Z4sqrtDv3_Dh(<3 x half> noundef %{{.+}}) #{{[0-9]+$}}{{$}}
// DEFAULT: call {{.*}} <3 x half> @_Z4sqrtDv3_Dh(<3 x half> noundef %{{.+}}) #{{[0-9]+}}, !fpmath [[HFPMATH]]{{$}}
// CORRECTLYROUNDED: call {{.*}} <3 x half> @_Z4sqrtDv3_Dh(<3 x half> noundef %{{.+}}) #{{[0-9]+$}}{{$}}
half3 call_sqrt_v3f16(half3 x) {
return sqrt(x);
}


// CHECK-LABEL: define {{.*}} <4 x half> @call_sqrt_v4f16(
// CHECK: call {{.*}} <4 x half> @_Z4sqrtDv4_Dh(<4 x half> noundef %{{.+}}) #{{[0-9]+$}}{{$}}
// DEFAULT: call {{.*}} <4 x half> @_Z4sqrtDv4_Dh(<4 x half> noundef %{{.+}}) #{{[0-9]+}}, !fpmath [[HFPMATH]]{{$}}
// CORRECTLYROUNDED: call {{.*}} <4 x half> @_Z4sqrtDv4_Dh(<4 x half> noundef %{{.+}}) #{{[0-9]+$}}{{$}}
half4 call_sqrt_v4f16(half4 x) {
return sqrt(x);
}


// CHECK-LABEL: define {{.*}} <8 x half> @call_sqrt_v8f16(
// CHECK: call {{.*}} <8 x half> @_Z4sqrtDv8_Dh(<8 x half> noundef %{{.+}}) #{{[0-9]+$}}{{$}}
// DEFAULT: call {{.*}} <8 x half> @_Z4sqrtDv8_Dh(<8 x half> noundef %{{.+}}) #{{[0-9]+}}, !fpmath [[HFPMATH]]{{$}}
// CORRECTLYROUNDED: call {{.*}} <8 x half> @_Z4sqrtDv8_Dh(<8 x half> noundef %{{.+}}) #{{[0-9]+$}}{{$}}
half8 call_sqrt_v8f16(half8 x) {
return sqrt(x);
}


// CHECK-LABEL: define {{.*}} <16 x half> @call_sqrt_v16f16(
// CHECK: call {{.*}} <16 x half> @_Z4sqrtDv16_Dh(<16 x half> noundef %{{.+}}) #{{[0-9]+$}}{{$}}
// DEFAULT: call {{.*}} <16 x half> @_Z4sqrtDv16_Dh(<16 x half> noundef %{{.+}}) #{{[0-9]+}}, !fpmath [[HFPMATH]]{{$}}
// CORRECTLYROUNDED: call {{.*}} <16 x half> @_Z4sqrtDv16_Dh(<16 x half> noundef %{{.+}}) #{{[0-9]+$}}{{$}}
half16 call_sqrt_v16f16(half16 x) {
return sqrt(x);
}

// DEFAULT: [[FPMATH]] = !{float 3.000000e+00}
// DEFAULT: [[HFPMATH]] = !{float 1.500000e+00}