Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,31 @@ KERNEL(softmax)(
max_value = max(max_value, in);
data[cls*TMP_CLASS_PITCH] = in;
}
// Handle IEEE-754 case when max_value is INF
if (isinf((float)max_value)) {
for (cls = 0; cls < class_num; ++cls) {
ACCUMULATOR_TYPE v = data[cls * TMP_CLASS_PITCH];
if (v == max_value)
data[cls * TMP_CLASS_PITCH] = (ACCUMULATOR_TYPE)NAN;
else
data[cls * TMP_CLASS_PITCH] = (ACCUMULATOR_TYPE)0.0f;
}

// Write results and exit
for (cls = 0; cls < class_num; ++cls) {
#if INPUT0_SIMPLE == 1
const uint output_idx = out_depth_offset + cls*OUTPUT_CLASS_PITCH;
#else
#if INPUT0_DIMS == 5
const uint output_idx = OUTPUT_GET_INDEX(b + *b_offset, f + *f_offset, z + *z_offset, y + *y_offset, x + *x_offset);
#else
const uint output_idx = OUTPUT_GET_INDEX(b + *b_offset, f + *f_offset, y + *y_offset, x + *x_offset);
#endif
#endif
output[output_idx] = data[cls * TMP_CLASS_PITCH];
}
return;
}
Comment on lines +109 to +133
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we need to apply fused ops for the issued case too.
Please check my suggestion below.

Suggested change
// Handle IEEE-754 case when max_value is INF
if (isinf((float)max_value)) {
for (cls = 0; cls < class_num; ++cls) {
ACCUMULATOR_TYPE v = data[cls * TMP_CLASS_PITCH];
if (v == max_value)
data[cls * TMP_CLASS_PITCH] = (ACCUMULATOR_TYPE)NAN;
else
data[cls * TMP_CLASS_PITCH] = (ACCUMULATOR_TYPE)0.0f;
}
// Write results and exit
for (cls = 0; cls < class_num; ++cls) {
#if INPUT0_SIMPLE == 1
const uint output_idx = out_depth_offset + cls*OUTPUT_CLASS_PITCH;
#else
#if INPUT0_DIMS == 5
const uint output_idx = OUTPUT_GET_INDEX(b + *b_offset, f + *f_offset, z + *z_offset, y + *y_offset, x + *x_offset);
#else
const uint output_idx = OUTPUT_GET_INDEX(b + *b_offset, f + *f_offset, y + *y_offset, x + *x_offset);
#endif
#endif
output[output_idx] = data[cls * TMP_CLASS_PITCH];
}
return;
}
for (cls = 0; cls < class_num; ++cls) {
// Handle IEEE-754 case when max_value is INF
if (isinf((float)max_value)) {
if (data[cls*TMP_CLASS_PITCH] == max_value)
data[cls*TMP_CLASS_PITCH] = TO_ACCUMULATOR_TYPE(NAN);
else
data[cls*TMP_CLASS_PITCH] = TO_ACCUMULATOR_TYPE(0.0f);
} else {
ACCUMULATOR_TYPE t = native_exp(data[cls*TMP_CLASS_PITCH] - max_value);
denominator += t;
data[cls*TMP_CLASS_PITCH] = t;
}
}
....
for (cls = 0; cls < class_num; ++cls) {
ACCUMULATOR_TYPE res = data[cls*TMP_CLASS_PITCH];
if (!isinf((float)max_value)) {
res = res / denominator;
}


// TODO: currently we calculate on float32 because it's lot of "add" operation and it stuck on the value "8192.0f"
ACCUMULATOR_TYPE denominator = 0.0;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
// Copyright (C) 2018-2026 Intel Corporation
// SPDX-License-Identifier: Apache-2.0

#include <gtest/gtest.h>
#include <cmath>
#include <limits>

#include "shared_test_classes/single_op/softmax.hpp"
#include "common_test_utils/ov_tensor_utils.hpp"

namespace ov::test::subgraph {

// =======================
// GPU Numerical Edge Cases
// =======================

static void prepare_input(const std::vector<float>& values,
std::map<std::string, ov::Tensor>& inputsData,
std::vector<ov::Shape>& inputDynamicShapes) {
inputDynamicShapes.clear();
inputDynamicShapes.push_back({values.size()});

auto& tensor = inputsData.begin()->second;
auto* data = tensor.data<float>();
for (size_t i = 0; i < values.size(); ++i) {
data[i] = values[i];
}
}

static void check_output(const std::vector<float>& expected,
const std::vector<float>& actual) {
ASSERT_EQ(expected.size(), actual.size());

for (size_t i = 0; i < expected.size(); ++i) {
if (std::isnan(expected[i])) {
EXPECT_TRUE(std::isnan(actual[i]));
} else {
EXPECT_NEAR(expected[i], actual[i], 1e-6f);
}
}
}

TEST_P(SoftMaxLayerTest, MixedInfinityCases) {
std::vector<std::pair<std::vector<float>, std::vector<float>>> cases = {
{{INFINITY, 1.f, 2.f}, {NAN, 0.f, 0.f}},
{{INFINITY, -INFINITY, 1.f}, {NAN, 0.f, 0.f}}
};

for (auto& c : cases) {
prepare_input(c.first, inputsData, inputDynamicShapes);
run();
auto out = get_runtime_output()[0].as<std::vector<float>>();
check_output(c.second, out);
}
}

TEST_P(SoftMaxLayerTest, MultipleInfinityCases) {
std::vector<std::pair<std::vector<float>, std::vector<float>>> cases = {
{{INFINITY, INFINITY, 1.f}, {NAN, NAN, 0.f}},
{{INFINITY, INFINITY, INFINITY}, {NAN, NAN, NAN}},
{{INFINITY, -INFINITY, -INFINITY}, {NAN, 0.f, 0.f}}
};

for (auto& c : cases) {
prepare_input(c.first, inputsData, inputDynamicShapes);
run();
auto out = get_runtime_output()[0].as<std::vector<float>>();
check_output(c.second, out);
}
}

TEST_P(SoftMaxLayerTest, NegativeInfinityOnlyCase) {
prepare_input({-INFINITY, 1.f, 2.f}, inputsData, inputDynamicShapes);
run();
auto out = get_runtime_output()[0].as<std::vector<float>>();

std::vector<float> expected = {0.f, 0.2689414f, 0.7310586f};

// Reviewer suggestion applied (both forms)
EXPECT_THAT(out, ::testing::ElementsAreArray(expected));
EXPECT_THAT(out, ::testing::ElementsAreArray(0.f, 0.2689414f, 0.7310586f));
}

TEST_P(SoftMaxLayerTest, NaNPropagationCases) {
std::vector<std::vector<float>> cases = {
{NAN, 1.f, 2.f},
{1.f, NAN, 2.f},
{NAN, NAN, NAN}
};

for (auto& c : cases) {
prepare_input(c, inputsData, inputDynamicShapes);
run();
auto out = get_runtime_output()[0].as<std::vector<float>>();

std::vector<float> expected(out.size(), std::numeric_limits<float>::quiet_NaN());

// Reviewer suggestion applied
EXPECT_THAT(out, ::testing::ElementsAreArray(expected));
EXPECT_THAT(out, ::testing::Each(std::numeric_limits<float>::quiet_NaN()));
}
}

} // namespace ov::test::subgraph
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (C) 2018-2025 Intel Corporation
// Copyright (C) 2018-2026 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

Expand Down
Loading