Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

random spot) this change itself looks good. but I think it is necessary to check potential side-effect. Could you trigger onepunch accuracy test?

#define OUTPUT_TYPE_BLOCK MAKE_VECTOR_TYPE(OUTPUT_TYPE, VEC_SIZE)
#define TO_TYPE(type, val) CAT(convert_, type)(val)
#define TO_TYPE_SAT(type, val) CAT(CAT(convert_, type), _sat)(val)

#if ELTWISE_BROADCAST
#define GET_INDEX(prefix, num, idx_order) CAT(CAT(prefix, num), _GET_INDEX_SAFE)(idx_order)
Expand Down Expand Up @@ -75,13 +74,9 @@ KERNEL(eltwise_blocked_opt)(INPUTS_DECLS
#if HAS_FUSED_OPS
FUSED_OPS;
OUTPUT_TYPE_BLOCK out = TO_TYPE(MAKE_VECTOR_TYPE(OUTPUT_TYPE, VEC_SIZE), FUSED_OPS_RESULT);
#else
#if QUANTIZATION_TERM && !OUTPUT_IS_FP
OUTPUT_TYPE_BLOCK out = ACTIVATION_TYPED(TO_TYPE_SAT(MAKE_VECTOR_TYPE(OUTPUT_TYPE, VEC_SIZE), res), ACTIVATION_PARAMS_TYPED);
#else
OUTPUT_TYPE_BLOCK out = ACTIVATION_TYPED(TO_TYPE(MAKE_VECTOR_TYPE(OUTPUT_TYPE, VEC_SIZE), res), ACTIVATION_PARAMS_TYPED);
#endif
#endif

#ifdef LEFTOVERS
// Overwrite
Expand All @@ -105,4 +100,3 @@ KERNEL(eltwise_blocked_opt)(INPUTS_DECLS

#undef OUTPUT_TYPE_BLOCK
#undef TO_TYPE
#undef TO_TYPE_SAT
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ KERNEL(eltwise)(
#endif

#if QUANTIZATION_TERM && !OUTPUT_IS_FP
output[output_offset] = TO_OUTPUT_TYPE_SAT(ACTIVATION(out, ACTIVATION_PARAMS));
output[output_offset] = TO_OUTPUT_TYPE(ACTIVATION(out, ACTIVATION_PARAMS));
#else
output[output_offset] = TO_OUTPUT_TYPE(ACTIVATION_TYPED(out, ACTIVATION_PARAMS_TYPED));
#endif
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2634,6 +2634,100 @@ TEST(eltwise_gpu_int, basic_in4x4x4x4) {
}
}

TEST(eltwise_gpu_int, i8_overflow_wraparound) {
// Test that int8 eltwise operations correctly wrap around on overflow
// instead of saturating. This tests values that overflow the [-128, 127] range.
// Subtraction examples: -100 - 100 = -200 -> wraps to 56
// Addition examples: 100 + 100 = 200 -> wraps to -56

auto& engine = get_test_engine();

auto input1 = engine.allocate_memory({ data_types::i8, format::bfyx, { 1, 1, 4, 1 } });
auto input2 = engine.allocate_memory({ data_types::i8, format::bfyx, { 1, 1, 4, 1 } });

std::vector<int8_t> input1_data = { -100, 100, 127, -128 };
std::vector<int8_t> input2_data = { 100, -100, -1, 1 };

set_values(input1, input1_data);
set_values(input2, input2_data);

for (auto mode : { eltwise_mode::sub, eltwise_mode::sum }) {
topology topology;
topology.add(input_layout("input1", input1->get_layout()));
topology.add(input_layout("input2", input2->get_layout()));
topology.add(eltwise("eltwise", { input_info("input1"), input_info("input2") }, mode));

network network(engine, topology, get_test_default_config(engine));
network.set_input_data("input1", input1);
network.set_input_data("input2", input2);
auto outputs = network.execute();

auto output = outputs.at("eltwise").get_memory();
cldnn::mem_lock<int8_t> output_ptr(output, get_test_stream());

for (size_t i = 0; i < input1_data.size(); ++i) {
int16_t wide_result = (mode == eltwise_mode::sub)
? static_cast<int16_t>(input1_data[i]) - static_cast<int16_t>(input2_data[i])
: static_cast<int16_t>(input1_data[i]) + static_cast<int16_t>(input2_data[i]);
int8_t expected = static_cast<int8_t>(wide_result);
ASSERT_EQ(expected, output_ptr[i])
<< "Mode: " << (mode == eltwise_mode::sub ? "sub" : "sum")
<< ", index " << i << ": " << static_cast<int>(input1_data[i])
<< (mode == eltwise_mode::sub ? " - " : " + ") << static_cast<int>(input2_data[i]);
}
}
}

TEST(eltwise_gpu_int, i8_overflow_wraparound_blocked_format) {
auto& engine = get_test_engine();

const int batch = 1, features = 32, height = 1, width = 4;
tensor input_tensor(batch, features, width, height);

auto input1 = engine.allocate_memory({ data_types::i8, format::bfyx, input_tensor });
auto input2 = engine.allocate_memory({ data_types::i8, format::bfyx, input_tensor });

std::vector<int8_t> input1_data(batch * features * height * width);
std::vector<int8_t> input2_data(batch * features * height * width);

for (size_t i = 0; i < input1_data.size(); ++i) {
input1_data[i] = (i % 4 == 0) ? -100 : ((i % 4 == 1) ? 100 : ((i % 4 == 2) ? 127 : -128));
input2_data[i] = (i % 4 == 0) ? 100 : ((i % 4 == 1) ? -100 : ((i % 4 == 2) ? -1 : 1));
}

set_values(input1, input1_data);
set_values(input2, input2_data);

for (auto mode : { eltwise_mode::sub, eltwise_mode::sum }) {
topology topology;
topology.add(input_layout("input1", input1->get_layout()));
topology.add(input_layout("input2", input2->get_layout()));
topology.add(reorder("reorder1", input_info("input1"), layout(data_types::i8, format::b_fs_yx_fsv16, input_tensor)));
topology.add(reorder("reorder2", input_info("input2"), layout(data_types::i8, format::b_fs_yx_fsv16, input_tensor)));
topology.add(eltwise("eltwise", { input_info("reorder1"), input_info("reorder2") }, mode));
topology.add(reorder("output_reorder", input_info("eltwise"), layout(data_types::i8, format::bfyx, input_tensor)));

network network(engine, topology, get_test_default_config(engine));
network.set_input_data("input1", input1);
network.set_input_data("input2", input2);
auto outputs = network.execute();

auto output = outputs.at("output_reorder").get_memory();
cldnn::mem_lock<int8_t> output_ptr(output, get_test_stream());

for (size_t i = 0; i < input1_data.size(); ++i) {
int16_t wide_result = (mode == eltwise_mode::sub)
? static_cast<int16_t>(input1_data[i]) - static_cast<int16_t>(input2_data[i])
: static_cast<int16_t>(input1_data[i]) + static_cast<int16_t>(input2_data[i]);
int8_t expected = static_cast<int8_t>(wide_result);
ASSERT_EQ(expected, output_ptr[i])
<< "Mode: " << (mode == eltwise_mode::sub ? "sub" : "sum")
<< ", index " << i << ": " << static_cast<int>(input1_data[i])
<< (mode == eltwise_mode::sub ? " - " : " + ") << static_cast<int>(input2_data[i]);
}
}
}

TEST(eltwise_gpu_int, div_gather_fusing) {
auto& engine = get_test_engine();

Expand Down
Loading