Skip to content

Commit 9d5f1c7

Browse files
committed
Add test cases for ov_gpu_unit_tests
1 parent c779a7f commit 9d5f1c7

File tree

1 file changed

+145
-6
lines changed

1 file changed

+145
-6
lines changed

src/plugins/intel_gpu/tests/unit/test_cases/rms_gpu_test.cpp

Lines changed: 145 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,18 +14,21 @@ using namespace ::tests;
1414
class rms_gpu_test : public ::testing::TestWithParam<cldnn::format> {};
1515

1616
template <typename T>
17-
void rms_ref(const memory::ptr input, const memory::ptr gamma, memory::ptr output, float epsilon) {
17+
void rms_ref(const memory::ptr input, const memory::ptr gamma, memory::ptr output, float epsilon, bool has_gamma = true) {
1818
auto input_layout = input->get_layout();
19-
auto gamma_layout = gamma->get_layout();
2019

2120
uint32_t batch_size = input_layout.batch();
2221
uint32_t feature_size = input_layout.feature();
2322
uint32_t y_size = input_layout.spatial(1);
2423
uint32_t x_size = input_layout.spatial(0);
2524

2625
cldnn::mem_lock<T> src(input, get_test_stream());
27-
cldnn::mem_lock<T> weight(gamma, get_test_stream());
2826
cldnn::mem_lock<T> dst(output, get_test_stream());
27+
28+
std::unique_ptr<cldnn::mem_lock<T>> weight;
29+
if (has_gamma && gamma) {
30+
weight = std::make_unique<cldnn::mem_lock<T>>(gamma, get_test_stream());
31+
}
2932

3033
for (uint32_t b = 0; b < batch_size; ++b) {
3134
for (uint32_t f = 0; f < feature_size; ++f) {
@@ -44,12 +47,18 @@ void rms_ref(const memory::ptr input, const memory::ptr gamma, memory::ptr outpu
4447
for (uint32_t y = 0; y < y_size; ++y) {
4548
for (uint32_t x = 0; x < x_size; ++x) {
4649
auto tensor_src = tensor(batch(b), feature(f), spatial(x, y, 0, 0));
47-
auto tensor_weight = tensor(batch(0), feature(0), spatial(x, y, 0, 0));
4850
auto tensor_dst = tensor(batch(b), feature(f), spatial(x, y, 0, 0));
4951
size_t src_offset = input_layout.get_linear_offset(tensor_src);
50-
size_t weight_offset = input_layout.get_linear_offset(tensor_weight);
5152
size_t dst_offset = input_layout.get_linear_offset(tensor_dst);
52-
float result = rms * static_cast<float>(src[src_offset]) * static_cast<float>(weight[weight_offset]);
53+
54+
float gamma_val = 1.0f;
55+
if (has_gamma && weight) {
56+
auto tensor_weight = tensor(batch(0), feature(0), spatial(x, y, 0, 0));
57+
size_t weight_offset = input_layout.get_linear_offset(tensor_weight);
58+
gamma_val = static_cast<float>((*weight)[weight_offset]);
59+
}
60+
61+
float result = rms * static_cast<float>(src[src_offset]) * gamma_val;
5362
dst[dst_offset] = static_cast<T>(result);
5463
}
5564
}
@@ -418,3 +427,133 @@ TEST(rms_gpu_test, rms_test_bfyx_opt_padding) {
418427
ASSERT_NEAR(output_ptr[i], output_ref_ptr[i], 1e-3) << " index=" << i;
419428
}
420429
}
430+
431+
TEST(rms_gpu_test, rms_test_without_gamma_bfyx_ref) {
432+
auto& engine = get_test_engine();
433+
434+
auto input = engine.allocate_memory({ov::PartialShape{1, 2, 6}, data_types::f32, format::bfyx});
435+
auto gamma = engine.allocate_memory({ov::PartialShape{1, 6}, data_types::f32, format::bfyx});
436+
auto output_ref = engine.allocate_memory({ov::PartialShape{1, 2, 6}, data_types::f32, format::bfyx});
437+
438+
set_values(input, {
439+
0.001839f, -0.003815f, 0.000961f, 0.002930f, -0.003998f, -0.008057f,
440+
0.006744f, -0.000004f, 0.004303f, -0.002380f, 0.000072f, 0.001404f
441+
});
442+
set_values(gamma, {
443+
1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f
444+
});
445+
446+
rms_ref<float>(input, gamma, output_ref, 1e-5f, false);
447+
448+
topology topology;
449+
topology.add(input_layout("input", input->get_layout()));
450+
topology.add(input_layout("gamma", gamma->get_layout()));
451+
topology.add(rms("rms", input_info("input"), input_info("gamma"), 1e-5f, false));
452+
453+
network network(engine, topology, get_test_default_config(engine));
454+
455+
network.set_input_data("input", input);
456+
network.set_input_data("gamma", gamma);
457+
458+
auto outputs = network.execute();
459+
ASSERT_EQ(outputs.size(), size_t(1));
460+
ASSERT_EQ(outputs.begin()->first, "rms");
461+
462+
auto output = outputs.begin()->second.get_memory();
463+
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
464+
cldnn::mem_lock<float> output_ref_ptr(output_ref, get_test_stream());
465+
466+
for (unsigned int i = 0; i < output_ref->count(); ++i) {
467+
EXPECT_NEAR(output_ptr[i], output_ref_ptr[i], 1e-3);
468+
}
469+
}
470+
471+
TEST(rms_gpu_test, rms_test_without_gamma_bfyx_opt) {
472+
auto& engine = get_test_engine();
473+
474+
auto input = engine.allocate_memory({ov::PartialShape{1, 2, 16}, data_types::f32, format::bfyx});
475+
auto gamma = engine.allocate_memory({ov::PartialShape{1, 16}, data_types::f32, format::bfyx});
476+
auto output_ref = engine.allocate_memory({ov::PartialShape{1, 2, 16}, data_types::f32, format::bfyx});
477+
478+
set_values(input, {
479+
0.001839f, -0.003815f, 0.000961f, 0.002930f, -0.003998f, -0.008057f, -0.005402f, -0.002945f,
480+
0.006744f, -0.000004f, 0.004303f, -0.002380f, 0.000072f, 0.001404f, 0.000568f, 0.002579f,
481+
0.003098f, -0.006989f, -0.000244f, 0.010193f, 0.002899f, -0.005798f, -0.026978f, 0.008789f,
482+
0.002258f, 0.006500f, 0.003159f, -0.012329f, 0.026245f, -0.001839f, 0.000259f, 0.002670f
483+
});
484+
set_values(gamma, {
485+
1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f,
486+
1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f
487+
});
488+
489+
rms_ref<float>(input, gamma, output_ref, 1e-5f, false);
490+
491+
topology topology;
492+
topology.add(input_layout("input", input->get_layout()));
493+
topology.add(input_layout("gamma", gamma->get_layout()));
494+
topology.add(rms("rms", input_info("input"), input_info("gamma"), 1e-5f, false));
495+
496+
network network(engine, topology, get_test_default_config(engine));
497+
498+
network.set_input_data("input", input);
499+
network.set_input_data("gamma", gamma);
500+
501+
auto outputs = network.execute();
502+
ASSERT_EQ(outputs.size(), size_t(1));
503+
ASSERT_EQ(outputs.begin()->first, "rms");
504+
505+
auto output = outputs.begin()->second.get_memory();
506+
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
507+
cldnn::mem_lock<float> output_ref_ptr(output_ref, get_test_stream());
508+
509+
for (unsigned int i = 0; i < output_ref->count(); ++i) {
510+
EXPECT_NEAR(output_ptr[i], output_ref_ptr[i], 1e-3);
511+
}
512+
}
513+
514+
TEST(rms_gpu_test, rms_test_without_gamma_dyn) {
515+
auto& engine = get_test_engine();
516+
517+
auto input_layout_dynamic = layout{ov::PartialShape{ov::Dimension::dynamic(), ov::Dimension::dynamic(), 4096},
518+
data_types::f32, format::bfyx};
519+
auto input = engine.allocate_memory({ov::PartialShape{2, 1, 4096}, data_types::f32, format::bfyx});
520+
auto gamma = engine.allocate_memory({ov::PartialShape{1, 1, 4096}, data_types::f32, format::bfyx});
521+
auto output_ref = engine.allocate_memory({ov::PartialShape{2, 1, 4096}, data_types::f32, format::bfyx});
522+
523+
tests::set_random_values<float>(input, true, 8, 100);
524+
// Set gamma to all 1.0 for has_gamma=false case
525+
std::vector<float> gamma_data(4096, 1.0f);
526+
set_values(gamma, gamma_data);
527+
528+
rms_ref<float>(input, gamma, output_ref, 1e-5f, false);
529+
530+
topology topology;
531+
topology.add(input_layout("input", input_layout_dynamic));
532+
topology.add(input_layout("gamma", gamma->get_layout()));
533+
topology.add(rms("rms", input_info("input"), input_info("gamma"), 1e-5f, false));
534+
535+
ExecutionConfig config = get_test_default_config(engine);
536+
config.set_property(ov::intel_gpu::allow_new_shape_infer(true));
537+
538+
network network(engine, topology, config);
539+
540+
network.set_input_data("input", input);
541+
network.set_input_data("gamma", gamma);
542+
543+
auto inst = network.get_primitive("rms");
544+
auto impl = inst->get_impl();
545+
ASSERT_TRUE(impl != nullptr);
546+
ASSERT_TRUE(impl->is_dynamic());
547+
548+
auto outputs = network.execute();
549+
ASSERT_EQ(outputs.size(), size_t(1));
550+
ASSERT_EQ(outputs.begin()->first, "rms");
551+
552+
auto output = outputs.begin()->second.get_memory();
553+
cldnn::mem_lock<float> output_ptr(output, get_test_stream());
554+
cldnn::mem_lock<float> output_ref_ptr(output_ref, get_test_stream());
555+
556+
for (unsigned int i = 0; i < output_ref->count(); ++i) {
557+
EXPECT_NEAR(output_ptr[i], output_ref_ptr[i], 1e-3);
558+
}
559+
}

0 commit comments

Comments
 (0)