From 4e8a10eedc6bef21d0b2bc5a786748658eaa47a9 Mon Sep 17 00:00:00 2001 From: Arseniy Obolenskiy Date: Wed, 4 Feb 2026 11:10:23 +0100 Subject: [PATCH 1/5] [Snippets][CPU] Introduce EliminateInplaceOps pass --- .../lowered/pass/eliminate_inplace_ops.hpp | 64 +++++++++++++++ .../lowered/pass/eliminate_inplace_ops.cpp | 80 +++++++++++++++++++ src/common/snippets/src/op/subgraph.cpp | 22 ++++- .../snippets/aarch64/jit_fill_emitter.cpp | 18 ++--- .../snippets/x64/jit_fill_emitter.cpp | 16 ++-- 5 files changed, 178 insertions(+), 22 deletions(-) create mode 100644 src/common/snippets/include/snippets/lowered/pass/eliminate_inplace_ops.hpp create mode 100644 src/common/snippets/src/lowered/pass/eliminate_inplace_ops.cpp diff --git a/src/common/snippets/include/snippets/lowered/pass/eliminate_inplace_ops.hpp b/src/common/snippets/include/snippets/lowered/pass/eliminate_inplace_ops.hpp new file mode 100644 index 00000000000000..75b9aef23a55c9 --- /dev/null +++ b/src/common/snippets/include/snippets/lowered/pass/eliminate_inplace_ops.hpp @@ -0,0 +1,64 @@ +// Copyright (C) 2018-2026 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#pragma once + +#include + +#include "openvino/core/rtti.hpp" +#include "snippets/lowered/linear_ir.hpp" +#include "snippets/lowered/pass/pass.hpp" + +namespace ov::snippets::lowered::pass { + +/** + * @interface EliminateInplaceOps + * @brief Eliminates operations that are effectively inplace (input == output). + * Currently handles Fill operations where offset equals register capacity, + * which means the operation doesn't actually fill any new data. + * This pass should run after InsertSpecificIterations and before InitRegisters. + * @ingroup snippets + */ +class EliminateInplaceOps : public Pass { +public: + OPENVINO_RTTI("EliminateInplaceOps", "", Pass); + + /** + * @brief Callback type for determining if a Fill operation is inplace. + * Takes offset and element size, returns true if the Fill is inplace. + */ + using IsInplaceFillCallback = std::function; + + /** + * @brief Constructor with callback for inplace detection + * @param is_inplace_fill_callback Function to determine if a Fill is inplace based on offset and element size + */ + explicit EliminateInplaceOps(IsInplaceFillCallback is_inplace_fill_callback); + + /** + * @brief Apply the pass to the Linear IR + * @param linear_ir the target Linear IR + * @return status of the pass (true if any changes were made) + */ + bool run(LinearIR& linear_ir) override; + +private: + /** + * @brief Check if a Fill operation is inplace using the configured callback + * @param fill_expr expression containing Fill operation + * @return true if the Fill operation is inplace and can be eliminated + */ + bool is_inplace_fill(const ExpressionPtr& fill_expr) const; + + /** + * @brief Remove inplace Fill operation from the linear IR + * @param linear_ir the target Linear IR + * @param fill_expr expression containing inplace Fill operation + */ + static void eliminate_fill(LinearIR& linear_ir, const ExpressionPtr& fill_expr); + + IsInplaceFillCallback m_is_inplace_fill_callback; +}; + +} // namespace ov::snippets::lowered::pass diff --git a/src/common/snippets/src/lowered/pass/eliminate_inplace_ops.cpp b/src/common/snippets/src/lowered/pass/eliminate_inplace_ops.cpp new file mode 100644 index 00000000000000..bd5639b2a8ce69 --- /dev/null +++ b/src/common/snippets/src/lowered/pass/eliminate_inplace_ops.cpp @@ -0,0 +1,80 @@ +// Copyright (C) 2018-2026 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "snippets/lowered/pass/eliminate_inplace_ops.hpp" + +#include + +#include "openvino/core/except.hpp" +#include "openvino/core/type.hpp" +#include "snippets/itt.hpp" +#include "snippets/lowered/expression.hpp" +#include "snippets/lowered/expression_port.hpp" +#include "snippets/lowered/linear_ir.hpp" +#include "snippets/op/fill.hpp" + +namespace ov::snippets::lowered::pass { + +EliminateInplaceOps::EliminateInplaceOps(IsInplaceFillCallback is_inplace_fill_callback) + : m_is_inplace_fill_callback(std::move(is_inplace_fill_callback)) { + OPENVINO_ASSERT(m_is_inplace_fill_callback, "Callback for inplace Fill detection must be provided"); +} + +bool EliminateInplaceOps::is_inplace_fill(const ExpressionPtr& fill_expr) const { + const auto fill = ov::as_type_ptr(fill_expr->get_node()); + if (!fill) { + return false; + } + + const auto offset = fill->get_offset(); + const auto element_size = fill->get_output_element_type(0).size(); + + return m_is_inplace_fill_callback(offset, element_size); +} + +void EliminateInplaceOps::eliminate_fill(LinearIR& linear_ir, const ExpressionPtr& fill_expr) { + // Inplace Fill has one input and one output + // We need to redirect all consumers of the Fill's output to use the Fill's input instead + + OPENVINO_ASSERT(fill_expr->get_input_count() == 1, "Fill should have exactly one input"); + OPENVINO_ASSERT(fill_expr->get_output_count() == 1, "Fill should have exactly one output"); + + const auto& fill_input_connector = fill_expr->get_input_port_connector(0); + const auto& fill_output_connector = fill_expr->get_output_port_connector(0); + + // Get all consumers of this Fill operation + const auto consumers = fill_output_connector->get_consumers(); + + // Redirect all consumers to use the input of Fill directly + lowered::replace_input_port_connectors(consumers, fill_input_connector); + + // Remove Fill from the linear IR + linear_ir.erase(linear_ir.find(fill_expr)); +} + +bool EliminateInplaceOps::run(LinearIR& linear_ir) { + OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::EliminateInplaceOps"); + + bool modified = false; + + // Iterate through all expressions and eliminate inplace Fill operations + // We iterate from the end to avoid iterator invalidation issues when erasing + auto expr_it = linear_ir.begin(); + while (expr_it != linear_ir.end()) { + const auto& expr = *expr_it; + + if (is_inplace_fill(expr)) { + eliminate_fill(linear_ir, expr); + modified = true; + // After erasing, we need to reset the iterator + expr_it = linear_ir.begin(); + } else { + ++expr_it; + } + } + + return modified; +} + +} // namespace ov::snippets::lowered::pass diff --git a/src/common/snippets/src/op/subgraph.cpp b/src/common/snippets/src/op/subgraph.cpp index 1c0fb0386e5da6..cd89639b8520e4 100644 --- a/src/common/snippets/src/op/subgraph.cpp +++ b/src/common/snippets/src/op/subgraph.cpp @@ -48,6 +48,7 @@ #include "snippets/lowered/pass/allocate_buffers.hpp" #include "snippets/lowered/pass/clean_repeated_ptr_shifts.hpp" #include "snippets/lowered/pass/cleanup_loop_offsets.hpp" +#include "snippets/lowered/pass/eliminate_inplace_ops.hpp" #include "snippets/lowered/pass/extract_loop_invariants.hpp" #include "snippets/lowered/pass/fuse_loops.hpp" #include "snippets/lowered/pass/init_loops.hpp" @@ -571,12 +572,29 @@ void Subgraph::control_flow_transformations( // 1. AssignRegisters must be called after InsertSpecificIterations since specific loops maybe have // different expressions and connections each other. AssignRegisters should be performed on the expanded // loops. - // 2. CleanupLoopOffsets must be called after InsertSpecificIterations to avoid violating the proportionality of + // 2. EliminateInplaceOps must be called after InsertSpecificIterations to eliminate inplace Fill operations + // that appear with offset == register_capacity after loop decomposition. This resolves ticket 126270. + // 3. CleanupLoopOffsets must be called after InsertSpecificIterations to avoid violating the proportionality of // the pointer increments // (this might happen if tail loop and main loop have different increments) - // 3. OptimizeLoopSingleEvaluation must be called after CleanupLoopOffsets + // 4. OptimizeLoopSingleEvaluation must be called after CleanupLoopOffsets // since CleanupLoopOffsets can't handle loops with evaluate_once = true gen_pipeline.register_pass(); + // Callback to determine if Fill operation is inplace based on actual register capacity + // get_lanes() returns the number of float32 (4-byte) elements that fit in a vector register + const size_t lanes_for_float32 = get_generator()->get_target_machine()->get_lanes(); + auto is_inplace_fill_callback = [lanes_for_float32](size_t offset, size_t element_size) -> bool { + // When offset is 0, Fill fills the entire register (not inplace) + if (offset == 0) { + return false; + } + // Calculate register capacity for the given element size + // Scale from float32 capacity: capacity(T) = capacity(float32) * sizeof(float32) / sizeof(T) + const size_t register_capacity = (lanes_for_float32 * sizeof(float)) / element_size; + // Fill is inplace when offset equals the register capacity + return offset == register_capacity; + }; + gen_pipeline.register_pass(is_inplace_fill_callback); gen_pipeline.register_pass(get_generator(), lowered_pass_config); gen_pipeline.register_pass(); gen_pipeline.register_pass(); diff --git a/src/plugins/intel_cpu/src/emitters/snippets/aarch64/jit_fill_emitter.cpp b/src/plugins/intel_cpu/src/emitters/snippets/aarch64/jit_fill_emitter.cpp index e299636a6c4981..d4936e2600a8b0 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/aarch64/jit_fill_emitter.cpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/aarch64/jit_fill_emitter.cpp @@ -14,6 +14,7 @@ #include "emitters/plugin/aarch64/jit_emitter.hpp" #include "emitters/utils.hpp" +#include "openvino/core/except.hpp" #include "openvino/core/type.hpp" #include "openvino/core/type/element_type.hpp" #include "snippets/lowered/expression.hpp" @@ -63,17 +64,12 @@ void jit_fill_emitter::emit_impl(const std::vector& in, const std::vecto template void jit_fill_emitter::emit_isa(const std::vector& in, const std::vector& out) const { const size_t supported_et_size = dnnl::impl::cpu::aarch64::cpu_isa_traits::vlen / exec_prc_.size(); - if (offset == supported_et_size) { - // WA: since AssignRegisters doesn't support inplace logic, Fill ops with offset = register_capacity can't be - // removed from the LIR - // TODO: when inplace is supported, remove such Fill ops from the LIR and remove this logic. - // Ticket: 126270 - auto src = in[0]; - auto dst = out[0]; - if (src != dst) { - h->mov(Xbyak_aarch64::VReg16B(dst), Xbyak_aarch64::VReg16B(src)); - } - } else if (is_full_reg()) { + OPENVINO_ASSERT(offset <= supported_et_size, + "Fill emitter offset ", + offset, + " exceeds register capacity ", + supported_et_size); + if (is_full_reg()) { fill_full(out); } else { fill_tail(in, out); diff --git a/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_fill_emitter.cpp b/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_fill_emitter.cpp index 2013cc0ca770da..1114fd8a54e7c0 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_fill_emitter.cpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_fill_emitter.cpp @@ -15,6 +15,7 @@ #include "emitters/plugin/x64/jit_emitter.hpp" #include "emitters/utils.hpp" +#include "openvino/core/except.hpp" #include "openvino/core/type.hpp" #include "openvino/core/type/element_type.hpp" #include "snippets/lowered/expression.hpp" @@ -79,15 +80,12 @@ void jit_fill_emitter::emit_isa(const std::vector& in, const std::vector const size_t supported_et_size = 4; const auto register_capacity = (src_vmm.getBit() / 8) / supported_et_size; - if (offset == register_capacity) { - // WA: since AssignRegisters doesn't support inplace logic, Fill ops with offset = register_capacity can't be - // removed from the LIR - // TODO: when inplace is supported, remove such Fill ops from the LIR and remove this logic. - // Ticket: 126270 - if (src_vmm.getIdx() != dst_vmm.getIdx()) { - h->uni_vmovups(dst_vmm, src_vmm); - } - } else if (is_full_reg()) { + OPENVINO_ASSERT(offset <= register_capacity, + "Fill emitter offset ", + offset, + " exceeds register capacity ", + register_capacity); + if (is_full_reg()) { fill_full(dst_vmm); } else { fill_tail(src_vmm, dst_vmm); From 3aaed2cee4469fb60f71dbc523254034d1ae2a30 Mon Sep 17 00:00:00 2001 From: Arseniy Obolenskiy Date: Wed, 4 Feb 2026 11:16:30 +0100 Subject: [PATCH 2/5] Fix assert condition --- .../src/emitters/snippets/aarch64/jit_fill_emitter.cpp | 2 +- .../intel_cpu/src/emitters/snippets/x64/jit_fill_emitter.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/plugins/intel_cpu/src/emitters/snippets/aarch64/jit_fill_emitter.cpp b/src/plugins/intel_cpu/src/emitters/snippets/aarch64/jit_fill_emitter.cpp index d4936e2600a8b0..e40c78f416c437 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/aarch64/jit_fill_emitter.cpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/aarch64/jit_fill_emitter.cpp @@ -64,7 +64,7 @@ void jit_fill_emitter::emit_impl(const std::vector& in, const std::vecto template void jit_fill_emitter::emit_isa(const std::vector& in, const std::vector& out) const { const size_t supported_et_size = dnnl::impl::cpu::aarch64::cpu_isa_traits::vlen / exec_prc_.size(); - OPENVINO_ASSERT(offset <= supported_et_size, + OPENVINO_ASSERT(offset < supported_et_size, "Fill emitter offset ", offset, " exceeds register capacity ", diff --git a/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_fill_emitter.cpp b/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_fill_emitter.cpp index 1114fd8a54e7c0..2c323d0275347c 100644 --- a/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_fill_emitter.cpp +++ b/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_fill_emitter.cpp @@ -80,7 +80,7 @@ void jit_fill_emitter::emit_isa(const std::vector& in, const std::vector const size_t supported_et_size = 4; const auto register_capacity = (src_vmm.getBit() / 8) / supported_et_size; - OPENVINO_ASSERT(offset <= register_capacity, + OPENVINO_ASSERT(offset < register_capacity, "Fill emitter offset ", offset, " exceeds register capacity ", From 66ed10db620cad3f1462f6fd7c44db509297c58e Mon Sep 17 00:00:00 2001 From: Arseniy Obolenskiy Date: Mon, 9 Feb 2026 16:17:52 +0100 Subject: [PATCH 3/5] Apply review comment --- .../src/lowered/pass/reduce_decomposition.cpp | 145 +++++++++++++++--- src/common/snippets/src/op/subgraph.cpp | 19 +-- .../snippets/reduce.cpp | 3 + 3 files changed, 129 insertions(+), 38 deletions(-) diff --git a/src/common/snippets/src/lowered/pass/reduce_decomposition.cpp b/src/common/snippets/src/lowered/pass/reduce_decomposition.cpp index 03e23b6cc1b794..bb95aa6d3e7b0c 100644 --- a/src/common/snippets/src/lowered/pass/reduce_decomposition.cpp +++ b/src/common/snippets/src/lowered/pass/reduce_decomposition.cpp @@ -4,6 +4,7 @@ #include "snippets/lowered/pass/reduce_decomposition.hpp" +#include #include #include #include @@ -23,12 +24,12 @@ #include "snippets/lowered/loop_info.hpp" #include "snippets/lowered/loop_manager.hpp" #include "snippets/lowered/loop_port.hpp" -#include "snippets/lowered/pass/iter_handler.hpp" #include "snippets/lowered/pass/pass.hpp" #include "snippets/lowered/specific_loop_iter_types.hpp" #include "snippets/op/fill.hpp" #include "snippets/op/horizon_max.hpp" #include "snippets/op/horizon_sum.hpp" +#include "snippets/op/loop.hpp" #include "snippets/op/memory_access.hpp" #include "snippets/op/reduce.hpp" #include "snippets/op/vector_buffer.hpp" @@ -36,20 +37,107 @@ namespace ov::snippets::lowered::pass { +namespace { +uint32_t get_initial_value(const ov::DiscreteTypeInfo& type_info) { + static const std::map reduce_initial_values{ + {op::ReduceMax::get_type_info_static(), static_cast(0xff7fffff)}, + {op::ReduceSum::get_type_info_static(), static_cast(0x00000000)}, + }; + OPENVINO_ASSERT(reduce_initial_values.count(type_info), "Unexpected ReduceType"); + return reduce_initial_values.at(type_info); +} + +uint32_t get_fill_value_for_accumulation(const std::shared_ptr& accumulation) { + if (ov::is_type(accumulation)) { + return get_initial_value(op::ReduceMax::get_type_info_static()); + } + if (ov::is_type(accumulation)) { + return get_initial_value(op::ReduceSum::get_type_info_static()); + } + OPENVINO_THROW("InsertTailFill supports only Maximum/Add accumulation but got: ", accumulation->get_type_info()); +} +} // namespace + +class InsertTailFill : public RangedPass { +public: + explicit InsertTailFill(size_t offset) : RangedPass(), m_offset(offset) {} + OPENVINO_RTTI("InsertTailFill", "", RangedPass); + + bool run(LinearIR& linear_ir, LinearIR::constExprIt begin, LinearIR::constExprIt end) override { + OPENVINO_ASSERT(begin != end, "InsertTailFill expects non-empty range."); + const auto& loop_end = ov::as_type_ptr(end->get()->get_node()); + OPENVINO_ASSERT(loop_end, "InsertTailFill expected LoopEnd node in iterator `end`."); + const auto& loop_info = linear_ir.get_loop_manager()->get_loop_info(loop_end->get_id()); + const auto& output_ports = loop_info->get_output_ports(); + const auto accumulation_output_it = + std::find_if(output_ports.begin(), output_ports.end(), [](const LoopPort& output_loop_port) { + const auto& output_expr = output_loop_port.get_expr_port()->get_expr(); + const auto& output_node = output_expr->get_node(); + return ov::is_type_any_of(output_node); + }); + OPENVINO_ASSERT(accumulation_output_it != output_ports.end(), + "InsertTailFill failed to find accumulation output port."); + const auto& accumulation_expr = accumulation_output_it->get_expr_port()->get_expr(); + OPENVINO_ASSERT(accumulation_expr, "InsertTailFill failed to get accumulation expression."); + + size_t recurrent_input_port_idx = utils::get_dynamic_value(); + for (const auto& input_loop_port : loop_info->get_input_ports()) { + const auto& input_port = input_loop_port.get_expr_port(); + if (input_port->get_type() == ExpressionPort::Input && input_port->get_expr() == accumulation_expr) { + recurrent_input_port_idx = input_port->get_index(); + break; + } + } + OPENVINO_ASSERT(!utils::is_dynamic_value(recurrent_input_port_idx), + "InsertTailFill failed to find recurrent accumulation input port."); + + size_t data_input_port_idx = utils::get_dynamic_value(); + for (size_t i = 0; i < accumulation_expr->get_input_count(); ++i) { + if (i != recurrent_input_port_idx) { + data_input_port_idx = i; + break; + } + } + OPENVINO_ASSERT(!utils::is_dynamic_value(data_input_port_idx), + "InsertTailFill failed to find data accumulation input port."); + + const auto accumulation_input_port = accumulation_expr->get_input_port(data_input_port_idx); + const auto accumulation_it = linear_ir.find(begin, end, accumulation_expr); + + const auto source = accumulation_expr->get_input_port_connector(data_input_port_idx)->get_source(); + const auto source_output = source.get_expr()->get_node()->output(source.get_index()); + const auto fill_value = get_fill_value_for_accumulation(accumulation_expr->get_node()); + const auto fill = linear_ir.insert_node(accumulation_it, source_output, m_offset, fill_value); + + fill.first->get()->set_loop_ids(accumulation_expr->get_loop_ids()); + replace_input_port_connectors({accumulation_input_port}, fill.first->get()->get_output_port_connector(0)); + linear_ir.get_loop_manager()->update_loop_ports(*fill.first); + accumulation_expr->updateShapes(); + + return true; + } + + std::shared_ptr merge(const std::shared_ptr& other) override { + if (!other) { + return shared_from_this(); + } + const auto casted_pass = ov::as_type_ptr(other); + size_t merged_offset = 0; + if (!casted_pass || !ov::snippets::utils::merge_dynamic_dim(merged_offset, m_offset, casted_pass->m_offset)) { + return nullptr; + } + return std::make_shared(merged_offset); + } + +private: + size_t m_offset = 0; +}; + ReduceDecomposition::ReduceDecomposition(size_t vector_size) : RangedPass(), m_vector_size{vector_size} {} bool ReduceDecomposition::run(LinearIR& linear_ir, LinearIR::constExprIt begin, LinearIR::constExprIt end) { OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::ReduceMaxDecompositionLowered") - auto get_initial_value = [](const ov::DiscreteTypeInfo& type_info) { - static const std::map reduce_initial_values{ - {op::ReduceMax::get_type_info_static(), static_cast(0xff7fffff)}, - {op::ReduceSum::get_type_info_static(), static_cast(0x00000000)}, - }; - OPENVINO_ASSERT(reduce_initial_values.count(type_info), "Unexpected ReduceType"); - return reduce_initial_values.at(type_info); - }; - auto insert_accumulation_node = [&linear_ir]( const LinearIR::constExprIt& expr_it, @@ -98,33 +186,48 @@ bool ReduceDecomposition::run(LinearIR& linear_ir, LinearIR::constExprIt begin, // Float constant values in byte representation const auto fill_value = get_initial_value(reduce_type_info); + const auto is_single_iteration = !utils::is_dynamic_value(work_amount) && work_amount == increment; + const auto tail_size = utils::is_dynamic_value(work_amount) ? 1LU : work_amount % increment; + const bool insert_fill_in_loop = is_single_iteration; + const bool insert_fill_in_last_iter = !is_single_iteration && tail_size != 0; // Note: VectorBuffer is a special case, since it should go before the initial Load. // The buffer must be initialized with fill_value before reduction const auto vector_buffer = linear_ir.insert_node(expr_it); const auto initial_fill = linear_ir.insert_node(expr_it, vector_buffer.second, 0, fill_value); - // Reduce loop - const auto fill = - linear_ir.insert_node(expr_it, reduce->get_input_source_output(0), increment, fill_value); - const auto accumulation = insert_accumulation_node(expr_it, fill.second, initial_fill.second, reduce_type_info); + ov::Output accumulation_input = reduce->get_input_source_output(0); + LinearIR::constExprIt reduce_loop_begin = expr_it; + ExpressionPort reduce_loop_input_port; + if (insert_fill_in_loop) { + const auto fill = linear_ir.insert_node(expr_it, accumulation_input, increment, fill_value); + accumulation_input = fill.second; + reduce_loop_begin = fill.first; + reduce_loop_input_port = (*fill.first)->get_input_port(0); + } + + const auto accumulation = + insert_accumulation_node(expr_it, accumulation_input, initial_fill.second, reduce_type_info); + if (!insert_fill_in_loop) { + reduce_loop_begin = accumulation.first; + reduce_loop_input_port = (*accumulation.first)->get_input_port(0); + } const auto reduce_loop_id = loop_manager->mark_loop( - fill.first, + reduce_loop_begin, expr_it, work_amount, increment, - {LoopPort::create((*fill.first)->get_input_port(0), 0), + {LoopPort::create(reduce_loop_input_port, 0), LoopPort::create((*accumulation.first)->get_input_port(1), 0)}, {LoopPort::create((*accumulation.first)->get_output_port(0), 0)}); - const auto tail_size = utils::is_dynamic_value(work_amount) ? 1LU : work_amount % increment; - if (tail_size != 0) { + if (insert_fill_in_last_iter) { const auto loop_info = loop_manager->get_loop_info(reduce_loop_id); - loop_info->register_pass_to_handler(tail_size); + loop_info->register_pass_to_handler(tail_size); } const auto horizon = insert_horizon_node(expr_it, accumulation.second, reduce_type_info); // Transfer original ExpressionPorts - replace_input_port_connectors({fill.first->get()->get_input_port(0)}, reduce_expr->get_input_port_connector(0)); + replace_input_port_connectors({reduce_loop_input_port}, reduce_expr->get_input_port_connector(0)); const auto reduce_consumers = reduce_expr->get_output_port_connector(0)->get_consumers(); replace_input_port_connectors(reduce_consumers, horizon.first->get()->get_output_port_connector(0)); @@ -134,7 +237,7 @@ bool ReduceDecomposition::run(LinearIR& linear_ir, LinearIR::constExprIt begin, } // Update Loop info for outer loops - const std::vector input_ports{(*fill.first)->get_input_port(0)}; + const std::vector input_ports{reduce_loop_input_port}; const std::vector output_ports{(*horizon.first)->get_output_port(0)}; for (auto loop_id : reduce_expr->get_loop_ids()) { loop_manager diff --git a/src/common/snippets/src/op/subgraph.cpp b/src/common/snippets/src/op/subgraph.cpp index cd89639b8520e4..115f1bd814c9fc 100644 --- a/src/common/snippets/src/op/subgraph.cpp +++ b/src/common/snippets/src/op/subgraph.cpp @@ -572,29 +572,14 @@ void Subgraph::control_flow_transformations( // 1. AssignRegisters must be called after InsertSpecificIterations since specific loops maybe have // different expressions and connections each other. AssignRegisters should be performed on the expanded // loops. - // 2. EliminateInplaceOps must be called after InsertSpecificIterations to eliminate inplace Fill operations - // that appear with offset == register_capacity after loop decomposition. This resolves ticket 126270. - // 3. CleanupLoopOffsets must be called after InsertSpecificIterations to avoid violating the proportionality of + // 2. CleanupLoopOffsets must be called after InsertSpecificIterations to avoid violating the proportionality of // the pointer increments // (this might happen if tail loop and main loop have different increments) - // 4. OptimizeLoopSingleEvaluation must be called after CleanupLoopOffsets + // 3. OptimizeLoopSingleEvaluation must be called after CleanupLoopOffsets // since CleanupLoopOffsets can't handle loops with evaluate_once = true gen_pipeline.register_pass(); // Callback to determine if Fill operation is inplace based on actual register capacity // get_lanes() returns the number of float32 (4-byte) elements that fit in a vector register - const size_t lanes_for_float32 = get_generator()->get_target_machine()->get_lanes(); - auto is_inplace_fill_callback = [lanes_for_float32](size_t offset, size_t element_size) -> bool { - // When offset is 0, Fill fills the entire register (not inplace) - if (offset == 0) { - return false; - } - // Calculate register capacity for the given element size - // Scale from float32 capacity: capacity(T) = capacity(float32) * sizeof(float32) / sizeof(T) - const size_t register_capacity = (lanes_for_float32 * sizeof(float)) / element_size; - // Fill is inplace when offset equals the register capacity - return offset == register_capacity; - }; - gen_pipeline.register_pass(is_inplace_fill_callback); gen_pipeline.register_pass(get_generator(), lowered_pass_config); gen_pipeline.register_pass(); gen_pipeline.register_pass(); diff --git a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/reduce.cpp b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/reduce.cpp index 12b58ebeeb1937..3b2926e2356bee 100644 --- a/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/reduce.cpp +++ b/src/plugins/intel_cpu/tests/functional/shared_tests_instances/snippets/reduce.cpp @@ -13,6 +13,9 @@ namespace snippets { namespace { const std::vector inputShape = { + {{}, {{1, 3, 128, 1}}}, + {{}, {{1, 3, 128, 7}}}, + {{}, {{1, 3, 128, 9}}}, {{}, {{1, 3, 128, 128}}}, {{}, {{1, 3, 128, 15}}}, {{}, {{1, 3, 15, 16}}}, From a03abcdb8910ffe7f55bbdeccdce5c49f12571fd Mon Sep 17 00:00:00 2001 From: Arseniy Obolenskiy Date: Mon, 9 Feb 2026 19:09:14 +0100 Subject: [PATCH 4/5] fixes --- .../lowered/pass/eliminate_inplace_ops.hpp | 64 --------------- .../lowered/pass/eliminate_inplace_ops.cpp | 80 ------------------- .../src/lowered/pass/reduce_decomposition.cpp | 9 ++- src/common/snippets/src/op/subgraph.cpp | 1 - 4 files changed, 5 insertions(+), 149 deletions(-) delete mode 100644 src/common/snippets/include/snippets/lowered/pass/eliminate_inplace_ops.hpp delete mode 100644 src/common/snippets/src/lowered/pass/eliminate_inplace_ops.cpp diff --git a/src/common/snippets/include/snippets/lowered/pass/eliminate_inplace_ops.hpp b/src/common/snippets/include/snippets/lowered/pass/eliminate_inplace_ops.hpp deleted file mode 100644 index 75b9aef23a55c9..00000000000000 --- a/src/common/snippets/include/snippets/lowered/pass/eliminate_inplace_ops.hpp +++ /dev/null @@ -1,64 +0,0 @@ -// Copyright (C) 2018-2026 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#pragma once - -#include - -#include "openvino/core/rtti.hpp" -#include "snippets/lowered/linear_ir.hpp" -#include "snippets/lowered/pass/pass.hpp" - -namespace ov::snippets::lowered::pass { - -/** - * @interface EliminateInplaceOps - * @brief Eliminates operations that are effectively inplace (input == output). - * Currently handles Fill operations where offset equals register capacity, - * which means the operation doesn't actually fill any new data. - * This pass should run after InsertSpecificIterations and before InitRegisters. - * @ingroup snippets - */ -class EliminateInplaceOps : public Pass { -public: - OPENVINO_RTTI("EliminateInplaceOps", "", Pass); - - /** - * @brief Callback type for determining if a Fill operation is inplace. - * Takes offset and element size, returns true if the Fill is inplace. - */ - using IsInplaceFillCallback = std::function; - - /** - * @brief Constructor with callback for inplace detection - * @param is_inplace_fill_callback Function to determine if a Fill is inplace based on offset and element size - */ - explicit EliminateInplaceOps(IsInplaceFillCallback is_inplace_fill_callback); - - /** - * @brief Apply the pass to the Linear IR - * @param linear_ir the target Linear IR - * @return status of the pass (true if any changes were made) - */ - bool run(LinearIR& linear_ir) override; - -private: - /** - * @brief Check if a Fill operation is inplace using the configured callback - * @param fill_expr expression containing Fill operation - * @return true if the Fill operation is inplace and can be eliminated - */ - bool is_inplace_fill(const ExpressionPtr& fill_expr) const; - - /** - * @brief Remove inplace Fill operation from the linear IR - * @param linear_ir the target Linear IR - * @param fill_expr expression containing inplace Fill operation - */ - static void eliminate_fill(LinearIR& linear_ir, const ExpressionPtr& fill_expr); - - IsInplaceFillCallback m_is_inplace_fill_callback; -}; - -} // namespace ov::snippets::lowered::pass diff --git a/src/common/snippets/src/lowered/pass/eliminate_inplace_ops.cpp b/src/common/snippets/src/lowered/pass/eliminate_inplace_ops.cpp deleted file mode 100644 index bd5639b2a8ce69..00000000000000 --- a/src/common/snippets/src/lowered/pass/eliminate_inplace_ops.cpp +++ /dev/null @@ -1,80 +0,0 @@ -// Copyright (C) 2018-2026 Intel Corporation -// SPDX-License-Identifier: Apache-2.0 -// - -#include "snippets/lowered/pass/eliminate_inplace_ops.hpp" - -#include - -#include "openvino/core/except.hpp" -#include "openvino/core/type.hpp" -#include "snippets/itt.hpp" -#include "snippets/lowered/expression.hpp" -#include "snippets/lowered/expression_port.hpp" -#include "snippets/lowered/linear_ir.hpp" -#include "snippets/op/fill.hpp" - -namespace ov::snippets::lowered::pass { - -EliminateInplaceOps::EliminateInplaceOps(IsInplaceFillCallback is_inplace_fill_callback) - : m_is_inplace_fill_callback(std::move(is_inplace_fill_callback)) { - OPENVINO_ASSERT(m_is_inplace_fill_callback, "Callback for inplace Fill detection must be provided"); -} - -bool EliminateInplaceOps::is_inplace_fill(const ExpressionPtr& fill_expr) const { - const auto fill = ov::as_type_ptr(fill_expr->get_node()); - if (!fill) { - return false; - } - - const auto offset = fill->get_offset(); - const auto element_size = fill->get_output_element_type(0).size(); - - return m_is_inplace_fill_callback(offset, element_size); -} - -void EliminateInplaceOps::eliminate_fill(LinearIR& linear_ir, const ExpressionPtr& fill_expr) { - // Inplace Fill has one input and one output - // We need to redirect all consumers of the Fill's output to use the Fill's input instead - - OPENVINO_ASSERT(fill_expr->get_input_count() == 1, "Fill should have exactly one input"); - OPENVINO_ASSERT(fill_expr->get_output_count() == 1, "Fill should have exactly one output"); - - const auto& fill_input_connector = fill_expr->get_input_port_connector(0); - const auto& fill_output_connector = fill_expr->get_output_port_connector(0); - - // Get all consumers of this Fill operation - const auto consumers = fill_output_connector->get_consumers(); - - // Redirect all consumers to use the input of Fill directly - lowered::replace_input_port_connectors(consumers, fill_input_connector); - - // Remove Fill from the linear IR - linear_ir.erase(linear_ir.find(fill_expr)); -} - -bool EliminateInplaceOps::run(LinearIR& linear_ir) { - OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::EliminateInplaceOps"); - - bool modified = false; - - // Iterate through all expressions and eliminate inplace Fill operations - // We iterate from the end to avoid iterator invalidation issues when erasing - auto expr_it = linear_ir.begin(); - while (expr_it != linear_ir.end()) { - const auto& expr = *expr_it; - - if (is_inplace_fill(expr)) { - eliminate_fill(linear_ir, expr); - modified = true; - // After erasing, we need to reset the iterator - expr_it = linear_ir.begin(); - } else { - ++expr_it; - } - } - - return modified; -} - -} // namespace ov::snippets::lowered::pass diff --git a/src/common/snippets/src/lowered/pass/reduce_decomposition.cpp b/src/common/snippets/src/lowered/pass/reduce_decomposition.cpp index bb95aa6d3e7b0c..9aeb5ed2438aaf 100644 --- a/src/common/snippets/src/lowered/pass/reduce_decomposition.cpp +++ b/src/common/snippets/src/lowered/pass/reduce_decomposition.cpp @@ -15,6 +15,7 @@ #include "openvino/core/except.hpp" #include "openvino/core/node.hpp" #include "openvino/core/node_output.hpp" +#include "openvino/core/rtti.hpp" #include "openvino/core/type.hpp" #include "openvino/op/add.hpp" #include "openvino/op/maximum.hpp" @@ -80,7 +81,7 @@ class InsertTailFill : public RangedPass { const auto& accumulation_expr = accumulation_output_it->get_expr_port()->get_expr(); OPENVINO_ASSERT(accumulation_expr, "InsertTailFill failed to get accumulation expression."); - size_t recurrent_input_port_idx = utils::get_dynamic_value(); + auto recurrent_input_port_idx = utils::get_dynamic_value(); for (const auto& input_loop_port : loop_info->get_input_ports()) { const auto& input_port = input_loop_port.get_expr_port(); if (input_port->get_type() == ExpressionPort::Input && input_port->get_expr() == accumulation_expr) { @@ -91,7 +92,7 @@ class InsertTailFill : public RangedPass { OPENVINO_ASSERT(!utils::is_dynamic_value(recurrent_input_port_idx), "InsertTailFill failed to find recurrent accumulation input port."); - size_t data_input_port_idx = utils::get_dynamic_value(); + auto data_input_port_idx = utils::get_dynamic_value(); for (size_t i = 0; i < accumulation_expr->get_input_count(); ++i) { if (i != recurrent_input_port_idx) { data_input_port_idx = i; @@ -188,7 +189,7 @@ bool ReduceDecomposition::run(LinearIR& linear_ir, LinearIR::constExprIt begin, const auto fill_value = get_initial_value(reduce_type_info); const auto is_single_iteration = !utils::is_dynamic_value(work_amount) && work_amount == increment; const auto tail_size = utils::is_dynamic_value(work_amount) ? 1LU : work_amount % increment; - const bool insert_fill_in_loop = is_single_iteration; + const bool insert_fill_in_loop = is_single_iteration && increment < m_vector_size; const bool insert_fill_in_last_iter = !is_single_iteration && tail_size != 0; // Note: VectorBuffer is a special case, since it should go before the initial Load. // The buffer must be initialized with fill_value before reduction @@ -196,7 +197,7 @@ bool ReduceDecomposition::run(LinearIR& linear_ir, LinearIR::constExprIt begin, const auto initial_fill = linear_ir.insert_node(expr_it, vector_buffer.second, 0, fill_value); ov::Output accumulation_input = reduce->get_input_source_output(0); - LinearIR::constExprIt reduce_loop_begin = expr_it; + auto reduce_loop_begin = expr_it; ExpressionPort reduce_loop_input_port; if (insert_fill_in_loop) { const auto fill = linear_ir.insert_node(expr_it, accumulation_input, increment, fill_value); diff --git a/src/common/snippets/src/op/subgraph.cpp b/src/common/snippets/src/op/subgraph.cpp index 115f1bd814c9fc..d437fe6bdbcd8d 100644 --- a/src/common/snippets/src/op/subgraph.cpp +++ b/src/common/snippets/src/op/subgraph.cpp @@ -48,7 +48,6 @@ #include "snippets/lowered/pass/allocate_buffers.hpp" #include "snippets/lowered/pass/clean_repeated_ptr_shifts.hpp" #include "snippets/lowered/pass/cleanup_loop_offsets.hpp" -#include "snippets/lowered/pass/eliminate_inplace_ops.hpp" #include "snippets/lowered/pass/extract_loop_invariants.hpp" #include "snippets/lowered/pass/fuse_loops.hpp" #include "snippets/lowered/pass/init_loops.hpp" From 544e5432e0a446ac19adb3dd7684b840d8dcb414 Mon Sep 17 00:00:00 2001 From: Arseniy Obolenskiy Date: Tue, 10 Feb 2026 19:48:39 +0100 Subject: [PATCH 5/5] Address review comments --- .../snippets/lowered/pass/iter_handler.hpp | 17 ---- .../src/lowered/pass/iter_handler.cpp | 25 ------ .../src/lowered/pass/reduce_decomposition.cpp | 82 +++++++++++-------- src/common/snippets/src/op/subgraph.cpp | 2 - 4 files changed, 49 insertions(+), 77 deletions(-) diff --git a/src/common/snippets/include/snippets/lowered/pass/iter_handler.hpp b/src/common/snippets/include/snippets/lowered/pass/iter_handler.hpp index 450edd324dc1f0..ffdd45376d8061 100644 --- a/src/common/snippets/include/snippets/lowered/pass/iter_handler.hpp +++ b/src/common/snippets/include/snippets/lowered/pass/iter_handler.hpp @@ -31,23 +31,6 @@ class UpdateMemoryAccessCounts : public pass::RangedPass { size_t m_count; }; -/** - * @interface SetFillOffset - * @brief The pass changes offset of all Fill ops - * @param m_offset - offset which must be set - * @ingroup snippets - */ -class SetFillOffset : public pass::RangedPass { -public: - explicit SetFillOffset(size_t offset); - OPENVINO_RTTI("SetFillOffset", "", RangedPass); - bool run(LinearIR& linear_ir, LinearIR::constExprIt begin, LinearIR::constExprIt end) override; - std::shared_ptr merge(const std::shared_ptr& other) override; - -private: - size_t m_offset; -}; - /** * @interface SetLoopIncrementOne * @brief The pass set `increment = 1` to ExpandedLoopInfo which is mapped on LoopEnd in the passed iterator `end` and diff --git a/src/common/snippets/src/lowered/pass/iter_handler.cpp b/src/common/snippets/src/lowered/pass/iter_handler.cpp index b69aba2fed588d..f59cb1b5123394 100644 --- a/src/common/snippets/src/lowered/pass/iter_handler.cpp +++ b/src/common/snippets/src/lowered/pass/iter_handler.cpp @@ -14,7 +14,6 @@ #include "snippets/lowered/loop_info.hpp" #include "snippets/lowered/loop_manager.hpp" #include "snippets/lowered/pass/pass.hpp" -#include "snippets/op/fill.hpp" #include "snippets/op/loop.hpp" #include "snippets/op/memory_access.hpp" #include "snippets/utils/utils.hpp" @@ -67,30 +66,6 @@ std::shared_ptr UpdateMemoryAccessCounts::merge(const std::share return std::make_shared(merged_count); } -SetFillOffset::SetFillOffset(size_t offset) : RangedPass(), m_offset(offset) {} - -bool SetFillOffset::run([[maybe_unused]] LinearIR& linear_ir, LinearIR::constExprIt begin, LinearIR::constExprIt end) { - for (auto expr_it = begin; expr_it != end; expr_it++) { - const auto& node = expr_it->get()->get_node(); - if (const auto fill = ov::as_type_ptr(node)) { - fill->set_offset(m_offset); - } - } - return true; -} - -std::shared_ptr SetFillOffset::merge(const std::shared_ptr& other) { - if (!other) { - return shared_from_this(); - } - const auto casted_pass = ov::as_type_ptr(other); - size_t merged_offset = 0; - if (!casted_pass || !ov::snippets::utils::merge_dynamic_dim(merged_offset, m_offset, casted_pass->m_offset)) { - return nullptr; - } - return std::make_shared(merged_offset); -} - bool SetLoopIncrementOne::run(LinearIR& linear_ir, [[maybe_unused]] LinearIR::constExprIt begin, LinearIR::constExprIt end) { diff --git a/src/common/snippets/src/lowered/pass/reduce_decomposition.cpp b/src/common/snippets/src/lowered/pass/reduce_decomposition.cpp index 9aeb5ed2438aaf..239c8c8237be94 100644 --- a/src/common/snippets/src/lowered/pass/reduce_decomposition.cpp +++ b/src/common/snippets/src/lowered/pass/reduce_decomposition.cpp @@ -9,6 +9,8 @@ #include #include #include +#include +#include #include #include @@ -20,6 +22,7 @@ #include "openvino/op/add.hpp" #include "openvino/op/maximum.hpp" #include "snippets/itt.hpp" +#include "snippets/lowered/expression.hpp" #include "snippets/lowered/expression_port.hpp" #include "snippets/lowered/linear_ir.hpp" #include "snippets/lowered/loop_info.hpp" @@ -57,6 +60,39 @@ uint32_t get_fill_value_for_accumulation(const std::shared_ptr& accumu } OPENVINO_THROW("InsertTailFill supports only Maximum/Add accumulation but got: ", accumulation->get_type_info()); } + +bool is_fill_from_vector_buffer(const ExpressionPtr& expr) { + if (!expr || !ov::is_type(expr->get_node())) { + return false; + } + const auto& parent_expr = expr->get_input_expr_ptr(0); + return parent_expr && ov::is_type(parent_expr->get_node()); +} + +bool is_supported_accumulation(const ExpressionPtr& accumulation_expr) { + return accumulation_expr && ov::is_type_any_of(accumulation_expr->get_node()); +} + +std::optional find_data_input_port_idx(const ExpressionPtr& accumulation_expr) { + if (!accumulation_expr || accumulation_expr->get_input_count() != 2) { + return std::nullopt; + } + const auto input0_is_initial_fill = is_fill_from_vector_buffer(accumulation_expr->get_input_expr_ptr(0)); + const auto input1_is_initial_fill = is_fill_from_vector_buffer(accumulation_expr->get_input_expr_ptr(1)); + if (input0_is_initial_fill == input1_is_initial_fill) { + return std::nullopt; + } + return input0_is_initial_fill ? 1 : 0; +} + +size_t get_data_input_port_idx(const ExpressionPtr& accumulation_expr) { + OPENVINO_ASSERT(is_supported_accumulation(accumulation_expr), + "InsertTailFill expected Maximum/Add accumulation expression."); + const auto data_input_port_idx = find_data_input_port_idx(accumulation_expr); + OPENVINO_ASSERT(data_input_port_idx.has_value(), + "InsertTailFill failed to detect unique Fill(VectorBuffer) accumulation input."); + return *data_input_port_idx; +} } // namespace class InsertTailFill : public RangedPass { @@ -72,47 +108,27 @@ class InsertTailFill : public RangedPass { const auto& output_ports = loop_info->get_output_ports(); const auto accumulation_output_it = std::find_if(output_ports.begin(), output_ports.end(), [](const LoopPort& output_loop_port) { - const auto& output_expr = output_loop_port.get_expr_port()->get_expr(); - const auto& output_node = output_expr->get_node(); - return ov::is_type_any_of(output_node); + const auto& accumulation_expr = output_loop_port.get_expr_port()->get_expr(); + return is_supported_accumulation(accumulation_expr) && + find_data_input_port_idx(accumulation_expr).has_value(); }); OPENVINO_ASSERT(accumulation_output_it != output_ports.end(), - "InsertTailFill failed to find accumulation output port."); + "InsertTailFill failed to find accumulation output port with Fill(VectorBuffer) input."); const auto& accumulation_expr = accumulation_output_it->get_expr_port()->get_expr(); - OPENVINO_ASSERT(accumulation_expr, "InsertTailFill failed to get accumulation expression."); - - auto recurrent_input_port_idx = utils::get_dynamic_value(); - for (const auto& input_loop_port : loop_info->get_input_ports()) { - const auto& input_port = input_loop_port.get_expr_port(); - if (input_port->get_type() == ExpressionPort::Input && input_port->get_expr() == accumulation_expr) { - recurrent_input_port_idx = input_port->get_index(); - break; - } - } - OPENVINO_ASSERT(!utils::is_dynamic_value(recurrent_input_port_idx), - "InsertTailFill failed to find recurrent accumulation input port."); - - auto data_input_port_idx = utils::get_dynamic_value(); - for (size_t i = 0; i < accumulation_expr->get_input_count(); ++i) { - if (i != recurrent_input_port_idx) { - data_input_port_idx = i; - break; - } - } - OPENVINO_ASSERT(!utils::is_dynamic_value(data_input_port_idx), - "InsertTailFill failed to find data accumulation input port."); - + const auto data_input_port_idx = get_data_input_port_idx(accumulation_expr); const auto accumulation_input_port = accumulation_expr->get_input_port(data_input_port_idx); const auto accumulation_it = linear_ir.find(begin, end, accumulation_expr); const auto source = accumulation_expr->get_input_port_connector(data_input_port_idx)->get_source(); const auto source_output = source.get_expr()->get_node()->output(source.get_index()); const auto fill_value = get_fill_value_for_accumulation(accumulation_expr->get_node()); - const auto fill = linear_ir.insert_node(accumulation_it, source_output, m_offset, fill_value); - - fill.first->get()->set_loop_ids(accumulation_expr->get_loop_ids()); - replace_input_port_connectors({accumulation_input_port}, fill.first->get()->get_output_port_connector(0)); - linear_ir.get_loop_manager()->update_loop_ports(*fill.first); + const auto fill_node = std::make_shared(source_output, m_offset, fill_value); + linear_ir.insert_node(fill_node, + std::vector{source}, + accumulation_expr->get_loop_ids(), + true, + accumulation_it, + std::set{accumulation_input_port}); accumulation_expr->updateShapes(); return true; @@ -187,7 +203,7 @@ bool ReduceDecomposition::run(LinearIR& linear_ir, LinearIR::constExprIt begin, // Float constant values in byte representation const auto fill_value = get_initial_value(reduce_type_info); - const auto is_single_iteration = !utils::is_dynamic_value(work_amount) && work_amount == increment; + const auto is_single_iteration = work_amount == increment; const auto tail_size = utils::is_dynamic_value(work_amount) ? 1LU : work_amount % increment; const bool insert_fill_in_loop = is_single_iteration && increment < m_vector_size; const bool insert_fill_in_last_iter = !is_single_iteration && tail_size != 0; diff --git a/src/common/snippets/src/op/subgraph.cpp b/src/common/snippets/src/op/subgraph.cpp index d437fe6bdbcd8d..1c0fb0386e5da6 100644 --- a/src/common/snippets/src/op/subgraph.cpp +++ b/src/common/snippets/src/op/subgraph.cpp @@ -577,8 +577,6 @@ void Subgraph::control_flow_transformations( // 3. OptimizeLoopSingleEvaluation must be called after CleanupLoopOffsets // since CleanupLoopOffsets can't handle loops with evaluate_once = true gen_pipeline.register_pass(); - // Callback to determine if Fill operation is inplace based on actual register capacity - // get_lanes() returns the number of float32 (4-byte) elements that fit in a vector register gen_pipeline.register_pass(get_generator(), lowered_pass_config); gen_pipeline.register_pass(); gen_pipeline.register_pass();