Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
// Copyright (C) 2018-2026 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include <functional>

#include "openvino/core/rtti.hpp"
#include "snippets/lowered/linear_ir.hpp"
#include "snippets/lowered/pass/pass.hpp"

namespace ov::snippets::lowered::pass {

/**
* @interface EliminateInplaceOps
* @brief Eliminates operations that are effectively inplace (input == output).
* Currently handles Fill operations where offset equals register capacity,
* which means the operation doesn't actually fill any new data.
* This pass should run after InsertSpecificIterations and before InitRegisters.
* @ingroup snippets
*/
class EliminateInplaceOps : public Pass {
public:
OPENVINO_RTTI("EliminateInplaceOps", "", Pass);

/**
* @brief Callback type for determining if a Fill operation is inplace.
* Takes offset and element size, returns true if the Fill is inplace.
*/
using IsInplaceFillCallback = std::function<bool(size_t offset, size_t element_size)>;

/**
* @brief Constructor with callback for inplace detection
* @param is_inplace_fill_callback Function to determine if a Fill is inplace based on offset and element size
*/
explicit EliminateInplaceOps(IsInplaceFillCallback is_inplace_fill_callback);

/**
* @brief Apply the pass to the Linear IR
* @param linear_ir the target Linear IR
* @return status of the pass (true if any changes were made)
*/
bool run(LinearIR& linear_ir) override;

private:
/**
* @brief Check if a Fill operation is inplace using the configured callback
* @param fill_expr expression containing Fill operation
* @return true if the Fill operation is inplace and can be eliminated
*/
bool is_inplace_fill(const ExpressionPtr& fill_expr) const;

/**
* @brief Remove inplace Fill operation from the linear IR
* @param linear_ir the target Linear IR
* @param fill_expr expression containing inplace Fill operation
*/
static void eliminate_fill(LinearIR& linear_ir, const ExpressionPtr& fill_expr);

IsInplaceFillCallback m_is_inplace_fill_callback;
};

} // namespace ov::snippets::lowered::pass
80 changes: 80 additions & 0 deletions src/common/snippets/src/lowered/pass/eliminate_inplace_ops.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
// Copyright (C) 2018-2026 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#include "snippets/lowered/pass/eliminate_inplace_ops.hpp"

#include <utility>

#include "openvino/core/except.hpp"
#include "openvino/core/type.hpp"
#include "snippets/itt.hpp"
#include "snippets/lowered/expression.hpp"
#include "snippets/lowered/expression_port.hpp"
#include "snippets/lowered/linear_ir.hpp"
#include "snippets/op/fill.hpp"

namespace ov::snippets::lowered::pass {

EliminateInplaceOps::EliminateInplaceOps(IsInplaceFillCallback is_inplace_fill_callback)
: m_is_inplace_fill_callback(std::move(is_inplace_fill_callback)) {
OPENVINO_ASSERT(m_is_inplace_fill_callback, "Callback for inplace Fill detection must be provided");
}

bool EliminateInplaceOps::is_inplace_fill(const ExpressionPtr& fill_expr) const {
const auto fill = ov::as_type_ptr<snippets::op::Fill>(fill_expr->get_node());
if (!fill) {
return false;
}

const auto offset = fill->get_offset();
const auto element_size = fill->get_output_element_type(0).size();

return m_is_inplace_fill_callback(offset, element_size);
}

void EliminateInplaceOps::eliminate_fill(LinearIR& linear_ir, const ExpressionPtr& fill_expr) {
// Inplace Fill has one input and one output
// We need to redirect all consumers of the Fill's output to use the Fill's input instead

OPENVINO_ASSERT(fill_expr->get_input_count() == 1, "Fill should have exactly one input");
OPENVINO_ASSERT(fill_expr->get_output_count() == 1, "Fill should have exactly one output");

const auto& fill_input_connector = fill_expr->get_input_port_connector(0);
const auto& fill_output_connector = fill_expr->get_output_port_connector(0);

// Get all consumers of this Fill operation
const auto consumers = fill_output_connector->get_consumers();

// Redirect all consumers to use the input of Fill directly
lowered::replace_input_port_connectors(consumers, fill_input_connector);

// Remove Fill from the linear IR
linear_ir.erase(linear_ir.find(fill_expr));
}

bool EliminateInplaceOps::run(LinearIR& linear_ir) {
OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::EliminateInplaceOps");

bool modified = false;

// Iterate through all expressions and eliminate inplace Fill operations
// We iterate from the end to avoid iterator invalidation issues when erasing
auto expr_it = linear_ir.begin();
while (expr_it != linear_ir.end()) {
const auto& expr = *expr_it;

if (is_inplace_fill(expr)) {
eliminate_fill(linear_ir, expr);
modified = true;
// After erasing, we need to reset the iterator
expr_it = linear_ir.begin();
} else {
++expr_it;
}
}

return modified;
}

} // namespace ov::snippets::lowered::pass
22 changes: 20 additions & 2 deletions src/common/snippets/src/op/subgraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
#include "snippets/lowered/pass/allocate_buffers.hpp"
#include "snippets/lowered/pass/clean_repeated_ptr_shifts.hpp"
#include "snippets/lowered/pass/cleanup_loop_offsets.hpp"
#include "snippets/lowered/pass/eliminate_inplace_ops.hpp"
#include "snippets/lowered/pass/extract_loop_invariants.hpp"
#include "snippets/lowered/pass/fuse_loops.hpp"
#include "snippets/lowered/pass/init_loops.hpp"
Expand Down Expand Up @@ -571,12 +572,29 @@ void Subgraph::control_flow_transformations(
// 1. AssignRegisters must be called after InsertSpecificIterations since specific loops maybe have
// different expressions and connections each other. AssignRegisters should be performed on the expanded
// loops.
// 2. CleanupLoopOffsets must be called after InsertSpecificIterations to avoid violating the proportionality of
// 2. EliminateInplaceOps must be called after InsertSpecificIterations to eliminate inplace Fill operations
// that appear with offset == register_capacity after loop decomposition. This resolves ticket 126270.
// 3. CleanupLoopOffsets must be called after InsertSpecificIterations to avoid violating the proportionality of
// the pointer increments
// (this might happen if tail loop and main loop have different increments)
// 3. OptimizeLoopSingleEvaluation must be called after CleanupLoopOffsets
// 4. OptimizeLoopSingleEvaluation must be called after CleanupLoopOffsets
// since CleanupLoopOffsets can't handle loops with evaluate_once = true
gen_pipeline.register_pass<lowered::pass::InsertSpecificIterations>();
// Callback to determine if Fill operation is inplace based on actual register capacity
// get_lanes() returns the number of float32 (4-byte) elements that fit in a vector register
const size_t lanes_for_float32 = get_generator()->get_target_machine()->get_lanes();
auto is_inplace_fill_callback = [lanes_for_float32](size_t offset, size_t element_size) -> bool {
// When offset is 0, Fill fills the entire register (not inplace)
if (offset == 0) {
return false;
}
// Calculate register capacity for the given element size
// Scale from float32 capacity: capacity(T) = capacity(float32) * sizeof(float32) / sizeof(T)
const size_t register_capacity = (lanes_for_float32 * sizeof(float)) / element_size;
// Fill is inplace when offset equals the register capacity
return offset == register_capacity;
};
gen_pipeline.register_pass<lowered::pass::EliminateInplaceOps>(is_inplace_fill_callback);
gen_pipeline.register_pass<lowered::pass::InitRegisters>(get_generator(), lowered_pass_config);
gen_pipeline.register_pass<lowered::pass::NormalizeLoopIDs>();
gen_pipeline.register_pass<lowered::pass::ValidateExpandedLoops>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

#include "emitters/plugin/aarch64/jit_emitter.hpp"
#include "emitters/utils.hpp"
#include "openvino/core/except.hpp"
#include "openvino/core/type.hpp"
#include "openvino/core/type/element_type.hpp"
#include "snippets/lowered/expression.hpp"
Expand Down Expand Up @@ -63,17 +64,12 @@ void jit_fill_emitter::emit_impl(const std::vector<size_t>& in, const std::vecto
template <cpu_isa_t isa>
void jit_fill_emitter::emit_isa(const std::vector<size_t>& in, const std::vector<size_t>& out) const {
const size_t supported_et_size = dnnl::impl::cpu::aarch64::cpu_isa_traits<isa>::vlen / exec_prc_.size();
if (offset == supported_et_size) {
// WA: since AssignRegisters doesn't support inplace logic, Fill ops with offset = register_capacity can't be
// removed from the LIR
// TODO: when inplace is supported, remove such Fill ops from the LIR and remove this logic.
// Ticket: 126270
auto src = in[0];
auto dst = out[0];
if (src != dst) {
h->mov(Xbyak_aarch64::VReg16B(dst), Xbyak_aarch64::VReg16B(src));
}
} else if (is_full_reg()) {
OPENVINO_ASSERT(offset < supported_et_size,
"Fill emitter offset ",
offset,
" exceeds register capacity ",
supported_et_size);
if (is_full_reg()) {
fill_full<isa>(out);
} else {
fill_tail<isa>(in, out);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

#include "emitters/plugin/x64/jit_emitter.hpp"
#include "emitters/utils.hpp"
#include "openvino/core/except.hpp"
#include "openvino/core/type.hpp"
#include "openvino/core/type/element_type.hpp"
#include "snippets/lowered/expression.hpp"
Expand Down Expand Up @@ -79,15 +80,12 @@ void jit_fill_emitter::emit_isa(const std::vector<size_t>& in, const std::vector

const size_t supported_et_size = 4;
const auto register_capacity = (src_vmm.getBit() / 8) / supported_et_size;
if (offset == register_capacity) {
// WA: since AssignRegisters doesn't support inplace logic, Fill ops with offset = register_capacity can't be
// removed from the LIR
// TODO: when inplace is supported, remove such Fill ops from the LIR and remove this logic.
// Ticket: 126270
if (src_vmm.getIdx() != dst_vmm.getIdx()) {
h->uni_vmovups(dst_vmm, src_vmm);
}
} else if (is_full_reg()) {
OPENVINO_ASSERT(offset < register_capacity,
"Fill emitter offset ",
offset,
" exceeds register capacity ",
register_capacity);
if (is_full_reg()) {
fill_full<Vmm>(dst_vmm);
} else {
fill_tail<Vmm>(src_vmm, dst_vmm);
Expand Down
Loading