Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -31,23 +31,6 @@ class UpdateMemoryAccessCounts : public pass::RangedPass {
size_t m_count;
};

/**
* @interface SetFillOffset
* @brief The pass changes offset of all Fill ops
* @param m_offset - offset which must be set
* @ingroup snippets
*/
class SetFillOffset : public pass::RangedPass {
public:
explicit SetFillOffset(size_t offset);
OPENVINO_RTTI("SetFillOffset", "", RangedPass);
bool run(LinearIR& linear_ir, LinearIR::constExprIt begin, LinearIR::constExprIt end) override;
std::shared_ptr<pass::PassBase> merge(const std::shared_ptr<pass::PassBase>& other) override;

private:
size_t m_offset;
};

/**
* @interface SetLoopIncrementOne
* @brief The pass set `increment = 1` to ExpandedLoopInfo which is mapped on LoopEnd in the passed iterator `end` and
Expand Down
25 changes: 0 additions & 25 deletions src/common/snippets/src/lowered/pass/iter_handler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
#include "snippets/lowered/loop_info.hpp"
#include "snippets/lowered/loop_manager.hpp"
#include "snippets/lowered/pass/pass.hpp"
#include "snippets/op/fill.hpp"
#include "snippets/op/loop.hpp"
#include "snippets/op/memory_access.hpp"
#include "snippets/utils/utils.hpp"
Expand Down Expand Up @@ -67,30 +66,6 @@ std::shared_ptr<pass::PassBase> UpdateMemoryAccessCounts::merge(const std::share
return std::make_shared<UpdateMemoryAccessCounts>(merged_count);
}

SetFillOffset::SetFillOffset(size_t offset) : RangedPass(), m_offset(offset) {}

bool SetFillOffset::run([[maybe_unused]] LinearIR& linear_ir, LinearIR::constExprIt begin, LinearIR::constExprIt end) {
for (auto expr_it = begin; expr_it != end; expr_it++) {
const auto& node = expr_it->get()->get_node();
if (const auto fill = ov::as_type_ptr<ov::snippets::op::Fill>(node)) {
fill->set_offset(m_offset);
}
}
return true;
}

std::shared_ptr<pass::PassBase> SetFillOffset::merge(const std::shared_ptr<pass::PassBase>& other) {
if (!other) {
return shared_from_this();
}
const auto casted_pass = ov::as_type_ptr<SetFillOffset>(other);
size_t merged_offset = 0;
if (!casted_pass || !ov::snippets::utils::merge_dynamic_dim(merged_offset, m_offset, casted_pass->m_offset)) {
return nullptr;
}
return std::make_shared<SetFillOffset>(merged_offset);
}

bool SetLoopIncrementOne::run(LinearIR& linear_ir,
[[maybe_unused]] LinearIR::constExprIt begin,
LinearIR::constExprIt end) {
Expand Down
162 changes: 141 additions & 21 deletions src/common/snippets/src/lowered/pass/reduce_decomposition.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,52 +4,157 @@

#include "snippets/lowered/pass/reduce_decomposition.hpp"

#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <map>
#include <memory>
#include <optional>
#include <set>
#include <utility>
#include <vector>

#include "openvino/core/except.hpp"
#include "openvino/core/node.hpp"
#include "openvino/core/node_output.hpp"
#include "openvino/core/rtti.hpp"
#include "openvino/core/type.hpp"
#include "openvino/op/add.hpp"
#include "openvino/op/maximum.hpp"
#include "snippets/itt.hpp"
#include "snippets/lowered/expression.hpp"
#include "snippets/lowered/expression_port.hpp"
#include "snippets/lowered/linear_ir.hpp"
#include "snippets/lowered/loop_info.hpp"
#include "snippets/lowered/loop_manager.hpp"
#include "snippets/lowered/loop_port.hpp"
#include "snippets/lowered/pass/iter_handler.hpp"
#include "snippets/lowered/pass/pass.hpp"
#include "snippets/lowered/specific_loop_iter_types.hpp"
#include "snippets/op/fill.hpp"
#include "snippets/op/horizon_max.hpp"
#include "snippets/op/horizon_sum.hpp"
#include "snippets/op/loop.hpp"
#include "snippets/op/memory_access.hpp"
#include "snippets/op/reduce.hpp"
#include "snippets/op/vector_buffer.hpp"
#include "snippets/utils/utils.hpp"

namespace ov::snippets::lowered::pass {

namespace {
uint32_t get_initial_value(const ov::DiscreteTypeInfo& type_info) {
static const std::map<ov::DiscreteTypeInfo, uint32_t> reduce_initial_values{
{op::ReduceMax::get_type_info_static(), static_cast<uint32_t>(0xff7fffff)},
{op::ReduceSum::get_type_info_static(), static_cast<uint32_t>(0x00000000)},
};
OPENVINO_ASSERT(reduce_initial_values.count(type_info), "Unexpected ReduceType");
return reduce_initial_values.at(type_info);
}

uint32_t get_fill_value_for_accumulation(const std::shared_ptr<ov::Node>& accumulation) {
if (ov::is_type<ov::op::v1::Maximum>(accumulation)) {
return get_initial_value(op::ReduceMax::get_type_info_static());
}
if (ov::is_type<ov::op::v1::Add>(accumulation)) {
return get_initial_value(op::ReduceSum::get_type_info_static());
}
OPENVINO_THROW("InsertTailFill supports only Maximum/Add accumulation but got: ", accumulation->get_type_info());
}

bool is_fill_from_vector_buffer(const ExpressionPtr& expr) {
if (!expr || !ov::is_type<op::Fill>(expr->get_node())) {
return false;
}
const auto& parent_expr = expr->get_input_expr_ptr(0);
return parent_expr && ov::is_type<op::VectorBuffer>(parent_expr->get_node());
}

bool is_supported_accumulation(const ExpressionPtr& accumulation_expr) {
return accumulation_expr && ov::is_type_any_of<ov::op::v1::Maximum, ov::op::v1::Add>(accumulation_expr->get_node());
}

std::optional<size_t> find_data_input_port_idx(const ExpressionPtr& accumulation_expr) {
if (!accumulation_expr || accumulation_expr->get_input_count() != 2) {
return std::nullopt;
}
const auto input0_is_initial_fill = is_fill_from_vector_buffer(accumulation_expr->get_input_expr_ptr(0));
const auto input1_is_initial_fill = is_fill_from_vector_buffer(accumulation_expr->get_input_expr_ptr(1));
if (input0_is_initial_fill == input1_is_initial_fill) {
return std::nullopt;
}
return input0_is_initial_fill ? 1 : 0;
}

size_t get_data_input_port_idx(const ExpressionPtr& accumulation_expr) {
OPENVINO_ASSERT(is_supported_accumulation(accumulation_expr),
"InsertTailFill expected Maximum/Add accumulation expression.");
const auto data_input_port_idx = find_data_input_port_idx(accumulation_expr);
OPENVINO_ASSERT(data_input_port_idx.has_value(),
"InsertTailFill failed to detect unique Fill(VectorBuffer) accumulation input.");
return *data_input_port_idx;
}
} // namespace

class InsertTailFill : public RangedPass {
public:
explicit InsertTailFill(size_t offset) : RangedPass(), m_offset(offset) {}
OPENVINO_RTTI("InsertTailFill", "", RangedPass);

bool run(LinearIR& linear_ir, LinearIR::constExprIt begin, LinearIR::constExprIt end) override {
OPENVINO_ASSERT(begin != end, "InsertTailFill expects non-empty range.");
const auto& loop_end = ov::as_type_ptr<op::LoopEnd>(end->get()->get_node());
OPENVINO_ASSERT(loop_end, "InsertTailFill expected LoopEnd node in iterator `end`.");
const auto& loop_info = linear_ir.get_loop_manager()->get_loop_info<ExpandedLoopInfo>(loop_end->get_id());
const auto& output_ports = loop_info->get_output_ports();
const auto accumulation_output_it =
std::find_if(output_ports.begin(), output_ports.end(), [](const LoopPort& output_loop_port) {
const auto& accumulation_expr = output_loop_port.get_expr_port()->get_expr();
return is_supported_accumulation(accumulation_expr) &&
find_data_input_port_idx(accumulation_expr).has_value();
});
OPENVINO_ASSERT(accumulation_output_it != output_ports.end(),
"InsertTailFill failed to find accumulation output port with Fill(VectorBuffer) input.");
const auto& accumulation_expr = accumulation_output_it->get_expr_port()->get_expr();
const auto data_input_port_idx = get_data_input_port_idx(accumulation_expr);
const auto accumulation_input_port = accumulation_expr->get_input_port(data_input_port_idx);
const auto accumulation_it = linear_ir.find(begin, end, accumulation_expr);

const auto source = accumulation_expr->get_input_port_connector(data_input_port_idx)->get_source();
const auto source_output = source.get_expr()->get_node()->output(source.get_index());
const auto fill_value = get_fill_value_for_accumulation(accumulation_expr->get_node());
const auto fill_node = std::make_shared<op::Fill>(source_output, m_offset, fill_value);
linear_ir.insert_node(fill_node,
std::vector<ExpressionPort>{source},
accumulation_expr->get_loop_ids(),
true,
accumulation_it,
std::set<ExpressionPort>{accumulation_input_port});
accumulation_expr->updateShapes();

return true;
}

std::shared_ptr<PassBase> merge(const std::shared_ptr<PassBase>& other) override {
if (!other) {
return shared_from_this();
}
const auto casted_pass = ov::as_type_ptr<InsertTailFill>(other);
size_t merged_offset = 0;
if (!casted_pass || !ov::snippets::utils::merge_dynamic_dim(merged_offset, m_offset, casted_pass->m_offset)) {
return nullptr;
}
return std::make_shared<InsertTailFill>(merged_offset);
}

private:
size_t m_offset = 0;
};

ReduceDecomposition::ReduceDecomposition(size_t vector_size) : RangedPass(), m_vector_size{vector_size} {}

bool ReduceDecomposition::run(LinearIR& linear_ir, LinearIR::constExprIt begin, LinearIR::constExprIt end) {
OV_ITT_SCOPED_TASK(ov::pass::itt::domains::SnippetsTransform, "Snippets::ReduceMaxDecompositionLowered")

auto get_initial_value = [](const ov::DiscreteTypeInfo& type_info) {
static const std::map<ov::DiscreteTypeInfo, uint32_t> reduce_initial_values{
{op::ReduceMax::get_type_info_static(), static_cast<uint32_t>(0xff7fffff)},
{op::ReduceSum::get_type_info_static(), static_cast<uint32_t>(0x00000000)},
};
OPENVINO_ASSERT(reduce_initial_values.count(type_info), "Unexpected ReduceType");
return reduce_initial_values.at(type_info);
};

auto insert_accumulation_node =
[&linear_ir](
const LinearIR::constExprIt& expr_it,
Expand Down Expand Up @@ -98,33 +203,48 @@ bool ReduceDecomposition::run(LinearIR& linear_ir, LinearIR::constExprIt begin,

// Float constant values in byte representation
const auto fill_value = get_initial_value(reduce_type_info);
const auto is_single_iteration = work_amount == increment;
const auto tail_size = utils::is_dynamic_value(work_amount) ? 1LU : work_amount % increment;
const bool insert_fill_in_loop = is_single_iteration && increment < m_vector_size;
const bool insert_fill_in_last_iter = !is_single_iteration && tail_size != 0;
// Note: VectorBuffer is a special case, since it should go before the initial Load.
// The buffer must be initialized with fill_value before reduction
const auto vector_buffer = linear_ir.insert_node<op::VectorBuffer>(expr_it);
const auto initial_fill = linear_ir.insert_node<op::Fill>(expr_it, vector_buffer.second, 0, fill_value);

// Reduce loop
const auto fill =
linear_ir.insert_node<op::Fill>(expr_it, reduce->get_input_source_output(0), increment, fill_value);
const auto accumulation = insert_accumulation_node(expr_it, fill.second, initial_fill.second, reduce_type_info);
ov::Output<ov::Node> accumulation_input = reduce->get_input_source_output(0);
auto reduce_loop_begin = expr_it;
ExpressionPort reduce_loop_input_port;
if (insert_fill_in_loop) {
const auto fill = linear_ir.insert_node<op::Fill>(expr_it, accumulation_input, increment, fill_value);
accumulation_input = fill.second;
reduce_loop_begin = fill.first;
reduce_loop_input_port = (*fill.first)->get_input_port(0);
}

const auto accumulation =
insert_accumulation_node(expr_it, accumulation_input, initial_fill.second, reduce_type_info);
if (!insert_fill_in_loop) {
reduce_loop_begin = accumulation.first;
reduce_loop_input_port = (*accumulation.first)->get_input_port(0);
}

const auto reduce_loop_id = loop_manager->mark_loop(
fill.first,
reduce_loop_begin,
expr_it,
work_amount,
increment,
{LoopPort::create<LoopPort::Type::Incremented>((*fill.first)->get_input_port(0), 0),
{LoopPort::create<LoopPort::Type::Incremented>(reduce_loop_input_port, 0),
LoopPort::create<LoopPort::Type::Incremented>((*accumulation.first)->get_input_port(1), 0)},
{LoopPort::create<LoopPort::Type::Incremented>((*accumulation.first)->get_output_port(0), 0)});
const auto tail_size = utils::is_dynamic_value(work_amount) ? 1LU : work_amount % increment;
if (tail_size != 0) {
if (insert_fill_in_last_iter) {
const auto loop_info = loop_manager->get_loop_info<UnifiedLoopInfo>(reduce_loop_id);
loop_info->register_pass_to_handler<SpecificLoopIterType::LAST_ITER, SetFillOffset>(tail_size);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like we can remove SetFillOffset pass

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done, thanks

loop_info->register_pass_to_handler<SpecificLoopIterType::LAST_ITER, InsertTailFill>(tail_size);
}
const auto horizon = insert_horizon_node(expr_it, accumulation.second, reduce_type_info);

// Transfer original ExpressionPorts
replace_input_port_connectors({fill.first->get()->get_input_port(0)}, reduce_expr->get_input_port_connector(0));
replace_input_port_connectors({reduce_loop_input_port}, reduce_expr->get_input_port_connector(0));
const auto reduce_consumers = reduce_expr->get_output_port_connector(0)->get_consumers();
replace_input_port_connectors(reduce_consumers, horizon.first->get()->get_output_port_connector(0));

Expand All @@ -134,7 +254,7 @@ bool ReduceDecomposition::run(LinearIR& linear_ir, LinearIR::constExprIt begin,
}

// Update Loop info for outer loops
const std::vector<ExpressionPort> input_ports{(*fill.first)->get_input_port(0)};
const std::vector<ExpressionPort> input_ports{reduce_loop_input_port};
const std::vector<ExpressionPort> output_ports{(*horizon.first)->get_output_port(0)};
for (auto loop_id : reduce_expr->get_loop_ids()) {
loop_manager
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

#include "emitters/plugin/aarch64/jit_emitter.hpp"
#include "emitters/utils.hpp"
#include "openvino/core/except.hpp"
#include "openvino/core/type.hpp"
#include "openvino/core/type/element_type.hpp"
#include "snippets/lowered/expression.hpp"
Expand Down Expand Up @@ -63,17 +64,12 @@ void jit_fill_emitter::emit_impl(const std::vector<size_t>& in, const std::vecto
template <cpu_isa_t isa>
void jit_fill_emitter::emit_isa(const std::vector<size_t>& in, const std::vector<size_t>& out) const {
const size_t supported_et_size = dnnl::impl::cpu::aarch64::cpu_isa_traits<isa>::vlen / exec_prc_.size();
if (offset == supported_et_size) {
// WA: since AssignRegisters doesn't support inplace logic, Fill ops with offset = register_capacity can't be
// removed from the LIR
// TODO: when inplace is supported, remove such Fill ops from the LIR and remove this logic.
// Ticket: 126270
auto src = in[0];
auto dst = out[0];
if (src != dst) {
h->mov(Xbyak_aarch64::VReg16B(dst), Xbyak_aarch64::VReg16B(src));
}
} else if (is_full_reg()) {
OPENVINO_ASSERT(offset < supported_et_size,
"Fill emitter offset ",
offset,
" exceeds register capacity ",
supported_et_size);
if (is_full_reg()) {
fill_full<isa>(out);
} else {
fill_tail<isa>(in, out);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

#include "emitters/plugin/x64/jit_emitter.hpp"
#include "emitters/utils.hpp"
#include "openvino/core/except.hpp"
#include "openvino/core/type.hpp"
#include "openvino/core/type/element_type.hpp"
#include "snippets/lowered/expression.hpp"
Expand Down Expand Up @@ -79,15 +80,12 @@ void jit_fill_emitter::emit_isa(const std::vector<size_t>& in, const std::vector

const size_t supported_et_size = 4;
const auto register_capacity = (src_vmm.getBit() / 8) / supported_et_size;
if (offset == register_capacity) {
// WA: since AssignRegisters doesn't support inplace logic, Fill ops with offset = register_capacity can't be
// removed from the LIR
// TODO: when inplace is supported, remove such Fill ops from the LIR and remove this logic.
// Ticket: 126270
if (src_vmm.getIdx() != dst_vmm.getIdx()) {
h->uni_vmovups(dst_vmm, src_vmm);
}
} else if (is_full_reg()) {
OPENVINO_ASSERT(offset < register_capacity,
"Fill emitter offset ",
offset,
" exceeds register capacity ",
register_capacity);
if (is_full_reg()) {
fill_full<Vmm>(dst_vmm);
} else {
fill_tail<Vmm>(src_vmm, dst_vmm);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@ namespace snippets {
namespace {

const std::vector<InputShape> inputShape = {
{{}, {{1, 3, 128, 1}}},
{{}, {{1, 3, 128, 7}}},
{{}, {{1, 3, 128, 9}}},
{{}, {{1, 3, 128, 128}}},
{{}, {{1, 3, 128, 15}}},
{{}, {{1, 3, 15, 16}}},
Expand Down
Loading