@@ -626,9 +626,12 @@ void Partitioner::identifySubgraphs() {
626626 // Exception: param is registered via slice or convert
627627 const auto & links_from = result_cache.at (src_node);
628628 NPUW_ASSERT (links_from.size () > 0 );
629+ // Note: It may happen that one output layer has more than one
630+ // Result nodes, that are the same!
631+ // Please see the `results_cache` filling below.
629632 if (links_from.size () > 1 ) {
630633 LOG_INFO (" Parameter " << this_param->get_friendly_name ()
631- << " has more than one possible Result nodes to connect!"
634+ << " has more than one possible similar Result nodes to connect!"
632635 << " Will pick the first one: " << (*links_from.begin ()).second );
633636 }
634637 const auto link_to = LinkPtrTo{this_group_idx, this_param};
@@ -656,9 +659,6 @@ void Partitioner::identifySubgraphs() {
656659 // set as part of kvcache conversion routune.
657660 LOG_BLOCK ();
658661 std::set<std::string> output_layers_cache (group.output_layers .begin (), group.output_layers .end ());
659- for (auto && ol_name : group.output_layers ) {
660- LOG_VERB (" Initially registered output layer: " << ol_name);
661- }
662662
663663 // Have to switch clang-format here to make cpplint happy
664664 // clang-format off
@@ -709,9 +709,14 @@ void Partitioner::identifySubgraphs() {
709709 // v
710710 // op102
711711 bool has_external_readers = false ;
712- // NB: It turns out sometime we may end up with multiple
713- // readers from the output node and more than 1 of them
714- // will be the Result node!
712+ // NB: It turns out that sometime we may end up with multiple
713+ // Result nodes from one output layer, but they should be equal.
714+ // Ex.: OmniThinker multi-outputs case with cut LM head.
715+ // Output embeddings (not logits) became a Result from the
716+ // prefill/kvcache model when LM head is cut.
717+ // However, last operation before LM head has had already
718+ // a connected Result node corresponding to the second
719+ // output of the original model.
715720 std::vector<NodeSPtr> maybe_results;
716721 auto readers = output_desc.get_target_inputs ();
717722 // This is possible then some of layer's outputs are not used in the model.
@@ -732,13 +737,20 @@ void Partitioner::identifySubgraphs() {
732737 }
733738 }
734739 if (!maybe_results.empty ()) {
735- for (auto && mr: maybe_results) {
736- LOG_VERB (mr);
737- }
738740 // This layer's output was connected to Result already.
739741 // It happens when this layer is the original model's output
740742 // Keep it to make the ugly top-level I/O matching procedure work.
741743 // FIXME: This needs to be refactored
744+
745+ // Sanity check that if layer output connects with multiple Result nodes,
746+ // then all Result nodes share the same shape.
747+ if (maybe_results.size () > 1 ) {
748+ const auto shape = (*maybe_results.begin ())->get_shape ();
749+ for (int i = 1 ; i < maybe_results.size (); ++i) {
750+ OPENVINO_ASSERT (shape == maybe_results[i]->get_shape (),
751+ " Multiple results from one output layer should be similar!" );
752+ }
753+ }
742754 for (auto && mr : maybe_results) {
743755 group.sg ._results .push_back (ov::as_type_ptr<ov::op::v0::Result>(mr));
744756 result_cache[output_desc].push_back (
@@ -772,6 +784,7 @@ void Partitioner::identifySubgraphs() {
772784 }
773785 auto new_result = std::make_shared<ov::op::v0::Result>(result_src);
774786 result_cache[output_desc].push_back (LinkPtrFrom{this_group_idx, new_result});
787+
775788 ov::copy_runtime_info (output_desc.get_node_shared_ptr (), new_result);
776789 group.sg ._results .push_back (std::move (new_result));
777790 }
@@ -2446,28 +2459,18 @@ void Partitioner::finalizeLinks() {
24462459 // result order.. but how? But how... see above, the complexity
24472460 // is there.
24482461
2449- LOG_VERB (" before get_idx_param" );
24502462 std::size_t subgraph_idx_to;
24512463 PPtr subgraph_param_to;
24522464 std::tie (subgraph_idx_to, subgraph_param_to) = ptr_link.first ;
2453- LOG_VERB (" subgraph_idx_to: " << subgraph_idx_to);
2454- LOG_VERB (" subgraph_param_to: " << subgraph_param_to);
24552465 auto param_idx = get_idx_param (subgraph_idx_to, subgraph_param_to);
2456- LOG_VERB (" after get_idx_param" );
24572466
2458- LOG_VERB (" before get_idx_result" );
24592467 std::size_t subgraph_idx_from;
24602468 RPtr subgraph_result_from;
24612469 std::tie (subgraph_idx_from, subgraph_result_from) = ptr_link.second ;
2462- LOG_VERB (" subgraph_idx_from: " << subgraph_idx_from);
2463- LOG_VERB (" subgraph_result_from: " << subgraph_result_from);
24642470 auto result_idx = get_idx_result (subgraph_idx_from, subgraph_result_from);
2465- LOG_VERB (" after get_idx_result" );
24662471
2467- LOG_VERB (" before subgraph_links" );
24682472 subgraph_links[ov::npuw::LinkTo{subgraph_idx_to, param_idx}] =
24692473 ov::npuw::LinkFrom{subgraph_idx_from, result_idx};
2470- LOG_VERB (" after subgraph_links" );
24712474
24722475 LOG_BLOCK ();
24732476 LOG_DEBUG (" Record link [" << subgraph_idx_to << " ]:" << param_idx << " <--- [" << subgraph_idx_from << " ]/"
@@ -2560,11 +2563,8 @@ ov::npuw::Partitioning ov::npuw::getPartitioning(const std::shared_ptr<ov::Model
25602563 p.saveRepeatedConstants (func_group);
25612564 p.saveTailDictConstants (func_group);
25622565 p.matchParameters (func_group);
2563- std::cout << " here" << std::endl;
25642566 p.matchResults (func_group);
2565- std::cout << " here 2" << std::endl;
25662567 p.matchRepeatedSubgraphs (func_group);
2567-
25682568 p.spatial (func_group);
25692569 p.attention (func_group);
25702570 p.optimize (func_group);
0 commit comments