Merge pull request #1877 from roboflow/DetectionsListRollupFixClassNames2

grzegorz-roboflow · web-flow · commit e7342125327b · 2026-01-06T14:36:42.000+01:00
Preserve class names for rolled up detections
diff --git a/inference/core/workflows/core_steps/fusion/detections_list_rollup/v1.py b/inference/core/workflows/core_steps/fusion/detections_list_rollup/v1.py
@@ -265,6 +265,9 @@ def _merge_keypoint_detections(
                     "class_id": group[0]["class_id"],
                     "mask": None,
                     "keypoint_data": merged_kp_data,
+                    "detection_data": group[0].get(
+                        "detection_data", {}
+                    ),  # Preserve first detection's metadata
                 }
             )
 
@@ -408,6 +411,23 @@ def merge_crop_predictions(
                         "keypoints_confidence"
                     ][j]
 
+            # Collect per-detection data fields to preserve individual detection metadata
+            # This is crucial for preserving class_name and other fields when multiple
+            # detections have the same class_id but different values
+            detection_data = {}
+            for key in child_pred.data.keys():
+                if key not in [
+                    "detection_id",
+                    "parent_id",
+                    "inference_id",
+                    "keypoints_xy",
+                    "keypoints_class_name",
+                    "keypoints_class_id",
+                    "keypoints_confidence",
+                ]:
+                    if j < len(child_pred.data[key]):
+                        detection_data[key] = child_pred.data[key][j]
+
             if has_masks and child_pred.mask is not None:
                 # Instance segmentation - transform mask
                 mask = child_pred.mask[j]
@@ -426,6 +446,7 @@ def merge_crop_predictions(
                         "class_id": class_id,
                         "bbox": None,  # Will compute from mask
                         "keypoint_data": keypoint_data,
+                        "detection_data": detection_data,  # Store per-detection metadata
                     }
                 )
             else:
@@ -446,6 +467,7 @@ def merge_crop_predictions(
                         "class_id": class_id,
                         "mask": None,
                         "keypoint_data": keypoint_data,
+                        "detection_data": detection_data,  # Store per-detection metadata
                     }
                 )
 
@@ -616,8 +638,11 @@ def merge_crop_predictions(
                     # Prediction type should be 'instance-segmentation'
                     merged_data[key].append("instance-segmentation")
                 else:
-                    # For other fields like class_name, use the value associated with this class_id
-                    if (
+                    # For other fields like class_name, check pred dict first (per-detection data)
+                    # then fall back to class_id_to_data (class-level defaults)
+                    if key in pred.get("detection_data", {}):
+                        merged_data[key].append(pred["detection_data"][key])
+                    elif (
                         pred["class_id"] in class_id_to_data
                         and key in class_id_to_data[pred["class_id"]]
                     ):
@@ -789,6 +814,9 @@ def _merge_overlapping_masks(
                         "polygon": poly,
                         "confidence": pred["confidence"],
                         "class_id": pred["class_id"],
+                        "detection_data": pred.get(
+                            "detection_data", {}
+                        ),  # Preserve metadata
                     }
                 )
 
@@ -829,6 +857,9 @@ def _merge_overlapping_masks(
                             "mask": mask,
                             "confidence": merged_confidence,
                             "class_id": class_id,
+                            "detection_data": group[0].get(
+                                "detection_data", {}
+                            ),  # Preserve first detection's metadata
                         }
                     )
         else:
@@ -839,6 +870,9 @@ def _merge_overlapping_masks(
                         "mask": mask,
                         "confidence": merged_confidence,
                         "class_id": class_id,
+                        "detection_data": group[0].get(
+                            "detection_data", {}
+                        ),  # Preserve first detection's metadata
                     }
                 )
 
@@ -891,7 +925,14 @@ def _merge_overlapping_bboxes(
         merged_bbox = np.array([min(x_mins), min(y_mins), max(x_maxs), max(y_maxs)])
 
         merged_results.append(
-            {"bbox": merged_bbox, "confidence": merged_confidence, "class_id": class_id}
+            {
+                "bbox": merged_bbox,
+                "confidence": merged_confidence,
+                "class_id": class_id,
+                "detection_data": group[0].get(
+                    "detection_data", {}
+                ),  # Preserve first detection's metadata
+            }
         )
 
     return merged_results
diff --git a/tests/workflows/integration_tests/execution/test_workflow_with_detections_rollup.py b/tests/workflows/integration_tests/execution/test_workflow_with_detections_rollup.py
@@ -948,3 +948,76 @@ def test_dimension_rollup_with_different_overlap_thresholds(
     assert (
         count_high >= count_0 - 1
     ), "Higher threshold should not significantly reduce detection count"
+
+
+@pytest.mark.skipif(
+    WORKFLOWS_MAX_CONCURRENT_STEPS != -1,
+    reason="Skipping integration test due to WORKFLOWS_MAX_CONCURRENT_STEPS limits",
+)
+def test_detections_list_rollup_preserves_individual_class_names(
+    crowd_image, model_manager: ModelManager
+):
+    """
+    Test that detections_list_rollup preserves individual class_name values
+    for detections with the same class_id.
+
+    This regression test ensures that when multiple detections share the same class_id
+    but have different class_name values (e.g., from different model outputs or child
+    predictions), the rollup operation preserves the individual class_name for each
+    detection instead of overwriting all with a single value.
+
+    Scenario:
+    - Create detections with class_id=0 but varying class_names (e.g., 640, 641, 642, etc.)
+    - Run through rollup workflow
+    - Verify each rolled-up detection retains its original class_name
+    """
+    # when
+    execution_engine = ExecutionEngine.init(
+        workflow_definition=FULL_DIMENSION_ROLLUP_WORKFLOW,
+        model_manager=model_manager,
+    )
+
+    result = execution_engine.run(
+        runtime_parameters={
+            "image": crowd_image,
+        }
+    )
+
+    # then
+    assert isinstance(result, list)
+    rolled_up_detections = result[0]["rolled_up_detections"]
+
+    # Verify we have detections to check
+    assert len(rolled_up_detections) > 0, "Should have detections after rollup"
+
+    # Get class_names from the detections
+    class_names = rolled_up_detections.data.get("class_name", [])
+
+    # Verify class_names are properly populated
+    assert len(class_names) == len(
+        rolled_up_detections
+    ), "Each detection should have a class_name value"
+
+    # Verify that if multiple detections share the same class_id,
+    # they can have different class_names (not all identical)
+    class_ids = rolled_up_detections.class_id
+    class_name_list = list(class_names)
+
+    # Group detections by class_id
+    class_id_to_names = {}
+    for idx, class_id in enumerate(class_ids):
+        if class_id not in class_id_to_names:
+            class_id_to_names[class_id] = []
+        if idx < len(class_name_list):
+            class_id_to_names[class_id].append(class_name_list[idx])
+
+    # For each class_id with multiple detections, verify they can have different names
+    # (This is a soft check - we just verify the mechanism works, not forcing diversity)
+    for class_id, names in class_id_to_names.items():
+        if len(names) > 1:
+            # If there are multiple detections with same class_id, at least verify
+            # they all have valid (non-empty) class_name values
+            for name in names:
+                assert (
+                    name is not None and str(name).strip() != ""
+                ), f"Class_id {class_id} has detection with invalid class_name: {name}"