AyushExel · AyushExel · Dec 2, 2024 · Dec 2, 2024
diff --git a/.gitignore b/.gitignore
@@ -18,7 +18,8 @@ env/
 *.egg-info
 dist/
 build/
-data/
+
+/data/
 weights/
 output/
 *.jpg

diff --git a/trolo/configs/yaml/include/dataloader.yml b/trolo/configs/yaml/include/dataloader.yml
@@ -0,0 +1,38 @@
+
+train_dataloader: 
+  dataset: 
+    transforms:
+      ops:
+        - {type: RandomPhotometricDistort, p: 0.5}
+        - {type: RandomZoomOut, fill: 0}
+        - {type: RandomIoUCrop, p: 0.8}
+        - {type: SanitizeBoundingBoxes, min_size: 1}
+        - {type: RandomHorizontalFlip}
+        - {type: Resize, size: [640, 640], }
+        - {type: SanitizeBoundingBoxes, min_size: 1}
+        - {type: ConvertPILImage, dtype: 'float32', scale: True}   
+        - {type: ConvertBoxes, fmt: 'cxcywh', normalize: True}
+      policy:
+        name: stop_epoch
+        epoch: 71 # epoch in [71, ~) stop `ops`
+        ops: ['RandomPhotometricDistort', 'RandomZoomOut', 'RandomIoUCrop']
+
+  collate_fn:
+    type: BatchImageCollateFunction
+    scales: [480, 512, 544, 576, 608, 640, 640, 640, 672, 704, 736, 768, 800]
+    stop_epoch: 71 # epoch in [71, ~) stop `multiscales`
+
+  shuffle: True
+  total_batch_size: 16 # total batch size equals to 16 (4 * 4)
+  num_workers: 4
+
+
+val_dataloader:
+  dataset: 
+    transforms:
+      ops: 
+        - {type: Resize, size: [640, 640]}
+        - {type: ConvertPILImage, dtype: 'float32', scale: True}   
+  shuffle: False
+  total_batch_size: 32
+  num_workers: 4
diff --git a/trolo/configs/yaml/include/optimizer.yml b/trolo/configs/yaml/include/optimizer.yml
@@ -0,0 +1,37 @@
+
+use_amp: True
+use_ema: True 
+ema:
+  type: ModelEMA
+  decay: 0.9999
+  warmups: 2000
+
+
+epoches: 72
+clip_max_norm: 0.1
+
+
+optimizer:
+  type: AdamW
+  params: 
+    - 
+      params: '^(?=.*backbone)(?!.*norm).*$'
+      lr: 0.00001
+    - 
+      params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn)).*$'
+      weight_decay: 0.
+
+  lr: 0.0001
+  betas: [0.9, 0.999]
+  weight_decay: 0.0001
+
+
+lr_scheduler:
+  type: MultiStepLR
+  milestones: [1000]
+  gamma: 0.1
+
+
+lr_warmup_scheduler:
+  type: LinearWarmup
+  warmup_duration: 2000
diff --git a/trolo/configs/yaml/rtdetrv2/base.yml b/trolo/configs/yaml/rtdetrv2/base.yml
@@ -0,0 +1,82 @@
+task: detection
+
+model: RTDETR
+criterion: RTDETRCriterionv2
+postprocessor: RTDETRPostProcessor
+
+
+use_focal_loss: True
+eval_spatial_size: [640, 640] # h w
+
+
+RTDETR: 
+  backbone: PResNet
+  encoder: HybridEncoder
+  decoder: RTDETRTransformerv2
+
+
+PResNet:
+  depth: 50
+  variant: d
+  freeze_at: 0
+  return_idx: [1, 2, 3]
+  num_stages: 4
+  freeze_norm: True
+  pretrained: True 
+
+
+HybridEncoder:
+  in_channels: [512, 1024, 2048]
+  feat_strides: [8, 16, 32]
+
+  # intra
+  hidden_dim: 256
+  use_encoder_idx: [2]
+  num_encoder_layers: 1
+  nhead: 8
+  dim_feedforward: 1024
+  dropout: 0.
+  enc_act: 'gelu'
+
+  # cross
+  expansion: 1.0
+  depth_mult: 1
+  act: 'silu'
+
+
+RTDETRTransformerv2:
+  feat_channels: [256, 256, 256]
+  feat_strides: [8, 16, 32]
+  hidden_dim: 256
+  num_levels: 3
+
+  num_layers: 6
+  num_queries: 300
+
+  num_denoising: 100
+  label_noise_ratio: 0.5
+  box_noise_scale: 1.0 # 1.0 0.4
+
+  eval_idx: -1
+
+  # NEW
+  num_points: [4, 4, 4] # [3,3,3] [2,2,2]
+  cross_attn_method: default # default, discrete
+  query_select_method: default # default, agnostic 
+
+
+RTDETRPostProcessor:
+  num_top_queries: 300
+
+
+RTDETRCriterionv2:
+  weight_dict: {loss_vfl: 1, loss_bbox: 5, loss_giou: 2,}
+  losses: ['vfl', 'boxes', ]
+  alpha: 0.75
+  gamma: 2.0
+
+  matcher:
+    type: HungarianMatcher
+    weight_dict: {cost_class: 2, cost_bbox: 5, cost_giou: 2}
+    alpha: 0.25
+    gamma: 2.0
diff --git a/...onfigs/yaml/rt-detrv2/rtdetrv2_s_coco.yml → trolo/configs/yaml/rtdetrv2/rtdetrv2_s.yml b/...onfigs/yaml/rt-detrv2/rtdetrv2_s_coco.yml → trolo/configs/yaml/rtdetrv2/rtdetrv2_s.yml
@@ -1,13 +1,13 @@
 __include__: [
-  '../dataset/coco_detection.yml',
+  '../dataset/dummy_coco.yml',
   '../runtime.yml',
-  './include/dataloader.yml',
-  './include/optimizer.yml',
-  './include/rtdetrv2_r50vd.yml',
+  '../include/dataloader.yml',
+  '../include/optimizer.yml',
+  'base.yml',
 ]
 
 
-output_dir: ./output/rtdetrv2_s_coco
+output_dir: ./output/rtdetrv2_r18vd_120e_coco
 
 
 PResNet:

diff --git a/trolo/data/dataloader.py b/trolo/data/dataloader.py
@@ -89,10 +89,13 @@ def __init__(
         ema_restart_decay=0.9999,
         base_size=640,
         base_size_repeat=None,
+        scales=None,
     ) -> None:
         super().__init__()
         self.base_size = base_size
-        self.scales = generate_scales(base_size, base_size_repeat) if base_size_repeat is not None else None
+        self.scales = scales
+        if scales is None:
+            self.scales = generate_scales(base_size, base_size_repeat) if base_size_repeat is not None else None
         self.stop_epoch = stop_epoch if stop_epoch is not None else 100000000
         self.ema_restart_decay = ema_restart_decay
         # self.interpolation = interpolation

diff --git a/trolo/loaders/maps.py b/trolo/loaders/maps.py
@@ -1,6 +1,7 @@
 from pathlib import Path
 from typing import Dict
 from trolo.models.dfine.maps import MODEL_CONFIG_MAP as DFINE_MODEL_CONFIG_MAP
+from trolo.models.rtdetrv2.maps import MODEL_CONFIG_MAP as RTDETRV2_MODEL_CONFIG_MAP
 
 # Get package root directory
 PKG_ROOT = Path(__file__).parent.parent
@@ -9,6 +10,7 @@
 # Map of model names to their config files
 MODEL_CONFIG_MAP = {
     **DFINE_MODEL_CONFIG_MAP,
+    **RTDETRV2_MODEL_CONFIG_MAP,
 }
 
 

diff --git a/trolo/models/__init__.py b/trolo/models/__init__.py
@@ -1,2 +1,2 @@
 from . import dfine
-from . import rtdetr
+from . import rtdetrv2
diff --git a/trolo/models/dfine/box_ops.py b/trolo/models/dfine/box_ops.py
@@ -1,88 +1,3 @@
-import torch
-from torch import Tensor
-from torchvision.ops.boxes import box_area
 
-
-def box_cxcywh_to_xyxy(x):
-    x_c, y_c, w, h = x.unbind(-1)
-    b = [
-        (x_c - 0.5 * w.clamp(min=0.0)),
-        (y_c - 0.5 * h.clamp(min=0.0)),
-        (x_c + 0.5 * w.clamp(min=0.0)),
-        (y_c + 0.5 * h.clamp(min=0.0)),
-    ]
-    return torch.stack(b, dim=-1)
-
-
-def box_xyxy_to_cxcywh(x: Tensor) -> Tensor:
-    x0, y0, x1, y1 = x.unbind(-1)
-    b = [(x0 + x1) / 2, (y0 + y1) / 2, (x1 - x0), (y1 - y0)]
-    return torch.stack(b, dim=-1)
-
-
-# modified from torchvision to also return the union
-def box_iou(boxes1: Tensor, boxes2: Tensor):
-    area1 = box_area(boxes1)
-    area2 = box_area(boxes2)
-
-    lt = torch.max(boxes1[:, None, :2], boxes2[:, :2])  # [N,M,2]
-    rb = torch.min(boxes1[:, None, 2:], boxes2[:, 2:])  # [N,M,2]
-
-    wh = (rb - lt).clamp(min=0)  # [N,M,2]
-    inter = wh[:, :, 0] * wh[:, :, 1]  # [N,M]
-
-    union = area1[:, None] + area2 - inter
-
-    iou = inter / union
-    return iou, union
-
-
-def generalized_box_iou(boxes1, boxes2):
-    """
-    Generalized IoU from https://giou.stanford.edu/
-
-    The boxes should be in [x0, y0, x1, y1] format
-
-    Returns a [N, M] pairwise matrix, where N = len(boxes1)
-    and M = len(boxes2)
-    """
-    # degenerate boxes gives inf / nan results
-    # so do an early check
-    assert (boxes1[:, 2:] >= boxes1[:, :2]).all()
-    assert (boxes2[:, 2:] >= boxes2[:, :2]).all()
-    iou, union = box_iou(boxes1, boxes2)
-
-    lt = torch.min(boxes1[:, None, :2], boxes2[:, :2])
-    rb = torch.max(boxes1[:, None, 2:], boxes2[:, 2:])
-
-    wh = (rb - lt).clamp(min=0)  # [N,M,2]
-    area = wh[:, :, 0] * wh[:, :, 1]
-
-    return iou - (area - union) / area
-
-
-def masks_to_boxes(masks):
-    """Compute the bounding boxes around the provided masks
-
-    The masks should be in format [N, H, W] where N is the number of masks, (H, W) are the spatial dimensions.
-
-    Returns a [N, 4] tensors, with the boxes in xyxy format
-    """
-    if masks.numel() == 0:
-        return torch.zeros((0, 4), device=masks.device)
-
-    h, w = masks.shape[-2:]
-
-    y = torch.arange(0, h, dtype=torch.float)
-    x = torch.arange(0, w, dtype=torch.float)
-    y, x = torch.meshgrid(y, x)
-
-    x_mask = masks * x.unsqueeze(0)
-    x_max = x_mask.flatten(1).max(-1)[0]
-    x_min = x_mask.masked_fill(~(masks.bool()), 1e8).flatten(1).min(-1)[0]
-
-    y_mask = masks * y.unsqueeze(0)
-    y_max = y_mask.flatten(1).max(-1)[0]
-    y_min = y_mask.masked_fill(~(masks.bool()), 1e8).flatten(1).min(-1)[0]
-
-    return torch.stack([x_min, y_min, x_max, y_max], 1)
+## THIS IS TOTAL TECH DEBT
+from trolo.utils.box_ops import * 
diff --git a/trolo/models/rtdetr/__init__.py b/trolo/models/rtdetr/__init__.py
-Original file line number
+Diff line change
@@ Expand Up / @@ -18,7 +18,8 @@ env/ @@
     *.egg-info
     dist/
     build/
-    data/
+    /data/
     weights/
     output/
     *.jpg
@@ Expand Down @@