Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ env/
*.egg-info
dist/
build/
data/

/data/
weights/
output/
*.jpg
Expand Down
38 changes: 38 additions & 0 deletions trolo/configs/yaml/include/dataloader.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@

train_dataloader:
dataset:
transforms:
ops:
- {type: RandomPhotometricDistort, p: 0.5}
- {type: RandomZoomOut, fill: 0}
- {type: RandomIoUCrop, p: 0.8}
- {type: SanitizeBoundingBoxes, min_size: 1}
- {type: RandomHorizontalFlip}
- {type: Resize, size: [640, 640], }
- {type: SanitizeBoundingBoxes, min_size: 1}
- {type: ConvertPILImage, dtype: 'float32', scale: True}
- {type: ConvertBoxes, fmt: 'cxcywh', normalize: True}
policy:
name: stop_epoch
epoch: 71 # epoch in [71, ~) stop `ops`
ops: ['RandomPhotometricDistort', 'RandomZoomOut', 'RandomIoUCrop']

collate_fn:
type: BatchImageCollateFunction
scales: [480, 512, 544, 576, 608, 640, 640, 640, 672, 704, 736, 768, 800]
stop_epoch: 71 # epoch in [71, ~) stop `multiscales`

shuffle: True
total_batch_size: 16 # total batch size equals to 16 (4 * 4)
num_workers: 4


val_dataloader:
dataset:
transforms:
ops:
- {type: Resize, size: [640, 640]}
- {type: ConvertPILImage, dtype: 'float32', scale: True}
shuffle: False
total_batch_size: 32
num_workers: 4
37 changes: 37 additions & 0 deletions trolo/configs/yaml/include/optimizer.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@

use_amp: True
use_ema: True
ema:
type: ModelEMA
decay: 0.9999
warmups: 2000


epoches: 72
clip_max_norm: 0.1


optimizer:
type: AdamW
params:
-
params: '^(?=.*backbone)(?!.*norm).*$'
lr: 0.00001
-
params: '^(?=.*(?:encoder|decoder))(?=.*(?:norm|bn)).*$'
weight_decay: 0.

lr: 0.0001
betas: [0.9, 0.999]
weight_decay: 0.0001


lr_scheduler:
type: MultiStepLR
milestones: [1000]
gamma: 0.1


lr_warmup_scheduler:
type: LinearWarmup
warmup_duration: 2000
82 changes: 82 additions & 0 deletions trolo/configs/yaml/rtdetrv2/base.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
task: detection

model: RTDETR
criterion: RTDETRCriterionv2
postprocessor: RTDETRPostProcessor


use_focal_loss: True
eval_spatial_size: [640, 640] # h w


RTDETR:
backbone: PResNet
encoder: HybridEncoder
decoder: RTDETRTransformerv2


PResNet:
depth: 50
variant: d
freeze_at: 0
return_idx: [1, 2, 3]
num_stages: 4
freeze_norm: True
pretrained: True


HybridEncoder:
in_channels: [512, 1024, 2048]
feat_strides: [8, 16, 32]

# intra
hidden_dim: 256
use_encoder_idx: [2]
num_encoder_layers: 1
nhead: 8
dim_feedforward: 1024
dropout: 0.
enc_act: 'gelu'

# cross
expansion: 1.0
depth_mult: 1
act: 'silu'


RTDETRTransformerv2:
feat_channels: [256, 256, 256]
feat_strides: [8, 16, 32]
hidden_dim: 256
num_levels: 3

num_layers: 6
num_queries: 300

num_denoising: 100
label_noise_ratio: 0.5
box_noise_scale: 1.0 # 1.0 0.4

eval_idx: -1

# NEW
num_points: [4, 4, 4] # [3,3,3] [2,2,2]
cross_attn_method: default # default, discrete
query_select_method: default # default, agnostic


RTDETRPostProcessor:
num_top_queries: 300


RTDETRCriterionv2:
weight_dict: {loss_vfl: 1, loss_bbox: 5, loss_giou: 2,}
losses: ['vfl', 'boxes', ]
alpha: 0.75
gamma: 2.0

matcher:
type: HungarianMatcher
weight_dict: {cost_class: 2, cost_bbox: 5, cost_giou: 2}
alpha: 0.25
gamma: 2.0
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
__include__: [
'../dataset/coco_detection.yml',
'../dataset/dummy_coco.yml',
'../runtime.yml',
'./include/dataloader.yml',
'./include/optimizer.yml',
'./include/rtdetrv2_r50vd.yml',
'../include/dataloader.yml',
'../include/optimizer.yml',
'base.yml',
]


output_dir: ./output/rtdetrv2_s_coco
output_dir: ./output/rtdetrv2_r18vd_120e_coco


PResNet:
Expand Down
5 changes: 4 additions & 1 deletion trolo/data/dataloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,10 +89,13 @@ def __init__(
ema_restart_decay=0.9999,
base_size=640,
base_size_repeat=None,
scales=None,
) -> None:
super().__init__()
self.base_size = base_size
self.scales = generate_scales(base_size, base_size_repeat) if base_size_repeat is not None else None
self.scales = scales
if scales is None:
self.scales = generate_scales(base_size, base_size_repeat) if base_size_repeat is not None else None
self.stop_epoch = stop_epoch if stop_epoch is not None else 100000000
self.ema_restart_decay = ema_restart_decay
# self.interpolation = interpolation
Expand Down
2 changes: 2 additions & 0 deletions trolo/loaders/maps.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from pathlib import Path
from typing import Dict
from trolo.models.dfine.maps import MODEL_CONFIG_MAP as DFINE_MODEL_CONFIG_MAP
from trolo.models.rtdetrv2.maps import MODEL_CONFIG_MAP as RTDETRV2_MODEL_CONFIG_MAP

# Get package root directory
PKG_ROOT = Path(__file__).parent.parent
Expand All @@ -9,6 +10,7 @@
# Map of model names to their config files
MODEL_CONFIG_MAP = {
**DFINE_MODEL_CONFIG_MAP,
**RTDETRV2_MODEL_CONFIG_MAP,
}


Expand Down
2 changes: 1 addition & 1 deletion trolo/models/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
from . import dfine
from . import rtdetr
from . import rtdetrv2
89 changes: 2 additions & 87 deletions trolo/models/dfine/box_ops.py
Original file line number Diff line number Diff line change
@@ -1,88 +1,3 @@
import torch
from torch import Tensor
from torchvision.ops.boxes import box_area


def box_cxcywh_to_xyxy(x):
x_c, y_c, w, h = x.unbind(-1)
b = [
(x_c - 0.5 * w.clamp(min=0.0)),
(y_c - 0.5 * h.clamp(min=0.0)),
(x_c + 0.5 * w.clamp(min=0.0)),
(y_c + 0.5 * h.clamp(min=0.0)),
]
return torch.stack(b, dim=-1)


def box_xyxy_to_cxcywh(x: Tensor) -> Tensor:
x0, y0, x1, y1 = x.unbind(-1)
b = [(x0 + x1) / 2, (y0 + y1) / 2, (x1 - x0), (y1 - y0)]
return torch.stack(b, dim=-1)


# modified from torchvision to also return the union
def box_iou(boxes1: Tensor, boxes2: Tensor):
area1 = box_area(boxes1)
area2 = box_area(boxes2)

lt = torch.max(boxes1[:, None, :2], boxes2[:, :2]) # [N,M,2]
rb = torch.min(boxes1[:, None, 2:], boxes2[:, 2:]) # [N,M,2]

wh = (rb - lt).clamp(min=0) # [N,M,2]
inter = wh[:, :, 0] * wh[:, :, 1] # [N,M]

union = area1[:, None] + area2 - inter

iou = inter / union
return iou, union


def generalized_box_iou(boxes1, boxes2):
"""
Generalized IoU from https://giou.stanford.edu/

The boxes should be in [x0, y0, x1, y1] format

Returns a [N, M] pairwise matrix, where N = len(boxes1)
and M = len(boxes2)
"""
# degenerate boxes gives inf / nan results
# so do an early check
assert (boxes1[:, 2:] >= boxes1[:, :2]).all()
assert (boxes2[:, 2:] >= boxes2[:, :2]).all()
iou, union = box_iou(boxes1, boxes2)

lt = torch.min(boxes1[:, None, :2], boxes2[:, :2])
rb = torch.max(boxes1[:, None, 2:], boxes2[:, 2:])

wh = (rb - lt).clamp(min=0) # [N,M,2]
area = wh[:, :, 0] * wh[:, :, 1]

return iou - (area - union) / area


def masks_to_boxes(masks):
"""Compute the bounding boxes around the provided masks

The masks should be in format [N, H, W] where N is the number of masks, (H, W) are the spatial dimensions.

Returns a [N, 4] tensors, with the boxes in xyxy format
"""
if masks.numel() == 0:
return torch.zeros((0, 4), device=masks.device)

h, w = masks.shape[-2:]

y = torch.arange(0, h, dtype=torch.float)
x = torch.arange(0, w, dtype=torch.float)
y, x = torch.meshgrid(y, x)

x_mask = masks * x.unsqueeze(0)
x_max = x_mask.flatten(1).max(-1)[0]
x_min = x_mask.masked_fill(~(masks.bool()), 1e8).flatten(1).min(-1)[0]

y_mask = masks * y.unsqueeze(0)
y_max = y_mask.flatten(1).max(-1)[0]
y_min = y_mask.masked_fill(~(masks.bool()), 1e8).flatten(1).min(-1)[0]

return torch.stack([x_min, y_min, x_max, y_max], 1)
## THIS IS TOTAL TECH DEBT
from trolo.utils.box_ops import *
1 change: 0 additions & 1 deletion trolo/models/rtdetr/__init__.py

This file was deleted.

Loading