Skip to content

input (in) and output (out) for docker image

王佳欣 edited this page Aug 17, 2022 · 1 revision

input (/in)

/in
├── annotations  # annotation files folder
├── assets  # image files folder
├── models # optional for training task: weight files folder
├── config.yaml  # hyper-parameter config file
├── env.yaml  # ymir config file
├── candidate-index.tsv # training index file
├── train-index.tsv # training index file
└── val-index.tsv # validation index file

/in/config.yaml training task example

backbone_name: swin_nano
batch_size: 2
class_names:
- dog
epochs: 5
eval_size: 640
gpu_count: 1
gpu_id: '0'
learning_rate: 0.0001
num_workers: 2
task_id: t000000100000166d7761660213748
weight_save_interval: 100
pretrained_model_params: ['/in/models/pretrained.pth']  # optional, for training task only, weight files.
model_params_path: ['/in/models/checkpoint.pth']  # for infer and mining task, weight files

/in/env.yaml training task example

input:                                                                                                      
  annotations_dir: /in/annotations                                                                        
  assets_dir: /in/assets                                                                                  
  candidate_index_file: /in/candidate-index.tsv                                                                 
  config_file: /in/config.yaml                                                                            
  models_dir: /in/models                                                                                  
  root_dir: /in
  training_index_file: /in/train-index.tsv
  val_index_file: /in/val-index.tsv
output:
  infer_result_file: /out/infer-result.json
  mining_result_file: /out/result.tsv
  models_dir: /out/models
  monitor_file: /out/monitor.txt
  root_dir: /out
  tensorboard_dir: /out/tensorboard
  training_result_file: /out/models/result.yaml
run_infer: false
run_mining: false
run_training: train
task_id: t000000100000166d7761660213748

/in/train-index.tsv training task example

/in/assets/./30/a4d09c8c143d7638684a865389dfc21229cf1d30.jpeg   /in/annotations/./30/a4d09c8c143d7638684a865389dfc21229cf1d30.txt
/in/assets/./06/c8842f4316cfb6ab7fc7d3319bbf54f2361d5806.jpeg   /in/annotations/./06/c8842f4316cfb6ab7fc7d3319bbf54f2361d5806.txt
/in/assets/./c5/cc5b0b4ae72c9ac9c503e0429e943c1d441699c5.jpeg   /in/annotations/./c5/cc5b0b4ae72c9ac9c503e0429e943c1d441699c5.txt
...

/in/val-index.tsv training task example

/in/assets/./76/447209480f478da85f51b2eba236b7c78942f576.jpeg   /in/annotations/./76/447209480f478da85f51b2eba236b7c78942f576.txt
/in/assets/./b0/74ec6d6edc1d3222eb067fcfb5f698afa4c1a0b0.jpeg   /in/annotations/./b0/74ec6d6edc1d3222eb067fcfb5f698afa4c1a0b0.txt
/in/assets/./17/da72f037bc56b1c21a076c1c0aef1157f3f19417.jpeg   /in/annotations/./17/da72f037bc56b1c21a076c1c0aef1157f3f19417.txt
...

/in/candidate-index.tsv mining/infer task example

/in/assets/./76/447209480f478da85f51b2eba236b7c78942f576.jpeg
/in/assets/./b0/74ec6d6edc1d3222eb067fcfb5f698afa4c1a0b0.jpeg
/in/assets/./17/da72f037bc56b1c21a076c1c0aef1157f3f19417.jpeg
...

output (/out) training task example

/out
├── models
│   ├── checkpoint.pth # saved by user in docker image
│   ├── result.yaml  # saved by user in docker image
│   └── ymir-info.yaml  # auto generated by ymir
├── monitor.txt  # saved by user in docker image
├── tensorboard  # saved by user in docker image
└── ymir-executor-out.log  # auto generated by ymir

output (/out) infer task example

/out
├── infer-result.json
├── monitor.txt
└── ymir-executor-out.log  # auto generated by ymir

output (/out) mining task example

/out/models/result.yaml

map: 0.1986365956036518
model:
- result.yaml
- checkpoint.pth

/out/infer-result.json

{"detection": {"447209480f478da85f51b2eba236b7c78942f576.jpeg": {"annotations": [{"class_name": "dog", "score": 0.3103592097759247, "box": {"x": 45, "y": 39, "w": 287, "h": 308}}, {"class_name": "dog", "score": 0.3044982850551605, "box": {"x": -5, "y": 42, "w": 292, "h": 323}}, {"class_name": "dog", "score": 0.28284817934036255, "box": {"x": -28, "y": -6, "w": 273, "h": 282}}, ...

/out/models/ymir-info.yaml

class_names:
- dog
executor_config:
  backbone_name: swin_nano
  batch_size: 2
  class_names:
  - dog
  epochs: 5
  eval_size: 640
  gpu_count: 1
  gpu_id: '0'
  learning_rate: 0.0001
  num_workers: 2
  task_id: t000000100000166d7761660213748
  weight_save_interval: 100
models:
- result.yaml
- checkpoint.pth
task_context:
  dst_rev: t000000100000166d7761660213748@t000000100000166d7761660213748
  executor: youdaoyzbx/ymir-executor:ymir1.1.0-vidt-cu102-tmi-fast
  mAP: 0.1986365956036518
  producer: ymir
  src_revs: t000000100000166d7761660213748@t100000100000166d7761660213748
  task_parameters: '{"dataset_id": 2, "keywords": ["dog"], "extra_url": null, "labellers":
    null, "keep_annotations": null, "validation_dataset_id": 3, "network": "YOLO v4",
    "backbone": "darknet", "hyperparameter": null, "model_id": null, "mining_algorithm":
    null, "top_k": null, "generate_annotations": null, "docker_image": "youdaoyzbx/ymir-executor:ymir1.1.0-vidt-cu102-tmi-fast",
    "docker_image_id": 16}'
  type: 1

/out/monitor.txt

t000000100000166d7761660213748  1660214625.761469       1.00    3

/out/ymir-executor-out.log

INFO    : [20220811-10:29:13] merged config: {'param': {'backbone_name': 'swin_nano', 'batch_size': 2, 'class_names': ['dog'], 'epochs': 5, 'eval_size': 640, 'gpu_count': 1, 'gpu_id': '0', 'learning_rate': 0.0001, 'num_workers': 2, 'task_id': 't000000100000166d7761660213748', 'weight_save_interval': 100}, 'ymir': EnvConfig(task_id='t000000100000166d7761660213748', run_training=True, run_mining=False, run_infer=False, input=EnvInputConfig(root_dir='/in', assets_dir='/in/assets', annotations_dir='/in/annotations', models_dir='/in/models', training_index_file='/in/train-index.tsv', val_index_file='/in/val-index.tsv', candidate_index_file='', config_file='/in/config.yaml'), output=EnvOutputConfig(root_dir='/out', models_dir='/out/models', tensorboard_dir='/out/tensorboard', training_result_file='/out/models/result.yaml', mining_result_file='/out/result.tsv', infer_result_file='/out/infer-result.json', monitor_file='/out/monitor.txt', executor_log_file='/out/ymir-executor-out.log'))}
INFO    : [20220811-10:29:13] training: python3 ymir/ymir_training.py
log /out/models
| distributed init (rank 0 / world 1): env://
Namespace(aux_loss=False, backbone_name='swin_nano', batch_size=2, bbox_loss_coef=5, clip_max_norm=0.1, cls_loss_coef=2, coco_panoptic_path=None, coco_path='/in', cross_indices=[3], cross_scale_fusion=False, dataset_file='ymir', dec_layers=6, dec_n_points=4, decay_rate=0.1, det_token_num=100, device='cuda', dice_loss_coef=1, dim_feedforward=1024, dist_backend='nccl', dist_url='env://', distil_loss_coef=4.0, distil_model=None, distil_model_path=None, distributed=True, dropout=0.1, eos_coef=0.1, epochs=5, eval=False, eval_size=640, focal_alpha=0.25, giou_loss_coef=2, gpu=0, iou_aware=False, iouaware_loss_coef=2, load_from='', lr=0.0001, lr_backbone=1e-05, lr_drop=40, lr_linear_proj_mult=0.1, lr_linear_proj_names=['reference_points', 'sampling_offsets'], lr_noise=None, lr_noise_pct=0.67, lr_noise_std=1.0, mask_loss_coef=1, method='vidt', min_lr=1e-07, n_iter_to_acc=1, nheads=8, num_classes=1, num_feature_levels=4, num_workers=2, output_dir='/out/models', pos_dim=256, position_embedding='sine', pre_trained='imagenet', print_freq=500, rank=0, reduced_dim=256, remove_difficult=False, resume='', save_interval=100, sched='warmupcos', seed=42, set_cost_bbox=5, set_cost_class=2, set_cost_giou=2, start_epoch=0, tensorboard_dir='/out/tensorboard', token_label=False, token_loss_coef=2, warmup_epochs=0, warmup_lr=1e-06, weight_decay=0.0001, with_box_refine=False, world_size=1)
100%|██████████| 114M/114M [01:49<00:00, 1.09MB/s] 
Load the backbone pretrained on ImageNet 1K
number of params: 14769553
num of total trainable prams:14769553
loading annotations into memory...