File tree Expand file tree Collapse file tree 12 files changed +390
-86
lines changed
train/distillation_convnext Expand file tree Collapse file tree 12 files changed +390
-86
lines changed Original file line number Diff line number Diff line change @@ -169,6 +169,7 @@ crops:
169169 - 0.229
170170 - 0.224
171171 - 0.225
172+ teacher_to_student_resolution_scale : 1.0
172173evaluation :
173174 eval_period_iterations : 12500
174175 low_freq_every : 5
Original file line number Diff line number Diff line change 1+ ibot :
2+ loss_weight : 1.0
3+ mask_sample_probability : 0.5
4+ mask_ratio_min_max :
5+ - 0.1
6+ - 0.5
7+ mask_random_circular_shift : false
8+ force_masking_even_with_zero_weight : false
9+ separate_head : true
10+ head_norm_last_layer : false
11+ head_nlayers : 3
12+ head_hidden_dim : 2048
13+ student :
14+ arch : convnext_base
15+ patch_size : 16
16+ drop_path_rate : 0.0
17+ block_chunks : 4
18+ optim :
19+ epochs : 500
20+ clip_grad : 3.0
21+ layerwise_decay : 1.0
22+ schedules :
23+ lr :
24+ start : 1e-6
25+ peak : 1e-4
26+ end : 1e-6
27+ warmup_epochs : 80
28+ freeze_last_layer_epochs : 1
29+ weight_decay :
30+ start : 0.02
31+ end : 0.2
32+ peak : 0.2
33+ warmup_epochs : 500
34+ teacher_temp :
35+ start : 0.04
36+ peak : 0.07
37+ end : 0.07
38+ warmup_epochs : 120
39+ momentum :
40+ start : 0.994
41+ peak : 1.0
42+ end : 1.0
43+ warmup_epochs : 500
Original file line number Diff line number Diff line change 1+ ibot :
2+ loss_weight : 1.0
3+ mask_sample_probability : 0.5
4+ mask_ratio_min_max :
5+ - 0.1
6+ - 0.5
7+ mask_random_circular_shift : false
8+ force_masking_even_with_zero_weight : false
9+ separate_head : true
10+ head_norm_last_layer : false
11+ head_nlayers : 3
12+ head_hidden_dim : 2048
13+ student :
14+ arch : convnext_large
15+ patch_size : 16
16+ drop_path_rate : 0.0
17+ block_chunks : 4
18+ optim :
19+ epochs : 500
20+ clip_grad : 3.0
21+ layerwise_decay : 1.0
22+ schedules :
23+ lr :
24+ start : 1e-6
25+ peak : 1e-4
26+ end : 1e-6
27+ warmup_epochs : 80
28+ freeze_last_layer_epochs : 1
29+ weight_decay :
30+ start : 0.04
31+ end : 0.2
32+ peak : 0.2
33+ warmup_epochs : 500
34+ teacher_temp :
35+ start : 0.04
36+ peak : 0.07
37+ end : 0.07
38+ warmup_epochs : 120
39+ momentum :
40+ start : 0.994
41+ peak : 1.0
42+ end : 1.0
43+ warmup_epochs : 500
Original file line number Diff line number Diff line change 1+ ibot :
2+ loss_weight : 1.0
3+ mask_sample_probability : 0.5
4+ mask_ratio_min_max :
5+ - 0.1
6+ - 0.5
7+ mask_random_circular_shift : false
8+ force_masking_even_with_zero_weight : false
9+ separate_head : true
10+ head_norm_last_layer : false
11+ head_nlayers : 3
12+ head_hidden_dim : 2048
13+ student :
14+ arch : convnext_small
15+ patch_size : 16
16+ drop_path_rate : 0.0
17+ block_chunks : 4
18+ optim :
19+ epochs : 500
20+ clip_grad : 3.0
21+ layerwise_decay : 1.0
22+ schedules :
23+ lr :
24+ start : 1e-6
25+ peak : 2e-4
26+ end : 1e-6
27+ warmup_epochs : 80
28+ freeze_last_layer_epochs : 1
29+ weight_decay :
30+ start : 0.04
31+ end : 0.2
32+ peak : 0.2
33+ warmup_epochs : 500
34+ teacher_temp :
35+ start : 0.04
36+ peak : 0.07
37+ end : 0.07
38+ warmup_epochs : 120
39+ momentum :
40+ start : 0.994
41+ peak : 1.0
42+ end : 1.0
43+ warmup_epochs : 500
Original file line number Diff line number Diff line change 1+ ibot :
2+ loss_weight : 1.0
3+ mask_sample_probability : 0.5
4+ mask_ratio_min_max :
5+ - 0.1
6+ - 0.5
7+ mask_random_circular_shift : false
8+ force_masking_even_with_zero_weight : false
9+ separate_head : true
10+ head_norm_last_layer : false
11+ head_nlayers : 3
12+ head_hidden_dim : 2048
13+ student :
14+ arch : convnext_tiny
15+ patch_size : 16
16+ drop_path_rate : 0.0
17+ block_chunks : 4
18+ optim :
19+ epochs : 500
20+ clip_grad : 3.0
21+ layerwise_decay : 1.0
22+ schedules :
23+ lr :
24+ start : 1e-6
25+ peak : 2e-4
26+ end : 1e-6
27+ warmup_epochs : 80
28+ freeze_last_layer_epochs : 1
29+ weight_decay :
30+ start : 0.04
31+ end : 0.2
32+ peak : 0.2
33+ warmup_epochs : 500
34+ teacher_temp :
35+ start : 0.04
36+ peak : 0.07
37+ end : 0.07
38+ warmup_epochs : 120
39+ momentum :
40+ start : 0.994
41+ peak : 1.0
42+ end : 1.0
43+ warmup_epochs : 500
Original file line number Diff line number Diff line change 1+ MODEL :
2+ META_ARCHITECTURE : MultiDistillationMetaArch
3+ multidistillation :
4+ enabled : true
5+ global_batch_size : 32 # 4096 for 16 nodes
6+ students :
7+ - name : convnext_tiny
8+ config_path : dinov3/configs/train/distillation_convnext/convnext_tiny_p16.yaml
9+ ranks_range :
10+ - 0
11+ - 2
12+ - name : convnext_small
13+ config_path : dinov3/configs/train/distillation_convnext/convnext_small_p16.yaml
14+ ranks_range :
15+ - 2
16+ - 4
17+ - name : convnext_base
18+ config_path : dinov3/configs/train/distillation_convnext/convnext_base_p16.yaml
19+ ranks_range :
20+ - 4
21+ - 6
22+ - name : convnext_large
23+ config_path : dinov3/configs/train/distillation_convnext/convnext_large_p16.yaml
24+ ranks_range :
25+ - 6
26+ - 8
27+ distillation : # teacher
28+ enabled : true
29+ full_cfg_path : dinov3/configs/train/vitl_im1k_lin834.yaml
30+ checkpoint_path : ignore
31+ crops :
32+ global_crops_size : 512
33+ local_crops_size : 224
34+ teacher_to_student_resolution_scale : 2.0
35+ train :
36+ dataset_path : ImageNet:split=TRAIN
37+ cache_dataset : false
38+ centering : " sinkhorn_knopp"
39+ compile : true
40+ ibot :
41+ separate_head : true
You can’t perform that action at this time.
0 commit comments