Need help in VGG16 Faster R-CNN implementation (2024)

I need help in implementing VGG16 for Faster R-CNN for my graduation thesis. This is my first time using mmdetection so I don't really have a clear idea how to create, train, and customize a custom model.

My environment is in google colab with CUDA 12.2.
l ran this code with no issue:

!pip3 install openmim!mim install mmengine!pip install torch==2.1.0 torchvision==0.16.0 torchaudio==2.1.0!pip install mmcv==2.1.0 -f https://download.openmmlab.com/mmcv/dist/cu121/torch2.1/index.html

I'm using a custom dataset from roboflow with COCO format with this format:

data|-- plat|--|-- test|--|--|-- _anotations.coco.json|--|--|-- [images_x].jpg|--|--|-- [images_x].jpg|--|--|-- [..................]|--|-- train|--|--|-- _anotations.coco.json|--|--|-- [images_x].jpg|--|--|-- [images_x].jpg|--|--|-- [..................]|--|-- valid|--|--|-- _anotations.coco.json|--|--|-- [images_x].jpg|--|--|-- [images_x].jpg|--|--|-- [..................]|--|-- readme.txt

This is my config in mmdetection > configs > faster_rcnn > faster_rcnn_v16_platBS16E20.py

base = [ "../base/models/faster-rcnn_v16.py", "../base/schedules/schedule_1x.py", "../base/default_runtime.py",]# Dataset settingsdataset_type = "PlatDataset"data_root = "data/plat/"train_ann_file = "train/_annotations.coco.json"train_data_prefix = "train/"val_ann_file = "valid/_annotations.coco.json"val_data_prefix = "valid/"test_ann_file = "test/_annotations.coco.json"test_data_prefix = "test/"class_name = "plat"num_classes = 1img_scale = (640, 640) # width, height# Train settingstrain_batch_size_per_gpu = 16train_num_workers = 10train_persistent_workers = True# Valid settingsval_batch_size_per_gpu = 16val_num_workers = 10val_persistent_workers = True# Train Val settingsbase_lr = 0.01 # Base learning rate for optim_wrappermax_epochs = 20 # Maximum training epochslearning_rate = 0.01momentum = 0.937weight_decay = 0.0005lr_start_factor = 1.0e-5backend_args = Nonemodel_test_cfg = dict( # The config of multi-label for multi-class prediction. multi_label=True, # The number of boxes before NMS nms_pre=30000, score_thr=0.001, # Threshold to filter out boxes. nms=dict(type="nms", iou_threshold=0.95), # NMS type and threshold max_per_img=300, # Max number of detections of each image)train_pipeline = [ dict(type="LoadImageFromFile", backend_args=backend_args), dict(type="LoadAnnotations", with_bbox=True), dict(type="PackDetInputs"),]test_pipeline = [ dict(type="LoadImageFromFile", backend_args=backend_args), dict(type='LoadAnnotations', with_bbox=True), dict( type="PackDetInputs", meta_keys=( "img_id", "img_path", "ori_shape", "img_shape", "scale_factor", ), ),]train_dataloader = dict( batch_size=train_batch_size_per_gpu, num_workers=train_num_workers, persistent_workers=train_persistent_workers, sampler=dict(type="DefaultSampler", shuffle=True), batch_sampler=dict(type="AspectRatioBatchSampler"), dataset=dict( type=dataset_type, data_root=data_root, ann_file=train_ann_file, data_prefix=dict(img=train_data_prefix), filter_cfg=dict(filter_empty_gt=True, min_size=32), pipeline=train_pipeline, backend_args=backend_args, ),)val_dataloader = dict( batch_size=val_batch_size_per_gpu, num_workers=val_num_workers, persistent_workers=val_persistent_workers, drop_last=False, sampler=dict(type="DefaultSampler", shuffle=False), dataset=dict( type=dataset_type, data_root=data_root, ann_file=val_ann_file, data_prefix=dict(img=val_data_prefix), test_mode=True, pipeline=test_pipeline, backend_args=backend_args, ),)test_dataloader = dict( batch_size=1, num_workers=2, persistent_workers=True, drop_last=False, sampler=dict(type="DefaultSampler", shuffle=False), dataset=dict( type=dataset_type, data_root=data_root, ann_file=test_ann_file, data_prefix=dict(img=test_data_prefix), test_mode=True, pipeline=test_pipeline, ),)val_evaluator = dict( type="CocoMetric", ann_file=data_root + val_ann_file, metric="bbox", format_only=False, backend_args=backend_args,)test_evaluator = dict( type="CocoMetric", metric="bbox", format_only=True, ann_file=data_root + test_ann_file, outfile_prefix="./work_dirs/plat_detection/test",)# Config of batch shapes. Only on val.batch_shapes_cfg = dict( type="BatchShapePolicy", batch_size=val_batch_size_per_gpu, img_size=img_scale[0], size_divisor=32, extra_pad_ratio=0.5,)optimizer = dict(type="SGD", lr=base_lr, momentum=momentum, weight_decay=weight_decay)optimizer_config = dict(_delete_=True)# learning rateparam_scheduler = [ dict( type="LinearLR", start_factor=lr_start_factor, by_epoch=True, begin=0, end=max_epochs, convert_to_iter_based=True, )]norm_cfg = dict(type="BN") # Normalization config# -----train val related-----lr_start_factor = 1.0e-5dsl_topk = 13 # Number of bbox selected in each levelloss_cls_weight = 1.0loss_bbox_weight = 2.0qfl_beta = 2.0 # beta of QualityFocalLossweight_decay = 0.05# Save model checkpoint and validation intervalssave_checkpoint_intervals = 10# The maximum checkpoints to keep.max_keep_ckpts = 3# single-scale training is recommended to# be turned on, which can speed up training.env_cfg = dict(cudnn_benchmark=True)# hooksdefault_hooks = dict( checkpoint=dict( type="CheckpointHook", interval=save_checkpoint_intervals, max_keep_ckpts=max_keep_ckpts, # only keep latest 3 checkpoints ))custom_hooks = [ dict( type="EMAHook", ema_type="ExpMomentumEMA", momentum=0.0002, update_buffers=True, strict_load=False, priority=49, )]train_cfg = dict( type="EpochBasedTrainLoop", max_epochs=max_epochs, val_interval=save_checkpoint_intervals,)val_cfg = dict(type="ValLoop")test_cfg = dict(type="TestLoop")

this is the model in mmdetection > configs > _base_ > models > faster-rcnn_v16.py

model = dict( type="FasterRCNN", backbone=dict( type="VGG", depth=16, num_stages=4, dilations=(1, 1, 1, 1), out_indices=(3,), frozen_stages=-1, bn_eval=True, with_bn=True, pretrained="torchvision://vgg16_bn", ), rpn_head=dict( type="RPNHead", in_channels=512, # channel changed feat_channels=512, # channel changed anchor_generator=dict( type="AnchorGenerator", scales=[2, 4, 8, 16, 32], ratios=[0.5, 1.0, 2.0], strides=[16], ), bbox_coder=dict( type="DeltaXYWHBBoxCoder", target_means=[0.0, 0.0, 0.0, 0.0], target_stds=[1.0, 1.0, 1.0, 1.0], ), loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=True, loss_weight=1.0), loss_bbox=dict(type="L1Loss", loss_weight=1.0), ), roi_head=dict( type="StandardRoIHead", bbox_roi_extractor=dict( type="SingleRoIExtractor", roi_layer=dict(type="RoIAlign", output_size=7, sampling_ratio=0), out_channels=512, # channel changed featmap_strides=[16], ), bbox_head=dict( type="Shared2FCBBoxHead", in_channels=512, # channel changed fc_out_channels=1024, roi_feat_size=7, num_classes=2, bbox_coder=dict( type="DeltaXYWHBBoxCoder", target_means=[0.0, 0.0, 0.0, 0.0], target_stds=[0.1, 0.1, 0.2, 0.2], ), reg_class_agnostic=False, loss_cls=dict(type="CrossEntropyLoss", use_sigmoid=False, loss_weight=1.0), loss_bbox=dict(type="L1Loss", loss_weight=1.0), ), ), # model training and testing settings train_cfg=dict( rpn=dict( assigner=dict( type="MaxIoUAssigner", pos_iou_thr=0.7, neg_iou_thr=0.3, min_pos_iou=0.3, match_low_quality=True, ignore_iof_thr=-1, ), sampler=dict( type="RandomSampler", num=256, pos_fraction=0.5, neg_pos_ub=-1, add_gt_as_proposals=False, ), allowed_border=0, pos_weight=-1, debug=False, ), rpn_proposal=dict( nms_pre=12000, max_per_img=2000, nms=dict(type="nms", iou_threshold=0.7), min_bbox_size=0, ), rcnn=dict( assigner=dict( type="MaxIoUAssigner", pos_iou_thr=0.5, neg_iou_thr=0.5, min_pos_iou=0.5, match_low_quality=False, ignore_iof_thr=-1, ), sampler=dict( type="RandomSampler", num=512, pos_fraction=0.25, neg_pos_ub=-1, add_gt_as_proposals=True, ), pos_weight=-1, debug=False, ), ), test_cfg=dict( rpn=dict( nms=dict(type="nms", iou_threshold=0.7), nms_pre=6000, max_per_img=1000, min_bbox_size=0, ), rcnn=dict( score_thr=0.05, nms=dict(type="nms", iou_threshold=0.5), max_per_img=100 ), ),)

and this is my VGG model in mmdetection > mmdet > models > backbones > vgg.py

from mmdet.registry import MODELSfrom mmcv.cnn import VGG@MODELS.register_module()class VGG(VGG): def __init__(self, pretrained=None, *args, **kwargs): self.pretrained = pretrained super().__init__(*args, **kwargs) def init_weights(self, pretrained=None): super().init_weights(pretrained) def forward(self, x): outs = [] vgg_layers = getattr(self, self.module_name) for i in range(len(self.stage_blocks)): for j in range(*self.range_sub_modules[i]): vgg_layer = vgg_layers[j] x = vgg_layer(x) if i in self.out_indices: outs.append(x) if self.num_classes > 0: x = x.view(x.size(0), -1) x = self.classifier(x) outs.append(x) return tuple(outs)

I've also added the VGG class in __init__.py file.

I'm getting this error when i run this command on my colab

!python tools/train.py configs/faster_rcnn/faster_rcnn_v16_platBS16E20.py

this is the error:

Traceback (most recent call last): File "/content/ta-mmdetection/tools/train.py", line 121, in <module> main() File "/content/ta-mmdetection/tools/train.py", line 110, in main runner = Runner.from_cfg(cfg) File "/usr/local/lib/python3.10/dist-packages/mmengine/runner/runner.py", line 463, in from_cfg model=cfg['model'], File "/usr/local/lib/python3.10/dist-packages/mmengine/config/config.py", line 1502, in __getitem__ return self._cfg_dict.__getitem__(name) File "/usr/local/lib/python3.10/dist-packages/mmengine/config/config.py", line 138, in __getitem__ return self.build_lazy(super().__getitem__(key)) File "/usr/local/lib/python3.10/dist-packages/mmengine/config/config.py", line 105, in __missing__ raise KeyError(name)KeyError: 'model'

is there anything wrong with my code? I'm willing to change anything as long as it fix the code.
Hope anyone can help me ASAP since I'm short in time.
Thank you for your help!

Need help in VGG16 Faster R-CNN implementation (2024)

References