2024/02/29 12:00:04 - mmengine - INFO - ------------------------------------------------------------ System environment: sys.platform: linux Python: 3.8.16 (default, Mar 2 2023, 03:21:46) [GCC 11.2.0] CUDA available: True numpy_random_seed: 1635557597 GPU 0,1,2,3,4,5,6,7: NVIDIA A100-SXM4-80GB CUDA_HOME: /mnt/petrelfs/share/cuda-11.3 NVCC: Cuda compilation tools, release 11.3, V11.3.109 GCC: gcc (GCC) 9.4.0 PyTorch: 1.11.0 PyTorch compiling details: PyTorch built with: - GCC 7.3 - C++ Version: 201402 - Intel(R) oneAPI Math Kernel Library Version 2021.4-Product Build 20210904 for Intel(R) 64 architecture applications - Intel(R) MKL-DNN v2.5.2 (Git Hash a9302535553c73243c632ad3c4c80beec3d19a1e) - OpenMP 201511 (a.k.a. OpenMP 4.5) - LAPACK is enabled (usually provided by MKL) - NNPACK is enabled - CPU capability usage: AVX2 - CUDA Runtime 11.5 - NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_61,code=sm_61;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_37,code=compute_37 - CuDNN 8.3.2 - Magma 2.6.1 - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=11.5, CUDNN_VERSION=8.3.2, CXX_COMPILER=/opt/rh/devtoolset-7/root/usr/bin/c++, CXX_FLAGS= -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -fopenmp -DNDEBUG -DUSE_KINETO -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -DEDGE_PROFILER_USE_KINETO -O2 -fPIC -Wno-narrowing -Wall -Wextra -Werror=return-type -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-sign-compare -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-unused-local-typedefs -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_VERSION=1.11.0, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=OFF, USE_MPI=OFF, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, TorchVision: 0.12.0 OpenCV: 4.7.0 MMEngine: 0.8.0 Runtime environment: cudnn_benchmark: False mp_cfg: {'mp_start_method': 'fork', 'opencv_num_threads': 0} dist_cfg: {'backend': 'nccl', 'port': 29320} seed: 1635557597 Distributed launcher: slurm Distributed training: True GPU number: 8 ------------------------------------------------------------ 2024/02/29 12:00:04 - mmengine - INFO - Config: default_scope = 'embodiedscan' default_hooks = dict( timer=dict(type='IterTimerHook'), logger=dict(type='LoggerHook', interval=50), param_scheduler=dict(type='ParamSchedulerHook'), checkpoint=dict(type='CheckpointHook', interval=1, max_keep_ckpts=3), sampler_seed=dict(type='DistSamplerSeedHook')) env_cfg = dict( cudnn_benchmark=False, mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), dist_cfg=dict(backend='nccl', port=29320)) log_processor = dict(type='LogProcessor', window_size=50, by_epoch=True) log_level = 'INFO' load_from = '/mnt/petrelfs/wangtai/EmbodiedScan/work_dirs/mv-3ddet-challenge/epoch_12.pth' resume = False n_points = 100000 backend_args = None metainfo = dict(classes='all') model = dict( type='SparseFeatureFusion3DGrounder', num_queries=256, voxel_size=0.01, data_preprocessor=dict( type='Det3DDataPreprocessor', mean=[ 123.675, 116.28, 103.53, ], std=[ 58.395, 57.12, 57.375, ], bgr_to_rgb=True, pad_size_divisor=32), backbone=dict( type='mmdet.ResNet', depth=50, base_channels=16, num_stages=4, out_indices=( 0, 1, 2, 3, ), frozen_stages=1, norm_cfg=dict(type='BN', requires_grad=False), norm_eval=True, init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'), style='pytorch'), backbone_lidar=dict(type='MinkResNet', in_channels=3, depth=34), use_xyz_feat=True, neck_3d=dict( type='MinkNeck', num_classes=1, in_channels=[ 128, 256, 512, 1024, ], out_channels=256, voxel_size=0.01, pts_prune_threshold=1000), decoder=dict( num_layers=6, return_intermediate=True, layer_cfg=dict( self_attn_cfg=dict(embed_dims=256, num_heads=8, dropout=0.0), cross_attn_text_cfg=dict(embed_dims=256, num_heads=8, dropout=0.0), cross_attn_cfg=dict(embed_dims=256, num_heads=8, dropout=0.0), ffn_cfg=dict( embed_dims=256, feedforward_channels=2048, ffn_drop=0.0)), post_norm_cfg=None), bbox_head=dict( type='GroundingHead', num_classes=256, sync_cls_avg_factor=True, decouple_bbox_loss=True, decouple_groups=4, share_pred_layer=True, decouple_weights=[ 0.2, 0.2, 0.2, 0.4, ], contrastive_cfg=dict(max_text_len=256, log_scale='auto', bias=True), loss_cls=dict( type='mmdet.FocalLoss', use_sigmoid=True, gamma=2.0, alpha=0.25, loss_weight=1.0), loss_bbox=dict( type='BBoxCDLoss', mode='l1', loss_weight=1.0, group='g8')), coord_type='DEPTH', train_cfg=dict( assigner=dict( type='HungarianAssigner3D', match_costs=[ dict(type='BinaryFocalLossCost', weight=1.0), dict(type='BBox3DL1Cost', weight=2.0), dict(type='IoU3DCost', weight=2.0), ])), test_cfg=None) dataset_type = 'MultiView3DGroundingDataset' data_root = 'data' train_pipeline = [ dict(type='LoadAnnotations3D'), dict( type='MultiViewPipeline', n_images=20, transforms=[ dict(type='LoadImageFromFile', backend_args=None), dict(type='LoadDepthFromFile', backend_args=None), dict(type='ConvertRGBDToPoints', coord_type='CAMERA'), dict(type='PointSample', num_points=10000), dict(type='Resize', scale=( 480, 480, ), keep_ratio=False), ]), dict(type='AggregateMultiViewPoints', coord_type='DEPTH'), dict(type='PointSample', num_points=100000), dict( type='GlobalRotScaleTrans', rot_range=[ -0.087266, 0.087266, ], scale_ratio_range=[ 0.9, 1.1, ], translation_std=[ 0.1, 0.1, 0.1, ], shift_height=False), dict( type='Pack3DDetInputs', keys=[ 'img', 'points', 'gt_bboxes_3d', 'gt_labels_3d', ]), ] test_pipeline = [ dict(type='LoadAnnotations3D'), dict( type='MultiViewPipeline', n_images=50, ordered=True, transforms=[ dict(type='LoadImageFromFile', backend_args=None), dict(type='LoadDepthFromFile', backend_args=None), dict(type='ConvertRGBDToPoints', coord_type='CAMERA'), dict(type='PointSample', num_points=10000), dict(type='Resize', scale=( 480, 480, ), keep_ratio=False), ]), dict(type='AggregateMultiViewPoints', coord_type='DEPTH'), dict(type='PointSample', num_points=100000), dict( type='Pack3DDetInputs', keys=[ 'img', 'points', 'gt_bboxes_3d', 'gt_labels_3d', ]), ] train_dataloader = dict( batch_size=12, num_workers=12, persistent_workers=True, sampler=dict(type='DefaultSampler', shuffle=True), dataset=dict( type='RepeatDataset', times=1, dataset=dict( type='MultiView3DGroundingDataset', data_root='data', ann_file='embodiedscan_infos_train_split_filtered.pkl', vg_file='embodiedscan_train_vg.json', metainfo=dict(classes='all'), pipeline=[ dict(type='LoadAnnotations3D'), dict( type='MultiViewPipeline', n_images=20, transforms=[ dict(type='LoadImageFromFile', backend_args=None), dict(type='LoadDepthFromFile', backend_args=None), dict(type='ConvertRGBDToPoints', coord_type='CAMERA'), dict(type='PointSample', num_points=10000), dict( type='Resize', scale=( 480, 480, ), keep_ratio=False), ]), dict(type='AggregateMultiViewPoints', coord_type='DEPTH'), dict(type='PointSample', num_points=100000), dict( type='GlobalRotScaleTrans', rot_range=[ -0.087266, 0.087266, ], scale_ratio_range=[ 0.9, 1.1, ], translation_std=[ 0.1, 0.1, 0.1, ], shift_height=False), dict( type='Pack3DDetInputs', keys=[ 'img', 'points', 'gt_bboxes_3d', 'gt_labels_3d', ]), ], test_mode=False, filter_empty_gt=True, box_type_3d='Euler-Depth'))) val_dataloader = dict( batch_size=12, num_workers=12, persistent_workers=True, drop_last=False, sampler=dict(type='DefaultSampler', shuffle=False), dataset=dict( type='MultiView3DGroundingDataset', data_root='data', ann_file='embodiedscan_infos_val_split_filtered.pkl', vg_file='embodiedscan_val_vg.json', metainfo=dict(classes='all'), pipeline=[ dict(type='LoadAnnotations3D'), dict( type='MultiViewPipeline', n_images=50, ordered=True, transforms=[ dict(type='LoadImageFromFile', backend_args=None), dict(type='LoadDepthFromFile', backend_args=None), dict(type='ConvertRGBDToPoints', coord_type='CAMERA'), dict(type='PointSample', num_points=10000), dict(type='Resize', scale=( 480, 480, ), keep_ratio=False), ]), dict(type='AggregateMultiViewPoints', coord_type='DEPTH'), dict(type='PointSample', num_points=100000), dict( type='Pack3DDetInputs', keys=[ 'img', 'points', 'gt_bboxes_3d', 'gt_labels_3d', ]), ], test_mode=True, filter_empty_gt=True, box_type_3d='Euler-Depth')) test_dataloader = dict( batch_size=12, num_workers=12, persistent_workers=True, drop_last=False, sampler=dict(type='DefaultSampler', shuffle=False), dataset=dict( type='MultiView3DGroundingDataset', data_root='data', ann_file='embodiedscan_infos_val_split_filtered.pkl', vg_file='embodiedscan_val_vg.json', metainfo=dict(classes='all'), pipeline=[ dict(type='LoadAnnotations3D'), dict( type='MultiViewPipeline', n_images=50, ordered=True, transforms=[ dict(type='LoadImageFromFile', backend_args=None), dict(type='LoadDepthFromFile', backend_args=None), dict(type='ConvertRGBDToPoints', coord_type='CAMERA'), dict(type='PointSample', num_points=10000), dict(type='Resize', scale=( 480, 480, ), keep_ratio=False), ]), dict(type='AggregateMultiViewPoints', coord_type='DEPTH'), dict(type='PointSample', num_points=100000), dict( type='Pack3DDetInputs', keys=[ 'img', 'points', 'gt_bboxes_3d', 'gt_labels_3d', ]), ], test_mode=True, filter_empty_gt=True, box_type_3d='Euler-Depth')) val_evaluator = dict(type='GroundingMetric') test_evaluator = dict(type='GroundingMetric') train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=12, val_interval=3) val_cfg = dict(type='ValLoop') test_cfg = dict(type='TestLoop') lr = 0.0005 optim_wrapper = dict( type='OptimWrapper', optimizer=dict(type='AdamW', lr=0.0005, weight_decay=0.0005), paramwise_cfg=dict( custom_keys=dict( text_encoder=dict(lr_mult=0.0), decoder=dict(lr_mult=0.1, decay_mult=1.0))), clip_grad=dict(max_norm=10, norm_type=2)) param_scheduler = dict( type='MultiStepLR', begin=0, end=12, by_epoch=True, milestones=[ 8, 11, ], gamma=0.1) custom_hooks = [ dict(type='EmptyCacheHook', after_iter=True), ] find_unused_parameters = True launcher = 'slurm' work_dir = '/mnt/petrelfs/wangtai/EmbodiedScan/work_dirs/mv-grounding-challenge-split' 2024/02/29 12:00:04 - mmengine - WARNING - Failed to search registry with scope "embodiedscan" in the "vis_backend" registry tree. As a workaround, the current "vis_backend" registry in "mmengine" is used to build instance. This may cause unexpected failure when running the built modules. Please check whether "embodiedscan" is a correct scope, or whether the registry is initialized. 2024/02/29 12:02:23 - mmengine - WARNING - Failed to search registry with scope "embodiedscan" in the "hook" registry tree. As a workaround, the current "hook" registry in "mmengine" is used to build instance. This may cause unexpected failure when running the built modules. Please check whether "embodiedscan" is a correct scope, or whether the registry is initialized. 2024/02/29 12:02:23 - mmengine - INFO - Hooks will be executed in the following order: before_run: (VERY_HIGH ) RuntimeInfoHook (BELOW_NORMAL) LoggerHook -------------------- before_train: (VERY_HIGH ) RuntimeInfoHook (NORMAL ) IterTimerHook (VERY_LOW ) CheckpointHook -------------------- before_train_epoch: (VERY_HIGH ) RuntimeInfoHook (NORMAL ) IterTimerHook (NORMAL ) DistSamplerSeedHook (NORMAL ) EmptyCacheHook -------------------- before_train_iter: (VERY_HIGH ) RuntimeInfoHook (NORMAL ) IterTimerHook -------------------- after_train_iter: (VERY_HIGH ) RuntimeInfoHook (NORMAL ) IterTimerHook (NORMAL ) EmptyCacheHook (BELOW_NORMAL) LoggerHook (LOW ) ParamSchedulerHook (VERY_LOW ) CheckpointHook -------------------- after_train_epoch: (NORMAL ) IterTimerHook (NORMAL ) EmptyCacheHook (LOW ) ParamSchedulerHook (VERY_LOW ) CheckpointHook -------------------- before_val_epoch: (NORMAL ) IterTimerHook (NORMAL ) EmptyCacheHook -------------------- before_val_iter: (NORMAL ) IterTimerHook -------------------- after_val_iter: (NORMAL ) IterTimerHook (NORMAL ) EmptyCacheHook (BELOW_NORMAL) LoggerHook -------------------- after_val_epoch: (VERY_HIGH ) RuntimeInfoHook (NORMAL ) IterTimerHook (NORMAL ) EmptyCacheHook (BELOW_NORMAL) LoggerHook (LOW ) ParamSchedulerHook (VERY_LOW ) CheckpointHook -------------------- after_train: (VERY_LOW ) CheckpointHook -------------------- before_test_epoch: (NORMAL ) IterTimerHook (NORMAL ) EmptyCacheHook -------------------- before_test_iter: (NORMAL ) IterTimerHook -------------------- after_test_iter: (NORMAL ) IterTimerHook (NORMAL ) EmptyCacheHook (BELOW_NORMAL) LoggerHook -------------------- after_test_epoch: (VERY_HIGH ) RuntimeInfoHook (NORMAL ) IterTimerHook (NORMAL ) EmptyCacheHook (BELOW_NORMAL) LoggerHook -------------------- after_run: (BELOW_NORMAL) LoggerHook -------------------- 2024/02/29 12:02:24 - mmengine - WARNING - Failed to search registry with scope "embodiedscan" in the "loop" registry tree. As a workaround, the current "loop" registry in "mmengine" is used to build instance. This may cause unexpected failure when running the built modules. Please check whether "embodiedscan" is a correct scope, or whether the registry is initialized. 2024/02/29 12:03:25 - mmengine - WARNING - Failed to search registry with scope "embodiedscan" in the "data sampler" registry tree. As a workaround, the current "data sampler" registry in "mmengine" is used to build instance. This may cause unexpected failure when running the built modules. Please check whether "embodiedscan" is a correct scope, or whether the registry is initialized. 2024/02/29 12:03:25 - mmengine - WARNING - Failed to search registry with scope "embodiedscan" in the "optimizer wrapper constructor" registry tree. As a workaround, the current "optimizer wrapper constructor" registry in "mmengine" is used to build instance. This may cause unexpected failure when running the built modules. Please check whether "embodiedscan" is a correct scope, or whether the registry is initialized. 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.embeddings.word_embeddings.weight:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.embeddings.word_embeddings.weight:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.embeddings.word_embeddings.weight:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.embeddings.position_embeddings.weight:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.embeddings.position_embeddings.weight:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.embeddings.position_embeddings.weight:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.embeddings.token_type_embeddings.weight:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.embeddings.token_type_embeddings.weight:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.embeddings.token_type_embeddings.weight:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.embeddings.LayerNorm.weight:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.embeddings.LayerNorm.weight:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.embeddings.LayerNorm.weight:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.embeddings.LayerNorm.bias:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.embeddings.LayerNorm.bias:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.embeddings.LayerNorm.bias:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.attention.self.query.weight:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.attention.self.query.weight:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.attention.self.query.weight:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.attention.self.query.bias:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.attention.self.query.bias:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.attention.self.query.bias:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.attention.self.key.weight:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.attention.self.key.weight:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.attention.self.key.weight:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.attention.self.key.bias:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.attention.self.key.bias:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.attention.self.key.bias:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.attention.self.value.weight:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.attention.self.value.weight:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.attention.self.value.weight:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.attention.self.value.bias:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.attention.self.value.bias:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.attention.self.value.bias:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.attention.output.dense.weight:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.attention.output.dense.weight:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.attention.output.dense.weight:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.attention.output.dense.bias:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.attention.output.dense.bias:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.attention.output.dense.bias:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.attention.output.LayerNorm.weight:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.attention.output.LayerNorm.weight:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.attention.output.LayerNorm.weight:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.attention.output.LayerNorm.bias:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.attention.output.LayerNorm.bias:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.attention.output.LayerNorm.bias:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.intermediate.dense.weight:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.intermediate.dense.weight:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.intermediate.dense.weight:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.intermediate.dense.bias:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.intermediate.dense.bias:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.intermediate.dense.bias:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.output.dense.weight:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.output.dense.weight:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.output.dense.weight:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.output.dense.bias:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.output.dense.bias:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.output.dense.bias:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.output.LayerNorm.weight:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.output.LayerNorm.weight:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.output.LayerNorm.weight:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.output.LayerNorm.bias:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.output.LayerNorm.bias:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.output.LayerNorm.bias:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.attention.self.query.weight:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.attention.self.query.weight:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.attention.self.query.weight:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.attention.self.query.bias:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.attention.self.query.bias:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.attention.self.query.bias:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.attention.self.key.weight:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.attention.self.key.weight:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.attention.self.key.weight:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.attention.self.key.bias:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.attention.self.key.bias:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.attention.self.key.bias:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.attention.self.value.weight:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.attention.self.value.weight:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.attention.self.value.weight:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.attention.self.value.bias:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.attention.self.value.bias:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.attention.self.value.bias:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.attention.output.dense.weight:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.attention.output.dense.weight:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.attention.output.dense.weight:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.attention.output.dense.bias:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.attention.output.dense.bias:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.attention.output.dense.bias:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.attention.output.LayerNorm.weight:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.attention.output.LayerNorm.weight:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.attention.output.LayerNorm.weight:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.attention.output.LayerNorm.bias:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.attention.output.LayerNorm.bias:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.attention.output.LayerNorm.bias:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.intermediate.dense.weight:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.intermediate.dense.weight:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.intermediate.dense.weight:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.intermediate.dense.bias:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.intermediate.dense.bias:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.intermediate.dense.bias:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.output.dense.weight:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.output.dense.weight:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.output.dense.weight:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.output.dense.bias:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.output.dense.bias:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.output.dense.bias:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.output.LayerNorm.weight:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.output.LayerNorm.weight:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.output.LayerNorm.weight:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.output.LayerNorm.bias:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.output.LayerNorm.bias:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.output.LayerNorm.bias:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.attention.self.query.weight:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.attention.self.query.weight:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.attention.self.query.weight:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.attention.self.query.bias:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.attention.self.query.bias:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.attention.self.query.bias:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.attention.self.key.weight:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.attention.self.key.weight:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.attention.self.key.weight:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.attention.self.key.bias:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.attention.self.key.bias:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.attention.self.key.bias:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.attention.self.value.weight:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.attention.self.value.weight:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.attention.self.value.weight:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.attention.self.value.bias:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.attention.self.value.bias:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.attention.self.value.bias:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.attention.output.dense.weight:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.attention.output.dense.weight:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.attention.output.dense.weight:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.attention.output.dense.bias:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.attention.output.dense.bias:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.attention.output.dense.bias:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.attention.output.LayerNorm.weight:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.attention.output.LayerNorm.weight:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.attention.output.LayerNorm.weight:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.attention.output.LayerNorm.bias:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.attention.output.LayerNorm.bias:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.attention.output.LayerNorm.bias:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.intermediate.dense.weight:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.intermediate.dense.weight:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.intermediate.dense.weight:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.intermediate.dense.bias:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.intermediate.dense.bias:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.intermediate.dense.bias:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.output.dense.weight:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.output.dense.weight:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.output.dense.weight:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.output.dense.bias:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.output.dense.bias:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.output.dense.bias:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.output.LayerNorm.weight:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.output.LayerNorm.weight:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.output.LayerNorm.weight:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.output.LayerNorm.bias:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.output.LayerNorm.bias:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.output.LayerNorm.bias:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.attention.self.query.weight:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.attention.self.query.weight:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.attention.self.query.weight:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.attention.self.query.bias:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.attention.self.query.bias:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.attention.self.query.bias:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.attention.self.key.weight:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.attention.self.key.weight:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.attention.self.key.weight:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.attention.self.key.bias:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.attention.self.key.bias:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.attention.self.key.bias:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.attention.self.value.weight:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.attention.self.value.weight:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.attention.self.value.weight:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.attention.self.value.bias:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.attention.self.value.bias:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.attention.self.value.bias:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.attention.output.dense.weight:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.attention.output.dense.weight:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.attention.output.dense.weight:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.attention.output.dense.bias:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.attention.output.dense.bias:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.attention.output.dense.bias:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.attention.output.LayerNorm.weight:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.attention.output.LayerNorm.weight:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.attention.output.LayerNorm.weight:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.attention.output.LayerNorm.bias:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.attention.output.LayerNorm.bias:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.attention.output.LayerNorm.bias:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.intermediate.dense.weight:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.intermediate.dense.weight:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.intermediate.dense.weight:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.intermediate.dense.bias:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.intermediate.dense.bias:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.intermediate.dense.bias:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.output.dense.weight:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.output.dense.weight:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.output.dense.weight:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.output.dense.bias:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.output.dense.bias:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.output.dense.bias:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.output.LayerNorm.weight:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.output.LayerNorm.weight:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.output.LayerNorm.weight:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.output.LayerNorm.bias:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.output.LayerNorm.bias:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.output.LayerNorm.bias:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.attention.self.query.weight:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.attention.self.query.weight:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.attention.self.query.weight:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.attention.self.query.bias:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.attention.self.query.bias:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.attention.self.query.bias:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.attention.self.key.weight:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.attention.self.key.weight:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.attention.self.key.weight:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.attention.self.key.bias:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.attention.self.key.bias:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.attention.self.key.bias:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.attention.self.value.weight:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.attention.self.value.weight:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.attention.self.value.weight:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.attention.self.value.bias:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.attention.self.value.bias:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.attention.self.value.bias:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.attention.output.dense.weight:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.attention.output.dense.weight:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.attention.output.dense.weight:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.attention.output.dense.bias:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.attention.output.dense.bias:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.attention.output.dense.bias:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.attention.output.LayerNorm.weight:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.attention.output.LayerNorm.weight:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.attention.output.LayerNorm.weight:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.attention.output.LayerNorm.bias:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.attention.output.LayerNorm.bias:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.attention.output.LayerNorm.bias:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.intermediate.dense.weight:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.intermediate.dense.weight:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.intermediate.dense.weight:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.intermediate.dense.bias:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.intermediate.dense.bias:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.intermediate.dense.bias:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.output.dense.weight:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.output.dense.weight:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.output.dense.weight:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.output.dense.bias:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.output.dense.bias:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.output.dense.bias:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.output.LayerNorm.weight:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.output.LayerNorm.weight:weight_decay=0.0005 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.output.LayerNorm.weight:lr_mult=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.output.LayerNorm.bias:lr=0.0 2024/02/29 12:03:25 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.output.LayerNorm.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.output.LayerNorm.bias:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.attention.self.query.weight:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.attention.self.query.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.attention.self.query.weight:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.attention.self.query.bias:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.attention.self.query.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.attention.self.query.bias:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.attention.self.key.weight:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.attention.self.key.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.attention.self.key.weight:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.attention.self.key.bias:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.attention.self.key.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.attention.self.key.bias:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.attention.self.value.weight:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.attention.self.value.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.attention.self.value.weight:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.attention.self.value.bias:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.attention.self.value.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.attention.self.value.bias:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.attention.output.dense.weight:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.attention.output.dense.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.attention.output.dense.weight:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.attention.output.dense.bias:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.attention.output.dense.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.attention.output.dense.bias:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.attention.output.LayerNorm.weight:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.attention.output.LayerNorm.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.attention.output.LayerNorm.weight:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.attention.output.LayerNorm.bias:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.attention.output.LayerNorm.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.attention.output.LayerNorm.bias:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.intermediate.dense.weight:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.intermediate.dense.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.intermediate.dense.weight:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.intermediate.dense.bias:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.intermediate.dense.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.intermediate.dense.bias:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.output.dense.weight:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.output.dense.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.output.dense.weight:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.output.dense.bias:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.output.dense.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.output.dense.bias:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.output.LayerNorm.weight:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.output.LayerNorm.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.output.LayerNorm.weight:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.output.LayerNorm.bias:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.output.LayerNorm.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.output.LayerNorm.bias:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.attention.self.query.weight:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.attention.self.query.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.attention.self.query.weight:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.attention.self.query.bias:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.attention.self.query.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.attention.self.query.bias:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.attention.self.key.weight:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.attention.self.key.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.attention.self.key.weight:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.attention.self.key.bias:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.attention.self.key.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.attention.self.key.bias:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.attention.self.value.weight:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.attention.self.value.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.attention.self.value.weight:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.attention.self.value.bias:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.attention.self.value.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.attention.self.value.bias:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.attention.output.dense.weight:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.attention.output.dense.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.attention.output.dense.weight:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.attention.output.dense.bias:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.attention.output.dense.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.attention.output.dense.bias:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.attention.output.LayerNorm.weight:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.attention.output.LayerNorm.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.attention.output.LayerNorm.weight:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.attention.output.LayerNorm.bias:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.attention.output.LayerNorm.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.attention.output.LayerNorm.bias:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.intermediate.dense.weight:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.intermediate.dense.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.intermediate.dense.weight:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.intermediate.dense.bias:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.intermediate.dense.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.intermediate.dense.bias:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.output.dense.weight:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.output.dense.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.output.dense.weight:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.output.dense.bias:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.output.dense.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.output.dense.bias:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.output.LayerNorm.weight:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.output.LayerNorm.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.output.LayerNorm.weight:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.output.LayerNorm.bias:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.output.LayerNorm.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.output.LayerNorm.bias:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.attention.self.query.weight:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.attention.self.query.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.attention.self.query.weight:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.attention.self.query.bias:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.attention.self.query.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.attention.self.query.bias:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.attention.self.key.weight:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.attention.self.key.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.attention.self.key.weight:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.attention.self.key.bias:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.attention.self.key.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.attention.self.key.bias:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.attention.self.value.weight:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.attention.self.value.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.attention.self.value.weight:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.attention.self.value.bias:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.attention.self.value.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.attention.self.value.bias:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.attention.output.dense.weight:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.attention.output.dense.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.attention.output.dense.weight:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.attention.output.dense.bias:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.attention.output.dense.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.attention.output.dense.bias:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.attention.output.LayerNorm.weight:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.attention.output.LayerNorm.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.attention.output.LayerNorm.weight:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.attention.output.LayerNorm.bias:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.attention.output.LayerNorm.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.attention.output.LayerNorm.bias:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.intermediate.dense.weight:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.intermediate.dense.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.intermediate.dense.weight:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.intermediate.dense.bias:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.intermediate.dense.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.intermediate.dense.bias:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.output.dense.weight:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.output.dense.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.output.dense.weight:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.output.dense.bias:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.output.dense.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.output.dense.bias:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.output.LayerNorm.weight:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.output.LayerNorm.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.output.LayerNorm.weight:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.output.LayerNorm.bias:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.output.LayerNorm.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.output.LayerNorm.bias:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.attention.self.query.weight:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.attention.self.query.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.attention.self.query.weight:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.attention.self.query.bias:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.attention.self.query.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.attention.self.query.bias:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.attention.self.key.weight:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.attention.self.key.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.attention.self.key.weight:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.attention.self.key.bias:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.attention.self.key.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.attention.self.key.bias:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.attention.self.value.weight:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.attention.self.value.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.attention.self.value.weight:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.attention.self.value.bias:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.attention.self.value.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.attention.self.value.bias:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.attention.output.dense.weight:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.attention.output.dense.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.attention.output.dense.weight:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.attention.output.dense.bias:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.attention.output.dense.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.attention.output.dense.bias:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.attention.output.LayerNorm.weight:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.attention.output.LayerNorm.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.attention.output.LayerNorm.weight:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.attention.output.LayerNorm.bias:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.attention.output.LayerNorm.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.attention.output.LayerNorm.bias:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.intermediate.dense.weight:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.intermediate.dense.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.intermediate.dense.weight:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.intermediate.dense.bias:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.intermediate.dense.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.intermediate.dense.bias:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.output.dense.weight:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.output.dense.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.output.dense.weight:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.output.dense.bias:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.output.dense.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.output.dense.bias:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.output.LayerNorm.weight:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.output.LayerNorm.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.output.LayerNorm.weight:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.output.LayerNorm.bias:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.output.LayerNorm.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.output.LayerNorm.bias:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.attention.self.query.weight:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.attention.self.query.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.attention.self.query.weight:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.attention.self.query.bias:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.attention.self.query.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.attention.self.query.bias:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.attention.self.key.weight:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.attention.self.key.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.attention.self.key.weight:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.attention.self.key.bias:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.attention.self.key.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.attention.self.key.bias:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.attention.self.value.weight:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.attention.self.value.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.attention.self.value.weight:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.attention.self.value.bias:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.attention.self.value.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.attention.self.value.bias:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.attention.output.dense.weight:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.attention.output.dense.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.attention.output.dense.weight:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.attention.output.dense.bias:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.attention.output.dense.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.attention.output.dense.bias:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.attention.output.LayerNorm.weight:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.attention.output.LayerNorm.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.attention.output.LayerNorm.weight:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.attention.output.LayerNorm.bias:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.attention.output.LayerNorm.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.attention.output.LayerNorm.bias:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.intermediate.dense.weight:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.intermediate.dense.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.intermediate.dense.weight:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.intermediate.dense.bias:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.intermediate.dense.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.intermediate.dense.bias:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.output.dense.weight:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.output.dense.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.output.dense.weight:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.output.dense.bias:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.output.dense.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.output.dense.bias:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.output.LayerNorm.weight:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.output.LayerNorm.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.output.LayerNorm.weight:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.output.LayerNorm.bias:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.output.LayerNorm.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.output.LayerNorm.bias:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.attention.self.query.weight:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.attention.self.query.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.attention.self.query.weight:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.attention.self.query.bias:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.attention.self.query.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.attention.self.query.bias:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.attention.self.key.weight:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.attention.self.key.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.attention.self.key.weight:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.attention.self.key.bias:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.attention.self.key.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.attention.self.key.bias:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.attention.self.value.weight:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.attention.self.value.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.attention.self.value.weight:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.attention.self.value.bias:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.attention.self.value.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.attention.self.value.bias:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.attention.output.dense.weight:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.attention.output.dense.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.attention.output.dense.weight:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.attention.output.dense.bias:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.attention.output.dense.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.attention.output.dense.bias:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.attention.output.LayerNorm.weight:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.attention.output.LayerNorm.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.attention.output.LayerNorm.weight:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.attention.output.LayerNorm.bias:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.attention.output.LayerNorm.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.attention.output.LayerNorm.bias:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.intermediate.dense.weight:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.intermediate.dense.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.intermediate.dense.weight:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.intermediate.dense.bias:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.intermediate.dense.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.intermediate.dense.bias:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.output.dense.weight:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.output.dense.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.output.dense.weight:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.output.dense.bias:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.output.dense.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.output.dense.bias:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.output.LayerNorm.weight:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.output.LayerNorm.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.output.LayerNorm.weight:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.output.LayerNorm.bias:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.output.LayerNorm.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.output.LayerNorm.bias:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.attention.self.query.weight:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.attention.self.query.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.attention.self.query.weight:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.attention.self.query.bias:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.attention.self.query.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.attention.self.query.bias:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.attention.self.key.weight:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.attention.self.key.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.attention.self.key.weight:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.attention.self.key.bias:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.attention.self.key.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.attention.self.key.bias:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.attention.self.value.weight:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.attention.self.value.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.attention.self.value.weight:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.attention.self.value.bias:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.attention.self.value.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.attention.self.value.bias:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.attention.output.dense.weight:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.attention.output.dense.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.attention.output.dense.weight:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.attention.output.dense.bias:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.attention.output.dense.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.attention.output.dense.bias:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.attention.output.LayerNorm.weight:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.attention.output.LayerNorm.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.attention.output.LayerNorm.weight:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.attention.output.LayerNorm.bias:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.attention.output.LayerNorm.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.attention.output.LayerNorm.bias:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.intermediate.dense.weight:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.intermediate.dense.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.intermediate.dense.weight:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.intermediate.dense.bias:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.intermediate.dense.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.intermediate.dense.bias:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.output.dense.weight:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.output.dense.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.output.dense.weight:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.output.dense.bias:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.output.dense.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.output.dense.bias:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.output.LayerNorm.weight:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.output.LayerNorm.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.output.LayerNorm.weight:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.output.LayerNorm.bias:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.output.LayerNorm.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.output.LayerNorm.bias:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.pooler.dense.weight:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.pooler.dense.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.pooler.dense.weight:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.pooler.dense.bias:lr=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.pooler.dense.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- text_encoder.pooler.dense.bias:lr_mult=0.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_attn.attn.in_proj_weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_attn.attn.in_proj_weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_attn.attn.in_proj_weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_attn.attn.in_proj_weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_attn.attn.in_proj_bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_attn.attn.in_proj_bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_attn.attn.in_proj_bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_attn.attn.in_proj_bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_attn.attn.out_proj.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_attn.attn.out_proj.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_attn.attn.out_proj.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_attn.attn.out_proj.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_attn.attn.out_proj.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_attn.attn.out_proj.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_attn.attn.out_proj.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_attn.attn.out_proj.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.cross_attn_text.attn.in_proj_weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.cross_attn_text.attn.in_proj_weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.cross_attn_text.attn.in_proj_weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.cross_attn_text.attn.in_proj_weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.cross_attn_text.attn.in_proj_bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.cross_attn_text.attn.in_proj_bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.cross_attn_text.attn.in_proj_bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.cross_attn_text.attn.in_proj_bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.cross_attn_text.attn.out_proj.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.cross_attn_text.attn.out_proj.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.cross_attn_text.attn.out_proj.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.cross_attn_text.attn.out_proj.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.cross_attn_text.attn.out_proj.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.cross_attn_text.attn.out_proj.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.cross_attn_text.attn.out_proj.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.cross_attn_text.attn.out_proj.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.cross_attn.attn.in_proj_weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.cross_attn.attn.in_proj_weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.cross_attn.attn.in_proj_weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.cross_attn.attn.in_proj_weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.cross_attn.attn.in_proj_bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.cross_attn.attn.in_proj_bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.cross_attn.attn.in_proj_bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.cross_attn.attn.in_proj_bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.cross_attn.attn.out_proj.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.cross_attn.attn.out_proj.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.cross_attn.attn.out_proj.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.cross_attn.attn.out_proj.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.cross_attn.attn.out_proj.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.cross_attn.attn.out_proj.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.cross_attn.attn.out_proj.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.cross_attn.attn.out_proj.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.ffn.layers.0.0.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.ffn.layers.0.0.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.ffn.layers.0.0.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.ffn.layers.0.0.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.ffn.layers.0.0.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.ffn.layers.0.0.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.ffn.layers.0.0.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.ffn.layers.0.0.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.ffn.layers.1.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.ffn.layers.1.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.ffn.layers.1.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.ffn.layers.1.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.ffn.layers.1.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.ffn.layers.1.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.ffn.layers.1.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.ffn.layers.1.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.norms.0.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.norms.0.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.norms.0.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.norms.0.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.norms.0.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.norms.0.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.norms.0.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.norms.0.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.norms.1.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.norms.1.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.norms.1.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.norms.1.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.norms.1.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.norms.1.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.norms.1.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.norms.1.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.norms.2.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.norms.2.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.norms.2.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.norms.2.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.norms.2.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.norms.2.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.norms.2.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.norms.2.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.norms.3.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.norms.3.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.norms.3.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.norms.3.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.norms.3.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.norms.3.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.norms.3.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.norms.3.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_posembed.position_embedding_head.0.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_posembed.position_embedding_head.0.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_posembed.position_embedding_head.0.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_posembed.position_embedding_head.0.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_posembed.position_embedding_head.0.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_posembed.position_embedding_head.0.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_posembed.position_embedding_head.0.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_posembed.position_embedding_head.0.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_posembed.position_embedding_head.1.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_posembed.position_embedding_head.1.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_posembed.position_embedding_head.1.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_posembed.position_embedding_head.1.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_posembed.position_embedding_head.1.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_posembed.position_embedding_head.1.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_posembed.position_embedding_head.1.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_posembed.position_embedding_head.1.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_posembed.position_embedding_head.3.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_posembed.position_embedding_head.3.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_posembed.position_embedding_head.3.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_posembed.position_embedding_head.3.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_posembed.position_embedding_head.3.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_posembed.position_embedding_head.3.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_posembed.position_embedding_head.3.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_posembed.position_embedding_head.3.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_attn.attn.in_proj_weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_attn.attn.in_proj_weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_attn.attn.in_proj_weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_attn.attn.in_proj_weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_attn.attn.in_proj_bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_attn.attn.in_proj_bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_attn.attn.in_proj_bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_attn.attn.in_proj_bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_attn.attn.out_proj.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_attn.attn.out_proj.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_attn.attn.out_proj.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_attn.attn.out_proj.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_attn.attn.out_proj.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_attn.attn.out_proj.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_attn.attn.out_proj.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_attn.attn.out_proj.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.cross_attn_text.attn.in_proj_weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.cross_attn_text.attn.in_proj_weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.cross_attn_text.attn.in_proj_weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.cross_attn_text.attn.in_proj_weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.cross_attn_text.attn.in_proj_bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.cross_attn_text.attn.in_proj_bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.cross_attn_text.attn.in_proj_bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.cross_attn_text.attn.in_proj_bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.cross_attn_text.attn.out_proj.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.cross_attn_text.attn.out_proj.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.cross_attn_text.attn.out_proj.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.cross_attn_text.attn.out_proj.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.cross_attn_text.attn.out_proj.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.cross_attn_text.attn.out_proj.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.cross_attn_text.attn.out_proj.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.cross_attn_text.attn.out_proj.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.cross_attn.attn.in_proj_weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.cross_attn.attn.in_proj_weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.cross_attn.attn.in_proj_weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.cross_attn.attn.in_proj_weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.cross_attn.attn.in_proj_bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.cross_attn.attn.in_proj_bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.cross_attn.attn.in_proj_bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.cross_attn.attn.in_proj_bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.cross_attn.attn.out_proj.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.cross_attn.attn.out_proj.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.cross_attn.attn.out_proj.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.cross_attn.attn.out_proj.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.cross_attn.attn.out_proj.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.cross_attn.attn.out_proj.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.cross_attn.attn.out_proj.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.cross_attn.attn.out_proj.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.ffn.layers.0.0.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.ffn.layers.0.0.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.ffn.layers.0.0.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.ffn.layers.0.0.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.ffn.layers.0.0.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.ffn.layers.0.0.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.ffn.layers.0.0.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.ffn.layers.0.0.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.ffn.layers.1.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.ffn.layers.1.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.ffn.layers.1.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.ffn.layers.1.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.ffn.layers.1.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.ffn.layers.1.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.ffn.layers.1.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.ffn.layers.1.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.norms.0.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.norms.0.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.norms.0.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.norms.0.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.norms.0.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.norms.0.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.norms.0.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.norms.0.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.norms.1.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.norms.1.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.norms.1.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.norms.1.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.norms.1.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.norms.1.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.norms.1.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.norms.1.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.norms.2.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.norms.2.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.norms.2.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.norms.2.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.norms.2.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.norms.2.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.norms.2.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.norms.2.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.norms.3.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.norms.3.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.norms.3.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.norms.3.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.norms.3.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.norms.3.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.norms.3.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.norms.3.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_posembed.position_embedding_head.0.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_posembed.position_embedding_head.0.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_posembed.position_embedding_head.0.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_posembed.position_embedding_head.0.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_posembed.position_embedding_head.0.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_posembed.position_embedding_head.0.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_posembed.position_embedding_head.0.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_posembed.position_embedding_head.0.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_posembed.position_embedding_head.1.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_posembed.position_embedding_head.1.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_posembed.position_embedding_head.1.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_posembed.position_embedding_head.1.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_posembed.position_embedding_head.1.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_posembed.position_embedding_head.1.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_posembed.position_embedding_head.1.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_posembed.position_embedding_head.1.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_posembed.position_embedding_head.3.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_posembed.position_embedding_head.3.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_posembed.position_embedding_head.3.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_posembed.position_embedding_head.3.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_posembed.position_embedding_head.3.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_posembed.position_embedding_head.3.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_posembed.position_embedding_head.3.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_posembed.position_embedding_head.3.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_attn.attn.in_proj_weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_attn.attn.in_proj_weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_attn.attn.in_proj_weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_attn.attn.in_proj_weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_attn.attn.in_proj_bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_attn.attn.in_proj_bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_attn.attn.in_proj_bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_attn.attn.in_proj_bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_attn.attn.out_proj.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_attn.attn.out_proj.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_attn.attn.out_proj.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_attn.attn.out_proj.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_attn.attn.out_proj.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_attn.attn.out_proj.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_attn.attn.out_proj.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_attn.attn.out_proj.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.cross_attn_text.attn.in_proj_weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.cross_attn_text.attn.in_proj_weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.cross_attn_text.attn.in_proj_weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.cross_attn_text.attn.in_proj_weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.cross_attn_text.attn.in_proj_bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.cross_attn_text.attn.in_proj_bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.cross_attn_text.attn.in_proj_bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.cross_attn_text.attn.in_proj_bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.cross_attn_text.attn.out_proj.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.cross_attn_text.attn.out_proj.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.cross_attn_text.attn.out_proj.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.cross_attn_text.attn.out_proj.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.cross_attn_text.attn.out_proj.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.cross_attn_text.attn.out_proj.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.cross_attn_text.attn.out_proj.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.cross_attn_text.attn.out_proj.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.cross_attn.attn.in_proj_weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.cross_attn.attn.in_proj_weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.cross_attn.attn.in_proj_weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.cross_attn.attn.in_proj_weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.cross_attn.attn.in_proj_bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.cross_attn.attn.in_proj_bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.cross_attn.attn.in_proj_bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.cross_attn.attn.in_proj_bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.cross_attn.attn.out_proj.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.cross_attn.attn.out_proj.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.cross_attn.attn.out_proj.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.cross_attn.attn.out_proj.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.cross_attn.attn.out_proj.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.cross_attn.attn.out_proj.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.cross_attn.attn.out_proj.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.cross_attn.attn.out_proj.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.ffn.layers.0.0.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.ffn.layers.0.0.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.ffn.layers.0.0.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.ffn.layers.0.0.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.ffn.layers.0.0.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.ffn.layers.0.0.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.ffn.layers.0.0.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.ffn.layers.0.0.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.ffn.layers.1.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.ffn.layers.1.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.ffn.layers.1.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.ffn.layers.1.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.ffn.layers.1.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.ffn.layers.1.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.ffn.layers.1.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.ffn.layers.1.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.norms.0.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.norms.0.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.norms.0.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.norms.0.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.norms.0.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.norms.0.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.norms.0.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.norms.0.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.norms.1.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.norms.1.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.norms.1.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.norms.1.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.norms.1.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.norms.1.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.norms.1.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.norms.1.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.norms.2.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.norms.2.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.norms.2.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.norms.2.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.norms.2.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.norms.2.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.norms.2.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.norms.2.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.norms.3.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.norms.3.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.norms.3.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.norms.3.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.norms.3.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.norms.3.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.norms.3.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.norms.3.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_posembed.position_embedding_head.0.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_posembed.position_embedding_head.0.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_posembed.position_embedding_head.0.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_posembed.position_embedding_head.0.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_posembed.position_embedding_head.0.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_posembed.position_embedding_head.0.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_posembed.position_embedding_head.0.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_posembed.position_embedding_head.0.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_posembed.position_embedding_head.1.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_posembed.position_embedding_head.1.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_posembed.position_embedding_head.1.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_posembed.position_embedding_head.1.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_posembed.position_embedding_head.1.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_posembed.position_embedding_head.1.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_posembed.position_embedding_head.1.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_posembed.position_embedding_head.1.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_posembed.position_embedding_head.3.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_posembed.position_embedding_head.3.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_posembed.position_embedding_head.3.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_posembed.position_embedding_head.3.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_posembed.position_embedding_head.3.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_posembed.position_embedding_head.3.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_posembed.position_embedding_head.3.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_posembed.position_embedding_head.3.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_attn.attn.in_proj_weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_attn.attn.in_proj_weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_attn.attn.in_proj_weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_attn.attn.in_proj_weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_attn.attn.in_proj_bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_attn.attn.in_proj_bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_attn.attn.in_proj_bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_attn.attn.in_proj_bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_attn.attn.out_proj.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_attn.attn.out_proj.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_attn.attn.out_proj.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_attn.attn.out_proj.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_attn.attn.out_proj.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_attn.attn.out_proj.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_attn.attn.out_proj.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_attn.attn.out_proj.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.cross_attn_text.attn.in_proj_weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.cross_attn_text.attn.in_proj_weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.cross_attn_text.attn.in_proj_weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.cross_attn_text.attn.in_proj_weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.cross_attn_text.attn.in_proj_bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.cross_attn_text.attn.in_proj_bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.cross_attn_text.attn.in_proj_bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.cross_attn_text.attn.in_proj_bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.cross_attn_text.attn.out_proj.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.cross_attn_text.attn.out_proj.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.cross_attn_text.attn.out_proj.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.cross_attn_text.attn.out_proj.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.cross_attn_text.attn.out_proj.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.cross_attn_text.attn.out_proj.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.cross_attn_text.attn.out_proj.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.cross_attn_text.attn.out_proj.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.cross_attn.attn.in_proj_weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.cross_attn.attn.in_proj_weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.cross_attn.attn.in_proj_weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.cross_attn.attn.in_proj_weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.cross_attn.attn.in_proj_bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.cross_attn.attn.in_proj_bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.cross_attn.attn.in_proj_bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.cross_attn.attn.in_proj_bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.cross_attn.attn.out_proj.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.cross_attn.attn.out_proj.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.cross_attn.attn.out_proj.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.cross_attn.attn.out_proj.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.cross_attn.attn.out_proj.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.cross_attn.attn.out_proj.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.cross_attn.attn.out_proj.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.cross_attn.attn.out_proj.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.ffn.layers.0.0.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.ffn.layers.0.0.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.ffn.layers.0.0.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.ffn.layers.0.0.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.ffn.layers.0.0.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.ffn.layers.0.0.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.ffn.layers.0.0.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.ffn.layers.0.0.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.ffn.layers.1.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.ffn.layers.1.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.ffn.layers.1.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.ffn.layers.1.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.ffn.layers.1.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.ffn.layers.1.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.ffn.layers.1.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.ffn.layers.1.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.norms.0.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.norms.0.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.norms.0.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.norms.0.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.norms.0.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.norms.0.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.norms.0.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.norms.0.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.norms.1.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.norms.1.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.norms.1.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.norms.1.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.norms.1.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.norms.1.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.norms.1.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.norms.1.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.norms.2.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.norms.2.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.norms.2.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.norms.2.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.norms.2.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.norms.2.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.norms.2.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.norms.2.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.norms.3.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.norms.3.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.norms.3.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.norms.3.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.norms.3.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.norms.3.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.norms.3.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.norms.3.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_posembed.position_embedding_head.0.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_posembed.position_embedding_head.0.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_posembed.position_embedding_head.0.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_posembed.position_embedding_head.0.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_posembed.position_embedding_head.0.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_posembed.position_embedding_head.0.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_posembed.position_embedding_head.0.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_posembed.position_embedding_head.0.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_posembed.position_embedding_head.1.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_posembed.position_embedding_head.1.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_posembed.position_embedding_head.1.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_posembed.position_embedding_head.1.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_posembed.position_embedding_head.1.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_posembed.position_embedding_head.1.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_posembed.position_embedding_head.1.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_posembed.position_embedding_head.1.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_posembed.position_embedding_head.3.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_posembed.position_embedding_head.3.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_posembed.position_embedding_head.3.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_posembed.position_embedding_head.3.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_posembed.position_embedding_head.3.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_posembed.position_embedding_head.3.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_posembed.position_embedding_head.3.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_posembed.position_embedding_head.3.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_attn.attn.in_proj_weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_attn.attn.in_proj_weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_attn.attn.in_proj_weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_attn.attn.in_proj_weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_attn.attn.in_proj_bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_attn.attn.in_proj_bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_attn.attn.in_proj_bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_attn.attn.in_proj_bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_attn.attn.out_proj.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_attn.attn.out_proj.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_attn.attn.out_proj.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_attn.attn.out_proj.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_attn.attn.out_proj.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_attn.attn.out_proj.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_attn.attn.out_proj.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_attn.attn.out_proj.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.cross_attn_text.attn.in_proj_weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.cross_attn_text.attn.in_proj_weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.cross_attn_text.attn.in_proj_weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.cross_attn_text.attn.in_proj_weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.cross_attn_text.attn.in_proj_bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.cross_attn_text.attn.in_proj_bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.cross_attn_text.attn.in_proj_bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.cross_attn_text.attn.in_proj_bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.cross_attn_text.attn.out_proj.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.cross_attn_text.attn.out_proj.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.cross_attn_text.attn.out_proj.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.cross_attn_text.attn.out_proj.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.cross_attn_text.attn.out_proj.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.cross_attn_text.attn.out_proj.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.cross_attn_text.attn.out_proj.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.cross_attn_text.attn.out_proj.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.cross_attn.attn.in_proj_weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.cross_attn.attn.in_proj_weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.cross_attn.attn.in_proj_weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.cross_attn.attn.in_proj_weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.cross_attn.attn.in_proj_bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.cross_attn.attn.in_proj_bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.cross_attn.attn.in_proj_bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.cross_attn.attn.in_proj_bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.cross_attn.attn.out_proj.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.cross_attn.attn.out_proj.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.cross_attn.attn.out_proj.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.cross_attn.attn.out_proj.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.cross_attn.attn.out_proj.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.cross_attn.attn.out_proj.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.cross_attn.attn.out_proj.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.cross_attn.attn.out_proj.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.ffn.layers.0.0.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.ffn.layers.0.0.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.ffn.layers.0.0.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.ffn.layers.0.0.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.ffn.layers.0.0.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.ffn.layers.0.0.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.ffn.layers.0.0.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.ffn.layers.0.0.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.ffn.layers.1.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.ffn.layers.1.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.ffn.layers.1.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.ffn.layers.1.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.ffn.layers.1.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.ffn.layers.1.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.ffn.layers.1.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.ffn.layers.1.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.norms.0.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.norms.0.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.norms.0.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.norms.0.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.norms.0.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.norms.0.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.norms.0.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.norms.0.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.norms.1.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.norms.1.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.norms.1.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.norms.1.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.norms.1.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.norms.1.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.norms.1.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.norms.1.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.norms.2.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.norms.2.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.norms.2.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.norms.2.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.norms.2.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.norms.2.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.norms.2.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.norms.2.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.norms.3.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.norms.3.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.norms.3.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.norms.3.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.norms.3.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.norms.3.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.norms.3.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.norms.3.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_posembed.position_embedding_head.0.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_posembed.position_embedding_head.0.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_posembed.position_embedding_head.0.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_posembed.position_embedding_head.0.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_posembed.position_embedding_head.0.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_posembed.position_embedding_head.0.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_posembed.position_embedding_head.0.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_posembed.position_embedding_head.0.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_posembed.position_embedding_head.1.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_posembed.position_embedding_head.1.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_posembed.position_embedding_head.1.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_posembed.position_embedding_head.1.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_posembed.position_embedding_head.1.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_posembed.position_embedding_head.1.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_posembed.position_embedding_head.1.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_posembed.position_embedding_head.1.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_posembed.position_embedding_head.3.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_posembed.position_embedding_head.3.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_posembed.position_embedding_head.3.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_posembed.position_embedding_head.3.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_posembed.position_embedding_head.3.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_posembed.position_embedding_head.3.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_posembed.position_embedding_head.3.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_posembed.position_embedding_head.3.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_attn.attn.in_proj_weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_attn.attn.in_proj_weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_attn.attn.in_proj_weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_attn.attn.in_proj_weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_attn.attn.in_proj_bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_attn.attn.in_proj_bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_attn.attn.in_proj_bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_attn.attn.in_proj_bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_attn.attn.out_proj.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_attn.attn.out_proj.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_attn.attn.out_proj.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_attn.attn.out_proj.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_attn.attn.out_proj.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_attn.attn.out_proj.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_attn.attn.out_proj.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_attn.attn.out_proj.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.cross_attn_text.attn.in_proj_weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.cross_attn_text.attn.in_proj_weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.cross_attn_text.attn.in_proj_weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.cross_attn_text.attn.in_proj_weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.cross_attn_text.attn.in_proj_bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.cross_attn_text.attn.in_proj_bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.cross_attn_text.attn.in_proj_bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.cross_attn_text.attn.in_proj_bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.cross_attn_text.attn.out_proj.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.cross_attn_text.attn.out_proj.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.cross_attn_text.attn.out_proj.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.cross_attn_text.attn.out_proj.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.cross_attn_text.attn.out_proj.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.cross_attn_text.attn.out_proj.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.cross_attn_text.attn.out_proj.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.cross_attn_text.attn.out_proj.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.cross_attn.attn.in_proj_weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.cross_attn.attn.in_proj_weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.cross_attn.attn.in_proj_weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.cross_attn.attn.in_proj_weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.cross_attn.attn.in_proj_bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.cross_attn.attn.in_proj_bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.cross_attn.attn.in_proj_bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.cross_attn.attn.in_proj_bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.cross_attn.attn.out_proj.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.cross_attn.attn.out_proj.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.cross_attn.attn.out_proj.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.cross_attn.attn.out_proj.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.cross_attn.attn.out_proj.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.cross_attn.attn.out_proj.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.cross_attn.attn.out_proj.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.cross_attn.attn.out_proj.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.ffn.layers.0.0.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.ffn.layers.0.0.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.ffn.layers.0.0.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.ffn.layers.0.0.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.ffn.layers.0.0.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.ffn.layers.0.0.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.ffn.layers.0.0.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.ffn.layers.0.0.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.ffn.layers.1.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.ffn.layers.1.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.ffn.layers.1.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.ffn.layers.1.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.ffn.layers.1.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.ffn.layers.1.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.ffn.layers.1.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.ffn.layers.1.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.norms.0.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.norms.0.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.norms.0.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.norms.0.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.norms.0.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.norms.0.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.norms.0.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.norms.0.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.norms.1.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.norms.1.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.norms.1.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.norms.1.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.norms.1.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.norms.1.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.norms.1.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.norms.1.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.norms.2.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.norms.2.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.norms.2.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.norms.2.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.norms.2.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.norms.2.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.norms.2.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.norms.2.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.norms.3.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.norms.3.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.norms.3.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.norms.3.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.norms.3.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.norms.3.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.norms.3.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.norms.3.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_posembed.position_embedding_head.0.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_posembed.position_embedding_head.0.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_posembed.position_embedding_head.0.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_posembed.position_embedding_head.0.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_posembed.position_embedding_head.0.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_posembed.position_embedding_head.0.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_posembed.position_embedding_head.0.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_posembed.position_embedding_head.0.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_posembed.position_embedding_head.1.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_posembed.position_embedding_head.1.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_posembed.position_embedding_head.1.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_posembed.position_embedding_head.1.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_posembed.position_embedding_head.1.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_posembed.position_embedding_head.1.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_posembed.position_embedding_head.1.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_posembed.position_embedding_head.1.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_posembed.position_embedding_head.3.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_posembed.position_embedding_head.3.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_posembed.position_embedding_head.3.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_posembed.position_embedding_head.3.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_posembed.position_embedding_head.3.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_posembed.position_embedding_head.3.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_posembed.position_embedding_head.3.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_posembed.position_embedding_head.3.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.self_posembed.position_embedding_head.0.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.self_posembed.position_embedding_head.0.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.self_posembed.position_embedding_head.0.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.self_posembed.position_embedding_head.0.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.self_posembed.position_embedding_head.0.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.self_posembed.position_embedding_head.0.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.self_posembed.position_embedding_head.0.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.self_posembed.position_embedding_head.0.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.self_posembed.position_embedding_head.1.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.self_posembed.position_embedding_head.1.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.self_posembed.position_embedding_head.1.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.self_posembed.position_embedding_head.1.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.self_posembed.position_embedding_head.1.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.self_posembed.position_embedding_head.1.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.self_posembed.position_embedding_head.1.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.self_posembed.position_embedding_head.1.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.self_posembed.position_embedding_head.3.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.self_posembed.position_embedding_head.3.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.self_posembed.position_embedding_head.3.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.self_posembed.position_embedding_head.3.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.self_posembed.position_embedding_head.3.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.self_posembed.position_embedding_head.3.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.self_posembed.position_embedding_head.3.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.self_posembed.position_embedding_head.3.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.cross_posembed.position_embedding_head.0.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.cross_posembed.position_embedding_head.0.weight:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.cross_posembed.position_embedding_head.0.weight:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.cross_posembed.position_embedding_head.0.weight:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.cross_posembed.position_embedding_head.0.bias:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.cross_posembed.position_embedding_head.0.bias:weight_decay=0.0005 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.cross_posembed.position_embedding_head.0.bias:lr_mult=0.1 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.cross_posembed.position_embedding_head.0.bias:decay_mult=1.0 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.cross_posembed.position_embedding_head.1.weight:lr=5e-05 2024/02/29 12:03:26 - mmengine - INFO - paramwise_options -- decoder.cross_posembed.position_embedding_head.1.weight:weight_decay=0.0005 2024/02/29 12:03:27 - mmengine - INFO - paramwise_options -- decoder.cross_posembed.position_embedding_head.1.weight:lr_mult=0.1 2024/02/29 12:03:27 - mmengine - INFO - paramwise_options -- decoder.cross_posembed.position_embedding_head.1.weight:decay_mult=1.0 2024/02/29 12:03:27 - mmengine - INFO - paramwise_options -- decoder.cross_posembed.position_embedding_head.1.bias:lr=5e-05 2024/02/29 12:03:27 - mmengine - INFO - paramwise_options -- decoder.cross_posembed.position_embedding_head.1.bias:weight_decay=0.0005 2024/02/29 12:03:27 - mmengine - INFO - paramwise_options -- decoder.cross_posembed.position_embedding_head.1.bias:lr_mult=0.1 2024/02/29 12:03:27 - mmengine - INFO - paramwise_options -- decoder.cross_posembed.position_embedding_head.1.bias:decay_mult=1.0 2024/02/29 12:03:27 - mmengine - INFO - paramwise_options -- decoder.cross_posembed.position_embedding_head.3.weight:lr=5e-05 2024/02/29 12:03:27 - mmengine - INFO - paramwise_options -- decoder.cross_posembed.position_embedding_head.3.weight:weight_decay=0.0005 2024/02/29 12:03:27 - mmengine - INFO - paramwise_options -- decoder.cross_posembed.position_embedding_head.3.weight:lr_mult=0.1 2024/02/29 12:03:27 - mmengine - INFO - paramwise_options -- decoder.cross_posembed.position_embedding_head.3.weight:decay_mult=1.0 2024/02/29 12:03:27 - mmengine - INFO - paramwise_options -- decoder.cross_posembed.position_embedding_head.3.bias:lr=5e-05 2024/02/29 12:03:27 - mmengine - INFO - paramwise_options -- decoder.cross_posembed.position_embedding_head.3.bias:weight_decay=0.0005 2024/02/29 12:03:27 - mmengine - INFO - paramwise_options -- decoder.cross_posembed.position_embedding_head.3.bias:lr_mult=0.1 2024/02/29 12:03:27 - mmengine - INFO - paramwise_options -- decoder.cross_posembed.position_embedding_head.3.bias:decay_mult=1.0 2024/02/29 12:03:27 - mmengine - INFO - paramwise_options -- decoder.norm.weight:lr=5e-05 2024/02/29 12:03:27 - mmengine - INFO - paramwise_options -- decoder.norm.weight:weight_decay=0.0005 2024/02/29 12:03:27 - mmengine - INFO - paramwise_options -- decoder.norm.weight:lr_mult=0.1 2024/02/29 12:03:27 - mmengine - INFO - paramwise_options -- decoder.norm.weight:decay_mult=1.0 2024/02/29 12:03:27 - mmengine - INFO - paramwise_options -- decoder.norm.bias:lr=5e-05 2024/02/29 12:03:27 - mmengine - INFO - paramwise_options -- decoder.norm.bias:weight_decay=0.0005 2024/02/29 12:03:27 - mmengine - INFO - paramwise_options -- decoder.norm.bias:lr_mult=0.1 2024/02/29 12:03:27 - mmengine - INFO - paramwise_options -- decoder.norm.bias:decay_mult=1.0 2024/02/29 12:03:27 - mmengine - WARNING - Failed to search registry with scope "embodiedscan" in the "optimizer" registry tree. As a workaround, the current "optimizer" registry in "mmengine" is used to build instance. This may cause unexpected failure when running the built modules. Please check whether "embodiedscan" is a correct scope, or whether the registry is initialized. 2024/02/29 12:03:27 - mmengine - WARNING - Failed to search registry with scope "embodiedscan" in the "optim_wrapper" registry tree. As a workaround, the current "optim_wrapper" registry in "mmengine" is used to build instance. This may cause unexpected failure when running the built modules. Please check whether "embodiedscan" is a correct scope, or whether the registry is initialized. 2024/02/29 12:03:27 - mmengine - WARNING - Failed to search registry with scope "embodiedscan" in the "parameter scheduler" registry tree. As a workaround, the current "parameter scheduler" registry in "mmengine" is used to build instance. This may cause unexpected failure when running the built modules. Please check whether "embodiedscan" is a correct scope, or whether the registry is initialized. 2024/02/29 12:03:41 - mmengine - WARNING - The prefix is not set in metric class GroundingMetric. 2024/02/29 12:03:41 - mmengine - WARNING - Failed to search registry with scope "embodiedscan" in the "weight initializer" registry tree. As a workaround, the current "weight initializer" registry in "mmengine" is used to build instance. This may cause unexpected failure when running the built modules. Please check whether "embodiedscan" is a correct scope, or whether the registry is initialized. 2024/02/29 12:03:41 - mmengine - INFO - load model from: torchvision://resnet50 2024/02/29 12:03:41 - mmengine - INFO - Loads checkpoint by torchvision backend from path: torchvision://resnet50 2024/02/29 12:03:42 - mmengine - WARNING - The model and loaded state dict do not match exactly size mismatch for conv1.weight: copying a param with shape torch.Size([64, 3, 7, 7]) from checkpoint, the shape in current model is torch.Size([16, 3, 7, 7]). size mismatch for bn1.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([16]). size mismatch for bn1.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([16]). size mismatch for bn1.running_mean: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([16]). size mismatch for bn1.running_var: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([16]). size mismatch for layer1.0.conv1.weight: copying a param with shape torch.Size([64, 64, 1, 1]) from checkpoint, the shape in current model is torch.Size([16, 16, 1, 1]). size mismatch for layer1.0.bn1.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([16]). size mismatch for layer1.0.bn1.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([16]). size mismatch for layer1.0.bn1.running_mean: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([16]). size mismatch for layer1.0.bn1.running_var: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([16]). size mismatch for layer1.0.conv2.weight: copying a param with shape torch.Size([64, 64, 3, 3]) from checkpoint, the shape in current model is torch.Size([16, 16, 3, 3]). size mismatch for layer1.0.bn2.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([16]). size mismatch for layer1.0.bn2.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([16]). size mismatch for layer1.0.bn2.running_mean: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([16]). size mismatch for layer1.0.bn2.running_var: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([16]). size mismatch for layer1.0.conv3.weight: copying a param with shape torch.Size([256, 64, 1, 1]) from checkpoint, the shape in current model is torch.Size([64, 16, 1, 1]). size mismatch for layer1.0.bn3.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer1.0.bn3.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer1.0.bn3.running_mean: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer1.0.bn3.running_var: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer1.0.downsample.0.weight: copying a param with shape torch.Size([256, 64, 1, 1]) from checkpoint, the shape in current model is torch.Size([64, 16, 1, 1]). size mismatch for layer1.0.downsample.1.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer1.0.downsample.1.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer1.0.downsample.1.running_mean: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer1.0.downsample.1.running_var: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer1.1.conv1.weight: copying a param with shape torch.Size([64, 256, 1, 1]) from checkpoint, the shape in current model is torch.Size([16, 64, 1, 1]). size mismatch for layer1.1.bn1.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([16]). size mismatch for layer1.1.bn1.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([16]). size mismatch for layer1.1.bn1.running_mean: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([16]). size mismatch for layer1.1.bn1.running_var: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([16]). size mismatch for layer1.1.conv2.weight: copying a param with shape torch.Size([64, 64, 3, 3]) from checkpoint, the shape in current model is torch.Size([16, 16, 3, 3]). size mismatch for layer1.1.bn2.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([16]). size mismatch for layer1.1.bn2.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([16]). size mismatch for layer1.1.bn2.running_mean: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([16]). size mismatch for layer1.1.bn2.running_var: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([16]). size mismatch for layer1.1.conv3.weight: copying a param with shape torch.Size([256, 64, 1, 1]) from checkpoint, the shape in current model is torch.Size([64, 16, 1, 1]). size mismatch for layer1.1.bn3.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer1.1.bn3.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer1.1.bn3.running_mean: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer1.1.bn3.running_var: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer1.2.conv1.weight: copying a param with shape torch.Size([64, 256, 1, 1]) from checkpoint, the shape in current model is torch.Size([16, 64, 1, 1]). size mismatch for layer1.2.bn1.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([16]). size mismatch for layer1.2.bn1.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([16]). size mismatch for layer1.2.bn1.running_mean: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([16]). size mismatch for layer1.2.bn1.running_var: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([16]). size mismatch for layer1.2.conv2.weight: copying a param with shape torch.Size([64, 64, 3, 3]) from checkpoint, the shape in current model is torch.Size([16, 16, 3, 3]). size mismatch for layer1.2.bn2.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([16]). size mismatch for layer1.2.bn2.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([16]). size mismatch for layer1.2.bn2.running_mean: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([16]). size mismatch for layer1.2.bn2.running_var: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([16]). size mismatch for layer1.2.conv3.weight: copying a param with shape torch.Size([256, 64, 1, 1]) from checkpoint, the shape in current model is torch.Size([64, 16, 1, 1]). size mismatch for layer1.2.bn3.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer1.2.bn3.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer1.2.bn3.running_mean: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer1.2.bn3.running_var: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer2.0.conv1.weight: copying a param with shape torch.Size([128, 256, 1, 1]) from checkpoint, the shape in current model is torch.Size([32, 64, 1, 1]). size mismatch for layer2.0.bn1.weight: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([32]). size mismatch for layer2.0.bn1.bias: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([32]). size mismatch for layer2.0.bn1.running_mean: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([32]). size mismatch for layer2.0.bn1.running_var: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([32]). size mismatch for layer2.0.conv2.weight: copying a param with shape torch.Size([128, 128, 3, 3]) from checkpoint, the shape in current model is torch.Size([32, 32, 3, 3]). size mismatch for layer2.0.bn2.weight: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([32]). size mismatch for layer2.0.bn2.bias: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([32]). size mismatch for layer2.0.bn2.running_mean: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([32]). size mismatch for layer2.0.bn2.running_var: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([32]). size mismatch for layer2.0.conv3.weight: copying a param with shape torch.Size([512, 128, 1, 1]) from checkpoint, the shape in current model is torch.Size([128, 32, 1, 1]). size mismatch for layer2.0.bn3.weight: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer2.0.bn3.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer2.0.bn3.running_mean: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer2.0.bn3.running_var: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer2.0.downsample.0.weight: copying a param with shape torch.Size([512, 256, 1, 1]) from checkpoint, the shape in current model is torch.Size([128, 64, 1, 1]). size mismatch for layer2.0.downsample.1.weight: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer2.0.downsample.1.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer2.0.downsample.1.running_mean: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer2.0.downsample.1.running_var: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer2.1.conv1.weight: copying a param with shape torch.Size([128, 512, 1, 1]) from checkpoint, the shape in current model is torch.Size([32, 128, 1, 1]). size mismatch for layer2.1.bn1.weight: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([32]). size mismatch for layer2.1.bn1.bias: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([32]). size mismatch for layer2.1.bn1.running_mean: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([32]). size mismatch for layer2.1.bn1.running_var: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([32]). size mismatch for layer2.1.conv2.weight: copying a param with shape torch.Size([128, 128, 3, 3]) from checkpoint, the shape in current model is torch.Size([32, 32, 3, 3]). size mismatch for layer2.1.bn2.weight: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([32]). size mismatch for layer2.1.bn2.bias: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([32]). size mismatch for layer2.1.bn2.running_mean: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([32]). size mismatch for layer2.1.bn2.running_var: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([32]). size mismatch for layer2.1.conv3.weight: copying a param with shape torch.Size([512, 128, 1, 1]) from checkpoint, the shape in current model is torch.Size([128, 32, 1, 1]). size mismatch for layer2.1.bn3.weight: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer2.1.bn3.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer2.1.bn3.running_mean: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer2.1.bn3.running_var: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer2.2.conv1.weight: copying a param with shape torch.Size([128, 512, 1, 1]) from checkpoint, the shape in current model is torch.Size([32, 128, 1, 1]). size mismatch for layer2.2.bn1.weight: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([32]). size mismatch for layer2.2.bn1.bias: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([32]). size mismatch for layer2.2.bn1.running_mean: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([32]). size mismatch for layer2.2.bn1.running_var: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([32]). size mismatch for layer2.2.conv2.weight: copying a param with shape torch.Size([128, 128, 3, 3]) from checkpoint, the shape in current model is torch.Size([32, 32, 3, 3]). size mismatch for layer2.2.bn2.weight: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([32]). size mismatch for layer2.2.bn2.bias: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([32]). size mismatch for layer2.2.bn2.running_mean: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([32]). size mismatch for layer2.2.bn2.running_var: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([32]). size mismatch for layer2.2.conv3.weight: copying a param with shape torch.Size([512, 128, 1, 1]) from checkpoint, the shape in current model is torch.Size([128, 32, 1, 1]). size mismatch for layer2.2.bn3.weight: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer2.2.bn3.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer2.2.bn3.running_mean: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer2.2.bn3.running_var: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer2.3.conv1.weight: copying a param with shape torch.Size([128, 512, 1, 1]) from checkpoint, the shape in current model is torch.Size([32, 128, 1, 1]). size mismatch for layer2.3.bn1.weight: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([32]). size mismatch for layer2.3.bn1.bias: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([32]). size mismatch for layer2.3.bn1.running_mean: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([32]). size mismatch for layer2.3.bn1.running_var: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([32]). size mismatch for layer2.3.conv2.weight: copying a param with shape torch.Size([128, 128, 3, 3]) from checkpoint, the shape in current model is torch.Size([32, 32, 3, 3]). size mismatch for layer2.3.bn2.weight: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([32]). size mismatch for layer2.3.bn2.bias: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([32]). size mismatch for layer2.3.bn2.running_mean: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([32]). size mismatch for layer2.3.bn2.running_var: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([32]). size mismatch for layer2.3.conv3.weight: copying a param with shape torch.Size([512, 128, 1, 1]) from checkpoint, the shape in current model is torch.Size([128, 32, 1, 1]). size mismatch for layer2.3.bn3.weight: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer2.3.bn3.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer2.3.bn3.running_mean: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer2.3.bn3.running_var: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer3.0.conv1.weight: copying a param with shape torch.Size([256, 512, 1, 1]) from checkpoint, the shape in current model is torch.Size([64, 128, 1, 1]). size mismatch for layer3.0.bn1.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.0.bn1.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.0.bn1.running_mean: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.0.bn1.running_var: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.0.conv2.weight: copying a param with shape torch.Size([256, 256, 3, 3]) from checkpoint, the shape in current model is torch.Size([64, 64, 3, 3]). size mismatch for layer3.0.bn2.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.0.bn2.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.0.bn2.running_mean: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.0.bn2.running_var: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.0.conv3.weight: copying a param with shape torch.Size([1024, 256, 1, 1]) from checkpoint, the shape in current model is torch.Size([256, 64, 1, 1]). size mismatch for layer3.0.bn3.weight: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([256]). size mismatch for layer3.0.bn3.bias: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([256]). size mismatch for layer3.0.bn3.running_mean: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([256]). size mismatch for layer3.0.bn3.running_var: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([256]). size mismatch for layer3.0.downsample.0.weight: copying a param with shape torch.Size([1024, 512, 1, 1]) from checkpoint, the shape in current model is torch.Size([256, 128, 1, 1]). size mismatch for layer3.0.downsample.1.weight: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([256]). size mismatch for layer3.0.downsample.1.bias: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([256]). size mismatch for layer3.0.downsample.1.running_mean: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([256]). size mismatch for layer3.0.downsample.1.running_var: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([256]). size mismatch for layer3.1.conv1.weight: copying a param with shape torch.Size([256, 1024, 1, 1]) from checkpoint, the shape in current model is torch.Size([64, 256, 1, 1]). size mismatch for layer3.1.bn1.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.1.bn1.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.1.bn1.running_mean: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.1.bn1.running_var: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.1.conv2.weight: copying a param with shape torch.Size([256, 256, 3, 3]) from checkpoint, the shape in current model is torch.Size([64, 64, 3, 3]). size mismatch for layer3.1.bn2.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.1.bn2.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.1.bn2.running_mean: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.1.bn2.running_var: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.1.conv3.weight: copying a param with shape torch.Size([1024, 256, 1, 1]) from checkpoint, the shape in current model is torch.Size([256, 64, 1, 1]). size mismatch for layer3.1.bn3.weight: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([256]). size mismatch for layer3.1.bn3.bias: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([256]). size mismatch for layer3.1.bn3.running_mean: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([256]). size mismatch for layer3.1.bn3.running_var: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([256]). size mismatch for layer3.2.conv1.weight: copying a param with shape torch.Size([256, 1024, 1, 1]) from checkpoint, the shape in current model is torch.Size([64, 256, 1, 1]). size mismatch for layer3.2.bn1.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.2.bn1.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.2.bn1.running_mean: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.2.bn1.running_var: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.2.conv2.weight: copying a param with shape torch.Size([256, 256, 3, 3]) from checkpoint, the shape in current model is torch.Size([64, 64, 3, 3]). size mismatch for layer3.2.bn2.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.2.bn2.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.2.bn2.running_mean: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.2.bn2.running_var: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.2.conv3.weight: copying a param with shape torch.Size([1024, 256, 1, 1]) from checkpoint, the shape in current model is torch.Size([256, 64, 1, 1]). size mismatch for layer3.2.bn3.weight: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([256]). size mismatch for layer3.2.bn3.bias: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([256]). size mismatch for layer3.2.bn3.running_mean: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([256]). size mismatch for layer3.2.bn3.running_var: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([256]). size mismatch for layer3.3.conv1.weight: copying a param with shape torch.Size([256, 1024, 1, 1]) from checkpoint, the shape in current model is torch.Size([64, 256, 1, 1]). size mismatch for layer3.3.bn1.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.3.bn1.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.3.bn1.running_mean: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.3.bn1.running_var: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.3.conv2.weight: copying a param with shape torch.Size([256, 256, 3, 3]) from checkpoint, the shape in current model is torch.Size([64, 64, 3, 3]). size mismatch for layer3.3.bn2.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.3.bn2.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.3.bn2.running_mean: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.3.bn2.running_var: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.3.conv3.weight: copying a param with shape torch.Size([1024, 256, 1, 1]) from checkpoint, the shape in current model is torch.Size([256, 64, 1, 1]). size mismatch for layer3.3.bn3.weight: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([256]). size mismatch for layer3.3.bn3.bias: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([256]). size mismatch for layer3.3.bn3.running_mean: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([256]). size mismatch for layer3.3.bn3.running_var: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([256]). size mismatch for layer3.4.conv1.weight: copying a param with shape torch.Size([256, 1024, 1, 1]) from checkpoint, the shape in current model is torch.Size([64, 256, 1, 1]). size mismatch for layer3.4.bn1.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.4.bn1.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.4.bn1.running_mean: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.4.bn1.running_var: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.4.conv2.weight: copying a param with shape torch.Size([256, 256, 3, 3]) from checkpoint, the shape in current model is torch.Size([64, 64, 3, 3]). size mismatch for layer3.4.bn2.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.4.bn2.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.4.bn2.running_mean: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.4.bn2.running_var: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.4.conv3.weight: copying a param with shape torch.Size([1024, 256, 1, 1]) from checkpoint, the shape in current model is torch.Size([256, 64, 1, 1]). size mismatch for layer3.4.bn3.weight: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([256]). size mismatch for layer3.4.bn3.bias: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([256]). size mismatch for layer3.4.bn3.running_mean: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([256]). size mismatch for layer3.4.bn3.running_var: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([256]). size mismatch for layer3.5.conv1.weight: copying a param with shape torch.Size([256, 1024, 1, 1]) from checkpoint, the shape in current model is torch.Size([64, 256, 1, 1]). size mismatch for layer3.5.bn1.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.5.bn1.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.5.bn1.running_mean: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.5.bn1.running_var: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.5.conv2.weight: copying a param with shape torch.Size([256, 256, 3, 3]) from checkpoint, the shape in current model is torch.Size([64, 64, 3, 3]). size mismatch for layer3.5.bn2.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.5.bn2.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.5.bn2.running_mean: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.5.bn2.running_var: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.5.conv3.weight: copying a param with shape torch.Size([1024, 256, 1, 1]) from checkpoint, the shape in current model is torch.Size([256, 64, 1, 1]). size mismatch for layer3.5.bn3.weight: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([256]). size mismatch for layer3.5.bn3.bias: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([256]). size mismatch for layer3.5.bn3.running_mean: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([256]). size mismatch for layer3.5.bn3.running_var: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([256]). size mismatch for layer4.0.conv1.weight: copying a param with shape torch.Size([512, 1024, 1, 1]) from checkpoint, the shape in current model is torch.Size([128, 256, 1, 1]). size mismatch for layer4.0.bn1.weight: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer4.0.bn1.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer4.0.bn1.running_mean: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer4.0.bn1.running_var: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer4.0.conv2.weight: copying a param with shape torch.Size([512, 512, 3, 3]) from checkpoint, the shape in current model is torch.Size([128, 128, 3, 3]). size mismatch for layer4.0.bn2.weight: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer4.0.bn2.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer4.0.bn2.running_mean: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer4.0.bn2.running_var: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer4.0.conv3.weight: copying a param with shape torch.Size([2048, 512, 1, 1]) from checkpoint, the shape in current model is torch.Size([512, 128, 1, 1]). size mismatch for layer4.0.bn3.weight: copying a param with shape torch.Size([2048]) from checkpoint, the shape in current model is torch.Size([512]). size mismatch for layer4.0.bn3.bias: copying a param with shape torch.Size([2048]) from checkpoint, the shape in current model is torch.Size([512]). size mismatch for layer4.0.bn3.running_mean: copying a param with shape torch.Size([2048]) from checkpoint, the shape in current model is torch.Size([512]). size mismatch for layer4.0.bn3.running_var: copying a param with shape torch.Size([2048]) from checkpoint, the shape in current model is torch.Size([512]). size mismatch for layer4.0.downsample.0.weight: copying a param with shape torch.Size([2048, 1024, 1, 1]) from checkpoint, the shape in current model is torch.Size([512, 256, 1, 1]). size mismatch for layer4.0.downsample.1.weight: copying a param with shape torch.Size([2048]) from checkpoint, the shape in current model is torch.Size([512]). size mismatch for layer4.0.downsample.1.bias: copying a param with shape torch.Size([2048]) from checkpoint, the shape in current model is torch.Size([512]). size mismatch for layer4.0.downsample.1.running_mean: copying a param with shape torch.Size([2048]) from checkpoint, the shape in current model is torch.Size([512]). size mismatch for layer4.0.downsample.1.running_var: copying a param with shape torch.Size([2048]) from checkpoint, the shape in current model is torch.Size([512]). size mismatch for layer4.1.conv1.weight: copying a param with shape torch.Size([512, 2048, 1, 1]) from checkpoint, the shape in current model is torch.Size([128, 512, 1, 1]). size mismatch for layer4.1.bn1.weight: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer4.1.bn1.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer4.1.bn1.running_mean: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer4.1.bn1.running_var: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer4.1.conv2.weight: copying a param with shape torch.Size([512, 512, 3, 3]) from checkpoint, the shape in current model is torch.Size([128, 128, 3, 3]). size mismatch for layer4.1.bn2.weight: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer4.1.bn2.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer4.1.bn2.running_mean: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer4.1.bn2.running_var: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer4.1.conv3.weight: copying a param with shape torch.Size([2048, 512, 1, 1]) from checkpoint, the shape in current model is torch.Size([512, 128, 1, 1]). size mismatch for layer4.1.bn3.weight: copying a param with shape torch.Size([2048]) from checkpoint, the shape in current model is torch.Size([512]). size mismatch for layer4.1.bn3.bias: copying a param with shape torch.Size([2048]) from checkpoint, the shape in current model is torch.Size([512]). size mismatch for layer4.1.bn3.running_mean: copying a param with shape torch.Size([2048]) from checkpoint, the shape in current model is torch.Size([512]). size mismatch for layer4.1.bn3.running_var: copying a param with shape torch.Size([2048]) from checkpoint, the shape in current model is torch.Size([512]). size mismatch for layer4.2.conv1.weight: copying a param with shape torch.Size([512, 2048, 1, 1]) from checkpoint, the shape in current model is torch.Size([128, 512, 1, 1]). size mismatch for layer4.2.bn1.weight: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer4.2.bn1.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer4.2.bn1.running_mean: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer4.2.bn1.running_var: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer4.2.conv2.weight: copying a param with shape torch.Size([512, 512, 3, 3]) from checkpoint, the shape in current model is torch.Size([128, 128, 3, 3]). size mismatch for layer4.2.bn2.weight: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer4.2.bn2.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer4.2.bn2.running_mean: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer4.2.bn2.running_var: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer4.2.conv3.weight: copying a param with shape torch.Size([2048, 512, 1, 1]) from checkpoint, the shape in current model is torch.Size([512, 128, 1, 1]). size mismatch for layer4.2.bn3.weight: copying a param with shape torch.Size([2048]) from checkpoint, the shape in current model is torch.Size([512]). size mismatch for layer4.2.bn3.bias: copying a param with shape torch.Size([2048]) from checkpoint, the shape in current model is torch.Size([512]). size mismatch for layer4.2.bn3.running_mean: copying a param with shape torch.Size([2048]) from checkpoint, the shape in current model is torch.Size([512]). size mismatch for layer4.2.bn3.running_var: copying a param with shape torch.Size([2048]) from checkpoint, the shape in current model is torch.Size([512]). unexpected key in source state_dict: fc.weight, fc.bias Name of parameter - Initialization information backbone.conv1.weight - torch.Size([16, 3, 7, 7]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.bn1.weight - torch.Size([16]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.bn1.bias - torch.Size([16]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer1.0.conv1.weight - torch.Size([16, 16, 1, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer1.0.bn1.weight - torch.Size([16]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer1.0.bn1.bias - torch.Size([16]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer1.0.conv2.weight - torch.Size([16, 16, 3, 3]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer1.0.bn2.weight - torch.Size([16]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer1.0.bn2.bias - torch.Size([16]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer1.0.conv3.weight - torch.Size([64, 16, 1, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer1.0.bn3.weight - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer1.0.bn3.bias - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer1.0.downsample.0.weight - torch.Size([64, 16, 1, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer1.0.downsample.1.weight - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer1.0.downsample.1.bias - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer1.1.conv1.weight - torch.Size([16, 64, 1, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer1.1.bn1.weight - torch.Size([16]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer1.1.bn1.bias - torch.Size([16]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer1.1.conv2.weight - torch.Size([16, 16, 3, 3]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer1.1.bn2.weight - torch.Size([16]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer1.1.bn2.bias - torch.Size([16]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer1.1.conv3.weight - torch.Size([64, 16, 1, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer1.1.bn3.weight - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer1.1.bn3.bias - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer1.2.conv1.weight - torch.Size([16, 64, 1, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer1.2.bn1.weight - torch.Size([16]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer1.2.bn1.bias - torch.Size([16]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer1.2.conv2.weight - torch.Size([16, 16, 3, 3]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer1.2.bn2.weight - torch.Size([16]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer1.2.bn2.bias - torch.Size([16]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer1.2.conv3.weight - torch.Size([64, 16, 1, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer1.2.bn3.weight - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer1.2.bn3.bias - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.0.conv1.weight - torch.Size([32, 64, 1, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.0.bn1.weight - torch.Size([32]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.0.bn1.bias - torch.Size([32]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.0.conv2.weight - torch.Size([32, 32, 3, 3]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.0.bn2.weight - torch.Size([32]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.0.bn2.bias - torch.Size([32]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.0.conv3.weight - torch.Size([128, 32, 1, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.0.bn3.weight - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.0.bn3.bias - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.0.downsample.0.weight - torch.Size([128, 64, 1, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.0.downsample.1.weight - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.0.downsample.1.bias - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.1.conv1.weight - torch.Size([32, 128, 1, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.1.bn1.weight - torch.Size([32]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.1.bn1.bias - torch.Size([32]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.1.conv2.weight - torch.Size([32, 32, 3, 3]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.1.bn2.weight - torch.Size([32]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.1.bn2.bias - torch.Size([32]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.1.conv3.weight - torch.Size([128, 32, 1, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.1.bn3.weight - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.1.bn3.bias - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.2.conv1.weight - torch.Size([32, 128, 1, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.2.bn1.weight - torch.Size([32]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.2.bn1.bias - torch.Size([32]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.2.conv2.weight - torch.Size([32, 32, 3, 3]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.2.bn2.weight - torch.Size([32]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.2.bn2.bias - torch.Size([32]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.2.conv3.weight - torch.Size([128, 32, 1, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.2.bn3.weight - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.2.bn3.bias - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.3.conv1.weight - torch.Size([32, 128, 1, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.3.bn1.weight - torch.Size([32]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.3.bn1.bias - torch.Size([32]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.3.conv2.weight - torch.Size([32, 32, 3, 3]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.3.bn2.weight - torch.Size([32]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.3.bn2.bias - torch.Size([32]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.3.conv3.weight - torch.Size([128, 32, 1, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.3.bn3.weight - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.3.bn3.bias - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.0.conv1.weight - torch.Size([64, 128, 1, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.0.bn1.weight - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.0.bn1.bias - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.0.conv2.weight - torch.Size([64, 64, 3, 3]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.0.bn2.weight - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.0.bn2.bias - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.0.conv3.weight - torch.Size([256, 64, 1, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.0.bn3.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.0.bn3.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.0.downsample.0.weight - torch.Size([256, 128, 1, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.0.downsample.1.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.0.downsample.1.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.1.conv1.weight - torch.Size([64, 256, 1, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.1.bn1.weight - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.1.bn1.bias - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.1.conv2.weight - torch.Size([64, 64, 3, 3]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.1.bn2.weight - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.1.bn2.bias - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.1.conv3.weight - torch.Size([256, 64, 1, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.1.bn3.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.1.bn3.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.2.conv1.weight - torch.Size([64, 256, 1, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.2.bn1.weight - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.2.bn1.bias - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.2.conv2.weight - torch.Size([64, 64, 3, 3]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.2.bn2.weight - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.2.bn2.bias - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.2.conv3.weight - torch.Size([256, 64, 1, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.2.bn3.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.2.bn3.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.3.conv1.weight - torch.Size([64, 256, 1, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.3.bn1.weight - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.3.bn1.bias - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.3.conv2.weight - torch.Size([64, 64, 3, 3]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.3.bn2.weight - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.3.bn2.bias - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.3.conv3.weight - torch.Size([256, 64, 1, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.3.bn3.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.3.bn3.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.4.conv1.weight - torch.Size([64, 256, 1, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.4.bn1.weight - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.4.bn1.bias - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.4.conv2.weight - torch.Size([64, 64, 3, 3]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.4.bn2.weight - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.4.bn2.bias - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.4.conv3.weight - torch.Size([256, 64, 1, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.4.bn3.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.4.bn3.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.5.conv1.weight - torch.Size([64, 256, 1, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.5.bn1.weight - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.5.bn1.bias - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.5.conv2.weight - torch.Size([64, 64, 3, 3]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.5.bn2.weight - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.5.bn2.bias - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.5.conv3.weight - torch.Size([256, 64, 1, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.5.bn3.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.5.bn3.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer4.0.conv1.weight - torch.Size([128, 256, 1, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer4.0.bn1.weight - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer4.0.bn1.bias - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer4.0.conv2.weight - torch.Size([128, 128, 3, 3]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer4.0.bn2.weight - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer4.0.bn2.bias - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer4.0.conv3.weight - torch.Size([512, 128, 1, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer4.0.bn3.weight - torch.Size([512]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer4.0.bn3.bias - torch.Size([512]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer4.0.downsample.0.weight - torch.Size([512, 256, 1, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer4.0.downsample.1.weight - torch.Size([512]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer4.0.downsample.1.bias - torch.Size([512]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer4.1.conv1.weight - torch.Size([128, 512, 1, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer4.1.bn1.weight - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer4.1.bn1.bias - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer4.1.conv2.weight - torch.Size([128, 128, 3, 3]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer4.1.bn2.weight - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer4.1.bn2.bias - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer4.1.conv3.weight - torch.Size([512, 128, 1, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer4.1.bn3.weight - torch.Size([512]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer4.1.bn3.bias - torch.Size([512]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer4.2.conv1.weight - torch.Size([128, 512, 1, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer4.2.bn1.weight - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer4.2.bn1.bias - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer4.2.conv2.weight - torch.Size([128, 128, 3, 3]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer4.2.bn2.weight - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer4.2.bn2.bias - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer4.2.conv3.weight - torch.Size([512, 128, 1, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer4.2.bn3.weight - torch.Size([512]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer4.2.bn3.bias - torch.Size([512]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.conv1.kernel - torch.Size([27, 3, 64]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.norm1.weight - torch.Size([1, 64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.norm1.bias - torch.Size([1, 64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer1.0.conv1.kernel - torch.Size([27, 64, 64]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.layer1.0.norm1.bn.weight - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer1.0.norm1.bn.bias - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer1.0.conv2.kernel - torch.Size([27, 64, 64]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.layer1.0.norm2.bn.weight - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer1.0.norm2.bn.bias - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer1.0.downsample.0.kernel - torch.Size([1, 64, 64]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.layer1.0.downsample.1.bn.weight - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer1.0.downsample.1.bn.bias - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer1.1.conv1.kernel - torch.Size([27, 64, 64]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.layer1.1.norm1.bn.weight - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer1.1.norm1.bn.bias - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer1.1.conv2.kernel - torch.Size([27, 64, 64]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.layer1.1.norm2.bn.weight - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer1.1.norm2.bn.bias - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer1.2.conv1.kernel - torch.Size([27, 64, 64]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.layer1.2.norm1.bn.weight - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer1.2.norm1.bn.bias - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer1.2.conv2.kernel - torch.Size([27, 64, 64]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.layer1.2.norm2.bn.weight - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer1.2.norm2.bn.bias - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer2.0.conv1.kernel - torch.Size([27, 64, 128]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.layer2.0.norm1.bn.weight - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer2.0.norm1.bn.bias - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer2.0.conv2.kernel - torch.Size([27, 128, 128]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.layer2.0.norm2.bn.weight - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer2.0.norm2.bn.bias - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer2.0.downsample.0.kernel - torch.Size([1, 64, 128]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.layer2.0.downsample.1.bn.weight - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer2.0.downsample.1.bn.bias - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer2.1.conv1.kernel - torch.Size([27, 128, 128]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.layer2.1.norm1.bn.weight - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer2.1.norm1.bn.bias - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer2.1.conv2.kernel - torch.Size([27, 128, 128]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.layer2.1.norm2.bn.weight - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer2.1.norm2.bn.bias - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer2.2.conv1.kernel - torch.Size([27, 128, 128]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.layer2.2.norm1.bn.weight - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer2.2.norm1.bn.bias - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer2.2.conv2.kernel - torch.Size([27, 128, 128]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.layer2.2.norm2.bn.weight - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer2.2.norm2.bn.bias - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer2.3.conv1.kernel - torch.Size([27, 128, 128]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.layer2.3.norm1.bn.weight - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer2.3.norm1.bn.bias - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer2.3.conv2.kernel - torch.Size([27, 128, 128]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.layer2.3.norm2.bn.weight - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer2.3.norm2.bn.bias - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer3.0.conv1.kernel - torch.Size([27, 128, 256]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.layer3.0.norm1.bn.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer3.0.norm1.bn.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer3.0.conv2.kernel - torch.Size([27, 256, 256]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.layer3.0.norm2.bn.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer3.0.norm2.bn.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer3.0.downsample.0.kernel - torch.Size([1, 128, 256]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.layer3.0.downsample.1.bn.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer3.0.downsample.1.bn.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer3.1.conv1.kernel - torch.Size([27, 256, 256]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.layer3.1.norm1.bn.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer3.1.norm1.bn.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer3.1.conv2.kernel - torch.Size([27, 256, 256]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.layer3.1.norm2.bn.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer3.1.norm2.bn.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer3.2.conv1.kernel - torch.Size([27, 256, 256]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.layer3.2.norm1.bn.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer3.2.norm1.bn.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer3.2.conv2.kernel - torch.Size([27, 256, 256]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.layer3.2.norm2.bn.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer3.2.norm2.bn.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer3.3.conv1.kernel - torch.Size([27, 256, 256]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.layer3.3.norm1.bn.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer3.3.norm1.bn.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer3.3.conv2.kernel - torch.Size([27, 256, 256]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.layer3.3.norm2.bn.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer3.3.norm2.bn.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer3.4.conv1.kernel - torch.Size([27, 256, 256]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.layer3.4.norm1.bn.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer3.4.norm1.bn.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer3.4.conv2.kernel - torch.Size([27, 256, 256]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.layer3.4.norm2.bn.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer3.4.norm2.bn.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer3.5.conv1.kernel - torch.Size([27, 256, 256]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.layer3.5.norm1.bn.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer3.5.norm1.bn.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer3.5.conv2.kernel - torch.Size([27, 256, 256]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.layer3.5.norm2.bn.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer3.5.norm2.bn.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer4.0.conv1.kernel - torch.Size([27, 256, 512]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.layer4.0.norm1.bn.weight - torch.Size([512]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer4.0.norm1.bn.bias - torch.Size([512]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer4.0.conv2.kernel - torch.Size([27, 512, 512]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.layer4.0.norm2.bn.weight - torch.Size([512]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer4.0.norm2.bn.bias - torch.Size([512]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer4.0.downsample.0.kernel - torch.Size([1, 256, 512]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.layer4.0.downsample.1.bn.weight - torch.Size([512]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer4.0.downsample.1.bn.bias - torch.Size([512]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer4.1.conv1.kernel - torch.Size([27, 512, 512]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.layer4.1.norm1.bn.weight - torch.Size([512]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer4.1.norm1.bn.bias - torch.Size([512]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer4.1.conv2.kernel - torch.Size([27, 512, 512]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.layer4.1.norm2.bn.weight - torch.Size([512]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer4.1.norm2.bn.bias - torch.Size([512]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer4.2.conv1.kernel - torch.Size([27, 512, 512]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.layer4.2.norm1.bn.weight - torch.Size([512]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer4.2.norm1.bn.bias - torch.Size([512]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer4.2.conv2.kernel - torch.Size([27, 512, 512]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.layer4.2.norm2.bn.weight - torch.Size([512]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer4.2.norm2.bn.bias - torch.Size([512]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder neck_3d.out_block_0.0.kernel - torch.Size([27, 128, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder neck_3d.out_block_0.1.bn.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder neck_3d.out_block_0.1.bn.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder neck_3d.up_block_1.0.kernel - torch.Size([8, 256, 128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder neck_3d.up_block_1.1.bn.weight - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder neck_3d.up_block_1.1.bn.bias - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder neck_3d.up_block_1.3.kernel - torch.Size([27, 128, 128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder neck_3d.up_block_1.4.bn.weight - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder neck_3d.up_block_1.4.bn.bias - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder neck_3d.out_block_1.0.kernel - torch.Size([27, 256, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder neck_3d.out_block_1.1.bn.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder neck_3d.out_block_1.1.bn.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder neck_3d.up_block_2.0.kernel - torch.Size([8, 512, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder neck_3d.up_block_2.1.bn.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder neck_3d.up_block_2.1.bn.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder neck_3d.up_block_2.3.kernel - torch.Size([27, 256, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder neck_3d.up_block_2.4.bn.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder neck_3d.up_block_2.4.bn.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder neck_3d.out_block_2.0.kernel - torch.Size([27, 512, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder neck_3d.out_block_2.1.bn.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder neck_3d.out_block_2.1.bn.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder neck_3d.up_block_3.0.kernel - torch.Size([8, 1024, 512]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder neck_3d.up_block_3.1.bn.weight - torch.Size([512]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder neck_3d.up_block_3.1.bn.bias - torch.Size([512]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder neck_3d.up_block_3.3.kernel - torch.Size([27, 512, 512]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder neck_3d.up_block_3.4.bn.weight - torch.Size([512]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder neck_3d.up_block_3.4.bn.bias - torch.Size([512]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder neck_3d.out_block_3.0.kernel - torch.Size([27, 1024, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder neck_3d.out_block_3.1.bn.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder neck_3d.out_block_3.1.bn.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder neck_3d.conv_cls.kernel - torch.Size([256, 1]): Initialized by user-defined `init_weights` in MinkNeck neck_3d.conv_cls.bias - torch.Size([1, 1]): Initialized by user-defined `init_weights` in MinkNeck bbox_head.cls_branches.0.bias - torch.Size([1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder bbox_head.reg_branches.0.0.weight - torch.Size([256, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder bbox_head.reg_branches.0.0.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder bbox_head.reg_branches.0.2.weight - torch.Size([256, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder bbox_head.reg_branches.0.2.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder bbox_head.reg_branches.0.4.weight - torch.Size([9, 256]): Initialized by user-defined `init_weights` in GroundingHead bbox_head.reg_branches.0.4.bias - torch.Size([9]): Initialized by user-defined `init_weights` in GroundingHead text_encoder.embeddings.word_embeddings.weight - torch.Size([50265, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.embeddings.position_embeddings.weight - torch.Size([514, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.embeddings.token_type_embeddings.weight - torch.Size([1, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.embeddings.LayerNorm.weight - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.embeddings.LayerNorm.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.0.attention.self.query.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.0.attention.self.query.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.0.attention.self.key.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.0.attention.self.key.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.0.attention.self.value.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.0.attention.self.value.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.0.attention.output.dense.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.0.attention.output.dense.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.0.attention.output.LayerNorm.weight - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.0.attention.output.LayerNorm.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.0.intermediate.dense.weight - torch.Size([3072, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.0.intermediate.dense.bias - torch.Size([3072]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.0.output.dense.weight - torch.Size([768, 3072]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.0.output.dense.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.0.output.LayerNorm.weight - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.0.output.LayerNorm.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.1.attention.self.query.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.1.attention.self.query.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.1.attention.self.key.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.1.attention.self.key.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.1.attention.self.value.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.1.attention.self.value.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.1.attention.output.dense.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.1.attention.output.dense.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.1.attention.output.LayerNorm.weight - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.1.attention.output.LayerNorm.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.1.intermediate.dense.weight - torch.Size([3072, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.1.intermediate.dense.bias - torch.Size([3072]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.1.output.dense.weight - torch.Size([768, 3072]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.1.output.dense.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.1.output.LayerNorm.weight - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.1.output.LayerNorm.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.2.attention.self.query.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.2.attention.self.query.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.2.attention.self.key.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.2.attention.self.key.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.2.attention.self.value.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.2.attention.self.value.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.2.attention.output.dense.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.2.attention.output.dense.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.2.attention.output.LayerNorm.weight - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.2.attention.output.LayerNorm.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.2.intermediate.dense.weight - torch.Size([3072, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.2.intermediate.dense.bias - torch.Size([3072]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.2.output.dense.weight - torch.Size([768, 3072]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.2.output.dense.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.2.output.LayerNorm.weight - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.2.output.LayerNorm.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.3.attention.self.query.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.3.attention.self.query.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.3.attention.self.key.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.3.attention.self.key.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.3.attention.self.value.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.3.attention.self.value.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.3.attention.output.dense.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.3.attention.output.dense.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.3.attention.output.LayerNorm.weight - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.3.attention.output.LayerNorm.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.3.intermediate.dense.weight - torch.Size([3072, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.3.intermediate.dense.bias - torch.Size([3072]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.3.output.dense.weight - torch.Size([768, 3072]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.3.output.dense.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.3.output.LayerNorm.weight - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.3.output.LayerNorm.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.4.attention.self.query.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.4.attention.self.query.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.4.attention.self.key.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.4.attention.self.key.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.4.attention.self.value.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.4.attention.self.value.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.4.attention.output.dense.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.4.attention.output.dense.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.4.attention.output.LayerNorm.weight - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.4.attention.output.LayerNorm.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.4.intermediate.dense.weight - torch.Size([3072, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.4.intermediate.dense.bias - torch.Size([3072]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.4.output.dense.weight - torch.Size([768, 3072]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.4.output.dense.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.4.output.LayerNorm.weight - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.4.output.LayerNorm.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.5.attention.self.query.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.5.attention.self.query.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.5.attention.self.key.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.5.attention.self.key.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.5.attention.self.value.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.5.attention.self.value.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.5.attention.output.dense.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.5.attention.output.dense.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.5.attention.output.LayerNorm.weight - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.5.attention.output.LayerNorm.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.5.intermediate.dense.weight - torch.Size([3072, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.5.intermediate.dense.bias - torch.Size([3072]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.5.output.dense.weight - torch.Size([768, 3072]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.5.output.dense.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.5.output.LayerNorm.weight - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.5.output.LayerNorm.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.6.attention.self.query.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.6.attention.self.query.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.6.attention.self.key.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.6.attention.self.key.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.6.attention.self.value.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.6.attention.self.value.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.6.attention.output.dense.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.6.attention.output.dense.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.6.attention.output.LayerNorm.weight - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.6.attention.output.LayerNorm.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.6.intermediate.dense.weight - torch.Size([3072, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.6.intermediate.dense.bias - torch.Size([3072]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.6.output.dense.weight - torch.Size([768, 3072]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.6.output.dense.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.6.output.LayerNorm.weight - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.6.output.LayerNorm.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.7.attention.self.query.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.7.attention.self.query.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.7.attention.self.key.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.7.attention.self.key.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.7.attention.self.value.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.7.attention.self.value.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.7.attention.output.dense.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.7.attention.output.dense.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.7.attention.output.LayerNorm.weight - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.7.attention.output.LayerNorm.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.7.intermediate.dense.weight - torch.Size([3072, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.7.intermediate.dense.bias - torch.Size([3072]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.7.output.dense.weight - torch.Size([768, 3072]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.7.output.dense.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.7.output.LayerNorm.weight - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.7.output.LayerNorm.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.8.attention.self.query.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.8.attention.self.query.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.8.attention.self.key.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.8.attention.self.key.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.8.attention.self.value.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.8.attention.self.value.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.8.attention.output.dense.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.8.attention.output.dense.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.8.attention.output.LayerNorm.weight - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.8.attention.output.LayerNorm.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.8.intermediate.dense.weight - torch.Size([3072, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.8.intermediate.dense.bias - torch.Size([3072]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.8.output.dense.weight - torch.Size([768, 3072]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.8.output.dense.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.8.output.LayerNorm.weight - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.8.output.LayerNorm.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.9.attention.self.query.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.9.attention.self.query.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.9.attention.self.key.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.9.attention.self.key.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.9.attention.self.value.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.9.attention.self.value.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.9.attention.output.dense.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.9.attention.output.dense.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.9.attention.output.LayerNorm.weight - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.9.attention.output.LayerNorm.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.9.intermediate.dense.weight - torch.Size([3072, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.9.intermediate.dense.bias - torch.Size([3072]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.9.output.dense.weight - torch.Size([768, 3072]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.9.output.dense.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.9.output.LayerNorm.weight - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.9.output.LayerNorm.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.10.attention.self.query.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.10.attention.self.query.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.10.attention.self.key.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.10.attention.self.key.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.10.attention.self.value.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.10.attention.self.value.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.10.attention.output.dense.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.10.attention.output.dense.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.10.attention.output.LayerNorm.weight - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.10.attention.output.LayerNorm.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.10.intermediate.dense.weight - torch.Size([3072, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.10.intermediate.dense.bias - torch.Size([3072]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.10.output.dense.weight - torch.Size([768, 3072]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.10.output.dense.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.10.output.LayerNorm.weight - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.10.output.LayerNorm.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.11.attention.self.query.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.11.attention.self.query.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.11.attention.self.key.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.11.attention.self.key.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.11.attention.self.value.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.11.attention.self.value.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.11.attention.output.dense.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.11.attention.output.dense.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.11.attention.output.LayerNorm.weight - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.11.attention.output.LayerNorm.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.11.intermediate.dense.weight - torch.Size([3072, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.11.intermediate.dense.bias - torch.Size([3072]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.11.output.dense.weight - torch.Size([768, 3072]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.11.output.dense.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.11.output.LayerNorm.weight - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.11.output.LayerNorm.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.pooler.dense.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.pooler.dense.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.0.self_attn.attn.in_proj_weight - torch.Size([768, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.0.self_attn.attn.in_proj_bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.0.self_attn.attn.out_proj.weight - torch.Size([256, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.0.self_attn.attn.out_proj.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.0.cross_attn_text.attn.in_proj_weight - torch.Size([768, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.0.cross_attn_text.attn.in_proj_bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.0.cross_attn_text.attn.out_proj.weight - torch.Size([256, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.0.cross_attn_text.attn.out_proj.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.0.cross_attn.attn.in_proj_weight - torch.Size([768, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.0.cross_attn.attn.in_proj_bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.0.cross_attn.attn.out_proj.weight - torch.Size([256, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.0.cross_attn.attn.out_proj.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.0.ffn.layers.0.0.weight - torch.Size([2048, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.0.ffn.layers.0.0.bias - torch.Size([2048]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.0.ffn.layers.1.weight - torch.Size([256, 2048]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.0.ffn.layers.1.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.0.norms.0.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.0.norms.0.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.0.norms.1.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.0.norms.1.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.0.norms.2.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.0.norms.2.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.0.norms.3.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.0.norms.3.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.0.self_posembed.position_embedding_head.0.weight - torch.Size([256, 3, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.0.self_posembed.position_embedding_head.0.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.0.self_posembed.position_embedding_head.1.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.0.self_posembed.position_embedding_head.1.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.0.self_posembed.position_embedding_head.3.weight - torch.Size([256, 256, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.0.self_posembed.position_embedding_head.3.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.1.self_attn.attn.in_proj_weight - torch.Size([768, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.1.self_attn.attn.in_proj_bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.1.self_attn.attn.out_proj.weight - torch.Size([256, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.1.self_attn.attn.out_proj.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.1.cross_attn_text.attn.in_proj_weight - torch.Size([768, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.1.cross_attn_text.attn.in_proj_bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.1.cross_attn_text.attn.out_proj.weight - torch.Size([256, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.1.cross_attn_text.attn.out_proj.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.1.cross_attn.attn.in_proj_weight - torch.Size([768, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.1.cross_attn.attn.in_proj_bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.1.cross_attn.attn.out_proj.weight - torch.Size([256, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.1.cross_attn.attn.out_proj.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.1.ffn.layers.0.0.weight - torch.Size([2048, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.1.ffn.layers.0.0.bias - torch.Size([2048]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.1.ffn.layers.1.weight - torch.Size([256, 2048]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.1.ffn.layers.1.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.1.norms.0.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.1.norms.0.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.1.norms.1.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.1.norms.1.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.1.norms.2.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.1.norms.2.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.1.norms.3.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.1.norms.3.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.1.self_posembed.position_embedding_head.0.weight - torch.Size([256, 3, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.1.self_posembed.position_embedding_head.0.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.1.self_posembed.position_embedding_head.1.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.1.self_posembed.position_embedding_head.1.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.1.self_posembed.position_embedding_head.3.weight - torch.Size([256, 256, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.1.self_posembed.position_embedding_head.3.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.2.self_attn.attn.in_proj_weight - torch.Size([768, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.2.self_attn.attn.in_proj_bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.2.self_attn.attn.out_proj.weight - torch.Size([256, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.2.self_attn.attn.out_proj.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.2.cross_attn_text.attn.in_proj_weight - torch.Size([768, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.2.cross_attn_text.attn.in_proj_bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.2.cross_attn_text.attn.out_proj.weight - torch.Size([256, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.2.cross_attn_text.attn.out_proj.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.2.cross_attn.attn.in_proj_weight - torch.Size([768, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.2.cross_attn.attn.in_proj_bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.2.cross_attn.attn.out_proj.weight - torch.Size([256, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.2.cross_attn.attn.out_proj.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.2.ffn.layers.0.0.weight - torch.Size([2048, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.2.ffn.layers.0.0.bias - torch.Size([2048]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.2.ffn.layers.1.weight - torch.Size([256, 2048]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.2.ffn.layers.1.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.2.norms.0.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.2.norms.0.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.2.norms.1.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.2.norms.1.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.2.norms.2.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.2.norms.2.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.2.norms.3.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.2.norms.3.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.2.self_posembed.position_embedding_head.0.weight - torch.Size([256, 3, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.2.self_posembed.position_embedding_head.0.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.2.self_posembed.position_embedding_head.1.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.2.self_posembed.position_embedding_head.1.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.2.self_posembed.position_embedding_head.3.weight - torch.Size([256, 256, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.2.self_posembed.position_embedding_head.3.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.3.self_attn.attn.in_proj_weight - torch.Size([768, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.3.self_attn.attn.in_proj_bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.3.self_attn.attn.out_proj.weight - torch.Size([256, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.3.self_attn.attn.out_proj.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.3.cross_attn_text.attn.in_proj_weight - torch.Size([768, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.3.cross_attn_text.attn.in_proj_bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.3.cross_attn_text.attn.out_proj.weight - torch.Size([256, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.3.cross_attn_text.attn.out_proj.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.3.cross_attn.attn.in_proj_weight - torch.Size([768, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.3.cross_attn.attn.in_proj_bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.3.cross_attn.attn.out_proj.weight - torch.Size([256, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.3.cross_attn.attn.out_proj.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.3.ffn.layers.0.0.weight - torch.Size([2048, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.3.ffn.layers.0.0.bias - torch.Size([2048]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.3.ffn.layers.1.weight - torch.Size([256, 2048]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.3.ffn.layers.1.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.3.norms.0.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.3.norms.0.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.3.norms.1.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.3.norms.1.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.3.norms.2.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.3.norms.2.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.3.norms.3.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.3.norms.3.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.3.self_posembed.position_embedding_head.0.weight - torch.Size([256, 3, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.3.self_posembed.position_embedding_head.0.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.3.self_posembed.position_embedding_head.1.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.3.self_posembed.position_embedding_head.1.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.3.self_posembed.position_embedding_head.3.weight - torch.Size([256, 256, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.3.self_posembed.position_embedding_head.3.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.4.self_attn.attn.in_proj_weight - torch.Size([768, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.4.self_attn.attn.in_proj_bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.4.self_attn.attn.out_proj.weight - torch.Size([256, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.4.self_attn.attn.out_proj.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.4.cross_attn_text.attn.in_proj_weight - torch.Size([768, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.4.cross_attn_text.attn.in_proj_bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.4.cross_attn_text.attn.out_proj.weight - torch.Size([256, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.4.cross_attn_text.attn.out_proj.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.4.cross_attn.attn.in_proj_weight - torch.Size([768, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.4.cross_attn.attn.in_proj_bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.4.cross_attn.attn.out_proj.weight - torch.Size([256, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.4.cross_attn.attn.out_proj.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.4.ffn.layers.0.0.weight - torch.Size([2048, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.4.ffn.layers.0.0.bias - torch.Size([2048]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.4.ffn.layers.1.weight - torch.Size([256, 2048]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.4.ffn.layers.1.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.4.norms.0.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.4.norms.0.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.4.norms.1.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.4.norms.1.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.4.norms.2.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.4.norms.2.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.4.norms.3.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.4.norms.3.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.4.self_posembed.position_embedding_head.0.weight - torch.Size([256, 3, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.4.self_posembed.position_embedding_head.0.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.4.self_posembed.position_embedding_head.1.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.4.self_posembed.position_embedding_head.1.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.4.self_posembed.position_embedding_head.3.weight - torch.Size([256, 256, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.4.self_posembed.position_embedding_head.3.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.5.self_attn.attn.in_proj_weight - torch.Size([768, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.5.self_attn.attn.in_proj_bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.5.self_attn.attn.out_proj.weight - torch.Size([256, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.5.self_attn.attn.out_proj.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.5.cross_attn_text.attn.in_proj_weight - torch.Size([768, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.5.cross_attn_text.attn.in_proj_bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.5.cross_attn_text.attn.out_proj.weight - torch.Size([256, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.5.cross_attn_text.attn.out_proj.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.5.cross_attn.attn.in_proj_weight - torch.Size([768, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.5.cross_attn.attn.in_proj_bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.5.cross_attn.attn.out_proj.weight - torch.Size([256, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.5.cross_attn.attn.out_proj.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.5.ffn.layers.0.0.weight - torch.Size([2048, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.5.ffn.layers.0.0.bias - torch.Size([2048]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.5.ffn.layers.1.weight - torch.Size([256, 2048]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.5.ffn.layers.1.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.5.norms.0.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.5.norms.0.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.5.norms.1.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.5.norms.1.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.5.norms.2.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.5.norms.2.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.5.norms.3.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.5.norms.3.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.5.self_posembed.position_embedding_head.0.weight - torch.Size([256, 3, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.5.self_posembed.position_embedding_head.0.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.5.self_posembed.position_embedding_head.1.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.5.self_posembed.position_embedding_head.1.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.5.self_posembed.position_embedding_head.3.weight - torch.Size([256, 256, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.5.self_posembed.position_embedding_head.3.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.self_posembed.position_embedding_head.0.weight - torch.Size([256, 9, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.self_posembed.position_embedding_head.0.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.self_posembed.position_embedding_head.1.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.self_posembed.position_embedding_head.1.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.self_posembed.position_embedding_head.3.weight - torch.Size([256, 256, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.self_posembed.position_embedding_head.3.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.cross_posembed.position_embedding_head.0.weight - torch.Size([256, 3, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.cross_posembed.position_embedding_head.0.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.cross_posembed.position_embedding_head.1.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.cross_posembed.position_embedding_head.1.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.cross_posembed.position_embedding_head.3.weight - torch.Size([256, 256, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.cross_posembed.position_embedding_head.3.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.norm.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.norm.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_feat_map.weight - torch.Size([256, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_feat_map.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder 2024/02/29 12:03:45 - mmengine - INFO - Load checkpoint from /mnt/petrelfs/wangtai/EmbodiedScan/work_dirs/mv-3ddet-challenge/epoch_12.pth 2024/02/29 12:03:45 - mmengine - WARNING - "FileClient" will be deprecated in future. Please use io functions in https://mmengine.readthedocs.io/en/latest/api/fileio.html#file-io 2024/02/29 12:03:45 - mmengine - WARNING - "HardDiskBackend" is the alias of "LocalBackend" and the former will be deprecated in future. 2024/02/29 12:03:45 - mmengine - INFO - Checkpoints will be saved to /mnt/petrelfs/wangtai/EmbodiedScan/work_dirs/mv-grounding-challenge-split. 2024/02/29 12:08:51 - mmengine - INFO - Epoch(train) [1][ 50/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 10:06:28 time: 6.1034 data_time: 1.4212 memory: 29365 grad_norm: 970.3000 loss: 44.1655 loss_cls: 4.0514 loss_bbox: 0.6740 d0.loss_cls: 11.0743 d0.loss_bbox: 0.6982 d1.loss_cls: 7.3764 d1.loss_bbox: 0.6934 d2.loss_cls: 6.4598 d2.loss_bbox: 0.6835 d3.loss_cls: 5.6101 d3.loss_bbox: 0.6782 d4.loss_cls: 5.4886 d4.loss_bbox: 0.6776 2024/02/29 12:12:23 - mmengine - INFO - Epoch(train) [1][100/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 8:29:42 time: 4.2424 data_time: 0.2600 memory: 28827 grad_norm: 27.0571 loss: 10.8315 loss_cls: 1.2506 loss_bbox: 0.5581 d0.loss_cls: 1.2337 d0.loss_bbox: 0.5757 d1.loss_cls: 1.2379 d1.loss_bbox: 0.5653 d2.loss_cls: 1.2407 d2.loss_bbox: 0.5607 d3.loss_cls: 1.2439 d3.loss_bbox: 0.5593 d4.loss_cls: 1.2469 d4.loss_bbox: 0.5587 2024/02/29 12:16:22 - mmengine - INFO - Epoch(train) [1][150/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 8:12:33 time: 4.7789 data_time: 0.4671 memory: 29301 grad_norm: 26.6285 loss: 10.7471 loss_cls: 1.2484 loss_bbox: 0.5519 d0.loss_cls: 1.2287 d0.loss_bbox: 0.5579 d1.loss_cls: 1.2301 d1.loss_bbox: 0.5541 d2.loss_cls: 1.2352 d2.loss_bbox: 0.5531 d3.loss_cls: 1.2393 d3.loss_bbox: 0.5526 d4.loss_cls: 1.2441 d4.loss_bbox: 0.5517 2024/02/29 12:20:12 - mmengine - INFO - Epoch(train) [1][200/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 7:57:45 time: 4.6034 data_time: 1.7426 memory: 29862 grad_norm: 26.1569 loss: 10.2521 loss_cls: 1.1869 loss_bbox: 0.5224 d0.loss_cls: 1.1837 d0.loss_bbox: 0.5270 d1.loss_cls: 1.1856 d1.loss_bbox: 0.5226 d2.loss_cls: 1.1865 d2.loss_bbox: 0.5221 d3.loss_cls: 1.1857 d3.loss_bbox: 0.5215 d4.loss_cls: 1.1866 d4.loss_bbox: 0.5214 2024/02/29 12:23:48 - mmengine - INFO - Epoch(train) [1][250/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 7:41:45 time: 4.3134 data_time: 0.8830 memory: 28478 grad_norm: 20.4095 loss: 10.1304 loss_cls: 1.1573 loss_bbox: 0.5346 d0.loss_cls: 1.1575 d0.loss_bbox: 0.5360 d1.loss_cls: 1.1517 d1.loss_bbox: 0.5343 d2.loss_cls: 1.1509 d2.loss_bbox: 0.5341 d3.loss_cls: 1.1523 d3.loss_bbox: 0.5345 d4.loss_cls: 1.1521 d4.loss_bbox: 0.5351 2024/02/29 12:27:38 - mmengine - INFO - Epoch(train) [1][300/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 7:34:32 time: 4.6056 data_time: 0.3283 memory: 28502 grad_norm: 22.1288 loss: 9.8361 loss_cls: 1.1280 loss_bbox: 0.5138 d0.loss_cls: 1.1334 d0.loss_bbox: 0.5075 d1.loss_cls: 1.1279 d1.loss_bbox: 0.5105 d2.loss_cls: 1.1286 d2.loss_bbox: 0.5080 d3.loss_cls: 1.1293 d3.loss_bbox: 0.5092 d4.loss_cls: 1.1318 d4.loss_bbox: 0.5081 2024/02/29 12:31:28 - mmengine - INFO - Epoch(train) [1][350/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 7:28:12 time: 4.6007 data_time: 0.2570 memory: 28796 grad_norm: 21.6131 loss: 9.4395 loss_cls: 1.1167 loss_bbox: 0.4585 d0.loss_cls: 1.1281 d0.loss_bbox: 0.4593 d1.loss_cls: 1.1114 d1.loss_bbox: 0.4575 d2.loss_cls: 1.1114 d2.loss_bbox: 0.4577 d3.loss_cls: 1.1085 d3.loss_bbox: 0.4578 d4.loss_cls: 1.1128 d4.loss_bbox: 0.4599 2024/02/29 12:35:12 - mmengine - INFO - Epoch(train) [1][400/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 7:21:03 time: 4.4763 data_time: 0.3427 memory: 28464 grad_norm: 20.9318 loss: 9.5441 loss_cls: 1.1495 loss_bbox: 0.4397 d0.loss_cls: 1.1721 d0.loss_bbox: 0.4301 d1.loss_cls: 1.1565 d1.loss_bbox: 0.4328 d2.loss_cls: 1.1554 d2.loss_bbox: 0.4348 d3.loss_cls: 1.1497 d3.loss_bbox: 0.4361 d4.loss_cls: 1.1496 d4.loss_bbox: 0.4379 2024/02/29 12:38:52 - mmengine - INFO - Epoch(train) [1][450/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 7:13:57 time: 4.4078 data_time: 0.2666 memory: 28673 grad_norm: 20.1978 loss: 9.4658 loss_cls: 1.1453 loss_bbox: 0.4347 d0.loss_cls: 1.1538 d0.loss_bbox: 0.4291 d1.loss_cls: 1.1432 d1.loss_bbox: 0.4311 d2.loss_cls: 1.1434 d2.loss_bbox: 0.4325 d3.loss_cls: 1.1421 d3.loss_bbox: 0.4341 d4.loss_cls: 1.1419 d4.loss_bbox: 0.4346 2024/02/29 12:42:14 - mmengine - INFO - Epoch(train) [1][500/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 7:04:06 time: 4.0342 data_time: 0.2528 memory: 28929 grad_norm: 19.5512 loss: 9.0546 loss_cls: 1.0578 loss_bbox: 0.4496 d0.loss_cls: 1.0794 d0.loss_bbox: 0.4394 d1.loss_cls: 1.0660 d1.loss_bbox: 0.4412 d2.loss_cls: 1.0636 d2.loss_bbox: 0.4428 d3.loss_cls: 1.0655 d3.loss_bbox: 0.4431 d4.loss_cls: 1.0606 d4.loss_bbox: 0.4456 2024/02/29 12:42:15 - mmengine - INFO - Exp name: mv-grounding_8xb12_embodiedscan-vg-9dof_20240229_115959 2024/02/29 12:42:15 - mmengine - INFO - Saving checkpoint at 1 epochs 2024/02/29 12:47:14 - mmengine - INFO - Epoch(train) [2][ 50/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 7:08:18 time: 5.6749 data_time: 2.7047 memory: 28473 grad_norm: 19.2115 loss: 9.0066 loss_cls: 1.0910 loss_bbox: 0.4122 d0.loss_cls: 1.1042 d0.loss_bbox: 0.4032 d1.loss_cls: 1.0926 d1.loss_bbox: 0.4095 d2.loss_cls: 1.0903 d2.loss_bbox: 0.4095 d3.loss_cls: 1.0881 d3.loss_bbox: 0.4086 d4.loss_cls: 1.0856 d4.loss_bbox: 0.4118 2024/02/29 12:51:10 - mmengine - INFO - Epoch(train) [2][100/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 7:04:29 time: 4.7206 data_time: 1.8096 memory: 28716 grad_norm: 20.7641 loss: 8.9640 loss_cls: 1.0775 loss_bbox: 0.4173 d0.loss_cls: 1.0957 d0.loss_bbox: 0.4135 d1.loss_cls: 1.0800 d1.loss_bbox: 0.4135 d2.loss_cls: 1.0779 d2.loss_bbox: 0.4123 d3.loss_cls: 1.0745 d3.loss_bbox: 0.4138 d4.loss_cls: 1.0708 d4.loss_bbox: 0.4173 2024/02/29 12:54:50 - mmengine - INFO - Epoch(train) [2][150/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 6:58:25 time: 4.3949 data_time: 2.1173 memory: 28901 grad_norm: 20.3498 loss: 9.0339 loss_cls: 1.0915 loss_bbox: 0.4089 d0.loss_cls: 1.1211 d0.loss_bbox: 0.4018 d1.loss_cls: 1.1042 d1.loss_bbox: 0.4054 d2.loss_cls: 1.0986 d2.loss_bbox: 0.4066 d3.loss_cls: 1.0903 d3.loss_bbox: 0.4096 d4.loss_cls: 1.0846 d4.loss_bbox: 0.4113 2024/02/29 12:58:46 - mmengine - INFO - Epoch(train) [2][200/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 6:54:47 time: 4.7248 data_time: 0.8314 memory: 28942 grad_norm: 21.0510 loss: 8.9824 loss_cls: 1.0779 loss_bbox: 0.4183 d0.loss_cls: 1.1107 d0.loss_bbox: 0.4000 d1.loss_cls: 1.0945 d1.loss_bbox: 0.4022 d2.loss_cls: 1.0913 d2.loss_bbox: 0.4039 d3.loss_cls: 1.0796 d3.loss_bbox: 0.4110 d4.loss_cls: 1.0768 d4.loss_bbox: 0.4163 2024/02/29 13:02:08 - mmengine - INFO - Epoch(train) [2][250/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 6:47:10 time: 4.0502 data_time: 0.2560 memory: 28388 grad_norm: 21.0764 loss: 9.4054 loss_cls: 1.1316 loss_bbox: 0.4391 d0.loss_cls: 1.1505 d0.loss_bbox: 0.4246 d1.loss_cls: 1.1308 d1.loss_bbox: 0.4330 d2.loss_cls: 1.1264 d2.loss_bbox: 0.4390 d3.loss_cls: 1.1203 d3.loss_bbox: 0.4421 d4.loss_cls: 1.1250 d4.loss_bbox: 0.4431 2024/02/29 13:05:51 - mmengine - INFO - Epoch(train) [2][300/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 6:42:16 time: 4.4526 data_time: 0.3591 memory: 27820 grad_norm: 20.0827 loss: 9.0741 loss_cls: 1.0376 loss_bbox: 0.4716 d0.loss_cls: 1.0804 d0.loss_bbox: 0.4478 d1.loss_cls: 1.0596 d1.loss_bbox: 0.4571 d2.loss_cls: 1.0451 d2.loss_bbox: 0.4628 d3.loss_cls: 1.0395 d3.loss_bbox: 0.4653 d4.loss_cls: 1.0401 d4.loss_bbox: 0.4670 2024/02/29 13:09:22 - mmengine - INFO - Epoch(train) [2][350/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 6:36:19 time: 4.2192 data_time: 0.2813 memory: 29128 grad_norm: 19.8634 loss: 8.9352 loss_cls: 1.0170 loss_bbox: 0.4685 d0.loss_cls: 1.0581 d0.loss_bbox: 0.4473 d1.loss_cls: 1.0330 d1.loss_bbox: 0.4576 d2.loss_cls: 1.0202 d2.loss_bbox: 0.4665 d3.loss_cls: 1.0106 d3.loss_bbox: 0.4724 d4.loss_cls: 1.0138 d4.loss_bbox: 0.4702 2024/02/29 13:13:06 - mmengine - INFO - Epoch(train) [2][400/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 6:31:51 time: 4.4768 data_time: 0.2927 memory: 28099 grad_norm: 19.1553 loss: 8.8270 loss_cls: 1.0218 loss_bbox: 0.4527 d0.loss_cls: 1.0478 d0.loss_bbox: 0.4350 d1.loss_cls: 1.0245 d1.loss_bbox: 0.4414 d2.loss_cls: 1.0160 d2.loss_bbox: 0.4487 d3.loss_cls: 1.0149 d3.loss_bbox: 0.4548 d4.loss_cls: 1.0130 d4.loss_bbox: 0.4564 2024/02/29 13:16:46 - mmengine - INFO - Epoch(train) [2][450/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 6:27:07 time: 4.3955 data_time: 0.2544 memory: 28266 grad_norm: 21.0636 loss: 8.8694 loss_cls: 1.0525 loss_bbox: 0.4281 d0.loss_cls: 1.0650 d0.loss_bbox: 0.4199 d1.loss_cls: 1.0601 d1.loss_bbox: 0.4167 d2.loss_cls: 1.0574 d2.loss_bbox: 0.4186 d3.loss_cls: 1.0508 d3.loss_bbox: 0.4234 d4.loss_cls: 1.0516 d4.loss_bbox: 0.4252 2024/02/29 13:19:46 - mmengine - INFO - Exp name: mv-grounding_8xb12_embodiedscan-vg-9dof_20240229_115959 2024/02/29 13:19:48 - mmengine - INFO - Epoch(train) [2][500/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 6:19:20 time: 3.6399 data_time: 0.2710 memory: 27571 grad_norm: 20.3706 loss: 8.7751 loss_cls: 1.0139 loss_bbox: 0.4445 d0.loss_cls: 1.0480 d0.loss_bbox: 0.4245 d1.loss_cls: 1.0257 d1.loss_bbox: 0.4379 d2.loss_cls: 1.0238 d2.loss_bbox: 0.4393 d3.loss_cls: 1.0185 d3.loss_bbox: 0.4419 d4.loss_cls: 1.0110 d4.loss_bbox: 0.4461 2024/02/29 13:19:48 - mmengine - INFO - Exp name: mv-grounding_8xb12_embodiedscan-vg-9dof_20240229_115959 2024/02/29 13:19:49 - mmengine - INFO - Saving checkpoint at 2 epochs 2024/02/29 13:24:42 - mmengine - INFO - Epoch(train) [3][ 50/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 6:19:16 time: 5.5837 data_time: 1.5240 memory: 27891 grad_norm: 19.9260 loss: 8.4857 loss_cls: 0.9801 loss_bbox: 0.4257 d0.loss_cls: 1.0394 d0.loss_bbox: 0.3971 d1.loss_cls: 1.0191 d1.loss_bbox: 0.4013 d2.loss_cls: 1.0018 d2.loss_bbox: 0.4115 d3.loss_cls: 0.9887 d3.loss_bbox: 0.4173 d4.loss_cls: 0.9805 d4.loss_bbox: 0.4231 2024/02/29 13:28:05 - mmengine - INFO - Epoch(train) [3][100/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 6:13:25 time: 4.0392 data_time: 0.2589 memory: 28323 grad_norm: 20.7333 loss: 8.7044 loss_cls: 0.9813 loss_bbox: 0.4654 d0.loss_cls: 1.0229 d0.loss_bbox: 0.4427 d1.loss_cls: 1.0087 d1.loss_bbox: 0.4512 d2.loss_cls: 0.9924 d2.loss_bbox: 0.4549 d3.loss_cls: 0.9828 d3.loss_bbox: 0.4607 d4.loss_cls: 0.9779 d4.loss_bbox: 0.4635 2024/02/29 13:32:03 - mmengine - INFO - Epoch(train) [3][150/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 6:10:20 time: 4.7675 data_time: 0.2602 memory: 28168 grad_norm: 20.5750 loss: 8.4581 loss_cls: 0.9687 loss_bbox: 0.4304 d0.loss_cls: 1.0102 d0.loss_bbox: 0.4167 d1.loss_cls: 0.9931 d1.loss_bbox: 0.4235 d2.loss_cls: 0.9839 d2.loss_bbox: 0.4254 d3.loss_cls: 0.9815 d3.loss_bbox: 0.4250 d4.loss_cls: 0.9757 d4.loss_bbox: 0.4239 2024/02/29 13:35:55 - mmengine - INFO - Epoch(train) [3][200/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 6:06:43 time: 4.6339 data_time: 0.2584 memory: 27827 grad_norm: 21.7258 loss: 8.2262 loss_cls: 0.9426 loss_bbox: 0.4279 d0.loss_cls: 0.9711 d0.loss_bbox: 0.4141 d1.loss_cls: 0.9496 d1.loss_bbox: 0.4188 d2.loss_cls: 0.9470 d2.loss_bbox: 0.4233 d3.loss_cls: 0.9395 d3.loss_bbox: 0.4235 d4.loss_cls: 0.9452 d4.loss_bbox: 0.4234 2024/02/29 13:39:31 - mmengine - INFO - Epoch(train) [3][250/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 6:02:08 time: 4.3305 data_time: 0.2953 memory: 28172 grad_norm: 20.7003 loss: 8.6894 loss_cls: 1.0342 loss_bbox: 0.4081 d0.loss_cls: 1.0690 d0.loss_bbox: 0.3975 d1.loss_cls: 1.0488 d1.loss_bbox: 0.4025 d2.loss_cls: 1.0433 d2.loss_bbox: 0.4047 d3.loss_cls: 1.0424 d3.loss_bbox: 0.4004 d4.loss_cls: 1.0314 d4.loss_bbox: 0.4071 2024/02/29 13:43:02 - mmengine - INFO - Epoch(train) [3][300/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 5:57:19 time: 4.2255 data_time: 0.2551 memory: 28309 grad_norm: 20.5789 loss: 8.1393 loss_cls: 0.9585 loss_bbox: 0.3969 d0.loss_cls: 0.9839 d0.loss_bbox: 0.3877 d1.loss_cls: 0.9726 d1.loss_bbox: 0.3875 d2.loss_cls: 0.9591 d2.loss_bbox: 0.3896 d3.loss_cls: 0.9576 d3.loss_bbox: 0.3933 d4.loss_cls: 0.9560 d4.loss_bbox: 0.3967 2024/02/29 13:46:20 - mmengine - INFO - Epoch(train) [3][350/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 5:51:47 time: 3.9501 data_time: 0.2605 memory: 28672 grad_norm: 20.0627 loss: 8.3183 loss_cls: 0.9725 loss_bbox: 0.4085 d0.loss_cls: 1.0048 d0.loss_bbox: 0.3988 d1.loss_cls: 0.9845 d1.loss_bbox: 0.4033 d2.loss_cls: 0.9774 d2.loss_bbox: 0.4061 d3.loss_cls: 0.9741 d3.loss_bbox: 0.4083 d4.loss_cls: 0.9691 d4.loss_bbox: 0.4110 2024/02/29 13:50:41 - mmengine - INFO - Epoch(train) [3][400/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 5:49:55 time: 5.2270 data_time: 0.2731 memory: 29338 grad_norm: 21.0039 loss: 8.3135 loss_cls: 0.9711 loss_bbox: 0.4079 d0.loss_cls: 1.0043 d0.loss_bbox: 0.4014 d1.loss_cls: 0.9881 d1.loss_bbox: 0.4010 d2.loss_cls: 0.9802 d2.loss_bbox: 0.4029 d3.loss_cls: 0.9762 d3.loss_bbox: 0.4028 d4.loss_cls: 0.9736 d4.loss_bbox: 0.4041 2024/02/29 13:54:08 - mmengine - INFO - Epoch(train) [3][450/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 5:45:03 time: 4.1445 data_time: 0.3273 memory: 29806 grad_norm: 21.3081 loss: 8.3492 loss_cls: 0.9859 loss_bbox: 0.3987 d0.loss_cls: 1.0191 d0.loss_bbox: 0.3929 d1.loss_cls: 1.0003 d1.loss_bbox: 0.3921 d2.loss_cls: 0.9926 d2.loss_bbox: 0.3926 d3.loss_cls: 0.9920 d3.loss_bbox: 0.3962 d4.loss_cls: 0.9914 d4.loss_bbox: 0.3954 2024/02/29 13:57:11 - mmengine - INFO - Epoch(train) [3][500/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 5:39:03 time: 3.6550 data_time: 0.2581 memory: 28456 grad_norm: 21.6964 loss: 8.5459 loss_cls: 0.9911 loss_bbox: 0.4262 d0.loss_cls: 1.0262 d0.loss_bbox: 0.4173 d1.loss_cls: 1.0091 d1.loss_bbox: 0.4191 d2.loss_cls: 0.9975 d2.loss_bbox: 0.4225 d3.loss_cls: 0.9922 d3.loss_bbox: 0.4223 d4.loss_cls: 0.9953 d4.loss_bbox: 0.4272 2024/02/29 13:57:12 - mmengine - INFO - Exp name: mv-grounding_8xb12_embodiedscan-vg-9dof_20240229_115959 2024/02/29 13:57:12 - mmengine - INFO - Saving checkpoint at 3 epochs 2024/02/29 14:08:02 - mmengine - INFO - Epoch(val) [3][ 50/123] eta: 0:15:26 time: 12.6963 data_time: 11.8385 memory: 13500 2024/02/29 14:18:09 - mmengine - INFO - Epoch(val) [3][100/123] eta: 0:04:45 time: 12.1477 data_time: 11.3316 memory: 13533 2024/02/29 14:21:59 - mmengine - INFO - Epoch(val) [3][123/123] Easy@0.25: 0.1507 Hard@0.25: 0.1451 View-Dep@0.25: 0.1077 View-Indep@0.25: 0.1726 Unique@0.25: 0.0000 Multi@0.25: 0.1502 Overall@0.25: 0.1502 Easy@0.5: 0.0294 Hard@0.5: 0.0179 View-Dep@0.5: 0.0121 View-Indep@0.5: 0.0371 Unique@0.5: 0.0000 Multi@0.5: 0.0284 Overall@0.5: 0.0284 data_time: 10.5651 time: 11.3925 2024/02/29 14:27:00 - mmengine - INFO - Epoch(train) [4][ 50/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 5:38:37 time: 6.0035 data_time: 1.1760 memory: 29233 grad_norm: 21.8595 loss: 8.3745 loss_cls: 0.9790 loss_bbox: 0.4087 d0.loss_cls: 1.0145 d0.loss_bbox: 0.4013 d1.loss_cls: 0.9955 d1.loss_bbox: 0.4065 d2.loss_cls: 0.9908 d2.loss_bbox: 0.4024 d3.loss_cls: 0.9841 d3.loss_bbox: 0.4032 d4.loss_cls: 0.9816 d4.loss_bbox: 0.4069 2024/02/29 14:30:45 - mmengine - INFO - Epoch(train) [4][100/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 5:34:41 time: 4.4947 data_time: 0.4434 memory: 29327 grad_norm: 21.4664 loss: 8.0750 loss_cls: 0.9573 loss_bbox: 0.3860 d0.loss_cls: 0.9904 d0.loss_bbox: 0.3816 d1.loss_cls: 0.9683 d1.loss_bbox: 0.3793 d2.loss_cls: 0.9612 d2.loss_bbox: 0.3806 d3.loss_cls: 0.9571 d3.loss_bbox: 0.3793 d4.loss_cls: 0.9533 d4.loss_bbox: 0.3807 2024/02/29 14:34:17 - mmengine - INFO - Epoch(train) [4][150/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 5:30:13 time: 4.2499 data_time: 1.6534 memory: 28696 grad_norm: 22.4202 loss: 8.2163 loss_cls: 0.9626 loss_bbox: 0.4005 d0.loss_cls: 1.0026 d0.loss_bbox: 0.3934 d1.loss_cls: 0.9787 d1.loss_bbox: 0.3952 d2.loss_cls: 0.9723 d2.loss_bbox: 0.3921 d3.loss_cls: 0.9677 d3.loss_bbox: 0.3932 d4.loss_cls: 0.9648 d4.loss_bbox: 0.3931 2024/02/29 14:38:01 - mmengine - INFO - Epoch(train) [4][200/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 5:26:17 time: 4.4816 data_time: 0.4037 memory: 29753 grad_norm: 22.2725 loss: 8.2656 loss_cls: 0.9779 loss_bbox: 0.3842 d0.loss_cls: 1.0401 d0.loss_bbox: 0.3742 d1.loss_cls: 1.0105 d1.loss_bbox: 0.3752 d2.loss_cls: 0.9925 d2.loss_bbox: 0.3794 d3.loss_cls: 0.9908 d3.loss_bbox: 0.3734 d4.loss_cls: 0.9890 d4.loss_bbox: 0.3785 2024/02/29 14:41:47 - mmengine - INFO - Epoch(train) [4][250/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 5:22:27 time: 4.5191 data_time: 0.2711 memory: 29194 grad_norm: 21.7413 loss: 8.3399 loss_cls: 0.9701 loss_bbox: 0.4139 d0.loss_cls: 1.0004 d0.loss_bbox: 0.4106 d1.loss_cls: 0.9851 d1.loss_bbox: 0.4071 d2.loss_cls: 0.9726 d2.loss_bbox: 0.4100 d3.loss_cls: 0.9772 d3.loss_bbox: 0.4105 d4.loss_cls: 0.9703 d4.loss_bbox: 0.4120 2024/02/29 14:45:22 - mmengine - INFO - Epoch(train) [4][300/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 5:18:10 time: 4.2877 data_time: 0.2471 memory: 28461 grad_norm: 20.8008 loss: 8.2432 loss_cls: 0.9674 loss_bbox: 0.3905 d0.loss_cls: 1.0210 d0.loss_bbox: 0.3814 d1.loss_cls: 1.0110 d1.loss_bbox: 0.3728 d2.loss_cls: 0.9927 d2.loss_bbox: 0.3798 d3.loss_cls: 0.9870 d3.loss_bbox: 0.3814 d4.loss_cls: 0.9717 d4.loss_bbox: 0.3867 2024/02/29 14:49:17 - mmengine - INFO - Epoch(train) [4][350/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 5:14:44 time: 4.7167 data_time: 0.2734 memory: 28955 grad_norm: 22.5928 loss: 7.7057 loss_cls: 0.9002 loss_bbox: 0.3718 d0.loss_cls: 0.9494 d0.loss_bbox: 0.3657 d1.loss_cls: 0.9253 d1.loss_bbox: 0.3675 d2.loss_cls: 0.9114 d2.loss_bbox: 0.3665 d3.loss_cls: 0.9071 d3.loss_bbox: 0.3671 d4.loss_cls: 0.9090 d4.loss_bbox: 0.3648 2024/02/29 14:52:44 - mmengine - INFO - Epoch(train) [4][400/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 5:10:14 time: 4.1391 data_time: 0.4140 memory: 28719 grad_norm: 21.6898 loss: 8.1290 loss_cls: 0.9393 loss_bbox: 0.4037 d0.loss_cls: 0.9897 d0.loss_bbox: 0.3988 d1.loss_cls: 0.9648 d1.loss_bbox: 0.3951 d2.loss_cls: 0.9560 d2.loss_bbox: 0.3956 d3.loss_cls: 0.9485 d3.loss_bbox: 0.3943 d4.loss_cls: 0.9437 d4.loss_bbox: 0.3994 2024/02/29 14:56:55 - mmengine - INFO - Epoch(train) [4][450/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 5:07:17 time: 5.0129 data_time: 0.3240 memory: 27523 grad_norm: 20.6769 loss: 8.0830 loss_cls: 0.9432 loss_bbox: 0.3926 d0.loss_cls: 0.9942 d0.loss_bbox: 0.3905 d1.loss_cls: 0.9706 d1.loss_bbox: 0.3827 d2.loss_cls: 0.9566 d2.loss_bbox: 0.3826 d3.loss_cls: 0.9469 d3.loss_bbox: 0.3884 d4.loss_cls: 0.9457 d4.loss_bbox: 0.3890 2024/02/29 14:59:38 - mmengine - INFO - Exp name: mv-grounding_8xb12_embodiedscan-vg-9dof_20240229_115959 2024/02/29 14:59:44 - mmengine - INFO - Epoch(train) [4][500/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 5:01:34 time: 3.3863 data_time: 0.2546 memory: 28341 grad_norm: 22.4441 loss: 8.2163 loss_cls: 0.9523 loss_bbox: 0.4025 d0.loss_cls: 1.0081 d0.loss_bbox: 0.3942 d1.loss_cls: 0.9875 d1.loss_bbox: 0.3913 d2.loss_cls: 0.9740 d2.loss_bbox: 0.3913 d3.loss_cls: 0.9669 d3.loss_bbox: 0.3935 d4.loss_cls: 0.9604 d4.loss_bbox: 0.3943 2024/02/29 14:59:45 - mmengine - INFO - Exp name: mv-grounding_8xb12_embodiedscan-vg-9dof_20240229_115959 2024/02/29 14:59:45 - mmengine - INFO - Saving checkpoint at 4 epochs 2024/02/29 15:04:48 - mmengine - INFO - Epoch(train) [5][ 50/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 4:59:37 time: 5.7579 data_time: 3.5047 memory: 27945 grad_norm: 24.5522 loss: 7.8848 loss_cls: 0.9340 loss_bbox: 0.3698 d0.loss_cls: 0.9864 d0.loss_bbox: 0.3574 d1.loss_cls: 0.9570 d1.loss_bbox: 0.3655 d2.loss_cls: 0.9429 d2.loss_bbox: 0.3639 d3.loss_cls: 0.9405 d3.loss_bbox: 0.3669 d4.loss_cls: 0.9325 d4.loss_bbox: 0.3680 2024/02/29 15:08:13 - mmengine - INFO - Epoch(train) [5][100/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 4:55:08 time: 4.0903 data_time: 0.3598 memory: 28542 grad_norm: 22.8423 loss: 7.8957 loss_cls: 0.9287 loss_bbox: 0.3703 d0.loss_cls: 0.9847 d0.loss_bbox: 0.3627 d1.loss_cls: 0.9612 d1.loss_bbox: 0.3610 d2.loss_cls: 0.9457 d2.loss_bbox: 0.3691 d3.loss_cls: 0.9394 d3.loss_bbox: 0.3691 d4.loss_cls: 0.9345 d4.loss_bbox: 0.3694 2024/02/29 15:12:18 - mmengine - INFO - Epoch(train) [5][150/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 4:51:55 time: 4.9100 data_time: 1.4096 memory: 27739 grad_norm: 22.8370 loss: 7.8550 loss_cls: 0.9224 loss_bbox: 0.3715 d0.loss_cls: 0.9657 d0.loss_bbox: 0.3686 d1.loss_cls: 0.9539 d1.loss_bbox: 0.3693 d2.loss_cls: 0.9386 d2.loss_bbox: 0.3692 d3.loss_cls: 0.9332 d3.loss_bbox: 0.3687 d4.loss_cls: 0.9294 d4.loss_bbox: 0.3646 2024/02/29 15:15:49 - mmengine - INFO - Epoch(train) [5][200/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 4:47:40 time: 4.2090 data_time: 0.8302 memory: 28798 grad_norm: 23.5755 loss: 7.9709 loss_cls: 0.9539 loss_bbox: 0.3645 d0.loss_cls: 1.0092 d0.loss_bbox: 0.3509 d1.loss_cls: 0.9840 d1.loss_bbox: 0.3463 d2.loss_cls: 0.9728 d2.loss_bbox: 0.3529 d3.loss_cls: 0.9592 d3.loss_bbox: 0.3614 d4.loss_cls: 0.9515 d4.loss_bbox: 0.3645 2024/02/29 15:19:45 - mmengine - INFO - Epoch(train) [5][250/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 4:44:10 time: 4.7339 data_time: 1.5195 memory: 29728 grad_norm: 23.8346 loss: 7.7189 loss_cls: 0.9019 loss_bbox: 0.3670 d0.loss_cls: 0.9668 d0.loss_bbox: 0.3609 d1.loss_cls: 0.9353 d1.loss_bbox: 0.3632 d2.loss_cls: 0.9221 d2.loss_bbox: 0.3588 d3.loss_cls: 0.9124 d3.loss_bbox: 0.3629 d4.loss_cls: 0.9044 d4.loss_bbox: 0.3633 2024/02/29 15:23:14 - mmengine - INFO - Epoch(train) [5][300/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 4:39:55 time: 4.1861 data_time: 2.0112 memory: 28686 grad_norm: 24.0006 loss: 7.8041 loss_cls: 0.9359 loss_bbox: 0.3522 d0.loss_cls: 0.9838 d0.loss_bbox: 0.3515 d1.loss_cls: 0.9599 d1.loss_bbox: 0.3397 d2.loss_cls: 0.9599 d2.loss_bbox: 0.3422 d3.loss_cls: 0.9420 d3.loss_bbox: 0.3485 d4.loss_cls: 0.9357 d4.loss_bbox: 0.3530 2024/02/29 15:27:08 - mmengine - INFO - Epoch(train) [5][350/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 4:36:18 time: 4.6584 data_time: 2.4680 memory: 28054 grad_norm: 22.4448 loss: 7.7261 loss_cls: 0.9202 loss_bbox: 0.3543 d0.loss_cls: 0.9691 d0.loss_bbox: 0.3560 d1.loss_cls: 0.9457 d1.loss_bbox: 0.3453 d2.loss_cls: 0.9307 d2.loss_bbox: 0.3526 d3.loss_cls: 0.9267 d3.loss_bbox: 0.3505 d4.loss_cls: 0.9218 d4.loss_bbox: 0.3531 2024/02/29 15:30:54 - mmengine - INFO - Epoch(train) [5][400/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 4:32:31 time: 4.5210 data_time: 2.3438 memory: 28032 grad_norm: 23.1528 loss: 7.6504 loss_cls: 0.9006 loss_bbox: 0.3590 d0.loss_cls: 0.9569 d0.loss_bbox: 0.3540 d1.loss_cls: 0.9363 d1.loss_bbox: 0.3500 d2.loss_cls: 0.9152 d2.loss_bbox: 0.3529 d3.loss_cls: 0.9128 d3.loss_bbox: 0.3527 d4.loss_cls: 0.9046 d4.loss_bbox: 0.3554 2024/02/29 15:34:22 - mmengine - INFO - Epoch(train) [5][450/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 4:28:18 time: 4.1709 data_time: 1.2375 memory: 28429 grad_norm: 24.3472 loss: 8.4436 loss_cls: 0.9835 loss_bbox: 0.4057 d0.loss_cls: 1.0579 d0.loss_bbox: 0.3948 d1.loss_cls: 1.0172 d1.loss_bbox: 0.3996 d2.loss_cls: 1.0050 d2.loss_bbox: 0.3971 d3.loss_cls: 0.9924 d3.loss_bbox: 0.4006 d4.loss_cls: 0.9815 d4.loss_bbox: 0.4085 2024/02/29 15:37:36 - mmengine - INFO - Epoch(train) [5][500/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 4:23:47 time: 3.8855 data_time: 0.3426 memory: 28868 grad_norm: 23.2621 loss: 7.8836 loss_cls: 0.9151 loss_bbox: 0.3791 d0.loss_cls: 0.9760 d0.loss_bbox: 0.3742 d1.loss_cls: 0.9589 d1.loss_bbox: 0.3678 d2.loss_cls: 0.9456 d2.loss_bbox: 0.3684 d3.loss_cls: 0.9306 d3.loss_bbox: 0.3707 d4.loss_cls: 0.9280 d4.loss_bbox: 0.3691 2024/02/29 15:37:37 - mmengine - INFO - Exp name: mv-grounding_8xb12_embodiedscan-vg-9dof_20240229_115959 2024/02/29 15:37:37 - mmengine - INFO - Saving checkpoint at 5 epochs 2024/02/29 15:42:28 - mmengine - INFO - Epoch(train) [6][ 50/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 4:20:59 time: 5.4991 data_time: 1.0749 memory: 29948 grad_norm: 24.6834 loss: 7.6768 loss_cls: 0.8941 loss_bbox: 0.3751 d0.loss_cls: 0.9461 d0.loss_bbox: 0.3676 d1.loss_cls: 0.9208 d1.loss_bbox: 0.3645 d2.loss_cls: 0.9037 d2.loss_bbox: 0.3735 d3.loss_cls: 0.8973 d3.loss_bbox: 0.3708 d4.loss_cls: 0.8893 d4.loss_bbox: 0.3740 2024/02/29 15:46:08 - mmengine - INFO - Epoch(train) [6][100/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 4:17:05 time: 4.4127 data_time: 0.2924 memory: 28607 grad_norm: 23.5465 loss: 7.5887 loss_cls: 0.8981 loss_bbox: 0.3577 d0.loss_cls: 0.9379 d0.loss_bbox: 0.3508 d1.loss_cls: 0.9188 d1.loss_bbox: 0.3494 d2.loss_cls: 0.9113 d2.loss_bbox: 0.3493 d3.loss_cls: 0.9101 d3.loss_bbox: 0.3534 d4.loss_cls: 0.8977 d4.loss_bbox: 0.3544 2024/02/29 15:49:51 - mmengine - INFO - Epoch(train) [6][150/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 4:13:13 time: 4.4447 data_time: 1.9256 memory: 29585 grad_norm: 23.2336 loss: 7.6846 loss_cls: 0.9166 loss_bbox: 0.3482 d0.loss_cls: 0.9838 d0.loss_bbox: 0.3375 d1.loss_cls: 0.9548 d1.loss_bbox: 0.3379 d2.loss_cls: 0.9337 d2.loss_bbox: 0.3399 d3.loss_cls: 0.9254 d3.loss_bbox: 0.3407 d4.loss_cls: 0.9225 d4.loss_bbox: 0.3435 2024/02/29 15:53:14 - mmengine - INFO - Epoch(train) [6][200/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 4:08:59 time: 4.0674 data_time: 0.7346 memory: 29080 grad_norm: 22.4759 loss: 7.7878 loss_cls: 0.9363 loss_bbox: 0.3501 d0.loss_cls: 0.9871 d0.loss_bbox: 0.3471 d1.loss_cls: 0.9549 d1.loss_bbox: 0.3501 d2.loss_cls: 0.9431 d2.loss_bbox: 0.3487 d3.loss_cls: 0.9424 d3.loss_bbox: 0.3456 d4.loss_cls: 0.9306 d4.loss_bbox: 0.3519 2024/02/29 15:57:34 - mmengine - INFO - Epoch(train) [6][250/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 4:05:53 time: 5.1997 data_time: 0.2664 memory: 28429 grad_norm: 23.5845 loss: 7.3233 loss_cls: 0.8633 loss_bbox: 0.3433 d0.loss_cls: 0.9135 d0.loss_bbox: 0.3433 d1.loss_cls: 0.8878 d1.loss_bbox: 0.3381 d2.loss_cls: 0.8714 d2.loss_bbox: 0.3406 d3.loss_cls: 0.8753 d3.loss_bbox: 0.3384 d4.loss_cls: 0.8700 d4.loss_bbox: 0.3385 2024/02/29 16:01:08 - mmengine - INFO - Epoch(train) [6][300/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 4:01:53 time: 4.2879 data_time: 0.2940 memory: 28685 grad_norm: 25.2007 loss: 7.4403 loss_cls: 0.8764 loss_bbox: 0.3506 d0.loss_cls: 0.9385 d0.loss_bbox: 0.3353 d1.loss_cls: 0.8986 d1.loss_bbox: 0.3454 d2.loss_cls: 0.8856 d2.loss_bbox: 0.3485 d3.loss_cls: 0.8838 d3.loss_bbox: 0.3503 d4.loss_cls: 0.8767 d4.loss_bbox: 0.3507 2024/02/29 16:04:49 - mmengine - INFO - Epoch(train) [6][350/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 3:58:00 time: 4.4130 data_time: 0.2565 memory: 29074 grad_norm: 23.2328 loss: 7.6662 loss_cls: 0.9116 loss_bbox: 0.3602 d0.loss_cls: 0.9663 d0.loss_bbox: 0.3429 d1.loss_cls: 0.9350 d1.loss_bbox: 0.3427 d2.loss_cls: 0.9203 d2.loss_bbox: 0.3531 d3.loss_cls: 0.9143 d3.loss_bbox: 0.3549 d4.loss_cls: 0.9086 d4.loss_bbox: 0.3563 2024/02/29 16:08:16 - mmengine - INFO - Epoch(train) [6][400/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 3:53:53 time: 4.1328 data_time: 0.3259 memory: 28800 grad_norm: 23.8272 loss: 7.6090 loss_cls: 0.8802 loss_bbox: 0.3691 d0.loss_cls: 0.9464 d0.loss_bbox: 0.3526 d1.loss_cls: 0.9148 d1.loss_bbox: 0.3657 d2.loss_cls: 0.8985 d2.loss_bbox: 0.3666 d3.loss_cls: 0.8977 d3.loss_bbox: 0.3670 d4.loss_cls: 0.8844 d4.loss_bbox: 0.3660 2024/02/29 16:11:57 - mmengine - INFO - Epoch(train) [6][450/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 3:50:02 time: 4.4179 data_time: 0.3916 memory: 29108 grad_norm: 22.9145 loss: 7.3620 loss_cls: 0.8502 loss_bbox: 0.3622 d0.loss_cls: 0.9128 d0.loss_bbox: 0.3468 d1.loss_cls: 0.8858 d1.loss_bbox: 0.3497 d2.loss_cls: 0.8727 d2.loss_bbox: 0.3516 d3.loss_cls: 0.8613 d3.loss_bbox: 0.3546 d4.loss_cls: 0.8568 d4.loss_bbox: 0.3575 2024/02/29 16:14:46 - mmengine - INFO - Exp name: mv-grounding_8xb12_embodiedscan-vg-9dof_20240229_115959 2024/02/29 16:15:04 - mmengine - INFO - Epoch(train) [6][500/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 3:45:38 time: 3.7395 data_time: 0.2522 memory: 28733 grad_norm: 25.0700 loss: 7.4750 loss_cls: 0.8617 loss_bbox: 0.3586 d0.loss_cls: 0.9403 d0.loss_bbox: 0.3572 d1.loss_cls: 0.8961 d1.loss_bbox: 0.3621 d2.loss_cls: 0.8771 d2.loss_bbox: 0.3641 d3.loss_cls: 0.8686 d3.loss_bbox: 0.3630 d4.loss_cls: 0.8674 d4.loss_bbox: 0.3587 2024/02/29 16:15:04 - mmengine - INFO - Exp name: mv-grounding_8xb12_embodiedscan-vg-9dof_20240229_115959 2024/02/29 16:15:04 - mmengine - INFO - Saving checkpoint at 6 epochs 2024/02/29 16:25:56 - mmengine - INFO - Epoch(val) [6][ 50/123] eta: 0:15:28 time: 12.7174 data_time: 11.8973 memory: 13501 2024/02/29 16:36:17 - mmengine - INFO - Epoch(val) [6][100/123] eta: 0:04:49 time: 12.4208 data_time: 11.6033 memory: 13534 2024/02/29 16:40:20 - mmengine - INFO - Epoch(val) [6][123/123] Easy@0.25: 0.2552 Hard@0.25: 0.2440 View-Dep@0.25: 0.2416 View-Indep@0.25: 0.2611 Unique@0.25: 0.0000 Multi@0.25: 0.2543 Overall@0.25: 0.2543 Easy@0.5: 0.0808 Hard@0.5: 0.0568 View-Dep@0.5: 0.0713 View-Indep@0.5: 0.0829 Unique@0.5: 0.0000 Multi@0.5: 0.0789 Overall@0.5: 0.0789 data_time: 10.6849 time: 11.5079 2024/02/29 16:45:16 - mmengine - INFO - Epoch(train) [7][ 50/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 3:42:53 time: 5.9230 data_time: 1.1641 memory: 27795 grad_norm: 23.0762 loss: 7.2099 loss_cls: 0.8646 loss_bbox: 0.3238 d0.loss_cls: 0.9246 d0.loss_bbox: 0.3149 d1.loss_cls: 0.8947 d1.loss_bbox: 0.3137 d2.loss_cls: 0.8767 d2.loss_bbox: 0.3179 d3.loss_cls: 0.8664 d3.loss_bbox: 0.3242 d4.loss_cls: 0.8667 d4.loss_bbox: 0.3217 2024/02/29 16:49:14 - mmengine - INFO - Epoch(train) [7][100/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 3:39:18 time: 4.7563 data_time: 0.2610 memory: 29507 grad_norm: 24.3135 loss: 7.9404 loss_cls: 0.9359 loss_bbox: 0.3700 d0.loss_cls: 0.9925 d0.loss_bbox: 0.3691 d1.loss_cls: 0.9704 d1.loss_bbox: 0.3642 d2.loss_cls: 0.9518 d2.loss_bbox: 0.3621 d3.loss_cls: 0.9508 d3.loss_bbox: 0.3618 d4.loss_cls: 0.9476 d4.loss_bbox: 0.3642 2024/02/29 16:52:36 - mmengine - INFO - Epoch(train) [7][150/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 3:35:10 time: 4.0442 data_time: 0.2723 memory: 28159 grad_norm: 23.8807 loss: 7.5627 loss_cls: 0.9013 loss_bbox: 0.3341 d0.loss_cls: 0.9756 d0.loss_bbox: 0.3283 d1.loss_cls: 0.9462 d1.loss_bbox: 0.3324 d2.loss_cls: 0.9283 d2.loss_bbox: 0.3297 d3.loss_cls: 0.9155 d3.loss_bbox: 0.3312 d4.loss_cls: 0.9093 d4.loss_bbox: 0.3308 2024/02/29 16:56:25 - mmengine - INFO - Epoch(train) [7][200/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 3:31:26 time: 4.5781 data_time: 0.2589 memory: 28225 grad_norm: 23.5170 loss: 7.4711 loss_cls: 0.8743 loss_bbox: 0.3447 d0.loss_cls: 0.9445 d0.loss_bbox: 0.3483 d1.loss_cls: 0.9192 d1.loss_bbox: 0.3367 d2.loss_cls: 0.9021 d2.loss_bbox: 0.3406 d3.loss_cls: 0.8892 d3.loss_bbox: 0.3434 d4.loss_cls: 0.8846 d4.loss_bbox: 0.3436 2024/02/29 17:00:14 - mmengine - INFO - Epoch(train) [7][250/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 3:27:43 time: 4.5820 data_time: 0.2663 memory: 28365 grad_norm: 24.4979 loss: 7.4425 loss_cls: 0.8868 loss_bbox: 0.3363 d0.loss_cls: 0.9477 d0.loss_bbox: 0.3354 d1.loss_cls: 0.9211 d1.loss_bbox: 0.3297 d2.loss_cls: 0.9046 d2.loss_bbox: 0.3315 d3.loss_cls: 0.8968 d3.loss_bbox: 0.3324 d4.loss_cls: 0.8850 d4.loss_bbox: 0.3354 2024/02/29 17:03:56 - mmengine - INFO - Epoch(train) [7][300/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 3:23:53 time: 4.4325 data_time: 0.2412 memory: 29013 grad_norm: 24.2786 loss: 7.5437 loss_cls: 0.9059 loss_bbox: 0.3361 d0.loss_cls: 0.9593 d0.loss_bbox: 0.3362 d1.loss_cls: 0.9350 d1.loss_bbox: 0.3300 d2.loss_cls: 0.9200 d2.loss_bbox: 0.3308 d3.loss_cls: 0.9136 d3.loss_bbox: 0.3302 d4.loss_cls: 0.9106 d4.loss_bbox: 0.3360 2024/02/29 17:07:44 - mmengine - INFO - Epoch(train) [7][350/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 3:20:08 time: 4.5563 data_time: 0.3702 memory: 28311 grad_norm: 24.7807 loss: 7.3694 loss_cls: 0.8795 loss_bbox: 0.3270 d0.loss_cls: 0.9428 d0.loss_bbox: 0.3332 d1.loss_cls: 0.9161 d1.loss_bbox: 0.3260 d2.loss_cls: 0.8975 d2.loss_bbox: 0.3264 d3.loss_cls: 0.8873 d3.loss_bbox: 0.3263 d4.loss_cls: 0.8799 d4.loss_bbox: 0.3274 2024/02/29 17:11:21 - mmengine - INFO - Epoch(train) [7][400/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 3:16:16 time: 4.3520 data_time: 0.2599 memory: 27876 grad_norm: 24.5267 loss: 7.3147 loss_cls: 0.8700 loss_bbox: 0.3305 d0.loss_cls: 0.9520 d0.loss_bbox: 0.3182 d1.loss_cls: 0.9124 d1.loss_bbox: 0.3244 d2.loss_cls: 0.8847 d2.loss_bbox: 0.3242 d3.loss_cls: 0.8753 d3.loss_bbox: 0.3256 d4.loss_cls: 0.8721 d4.loss_bbox: 0.3254 2024/02/29 17:15:00 - mmengine - INFO - Epoch(train) [7][450/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 3:12:24 time: 4.3643 data_time: 0.2560 memory: 28762 grad_norm: 24.0813 loss: 7.3435 loss_cls: 0.8487 loss_bbox: 0.3596 d0.loss_cls: 0.9118 d0.loss_bbox: 0.3534 d1.loss_cls: 0.8851 d1.loss_bbox: 0.3494 d2.loss_cls: 0.8703 d2.loss_bbox: 0.3492 d3.loss_cls: 0.8566 d3.loss_bbox: 0.3514 d4.loss_cls: 0.8495 d4.loss_bbox: 0.3585 2024/02/29 17:18:13 - mmengine - INFO - Epoch(train) [7][500/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 3:08:15 time: 3.8675 data_time: 0.2460 memory: 29252 grad_norm: 23.9920 loss: 7.5346 loss_cls: 0.8896 loss_bbox: 0.3397 d0.loss_cls: 0.9710 d0.loss_bbox: 0.3355 d1.loss_cls: 0.9376 d1.loss_bbox: 0.3313 d2.loss_cls: 0.9224 d2.loss_bbox: 0.3345 d3.loss_cls: 0.9024 d3.loss_bbox: 0.3362 d4.loss_cls: 0.8975 d4.loss_bbox: 0.3369 2024/02/29 17:18:14 - mmengine - INFO - Exp name: mv-grounding_8xb12_embodiedscan-vg-9dof_20240229_115959 2024/02/29 17:18:14 - mmengine - INFO - Saving checkpoint at 7 epochs 2024/02/29 17:23:10 - mmengine - INFO - Epoch(train) [8][ 50/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 3:05:01 time: 5.6163 data_time: 3.4292 memory: 28393 grad_norm: 25.2682 loss: 7.5750 loss_cls: 0.9025 loss_bbox: 0.3445 d0.loss_cls: 0.9705 d0.loss_bbox: 0.3387 d1.loss_cls: 0.9356 d1.loss_bbox: 0.3362 d2.loss_cls: 0.9181 d2.loss_bbox: 0.3380 d3.loss_cls: 0.9104 d3.loss_bbox: 0.3369 d4.loss_cls: 0.9035 d4.loss_bbox: 0.3401 2024/02/29 17:26:57 - mmengine - INFO - Epoch(train) [8][100/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 3:01:16 time: 4.5547 data_time: 0.4364 memory: 28835 grad_norm: 24.0645 loss: 7.3141 loss_cls: 0.8616 loss_bbox: 0.3415 d0.loss_cls: 0.9164 d0.loss_bbox: 0.3447 d1.loss_cls: 0.8861 d1.loss_bbox: 0.3402 d2.loss_cls: 0.8723 d2.loss_bbox: 0.3368 d3.loss_cls: 0.8711 d3.loss_bbox: 0.3383 d4.loss_cls: 0.8627 d4.loss_bbox: 0.3423 2024/02/29 17:30:43 - mmengine - INFO - Epoch(train) [8][150/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 2:57:30 time: 4.5182 data_time: 0.4502 memory: 28323 grad_norm: 24.5275 loss: 7.3554 loss_cls: 0.8650 loss_bbox: 0.3459 d0.loss_cls: 0.9237 d0.loss_bbox: 0.3392 d1.loss_cls: 0.9039 d1.loss_bbox: 0.3392 d2.loss_cls: 0.8854 d2.loss_bbox: 0.3367 d3.loss_cls: 0.8747 d3.loss_bbox: 0.3392 d4.loss_cls: 0.8614 d4.loss_bbox: 0.3411 2024/02/29 17:34:29 - mmengine - INFO - Epoch(train) [8][200/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 2:53:43 time: 4.5118 data_time: 0.2554 memory: 29129 grad_norm: 24.7502 loss: 7.2536 loss_cls: 0.8446 loss_bbox: 0.3459 d0.loss_cls: 0.8994 d0.loss_bbox: 0.3450 d1.loss_cls: 0.8779 d1.loss_bbox: 0.3451 d2.loss_cls: 0.8682 d2.loss_bbox: 0.3433 d3.loss_cls: 0.8537 d3.loss_bbox: 0.3402 d4.loss_cls: 0.8476 d4.loss_bbox: 0.3427 2024/02/29 17:38:14 - mmengine - INFO - Epoch(train) [8][250/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 2:49:56 time: 4.5011 data_time: 0.2683 memory: 28832 grad_norm: 23.8424 loss: 7.6527 loss_cls: 0.8934 loss_bbox: 0.3632 d0.loss_cls: 0.9595 d0.loss_bbox: 0.3601 d1.loss_cls: 0.9313 d1.loss_bbox: 0.3564 d2.loss_cls: 0.9096 d2.loss_bbox: 0.3587 d3.loss_cls: 0.9033 d3.loss_bbox: 0.3622 d4.loss_cls: 0.8896 d4.loss_bbox: 0.3655 2024/02/29 17:41:59 - mmengine - INFO - Epoch(train) [8][300/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 2:46:09 time: 4.4900 data_time: 0.3211 memory: 28089 grad_norm: 23.2998 loss: 7.3770 loss_cls: 0.8998 loss_bbox: 0.3196 d0.loss_cls: 0.9499 d0.loss_bbox: 0.3221 d1.loss_cls: 0.9201 d1.loss_bbox: 0.3157 d2.loss_cls: 0.9074 d2.loss_bbox: 0.3143 d3.loss_cls: 0.8987 d3.loss_bbox: 0.3171 d4.loss_cls: 0.8940 d4.loss_bbox: 0.3183 2024/02/29 17:45:35 - mmengine - INFO - Epoch(train) [8][350/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 2:42:18 time: 4.3381 data_time: 0.2564 memory: 28870 grad_norm: 23.9164 loss: 7.3115 loss_cls: 0.8638 loss_bbox: 0.3377 d0.loss_cls: 0.9270 d0.loss_bbox: 0.3364 d1.loss_cls: 0.8951 d1.loss_bbox: 0.3302 d2.loss_cls: 0.8812 d2.loss_bbox: 0.3327 d3.loss_cls: 0.8689 d3.loss_bbox: 0.3361 d4.loss_cls: 0.8629 d4.loss_bbox: 0.3393 2024/02/29 17:49:34 - mmengine - INFO - Epoch(train) [8][400/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 2:38:39 time: 4.7744 data_time: 0.2757 memory: 28508 grad_norm: 24.0531 loss: 7.2719 loss_cls: 0.8540 loss_bbox: 0.3418 d0.loss_cls: 0.9143 d0.loss_bbox: 0.3444 d1.loss_cls: 0.8844 d1.loss_bbox: 0.3391 d2.loss_cls: 0.8702 d2.loss_bbox: 0.3381 d3.loss_cls: 0.8573 d3.loss_bbox: 0.3367 d4.loss_cls: 0.8545 d4.loss_bbox: 0.3371 2024/02/29 17:52:49 - mmengine - INFO - Epoch(train) [8][450/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 2:34:37 time: 3.8944 data_time: 0.2899 memory: 28601 grad_norm: 23.5191 loss: 7.3229 loss_cls: 0.8556 loss_bbox: 0.3475 d0.loss_cls: 0.9144 d0.loss_bbox: 0.3455 d1.loss_cls: 0.8769 d1.loss_bbox: 0.3517 d2.loss_cls: 0.8748 d2.loss_bbox: 0.3478 d3.loss_cls: 0.8644 d3.loss_bbox: 0.3452 d4.loss_cls: 0.8562 d4.loss_bbox: 0.3429 2024/02/29 17:55:58 - mmengine - INFO - Exp name: mv-grounding_8xb12_embodiedscan-vg-9dof_20240229_115959 2024/02/29 17:56:15 - mmengine - INFO - Epoch(train) [8][500/501] base_lr: 5.0000e-04 lr: 5.0000e-04 eta: 2:30:41 time: 4.1295 data_time: 0.5256 memory: 29174 grad_norm: 23.4262 loss: 7.4433 loss_cls: 0.8499 loss_bbox: 0.3702 d0.loss_cls: 0.9205 d0.loss_bbox: 0.3615 d1.loss_cls: 0.8845 d1.loss_bbox: 0.3662 d2.loss_cls: 0.8757 d2.loss_bbox: 0.3672 d3.loss_cls: 0.8596 d3.loss_bbox: 0.3681 d4.loss_cls: 0.8486 d4.loss_bbox: 0.3713 2024/02/29 17:56:16 - mmengine - INFO - Exp name: mv-grounding_8xb12_embodiedscan-vg-9dof_20240229_115959 2024/02/29 17:56:16 - mmengine - INFO - Saving checkpoint at 8 epochs 2024/02/29 18:00:56 - mmengine - INFO - Epoch(train) [9][ 50/501] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 2:27:08 time: 5.2879 data_time: 1.3401 memory: 28887 grad_norm: 22.1393 loss: 7.0556 loss_cls: 0.8211 loss_bbox: 0.3335 d0.loss_cls: 0.8901 d0.loss_bbox: 0.3288 d1.loss_cls: 0.8628 d1.loss_bbox: 0.3257 d2.loss_cls: 0.8480 d2.loss_bbox: 0.3289 d3.loss_cls: 0.8306 d3.loss_bbox: 0.3361 d4.loss_cls: 0.8132 d4.loss_bbox: 0.3368 2024/02/29 18:04:52 - mmengine - INFO - Epoch(train) [9][100/501] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 2:23:27 time: 4.7180 data_time: 2.4152 memory: 28434 grad_norm: 20.1974 loss: 7.0901 loss_cls: 0.8262 loss_bbox: 0.3381 d0.loss_cls: 0.8896 d0.loss_bbox: 0.3309 d1.loss_cls: 0.8641 d1.loss_bbox: 0.3299 d2.loss_cls: 0.8453 d2.loss_bbox: 0.3335 d3.loss_cls: 0.8282 d3.loss_bbox: 0.3408 d4.loss_cls: 0.8277 d4.loss_bbox: 0.3359 2024/02/29 18:08:20 - mmengine - INFO - Epoch(train) [9][150/501] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 2:19:33 time: 4.1724 data_time: 0.9272 memory: 28740 grad_norm: 21.6852 loss: 6.9631 loss_cls: 0.8266 loss_bbox: 0.3151 d0.loss_cls: 0.8791 d0.loss_bbox: 0.3236 d1.loss_cls: 0.8592 d1.loss_bbox: 0.3153 d2.loss_cls: 0.8451 d2.loss_bbox: 0.3176 d3.loss_cls: 0.8244 d3.loss_bbox: 0.3171 d4.loss_cls: 0.8244 d4.loss_bbox: 0.3155 2024/02/29 18:12:23 - mmengine - INFO - Epoch(train) [9][200/501] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 2:15:55 time: 4.8584 data_time: 0.6440 memory: 29277 grad_norm: 21.2956 loss: 6.8853 loss_cls: 0.7871 loss_bbox: 0.3364 d0.loss_cls: 0.8623 d0.loss_bbox: 0.3448 d1.loss_cls: 0.8218 d1.loss_bbox: 0.3377 d2.loss_cls: 0.8007 d2.loss_bbox: 0.3368 d3.loss_cls: 0.7976 d3.loss_bbox: 0.3307 d4.loss_cls: 0.7928 d4.loss_bbox: 0.3366 2024/02/29 18:16:02 - mmengine - INFO - Epoch(train) [9][250/501] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 2:12:06 time: 4.3794 data_time: 0.2604 memory: 28726 grad_norm: 20.9650 loss: 7.2275 loss_cls: 0.8586 loss_bbox: 0.3266 d0.loss_cls: 0.9225 d0.loss_bbox: 0.3322 d1.loss_cls: 0.8831 d1.loss_bbox: 0.3314 d2.loss_cls: 0.8686 d2.loss_bbox: 0.3297 d3.loss_cls: 0.8563 d3.loss_bbox: 0.3335 d4.loss_cls: 0.8562 d4.loss_bbox: 0.3287 2024/02/29 18:19:32 - mmengine - INFO - Epoch(train) [9][300/501] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 2:08:13 time: 4.1873 data_time: 0.6875 memory: 28437 grad_norm: 21.4877 loss: 6.9313 loss_cls: 0.8145 loss_bbox: 0.3176 d0.loss_cls: 0.8850 d0.loss_bbox: 0.3252 d1.loss_cls: 0.8432 d1.loss_bbox: 0.3287 d2.loss_cls: 0.8274 d2.loss_bbox: 0.3197 d3.loss_cls: 0.8133 d3.loss_bbox: 0.3199 d4.loss_cls: 0.8188 d4.loss_bbox: 0.3181 2024/02/29 18:23:14 - mmengine - INFO - Epoch(train) [9][350/501] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 2:04:26 time: 4.4437 data_time: 0.6287 memory: 29086 grad_norm: 21.9563 loss: 6.8362 loss_cls: 0.8068 loss_bbox: 0.3187 d0.loss_cls: 0.8624 d0.loss_bbox: 0.3209 d1.loss_cls: 0.8384 d1.loss_bbox: 0.3103 d2.loss_cls: 0.8168 d2.loss_bbox: 0.3163 d3.loss_cls: 0.8075 d3.loss_bbox: 0.3164 d4.loss_cls: 0.8025 d4.loss_bbox: 0.3193 2024/02/29 18:27:11 - mmengine - INFO - Epoch(train) [9][400/501] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 2:00:45 time: 4.7393 data_time: 0.2773 memory: 28263 grad_norm: 21.9667 loss: 7.0035 loss_cls: 0.8151 loss_bbox: 0.3289 d0.loss_cls: 0.8866 d0.loss_bbox: 0.3414 d1.loss_cls: 0.8532 d1.loss_bbox: 0.3297 d2.loss_cls: 0.8307 d2.loss_bbox: 0.3272 d3.loss_cls: 0.8229 d3.loss_bbox: 0.3282 d4.loss_cls: 0.8101 d4.loss_bbox: 0.3296 2024/02/29 18:30:50 - mmengine - INFO - Epoch(train) [9][450/501] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 1:56:56 time: 4.3852 data_time: 0.2674 memory: 28579 grad_norm: 21.6421 loss: 6.9015 loss_cls: 0.8160 loss_bbox: 0.3072 d0.loss_cls: 0.9056 d0.loss_bbox: 0.3095 d1.loss_cls: 0.8515 d1.loss_bbox: 0.3044 d2.loss_cls: 0.8330 d2.loss_bbox: 0.3071 d3.loss_cls: 0.8295 d3.loss_bbox: 0.3101 d4.loss_cls: 0.8219 d4.loss_bbox: 0.3056 2024/02/29 18:34:01 - mmengine - INFO - Epoch(train) [9][500/501] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 1:52:59 time: 3.8224 data_time: 0.3454 memory: 28655 grad_norm: 22.2966 loss: 6.9328 loss_cls: 0.8231 loss_bbox: 0.3058 d0.loss_cls: 0.9129 d0.loss_bbox: 0.2995 d1.loss_cls: 0.8693 d1.loss_bbox: 0.3007 d2.loss_cls: 0.8387 d2.loss_bbox: 0.3102 d3.loss_cls: 0.8350 d3.loss_bbox: 0.3037 d4.loss_cls: 0.8269 d4.loss_bbox: 0.3069 2024/02/29 18:34:02 - mmengine - INFO - Exp name: mv-grounding_8xb12_embodiedscan-vg-9dof_20240229_115959 2024/02/29 18:34:02 - mmengine - INFO - Saving checkpoint at 9 epochs 2024/02/29 18:44:38 - mmengine - INFO - Epoch(val) [9][ 50/123] eta: 0:15:08 time: 12.4509 data_time: 11.6434 memory: 13501 2024/02/29 18:56:30 - mmengine - INFO - Epoch(val) [9][100/123] eta: 0:05:06 time: 14.2256 data_time: 13.3895 memory: 13532 2024/02/29 18:59:23 - mmengine - INFO - Epoch(val) [9][123/123] Easy@0.25: 0.3394 Hard@0.25: 0.3218 View-Dep@0.25: 0.3432 View-Indep@0.25: 0.3352 Unique@0.25: 0.0000 Multi@0.25: 0.3380 Overall@0.25: 0.3380 Easy@0.5: 0.1428 Hard@0.5: 0.1430 View-Dep@0.5: 0.1365 View-Indep@0.5: 0.1462 Unique@0.5: 0.0000 Multi@0.5: 0.1428 Overall@0.5: 0.1428 data_time: 10.7212 time: 11.5406 2024/02/29 19:04:26 - mmengine - INFO - Epoch(train) [10][ 50/501] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 1:49:33 time: 6.0644 data_time: 2.3661 memory: 29871 grad_norm: 21.8262 loss: 7.4873 loss_cls: 0.8779 loss_bbox: 0.3530 d0.loss_cls: 0.9505 d0.loss_bbox: 0.3541 d1.loss_cls: 0.9030 d1.loss_bbox: 0.3537 d2.loss_cls: 0.8938 d2.loss_bbox: 0.3483 d3.loss_cls: 0.8793 d3.loss_bbox: 0.3520 d4.loss_cls: 0.8682 d4.loss_bbox: 0.3536 2024/02/29 19:07:57 - mmengine - INFO - Epoch(train) [10][100/501] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 1:45:42 time: 4.2055 data_time: 2.0294 memory: 28154 grad_norm: 21.8389 loss: 7.1107 loss_cls: 0.8213 loss_bbox: 0.3411 d0.loss_cls: 0.8913 d0.loss_bbox: 0.3443 d1.loss_cls: 0.8607 d1.loss_bbox: 0.3356 d2.loss_cls: 0.8441 d2.loss_bbox: 0.3369 d3.loss_cls: 0.8355 d3.loss_bbox: 0.3359 d4.loss_cls: 0.8233 d4.loss_bbox: 0.3407 2024/02/29 19:11:43 - mmengine - INFO - Epoch(train) [10][150/501] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 1:41:56 time: 4.5275 data_time: 0.7646 memory: 28048 grad_norm: 21.6443 loss: 6.9410 loss_cls: 0.8165 loss_bbox: 0.3143 d0.loss_cls: 0.8923 d0.loss_bbox: 0.3159 d1.loss_cls: 0.8553 d1.loss_bbox: 0.3187 d2.loss_cls: 0.8374 d2.loss_bbox: 0.3169 d3.loss_cls: 0.8261 d3.loss_bbox: 0.3148 d4.loss_cls: 0.8196 d4.loss_bbox: 0.3132 2024/02/29 19:15:29 - mmengine - INFO - Epoch(train) [10][200/501] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 1:38:10 time: 4.5163 data_time: 0.3181 memory: 28156 grad_norm: 21.5958 loss: 6.8234 loss_cls: 0.8152 loss_bbox: 0.3010 d0.loss_cls: 0.8832 d0.loss_bbox: 0.2979 d1.loss_cls: 0.8522 d1.loss_bbox: 0.2919 d2.loss_cls: 0.8381 d2.loss_bbox: 0.2952 d3.loss_cls: 0.8302 d3.loss_bbox: 0.2959 d4.loss_cls: 0.8261 d4.loss_bbox: 0.2967 2024/02/29 19:19:01 - mmengine - INFO - Epoch(train) [10][250/501] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 1:34:20 time: 4.2424 data_time: 0.5292 memory: 28514 grad_norm: 22.5886 loss: 6.7624 loss_cls: 0.8138 loss_bbox: 0.2965 d0.loss_cls: 0.8790 d0.loss_bbox: 0.2974 d1.loss_cls: 0.8397 d1.loss_bbox: 0.2932 d2.loss_cls: 0.8239 d2.loss_bbox: 0.2926 d3.loss_cls: 0.8233 d3.loss_bbox: 0.2942 d4.loss_cls: 0.8126 d4.loss_bbox: 0.2961 2024/02/29 19:22:44 - mmengine - INFO - Epoch(train) [10][300/501] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 1:30:34 time: 4.4680 data_time: 0.2805 memory: 29609 grad_norm: 21.3532 loss: 6.8296 loss_cls: 0.8091 loss_bbox: 0.3072 d0.loss_cls: 0.8743 d0.loss_bbox: 0.3088 d1.loss_cls: 0.8447 d1.loss_bbox: 0.3018 d2.loss_cls: 0.8310 d2.loss_bbox: 0.3047 d3.loss_cls: 0.8225 d3.loss_bbox: 0.3047 d4.loss_cls: 0.8139 d4.loss_bbox: 0.3069 2024/02/29 19:26:25 - mmengine - INFO - Epoch(train) [10][350/501] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 1:26:47 time: 4.4188 data_time: 0.2772 memory: 29239 grad_norm: 21.9065 loss: 6.7227 loss_cls: 0.7822 loss_bbox: 0.3165 d0.loss_cls: 0.8562 d0.loss_bbox: 0.3185 d1.loss_cls: 0.8137 d1.loss_bbox: 0.3156 d2.loss_cls: 0.7980 d2.loss_bbox: 0.3185 d3.loss_cls: 0.7901 d3.loss_bbox: 0.3152 d4.loss_cls: 0.7815 d4.loss_bbox: 0.3167 2024/02/29 19:30:14 - mmengine - INFO - Epoch(train) [10][400/501] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 1:23:01 time: 4.5731 data_time: 0.2772 memory: 28278 grad_norm: 21.3380 loss: 7.1237 loss_cls: 0.8417 loss_bbox: 0.3226 d0.loss_cls: 0.9109 d0.loss_bbox: 0.3316 d1.loss_cls: 0.8787 d1.loss_bbox: 0.3227 d2.loss_cls: 0.8559 d2.loss_bbox: 0.3261 d3.loss_cls: 0.8489 d3.loss_bbox: 0.3234 d4.loss_cls: 0.8366 d4.loss_bbox: 0.3246 2024/02/29 19:33:58 - mmengine - INFO - Epoch(train) [10][450/501] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 1:19:15 time: 4.4832 data_time: 0.2948 memory: 28163 grad_norm: 22.0213 loss: 6.7369 loss_cls: 0.7907 loss_bbox: 0.3070 d0.loss_cls: 0.8725 d0.loss_bbox: 0.3074 d1.loss_cls: 0.8331 d1.loss_bbox: 0.3065 d2.loss_cls: 0.8059 d2.loss_bbox: 0.3066 d3.loss_cls: 0.8058 d3.loss_bbox: 0.3034 d4.loss_cls: 0.7962 d4.loss_bbox: 0.3017 2024/02/29 19:36:37 - mmengine - INFO - Exp name: mv-grounding_8xb12_embodiedscan-vg-9dof_20240229_115959 2024/02/29 19:36:56 - mmengine - INFO - Epoch(train) [10][500/501] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 1:15:20 time: 3.5449 data_time: 0.2473 memory: 28326 grad_norm: 21.6436 loss: 6.7413 loss_cls: 0.7919 loss_bbox: 0.3051 d0.loss_cls: 0.8768 d0.loss_bbox: 0.3019 d1.loss_cls: 0.8453 d1.loss_bbox: 0.3022 d2.loss_cls: 0.8117 d2.loss_bbox: 0.3071 d3.loss_cls: 0.7974 d3.loss_bbox: 0.3049 d4.loss_cls: 0.7914 d4.loss_bbox: 0.3056 2024/02/29 19:36:56 - mmengine - INFO - Exp name: mv-grounding_8xb12_embodiedscan-vg-9dof_20240229_115959 2024/02/29 19:36:56 - mmengine - INFO - Saving checkpoint at 10 epochs 2024/02/29 19:41:55 - mmengine - INFO - Epoch(train) [11][ 50/501] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 1:11:40 time: 5.6541 data_time: 3.1662 memory: 28055 grad_norm: 21.7992 loss: 6.8646 loss_cls: 0.8085 loss_bbox: 0.3089 d0.loss_cls: 0.8802 d0.loss_bbox: 0.3160 d1.loss_cls: 0.8468 d1.loss_bbox: 0.3089 d2.loss_cls: 0.8317 d2.loss_bbox: 0.3082 d3.loss_cls: 0.8267 d3.loss_bbox: 0.3087 d4.loss_cls: 0.8112 d4.loss_bbox: 0.3088 2024/02/29 19:45:33 - mmengine - INFO - Epoch(train) [11][100/501] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 1:07:53 time: 4.3747 data_time: 1.3249 memory: 28079 grad_norm: 21.8508 loss: 7.2395 loss_cls: 0.8634 loss_bbox: 0.3191 d0.loss_cls: 0.9345 d0.loss_bbox: 0.3220 d1.loss_cls: 0.9048 d1.loss_bbox: 0.3192 d2.loss_cls: 0.8837 d2.loss_bbox: 0.3213 d3.loss_cls: 0.8687 d3.loss_bbox: 0.3209 d4.loss_cls: 0.8601 d4.loss_bbox: 0.3219 2024/02/29 19:49:18 - mmengine - INFO - Epoch(train) [11][150/501] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 1:04:07 time: 4.5000 data_time: 1.4176 memory: 29145 grad_norm: 21.5004 loss: 6.6352 loss_cls: 0.7969 loss_bbox: 0.2823 d0.loss_cls: 0.8704 d0.loss_bbox: 0.2914 d1.loss_cls: 0.8352 d1.loss_bbox: 0.2870 d2.loss_cls: 0.8134 d2.loss_bbox: 0.2867 d3.loss_cls: 0.8064 d3.loss_bbox: 0.2837 d4.loss_cls: 0.7977 d4.loss_bbox: 0.2842 2024/02/29 19:52:40 - mmengine - INFO - Epoch(train) [11][200/501] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 1:00:17 time: 4.0298 data_time: 1.6810 memory: 28581 grad_norm: 21.5632 loss: 7.0080 loss_cls: 0.8207 loss_bbox: 0.3251 d0.loss_cls: 0.8890 d0.loss_bbox: 0.3242 d1.loss_cls: 0.8578 d1.loss_bbox: 0.3267 d2.loss_cls: 0.8405 d2.loss_bbox: 0.3254 d3.loss_cls: 0.8253 d3.loss_bbox: 0.3263 d4.loss_cls: 0.8189 d4.loss_bbox: 0.3280 2024/02/29 19:56:28 - mmengine - INFO - Epoch(train) [11][250/501] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 0:56:32 time: 4.5630 data_time: 0.5774 memory: 28255 grad_norm: 21.8772 loss: 6.9453 loss_cls: 0.8407 loss_bbox: 0.2918 d0.loss_cls: 0.9202 d0.loss_bbox: 0.2937 d1.loss_cls: 0.8775 d1.loss_bbox: 0.2868 d2.loss_cls: 0.8637 d2.loss_bbox: 0.2914 d3.loss_cls: 0.8535 d3.loss_bbox: 0.2869 d4.loss_cls: 0.8492 d4.loss_bbox: 0.2900 2024/02/29 20:00:20 - mmengine - INFO - Epoch(train) [11][300/501] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 0:52:47 time: 4.6454 data_time: 0.2701 memory: 30387 grad_norm: 21.4853 loss: 7.0110 loss_cls: 0.8397 loss_bbox: 0.3135 d0.loss_cls: 0.9002 d0.loss_bbox: 0.3180 d1.loss_cls: 0.8729 d1.loss_bbox: 0.3139 d2.loss_cls: 0.8419 d2.loss_bbox: 0.3122 d3.loss_cls: 0.8348 d3.loss_bbox: 0.3173 d4.loss_cls: 0.8305 d4.loss_bbox: 0.3161 2024/02/29 20:04:07 - mmengine - INFO - Epoch(train) [11][350/501] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 0:49:02 time: 4.5288 data_time: 0.2680 memory: 29184 grad_norm: 21.4502 loss: 6.9460 loss_cls: 0.8389 loss_bbox: 0.3027 d0.loss_cls: 0.8999 d0.loss_bbox: 0.3032 d1.loss_cls: 0.8646 d1.loss_bbox: 0.2987 d2.loss_cls: 0.8511 d2.loss_bbox: 0.3010 d3.loss_cls: 0.8409 d3.loss_bbox: 0.3025 d4.loss_cls: 0.8431 d4.loss_bbox: 0.2995 2024/02/29 20:07:34 - mmengine - INFO - Epoch(train) [11][400/501] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 0:45:14 time: 4.1347 data_time: 0.2626 memory: 29611 grad_norm: 20.5723 loss: 6.9149 loss_cls: 0.8064 loss_bbox: 0.3214 d0.loss_cls: 0.8762 d0.loss_bbox: 0.3214 d1.loss_cls: 0.8504 d1.loss_bbox: 0.3186 d2.loss_cls: 0.8286 d2.loss_bbox: 0.3202 d3.loss_cls: 0.8183 d3.loss_bbox: 0.3217 d4.loss_cls: 0.8096 d4.loss_bbox: 0.3221 2024/02/29 20:11:31 - mmengine - INFO - Epoch(train) [11][450/501] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 0:41:30 time: 4.7586 data_time: 0.2649 memory: 29752 grad_norm: 21.4003 loss: 6.5313 loss_cls: 0.7910 loss_bbox: 0.2755 d0.loss_cls: 0.8745 d0.loss_bbox: 0.2682 d1.loss_cls: 0.8342 d1.loss_bbox: 0.2729 d2.loss_cls: 0.8131 d2.loss_bbox: 0.2739 d3.loss_cls: 0.7931 d3.loss_bbox: 0.2765 d4.loss_cls: 0.7813 d4.loss_bbox: 0.2771 2024/02/29 20:17:26 - mmengine - INFO - Epoch(train) [11][500/501] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 0:37:56 time: 7.0837 data_time: 0.2467 memory: 29121 grad_norm: 21.8036 loss: 6.8238 loss_cls: 0.7944 loss_bbox: 0.3207 d0.loss_cls: 0.8681 d0.loss_bbox: 0.3153 d1.loss_cls: 0.8410 d1.loss_bbox: 0.3150 d2.loss_cls: 0.8152 d2.loss_bbox: 0.3179 d3.loss_cls: 0.7947 d3.loss_bbox: 0.3209 d4.loss_cls: 0.8009 d4.loss_bbox: 0.3197 2024/02/29 20:17:26 - mmengine - INFO - Exp name: mv-grounding_8xb12_embodiedscan-vg-9dof_20240229_115959 2024/02/29 20:17:26 - mmengine - INFO - Saving checkpoint at 11 epochs 2024/02/29 20:22:24 - mmengine - INFO - Epoch(train) [12][ 50/501] base_lr: 5.0000e-06 lr: 5.0000e-06 eta: 0:34:09 time: 5.6581 data_time: 1.7355 memory: 27962 grad_norm: 21.6513 loss: 6.6843 loss_cls: 0.7961 loss_bbox: 0.2957 d0.loss_cls: 0.8704 d0.loss_bbox: 0.2943 d1.loss_cls: 0.8360 d1.loss_bbox: 0.2925 d2.loss_cls: 0.8134 d2.loss_bbox: 0.2938 d3.loss_cls: 0.8033 d3.loss_bbox: 0.2945 d4.loss_cls: 0.7998 d4.loss_bbox: 0.2944 2024/02/29 20:25:55 - mmengine - INFO - Epoch(train) [12][100/501] base_lr: 5.0000e-06 lr: 5.0000e-06 eta: 0:30:21 time: 4.2066 data_time: 0.2451 memory: 28232 grad_norm: 20.1979 loss: 6.8463 loss_cls: 0.8151 loss_bbox: 0.3037 d0.loss_cls: 0.8810 d0.loss_bbox: 0.3108 d1.loss_cls: 0.8543 d1.loss_bbox: 0.3061 d2.loss_cls: 0.8294 d2.loss_bbox: 0.3066 d3.loss_cls: 0.8228 d3.loss_bbox: 0.2999 d4.loss_cls: 0.8163 d4.loss_bbox: 0.3002 2024/02/29 20:29:45 - mmengine - INFO - Epoch(train) [12][150/501] base_lr: 5.0000e-06 lr: 5.0000e-06 eta: 0:26:34 time: 4.5992 data_time: 0.4221 memory: 28416 grad_norm: 20.9792 loss: 7.2507 loss_cls: 0.8497 loss_bbox: 0.3336 d0.loss_cls: 0.9303 d0.loss_bbox: 0.3380 d1.loss_cls: 0.8856 d1.loss_bbox: 0.3355 d2.loss_cls: 0.8643 d2.loss_bbox: 0.3373 d3.loss_cls: 0.8558 d3.loss_bbox: 0.3366 d4.loss_cls: 0.8492 d4.loss_bbox: 0.3348 2024/02/29 20:33:09 - mmengine - INFO - Epoch(train) [12][200/501] base_lr: 5.0000e-06 lr: 5.0000e-06 eta: 0:22:45 time: 4.0951 data_time: 0.2538 memory: 28726 grad_norm: 21.9318 loss: 7.2256 loss_cls: 0.8587 loss_bbox: 0.3257 d0.loss_cls: 0.9205 d0.loss_bbox: 0.3346 d1.loss_cls: 0.8786 d1.loss_bbox: 0.3302 d2.loss_cls: 0.8691 d2.loss_bbox: 0.3322 d3.loss_cls: 0.8616 d3.loss_bbox: 0.3294 d4.loss_cls: 0.8574 d4.loss_bbox: 0.3275 2024/02/29 20:36:55 - mmengine - INFO - Epoch(train) [12][250/501] base_lr: 5.0000e-06 lr: 5.0000e-06 eta: 0:18:59 time: 4.5186 data_time: 0.3717 memory: 28888 grad_norm: 20.4100 loss: 6.6412 loss_cls: 0.7914 loss_bbox: 0.2944 d0.loss_cls: 0.8551 d0.loss_bbox: 0.3006 d1.loss_cls: 0.8307 d1.loss_bbox: 0.2930 d2.loss_cls: 0.8103 d2.loss_bbox: 0.2926 d3.loss_cls: 0.7957 d3.loss_bbox: 0.2917 d4.loss_cls: 0.7931 d4.loss_bbox: 0.2925 2024/02/29 20:40:27 - mmengine - INFO - Epoch(train) [12][300/501] base_lr: 5.0000e-06 lr: 5.0000e-06 eta: 0:15:11 time: 4.2441 data_time: 0.2943 memory: 28116 grad_norm: 21.5722 loss: 6.8987 loss_cls: 0.8214 loss_bbox: 0.3104 d0.loss_cls: 0.8921 d0.loss_bbox: 0.3037 d1.loss_cls: 0.8560 d1.loss_bbox: 0.3062 d2.loss_cls: 0.8362 d2.loss_bbox: 0.3078 d3.loss_cls: 0.8282 d3.loss_bbox: 0.3095 d4.loss_cls: 0.8183 d4.loss_bbox: 0.3089 2024/02/29 20:44:01 - mmengine - INFO - Epoch(train) [12][350/501] base_lr: 5.0000e-06 lr: 5.0000e-06 eta: 0:11:24 time: 4.2719 data_time: 0.2549 memory: 28622 grad_norm: 21.4221 loss: 7.0551 loss_cls: 0.8431 loss_bbox: 0.3053 d0.loss_cls: 0.9166 d0.loss_bbox: 0.3166 d1.loss_cls: 0.8760 d1.loss_bbox: 0.3079 d2.loss_cls: 0.8622 d2.loss_bbox: 0.3098 d3.loss_cls: 0.8567 d3.loss_bbox: 0.3045 d4.loss_cls: 0.8497 d4.loss_bbox: 0.3069 2024/02/29 20:47:40 - mmengine - INFO - Epoch(train) [12][400/501] base_lr: 5.0000e-06 lr: 5.0000e-06 eta: 0:07:37 time: 4.3709 data_time: 0.3879 memory: 28730 grad_norm: 21.3299 loss: 7.2060 loss_cls: 0.8511 loss_bbox: 0.3232 d0.loss_cls: 0.9318 d0.loss_bbox: 0.3203 d1.loss_cls: 0.9046 d1.loss_bbox: 0.3173 d2.loss_cls: 0.8763 d2.loss_bbox: 0.3183 d3.loss_cls: 0.8653 d3.loss_bbox: 0.3200 d4.loss_cls: 0.8572 d4.loss_bbox: 0.3207 2024/02/29 20:51:14 - mmengine - INFO - Epoch(train) [12][450/501] base_lr: 5.0000e-06 lr: 5.0000e-06 eta: 0:03:51 time: 4.2779 data_time: 1.0600 memory: 29070 grad_norm: 21.0001 loss: 6.6370 loss_cls: 0.7970 loss_bbox: 0.2832 d0.loss_cls: 0.8707 d0.loss_bbox: 0.2842 d1.loss_cls: 0.8405 d1.loss_bbox: 0.2840 d2.loss_cls: 0.8140 d2.loss_bbox: 0.2848 d3.loss_cls: 0.8120 d3.loss_bbox: 0.2838 d4.loss_cls: 0.7984 d4.loss_bbox: 0.2844 2024/02/29 20:54:05 - mmengine - INFO - Exp name: mv-grounding_8xb12_embodiedscan-vg-9dof_20240229_115959 2024/02/29 20:54:28 - mmengine - INFO - Epoch(train) [12][500/501] base_lr: 5.0000e-06 lr: 5.0000e-06 eta: 0:00:04 time: 3.8797 data_time: 0.2640 memory: 27890 grad_norm: 20.8922 loss: 6.8714 loss_cls: 0.8020 loss_bbox: 0.3176 d0.loss_cls: 0.8669 d0.loss_bbox: 0.3229 d1.loss_cls: 0.8464 d1.loss_bbox: 0.3193 d2.loss_cls: 0.8267 d2.loss_bbox: 0.3181 d3.loss_cls: 0.8106 d3.loss_bbox: 0.3187 d4.loss_cls: 0.8041 d4.loss_bbox: 0.3183 2024/02/29 20:54:28 - mmengine - INFO - Exp name: mv-grounding_8xb12_embodiedscan-vg-9dof_20240229_115959 2024/02/29 20:54:28 - mmengine - INFO - Saving checkpoint at 12 epochs 2024/02/29 21:06:40 - mmengine - INFO - Epoch(val) [12][ 50/123] eta: 0:17:16 time: 14.2009 data_time: 13.2853 memory: 13500 2024/02/29 21:17:17 - mmengine - INFO - Epoch(val) [12][100/123] eta: 0:05:09 time: 12.7432 data_time: 11.8442 memory: 13533 2024/02/29 21:19:51 - mmengine - INFO - Epoch(val) [12][123/123] Easy@0.25: 0.3387 Hard@0.25: 0.3049 View-Dep@0.25: 0.3355 View-Indep@0.25: 0.3361 Unique@0.25: 0.0000 Multi@0.25: 0.3359 Overall@0.25: 0.3359 Easy@0.5: 0.1458 Hard@0.5: 0.1241 View-Dep@0.5: 0.1392 View-Indep@0.5: 0.1465 Unique@0.5: 0.0000 Multi@0.5: 0.1440 Overall@0.5: 0.1440 data_time: 10.7070 time: 11.6027