2024/04/07 14:22:42 - mmengine - INFO - ------------------------------------------------------------ System environment: sys.platform: linux Python: 3.8.16 (default, Mar 2 2023, 03:21:46) [GCC 11.2.0] CUDA available: True numpy_random_seed: 785018322 GPU 0,1,2,3,4,5,6,7: NVIDIA A100-SXM4-80GB CUDA_HOME: /mnt/petrelfs/share/cuda-11.3 NVCC: Cuda compilation tools, release 11.3, V11.3.109 GCC: gcc (GCC) 9.4.0 PyTorch: 1.11.0 PyTorch compiling details: PyTorch built with: - GCC 7.3 - C++ Version: 201402 - Intel(R) oneAPI Math Kernel Library Version 2021.4-Product Build 20210904 for Intel(R) 64 architecture applications - Intel(R) MKL-DNN v2.5.2 (Git Hash a9302535553c73243c632ad3c4c80beec3d19a1e) - OpenMP 201511 (a.k.a. OpenMP 4.5) - LAPACK is enabled (usually provided by MKL) - NNPACK is enabled - CPU capability usage: AVX2 - CUDA Runtime 11.5 - NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_61,code=sm_61;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_37,code=compute_37 - CuDNN 8.3.2 - Magma 2.6.1 - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=11.5, CUDNN_VERSION=8.3.2, CXX_COMPILER=/opt/rh/devtoolset-7/root/usr/bin/c++, CXX_FLAGS= -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -fopenmp -DNDEBUG -DUSE_KINETO -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -DEDGE_PROFILER_USE_KINETO -O2 -fPIC -Wno-narrowing -Wall -Wextra -Werror=return-type -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-sign-compare -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-unused-local-typedefs -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_VERSION=1.11.0, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=OFF, USE_MPI=OFF, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, TorchVision: 0.12.0 OpenCV: 4.7.0 MMEngine: 0.8.0 Runtime environment: cudnn_benchmark: False mp_cfg: {'mp_start_method': 'fork', 'opencv_num_threads': 0} dist_cfg: {'backend': 'nccl', 'port': 29320} seed: 785018322 Distributed launcher: slurm Distributed training: True GPU number: 8 ------------------------------------------------------------ 2024/04/07 14:22:42 - mmengine - INFO - Config: default_scope = 'embodiedscan' default_hooks = dict( timer=dict(type='IterTimerHook'), logger=dict(type='LoggerHook', interval=50), param_scheduler=dict(type='ParamSchedulerHook'), checkpoint=dict(type='CheckpointHook', interval=1, max_keep_ckpts=3), sampler_seed=dict(type='DistSamplerSeedHook')) env_cfg = dict( cudnn_benchmark=False, mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), dist_cfg=dict(backend='nccl', port=29320)) log_processor = dict(type='LogProcessor', window_size=50, by_epoch=True) log_level = 'INFO' load_from = None resume = True n_points = 100000 backend_args = None metainfo = dict(classes='all') model = dict( type='SparseFeatureFusion3DGrounder', num_queries=256, voxel_size=0.01, data_preprocessor=dict( type='Det3DDataPreprocessor', mean=[ 123.675, 116.28, 103.53, ], std=[ 58.395, 57.12, 57.375, ], bgr_to_rgb=True, pad_size_divisor=32), backbone=dict( type='mmdet.ResNet', depth=50, base_channels=16, num_stages=4, out_indices=( 0, 1, 2, 3, ), frozen_stages=1, norm_cfg=dict(type='BN', requires_grad=False), norm_eval=True, init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50'), style='pytorch'), backbone_lidar=dict(type='MinkResNet', in_channels=3, depth=34), use_xyz_feat=True, neck_3d=dict( type='MinkNeck', num_classes=1, in_channels=[ 128, 256, 512, 1024, ], out_channels=256, voxel_size=0.01, pts_prune_threshold=1000), decoder=dict( num_layers=6, return_intermediate=True, layer_cfg=dict( self_attn_cfg=dict(embed_dims=256, num_heads=8, dropout=0.0), cross_attn_text_cfg=dict(embed_dims=256, num_heads=8, dropout=0.0), cross_attn_cfg=dict(embed_dims=256, num_heads=8, dropout=0.0), ffn_cfg=dict( embed_dims=256, feedforward_channels=2048, ffn_drop=0.0)), post_norm_cfg=None), bbox_head=dict( type='GroundingHead', num_classes=256, sync_cls_avg_factor=True, decouple_bbox_loss=True, decouple_groups=4, share_pred_layer=True, decouple_weights=[ 0.2, 0.2, 0.2, 0.4, ], contrastive_cfg=dict(max_text_len=256, log_scale='auto', bias=True), loss_cls=dict( type='mmdet.FocalLoss', use_sigmoid=True, gamma=2.0, alpha=0.25, loss_weight=1.0), loss_bbox=dict( type='BBoxCDLoss', mode='l1', loss_weight=1.0, group='g8')), coord_type='DEPTH', train_cfg=dict( assigner=dict( type='HungarianAssigner3D', match_costs=[ dict(type='BinaryFocalLossCost', weight=1.0), dict(type='BBox3DL1Cost', weight=2.0), dict(type='IoU3DCost', weight=2.0), ])), test_cfg=None) dataset_type = 'MultiView3DGroundingDataset' data_root = 'data' train_pipeline = [ dict(type='LoadAnnotations3D'), dict( type='MultiViewPipeline', n_images=20, transforms=[ dict(type='LoadImageFromFile', backend_args=None), dict(type='LoadDepthFromFile', backend_args=None), dict(type='ConvertRGBDToPoints', coord_type='CAMERA'), dict(type='PointSample', num_points=10000), dict(type='Resize', scale=( 480, 480, ), keep_ratio=False), ]), dict(type='AggregateMultiViewPoints', coord_type='DEPTH'), dict(type='PointSample', num_points=100000), dict( type='GlobalRotScaleTrans', rot_range=[ -0.087266, 0.087266, ], scale_ratio_range=[ 0.9, 1.1, ], translation_std=[ 0.1, 0.1, 0.1, ], shift_height=False), dict( type='Pack3DDetInputs', keys=[ 'img', 'points', 'gt_bboxes_3d', 'gt_labels_3d', ]), ] test_pipeline = [ dict(type='LoadAnnotations3D'), dict( type='MultiViewPipeline', n_images=50, ordered=True, transforms=[ dict(type='LoadImageFromFile', backend_args=None), dict(type='LoadDepthFromFile', backend_args=None), dict(type='ConvertRGBDToPoints', coord_type='CAMERA'), dict(type='PointSample', num_points=10000), dict(type='Resize', scale=( 480, 480, ), keep_ratio=False), ]), dict(type='AggregateMultiViewPoints', coord_type='DEPTH'), dict(type='PointSample', num_points=100000), dict( type='Pack3DDetInputs', keys=[ 'img', 'points', 'gt_bboxes_3d', 'gt_labels_3d', ]), ] train_dataloader = dict( batch_size=12, num_workers=12, persistent_workers=True, sampler=dict(type='DefaultSampler', shuffle=True), dataset=dict( type='RepeatDataset', times=1, dataset=dict( type='MultiView3DGroundingDataset', data_root='data', ann_file='embodiedscan_infos_train.pkl', vg_file='embodiedscan_train_vg.json', metainfo=dict(classes='all'), pipeline=[ dict(type='LoadAnnotations3D'), dict( type='MultiViewPipeline', n_images=20, transforms=[ dict(type='LoadImageFromFile', backend_args=None), dict(type='LoadDepthFromFile', backend_args=None), dict(type='ConvertRGBDToPoints', coord_type='CAMERA'), dict(type='PointSample', num_points=10000), dict( type='Resize', scale=( 480, 480, ), keep_ratio=False), ]), dict(type='AggregateMultiViewPoints', coord_type='DEPTH'), dict(type='PointSample', num_points=100000), dict( type='GlobalRotScaleTrans', rot_range=[ -0.087266, 0.087266, ], scale_ratio_range=[ 0.9, 1.1, ], translation_std=[ 0.1, 0.1, 0.1, ], shift_height=False), dict( type='Pack3DDetInputs', keys=[ 'img', 'points', 'gt_bboxes_3d', 'gt_labels_3d', ]), ], test_mode=False, filter_empty_gt=True, box_type_3d='Euler-Depth'))) val_dataloader = dict( batch_size=12, num_workers=12, persistent_workers=True, drop_last=False, sampler=dict(type='DefaultSampler', shuffle=False), dataset=dict( type='MultiView3DGroundingDataset', data_root='data', ann_file='embodiedscan_infos_val.pkl', vg_file='embodiedscan_val_vg.json', metainfo=dict(classes='all'), pipeline=[ dict(type='LoadAnnotations3D'), dict( type='MultiViewPipeline', n_images=50, ordered=True, transforms=[ dict(type='LoadImageFromFile', backend_args=None), dict(type='LoadDepthFromFile', backend_args=None), dict(type='ConvertRGBDToPoints', coord_type='CAMERA'), dict(type='PointSample', num_points=10000), dict(type='Resize', scale=( 480, 480, ), keep_ratio=False), ]), dict(type='AggregateMultiViewPoints', coord_type='DEPTH'), dict(type='PointSample', num_points=100000), dict( type='Pack3DDetInputs', keys=[ 'img', 'points', 'gt_bboxes_3d', 'gt_labels_3d', ]), ], test_mode=True, filter_empty_gt=True, box_type_3d='Euler-Depth')) test_dataloader = dict( batch_size=12, num_workers=12, persistent_workers=True, drop_last=False, sampler=dict(type='DefaultSampler', shuffle=False), dataset=dict( type='MultiView3DGroundingDataset', data_root='data', ann_file='embodiedscan_infos_val.pkl', vg_file='embodiedscan_val_vg.json', metainfo=dict(classes='all'), pipeline=[ dict(type='LoadAnnotations3D'), dict( type='MultiViewPipeline', n_images=50, ordered=True, transforms=[ dict(type='LoadImageFromFile', backend_args=None), dict(type='LoadDepthFromFile', backend_args=None), dict(type='ConvertRGBDToPoints', coord_type='CAMERA'), dict(type='PointSample', num_points=10000), dict(type='Resize', scale=( 480, 480, ), keep_ratio=False), ]), dict(type='AggregateMultiViewPoints', coord_type='DEPTH'), dict(type='PointSample', num_points=100000), dict( type='Pack3DDetInputs', keys=[ 'img', 'points', 'gt_bboxes_3d', 'gt_labels_3d', ]), ], test_mode=True, filter_empty_gt=True, box_type_3d='Euler-Depth')) val_evaluator = dict(type='GroundingMetric') test_evaluator = dict(type='GroundingMetric') train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=12, val_interval=3) val_cfg = dict(type='ValLoop') test_cfg = dict(type='TestLoop') lr = 0.0005 optim_wrapper = dict( type='OptimWrapper', optimizer=dict(type='AdamW', lr=0.0005, weight_decay=0.0005), paramwise_cfg=dict( custom_keys=dict( text_encoder=dict(lr_mult=0.0), decoder=dict(lr_mult=0.1, decay_mult=1.0))), clip_grad=dict(max_norm=10, norm_type=2)) param_scheduler = dict( type='MultiStepLR', begin=0, end=12, by_epoch=True, milestones=[ 8, 11, ], gamma=0.1) custom_hooks = [ dict(type='EmptyCacheHook', after_iter=True), ] find_unused_parameters = True launcher = 'slurm' work_dir = '/mnt/petrelfs/wangtai/EmbodiedScan/work_dirs/mv-grounding-challenge-full' 2024/04/07 14:22:42 - mmengine - WARNING - Failed to search registry with scope "embodiedscan" in the "vis_backend" registry tree. As a workaround, the current "vis_backend" registry in "mmengine" is used to build instance. This may cause unexpected failure when running the built modules. Please check whether "embodiedscan" is a correct scope, or whether the registry is initialized. 2024/04/07 14:24:54 - mmengine - WARNING - Failed to search registry with scope "embodiedscan" in the "hook" registry tree. As a workaround, the current "hook" registry in "mmengine" is used to build instance. This may cause unexpected failure when running the built modules. Please check whether "embodiedscan" is a correct scope, or whether the registry is initialized. 2024/04/07 14:24:54 - mmengine - INFO - Hooks will be executed in the following order: before_run: (VERY_HIGH ) RuntimeInfoHook (BELOW_NORMAL) LoggerHook -------------------- before_train: (VERY_HIGH ) RuntimeInfoHook (NORMAL ) IterTimerHook (VERY_LOW ) CheckpointHook -------------------- before_train_epoch: (VERY_HIGH ) RuntimeInfoHook (NORMAL ) IterTimerHook (NORMAL ) DistSamplerSeedHook (NORMAL ) EmptyCacheHook -------------------- before_train_iter: (VERY_HIGH ) RuntimeInfoHook (NORMAL ) IterTimerHook -------------------- after_train_iter: (VERY_HIGH ) RuntimeInfoHook (NORMAL ) IterTimerHook (NORMAL ) EmptyCacheHook (BELOW_NORMAL) LoggerHook (LOW ) ParamSchedulerHook (VERY_LOW ) CheckpointHook -------------------- after_train_epoch: (NORMAL ) IterTimerHook (NORMAL ) EmptyCacheHook (LOW ) ParamSchedulerHook (VERY_LOW ) CheckpointHook -------------------- before_val_epoch: (NORMAL ) IterTimerHook (NORMAL ) EmptyCacheHook -------------------- before_val_iter: (NORMAL ) IterTimerHook -------------------- after_val_iter: (NORMAL ) IterTimerHook (NORMAL ) EmptyCacheHook (BELOW_NORMAL) LoggerHook -------------------- after_val_epoch: (VERY_HIGH ) RuntimeInfoHook (NORMAL ) IterTimerHook (NORMAL ) EmptyCacheHook (BELOW_NORMAL) LoggerHook (LOW ) ParamSchedulerHook (VERY_LOW ) CheckpointHook -------------------- after_train: (VERY_LOW ) CheckpointHook -------------------- before_test_epoch: (NORMAL ) IterTimerHook (NORMAL ) EmptyCacheHook -------------------- before_test_iter: (NORMAL ) IterTimerHook -------------------- after_test_iter: (NORMAL ) IterTimerHook (NORMAL ) EmptyCacheHook (BELOW_NORMAL) LoggerHook -------------------- after_test_epoch: (VERY_HIGH ) RuntimeInfoHook (NORMAL ) IterTimerHook (NORMAL ) EmptyCacheHook (BELOW_NORMAL) LoggerHook -------------------- after_run: (BELOW_NORMAL) LoggerHook -------------------- 2024/04/07 14:24:55 - mmengine - WARNING - Failed to search registry with scope "embodiedscan" in the "loop" registry tree. As a workaround, the current "loop" registry in "mmengine" is used to build instance. This may cause unexpected failure when running the built modules. Please check whether "embodiedscan" is a correct scope, or whether the registry is initialized. 2024/04/07 14:28:43 - mmengine - WARNING - Failed to search registry with scope "embodiedscan" in the "data sampler" registry tree. As a workaround, the current "data sampler" registry in "mmengine" is used to build instance. This may cause unexpected failure when running the built modules. Please check whether "embodiedscan" is a correct scope, or whether the registry is initialized. 2024/04/07 14:28:43 - mmengine - WARNING - Failed to search registry with scope "embodiedscan" in the "optimizer wrapper constructor" registry tree. As a workaround, the current "optimizer wrapper constructor" registry in "mmengine" is used to build instance. This may cause unexpected failure when running the built modules. Please check whether "embodiedscan" is a correct scope, or whether the registry is initialized. 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.embeddings.word_embeddings.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.embeddings.word_embeddings.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.embeddings.word_embeddings.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.embeddings.position_embeddings.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.embeddings.position_embeddings.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.embeddings.position_embeddings.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.embeddings.token_type_embeddings.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.embeddings.token_type_embeddings.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.embeddings.token_type_embeddings.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.embeddings.LayerNorm.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.embeddings.LayerNorm.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.embeddings.LayerNorm.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.embeddings.LayerNorm.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.embeddings.LayerNorm.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.embeddings.LayerNorm.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.attention.self.query.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.attention.self.query.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.attention.self.query.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.attention.self.query.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.attention.self.query.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.attention.self.query.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.attention.self.key.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.attention.self.key.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.attention.self.key.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.attention.self.key.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.attention.self.key.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.attention.self.key.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.attention.self.value.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.attention.self.value.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.attention.self.value.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.attention.self.value.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.attention.self.value.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.attention.self.value.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.attention.output.dense.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.attention.output.dense.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.attention.output.dense.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.attention.output.dense.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.attention.output.dense.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.attention.output.dense.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.attention.output.LayerNorm.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.attention.output.LayerNorm.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.attention.output.LayerNorm.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.attention.output.LayerNorm.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.attention.output.LayerNorm.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.attention.output.LayerNorm.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.intermediate.dense.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.intermediate.dense.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.intermediate.dense.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.intermediate.dense.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.intermediate.dense.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.intermediate.dense.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.output.dense.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.output.dense.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.output.dense.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.output.dense.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.output.dense.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.output.dense.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.output.LayerNorm.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.output.LayerNorm.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.output.LayerNorm.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.output.LayerNorm.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.output.LayerNorm.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.0.output.LayerNorm.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.attention.self.query.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.attention.self.query.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.attention.self.query.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.attention.self.query.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.attention.self.query.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.attention.self.query.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.attention.self.key.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.attention.self.key.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.attention.self.key.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.attention.self.key.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.attention.self.key.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.attention.self.key.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.attention.self.value.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.attention.self.value.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.attention.self.value.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.attention.self.value.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.attention.self.value.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.attention.self.value.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.attention.output.dense.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.attention.output.dense.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.attention.output.dense.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.attention.output.dense.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.attention.output.dense.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.attention.output.dense.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.attention.output.LayerNorm.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.attention.output.LayerNorm.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.attention.output.LayerNorm.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.attention.output.LayerNorm.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.attention.output.LayerNorm.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.attention.output.LayerNorm.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.intermediate.dense.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.intermediate.dense.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.intermediate.dense.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.intermediate.dense.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.intermediate.dense.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.intermediate.dense.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.output.dense.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.output.dense.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.output.dense.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.output.dense.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.output.dense.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.output.dense.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.output.LayerNorm.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.output.LayerNorm.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.output.LayerNorm.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.output.LayerNorm.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.output.LayerNorm.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.1.output.LayerNorm.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.attention.self.query.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.attention.self.query.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.attention.self.query.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.attention.self.query.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.attention.self.query.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.attention.self.query.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.attention.self.key.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.attention.self.key.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.attention.self.key.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.attention.self.key.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.attention.self.key.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.attention.self.key.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.attention.self.value.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.attention.self.value.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.attention.self.value.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.attention.self.value.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.attention.self.value.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.attention.self.value.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.attention.output.dense.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.attention.output.dense.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.attention.output.dense.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.attention.output.dense.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.attention.output.dense.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.attention.output.dense.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.attention.output.LayerNorm.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.attention.output.LayerNorm.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.attention.output.LayerNorm.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.attention.output.LayerNorm.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.attention.output.LayerNorm.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.attention.output.LayerNorm.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.intermediate.dense.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.intermediate.dense.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.intermediate.dense.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.intermediate.dense.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.intermediate.dense.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.intermediate.dense.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.output.dense.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.output.dense.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.output.dense.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.output.dense.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.output.dense.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.output.dense.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.output.LayerNorm.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.output.LayerNorm.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.output.LayerNorm.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.output.LayerNorm.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.output.LayerNorm.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.2.output.LayerNorm.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.attention.self.query.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.attention.self.query.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.attention.self.query.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.attention.self.query.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.attention.self.query.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.attention.self.query.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.attention.self.key.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.attention.self.key.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.attention.self.key.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.attention.self.key.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.attention.self.key.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.attention.self.key.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.attention.self.value.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.attention.self.value.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.attention.self.value.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.attention.self.value.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.attention.self.value.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.attention.self.value.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.attention.output.dense.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.attention.output.dense.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.attention.output.dense.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.attention.output.dense.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.attention.output.dense.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.attention.output.dense.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.attention.output.LayerNorm.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.attention.output.LayerNorm.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.attention.output.LayerNorm.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.attention.output.LayerNorm.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.attention.output.LayerNorm.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.attention.output.LayerNorm.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.intermediate.dense.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.intermediate.dense.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.intermediate.dense.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.intermediate.dense.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.intermediate.dense.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.intermediate.dense.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.output.dense.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.output.dense.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.output.dense.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.output.dense.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.output.dense.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.output.dense.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.output.LayerNorm.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.output.LayerNorm.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.output.LayerNorm.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.output.LayerNorm.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.output.LayerNorm.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.3.output.LayerNorm.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.attention.self.query.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.attention.self.query.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.attention.self.query.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.attention.self.query.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.attention.self.query.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.attention.self.query.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.attention.self.key.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.attention.self.key.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.attention.self.key.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.attention.self.key.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.attention.self.key.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.attention.self.key.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.attention.self.value.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.attention.self.value.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.attention.self.value.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.attention.self.value.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.attention.self.value.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.attention.self.value.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.attention.output.dense.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.attention.output.dense.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.attention.output.dense.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.attention.output.dense.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.attention.output.dense.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.attention.output.dense.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.attention.output.LayerNorm.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.attention.output.LayerNorm.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.attention.output.LayerNorm.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.attention.output.LayerNorm.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.attention.output.LayerNorm.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.attention.output.LayerNorm.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.intermediate.dense.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.intermediate.dense.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.intermediate.dense.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.intermediate.dense.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.intermediate.dense.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.intermediate.dense.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.output.dense.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.output.dense.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.output.dense.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.output.dense.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.output.dense.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.output.dense.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.output.LayerNorm.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.output.LayerNorm.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.output.LayerNorm.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.output.LayerNorm.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.output.LayerNorm.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.4.output.LayerNorm.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.attention.self.query.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.attention.self.query.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.attention.self.query.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.attention.self.query.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.attention.self.query.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.attention.self.query.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.attention.self.key.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.attention.self.key.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.attention.self.key.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.attention.self.key.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.attention.self.key.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.attention.self.key.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.attention.self.value.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.attention.self.value.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.attention.self.value.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.attention.self.value.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.attention.self.value.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.attention.self.value.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.attention.output.dense.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.attention.output.dense.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.attention.output.dense.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.attention.output.dense.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.attention.output.dense.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.attention.output.dense.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.attention.output.LayerNorm.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.attention.output.LayerNorm.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.attention.output.LayerNorm.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.attention.output.LayerNorm.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.attention.output.LayerNorm.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.attention.output.LayerNorm.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.intermediate.dense.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.intermediate.dense.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.intermediate.dense.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.intermediate.dense.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.intermediate.dense.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.intermediate.dense.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.output.dense.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.output.dense.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.output.dense.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.output.dense.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.output.dense.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.output.dense.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.output.LayerNorm.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.output.LayerNorm.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.output.LayerNorm.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.output.LayerNorm.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.output.LayerNorm.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.5.output.LayerNorm.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.attention.self.query.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.attention.self.query.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.attention.self.query.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.attention.self.query.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.attention.self.query.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.attention.self.query.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.attention.self.key.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.attention.self.key.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.attention.self.key.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.attention.self.key.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.attention.self.key.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.attention.self.key.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.attention.self.value.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.attention.self.value.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.attention.self.value.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.attention.self.value.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.attention.self.value.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.attention.self.value.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.attention.output.dense.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.attention.output.dense.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.attention.output.dense.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.attention.output.dense.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.attention.output.dense.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.attention.output.dense.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.attention.output.LayerNorm.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.attention.output.LayerNorm.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.attention.output.LayerNorm.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.attention.output.LayerNorm.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.attention.output.LayerNorm.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.attention.output.LayerNorm.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.intermediate.dense.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.intermediate.dense.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.intermediate.dense.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.intermediate.dense.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.intermediate.dense.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.intermediate.dense.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.output.dense.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.output.dense.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.output.dense.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.output.dense.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.output.dense.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.output.dense.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.output.LayerNorm.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.output.LayerNorm.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.output.LayerNorm.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.output.LayerNorm.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.output.LayerNorm.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.6.output.LayerNorm.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.attention.self.query.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.attention.self.query.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.attention.self.query.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.attention.self.query.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.attention.self.query.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.attention.self.query.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.attention.self.key.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.attention.self.key.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.attention.self.key.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.attention.self.key.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.attention.self.key.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.attention.self.key.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.attention.self.value.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.attention.self.value.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.attention.self.value.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.attention.self.value.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.attention.self.value.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.attention.self.value.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.attention.output.dense.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.attention.output.dense.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.attention.output.dense.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.attention.output.dense.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.attention.output.dense.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.attention.output.dense.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.attention.output.LayerNorm.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.attention.output.LayerNorm.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.attention.output.LayerNorm.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.attention.output.LayerNorm.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.attention.output.LayerNorm.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.attention.output.LayerNorm.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.intermediate.dense.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.intermediate.dense.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.intermediate.dense.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.intermediate.dense.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.intermediate.dense.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.intermediate.dense.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.output.dense.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.output.dense.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.output.dense.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.output.dense.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.output.dense.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.output.dense.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.output.LayerNorm.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.output.LayerNorm.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.output.LayerNorm.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.output.LayerNorm.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.output.LayerNorm.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.7.output.LayerNorm.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.attention.self.query.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.attention.self.query.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.attention.self.query.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.attention.self.query.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.attention.self.query.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.attention.self.query.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.attention.self.key.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.attention.self.key.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.attention.self.key.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.attention.self.key.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.attention.self.key.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.attention.self.key.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.attention.self.value.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.attention.self.value.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.attention.self.value.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.attention.self.value.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.attention.self.value.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.attention.self.value.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.attention.output.dense.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.attention.output.dense.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.attention.output.dense.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.attention.output.dense.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.attention.output.dense.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.attention.output.dense.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.attention.output.LayerNorm.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.attention.output.LayerNorm.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.attention.output.LayerNorm.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.attention.output.LayerNorm.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.attention.output.LayerNorm.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.attention.output.LayerNorm.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.intermediate.dense.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.intermediate.dense.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.intermediate.dense.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.intermediate.dense.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.intermediate.dense.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.intermediate.dense.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.output.dense.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.output.dense.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.output.dense.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.output.dense.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.output.dense.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.output.dense.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.output.LayerNorm.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.output.LayerNorm.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.output.LayerNorm.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.output.LayerNorm.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.output.LayerNorm.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.8.output.LayerNorm.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.attention.self.query.weight:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.attention.self.query.weight:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.attention.self.query.weight:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.attention.self.query.bias:lr=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.attention.self.query.bias:weight_decay=0.0005 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.attention.self.query.bias:lr_mult=0.0 2024/04/07 14:28:43 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.attention.self.key.weight:lr=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.attention.self.key.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.attention.self.key.weight:lr_mult=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.attention.self.key.bias:lr=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.attention.self.key.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.attention.self.key.bias:lr_mult=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.attention.self.value.weight:lr=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.attention.self.value.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.attention.self.value.weight:lr_mult=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.attention.self.value.bias:lr=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.attention.self.value.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.attention.self.value.bias:lr_mult=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.attention.output.dense.weight:lr=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.attention.output.dense.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.attention.output.dense.weight:lr_mult=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.attention.output.dense.bias:lr=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.attention.output.dense.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.attention.output.dense.bias:lr_mult=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.attention.output.LayerNorm.weight:lr=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.attention.output.LayerNorm.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.attention.output.LayerNorm.weight:lr_mult=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.attention.output.LayerNorm.bias:lr=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.attention.output.LayerNorm.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.attention.output.LayerNorm.bias:lr_mult=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.intermediate.dense.weight:lr=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.intermediate.dense.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.intermediate.dense.weight:lr_mult=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.intermediate.dense.bias:lr=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.intermediate.dense.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.intermediate.dense.bias:lr_mult=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.output.dense.weight:lr=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.output.dense.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.output.dense.weight:lr_mult=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.output.dense.bias:lr=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.output.dense.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.output.dense.bias:lr_mult=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.output.LayerNorm.weight:lr=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.output.LayerNorm.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.output.LayerNorm.weight:lr_mult=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.output.LayerNorm.bias:lr=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.output.LayerNorm.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.9.output.LayerNorm.bias:lr_mult=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.attention.self.query.weight:lr=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.attention.self.query.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.attention.self.query.weight:lr_mult=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.attention.self.query.bias:lr=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.attention.self.query.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.attention.self.query.bias:lr_mult=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.attention.self.key.weight:lr=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.attention.self.key.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.attention.self.key.weight:lr_mult=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.attention.self.key.bias:lr=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.attention.self.key.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.attention.self.key.bias:lr_mult=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.attention.self.value.weight:lr=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.attention.self.value.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.attention.self.value.weight:lr_mult=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.attention.self.value.bias:lr=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.attention.self.value.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.attention.self.value.bias:lr_mult=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.attention.output.dense.weight:lr=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.attention.output.dense.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.attention.output.dense.weight:lr_mult=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.attention.output.dense.bias:lr=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.attention.output.dense.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.attention.output.dense.bias:lr_mult=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.attention.output.LayerNorm.weight:lr=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.attention.output.LayerNorm.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.attention.output.LayerNorm.weight:lr_mult=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.attention.output.LayerNorm.bias:lr=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.attention.output.LayerNorm.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.attention.output.LayerNorm.bias:lr_mult=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.intermediate.dense.weight:lr=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.intermediate.dense.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.intermediate.dense.weight:lr_mult=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.intermediate.dense.bias:lr=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.intermediate.dense.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.intermediate.dense.bias:lr_mult=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.output.dense.weight:lr=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.output.dense.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.output.dense.weight:lr_mult=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.output.dense.bias:lr=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.output.dense.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.output.dense.bias:lr_mult=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.output.LayerNorm.weight:lr=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.output.LayerNorm.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.output.LayerNorm.weight:lr_mult=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.output.LayerNorm.bias:lr=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.output.LayerNorm.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.10.output.LayerNorm.bias:lr_mult=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.attention.self.query.weight:lr=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.attention.self.query.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.attention.self.query.weight:lr_mult=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.attention.self.query.bias:lr=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.attention.self.query.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.attention.self.query.bias:lr_mult=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.attention.self.key.weight:lr=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.attention.self.key.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.attention.self.key.weight:lr_mult=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.attention.self.key.bias:lr=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.attention.self.key.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.attention.self.key.bias:lr_mult=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.attention.self.value.weight:lr=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.attention.self.value.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.attention.self.value.weight:lr_mult=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.attention.self.value.bias:lr=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.attention.self.value.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.attention.self.value.bias:lr_mult=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.attention.output.dense.weight:lr=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.attention.output.dense.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.attention.output.dense.weight:lr_mult=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.attention.output.dense.bias:lr=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.attention.output.dense.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.attention.output.dense.bias:lr_mult=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.attention.output.LayerNorm.weight:lr=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.attention.output.LayerNorm.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.attention.output.LayerNorm.weight:lr_mult=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.attention.output.LayerNorm.bias:lr=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.attention.output.LayerNorm.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.attention.output.LayerNorm.bias:lr_mult=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.intermediate.dense.weight:lr=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.intermediate.dense.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.intermediate.dense.weight:lr_mult=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.intermediate.dense.bias:lr=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.intermediate.dense.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.intermediate.dense.bias:lr_mult=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.output.dense.weight:lr=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.output.dense.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.output.dense.weight:lr_mult=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.output.dense.bias:lr=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.output.dense.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.output.dense.bias:lr_mult=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.output.LayerNorm.weight:lr=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.output.LayerNorm.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.output.LayerNorm.weight:lr_mult=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.output.LayerNorm.bias:lr=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.output.LayerNorm.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.encoder.layer.11.output.LayerNorm.bias:lr_mult=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.pooler.dense.weight:lr=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.pooler.dense.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.pooler.dense.weight:lr_mult=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.pooler.dense.bias:lr=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.pooler.dense.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- text_encoder.pooler.dense.bias:lr_mult=0.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_attn.attn.in_proj_weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_attn.attn.in_proj_weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_attn.attn.in_proj_weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_attn.attn.in_proj_weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_attn.attn.in_proj_bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_attn.attn.in_proj_bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_attn.attn.in_proj_bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_attn.attn.in_proj_bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_attn.attn.out_proj.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_attn.attn.out_proj.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_attn.attn.out_proj.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_attn.attn.out_proj.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_attn.attn.out_proj.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_attn.attn.out_proj.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_attn.attn.out_proj.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_attn.attn.out_proj.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.cross_attn_text.attn.in_proj_weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.cross_attn_text.attn.in_proj_weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.cross_attn_text.attn.in_proj_weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.cross_attn_text.attn.in_proj_weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.cross_attn_text.attn.in_proj_bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.cross_attn_text.attn.in_proj_bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.cross_attn_text.attn.in_proj_bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.cross_attn_text.attn.in_proj_bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.cross_attn_text.attn.out_proj.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.cross_attn_text.attn.out_proj.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.cross_attn_text.attn.out_proj.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.cross_attn_text.attn.out_proj.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.cross_attn_text.attn.out_proj.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.cross_attn_text.attn.out_proj.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.cross_attn_text.attn.out_proj.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.cross_attn_text.attn.out_proj.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.cross_attn.attn.in_proj_weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.cross_attn.attn.in_proj_weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.cross_attn.attn.in_proj_weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.cross_attn.attn.in_proj_weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.cross_attn.attn.in_proj_bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.cross_attn.attn.in_proj_bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.cross_attn.attn.in_proj_bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.cross_attn.attn.in_proj_bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.cross_attn.attn.out_proj.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.cross_attn.attn.out_proj.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.cross_attn.attn.out_proj.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.cross_attn.attn.out_proj.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.cross_attn.attn.out_proj.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.cross_attn.attn.out_proj.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.cross_attn.attn.out_proj.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.cross_attn.attn.out_proj.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.ffn.layers.0.0.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.ffn.layers.0.0.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.ffn.layers.0.0.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.ffn.layers.0.0.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.ffn.layers.0.0.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.ffn.layers.0.0.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.ffn.layers.0.0.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.ffn.layers.0.0.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.ffn.layers.1.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.ffn.layers.1.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.ffn.layers.1.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.ffn.layers.1.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.ffn.layers.1.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.ffn.layers.1.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.ffn.layers.1.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.ffn.layers.1.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.norms.0.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.norms.0.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.norms.0.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.norms.0.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.norms.0.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.norms.0.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.norms.0.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.norms.0.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.norms.1.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.norms.1.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.norms.1.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.norms.1.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.norms.1.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.norms.1.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.norms.1.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.norms.1.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.norms.2.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.norms.2.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.norms.2.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.norms.2.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.norms.2.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.norms.2.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.norms.2.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.norms.2.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.norms.3.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.norms.3.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.norms.3.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.norms.3.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.norms.3.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.norms.3.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.norms.3.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.norms.3.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_posembed.position_embedding_head.0.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_posembed.position_embedding_head.0.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_posembed.position_embedding_head.0.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_posembed.position_embedding_head.0.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_posembed.position_embedding_head.0.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_posembed.position_embedding_head.0.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_posembed.position_embedding_head.0.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_posembed.position_embedding_head.0.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_posembed.position_embedding_head.1.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_posembed.position_embedding_head.1.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_posembed.position_embedding_head.1.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_posembed.position_embedding_head.1.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_posembed.position_embedding_head.1.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_posembed.position_embedding_head.1.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_posembed.position_embedding_head.1.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_posembed.position_embedding_head.1.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_posembed.position_embedding_head.3.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_posembed.position_embedding_head.3.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_posembed.position_embedding_head.3.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_posembed.position_embedding_head.3.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_posembed.position_embedding_head.3.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_posembed.position_embedding_head.3.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_posembed.position_embedding_head.3.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.0.self_posembed.position_embedding_head.3.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_attn.attn.in_proj_weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_attn.attn.in_proj_weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_attn.attn.in_proj_weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_attn.attn.in_proj_weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_attn.attn.in_proj_bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_attn.attn.in_proj_bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_attn.attn.in_proj_bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_attn.attn.in_proj_bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_attn.attn.out_proj.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_attn.attn.out_proj.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_attn.attn.out_proj.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_attn.attn.out_proj.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_attn.attn.out_proj.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_attn.attn.out_proj.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_attn.attn.out_proj.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_attn.attn.out_proj.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.cross_attn_text.attn.in_proj_weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.cross_attn_text.attn.in_proj_weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.cross_attn_text.attn.in_proj_weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.cross_attn_text.attn.in_proj_weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.cross_attn_text.attn.in_proj_bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.cross_attn_text.attn.in_proj_bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.cross_attn_text.attn.in_proj_bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.cross_attn_text.attn.in_proj_bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.cross_attn_text.attn.out_proj.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.cross_attn_text.attn.out_proj.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.cross_attn_text.attn.out_proj.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.cross_attn_text.attn.out_proj.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.cross_attn_text.attn.out_proj.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.cross_attn_text.attn.out_proj.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.cross_attn_text.attn.out_proj.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.cross_attn_text.attn.out_proj.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.cross_attn.attn.in_proj_weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.cross_attn.attn.in_proj_weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.cross_attn.attn.in_proj_weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.cross_attn.attn.in_proj_weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.cross_attn.attn.in_proj_bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.cross_attn.attn.in_proj_bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.cross_attn.attn.in_proj_bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.cross_attn.attn.in_proj_bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.cross_attn.attn.out_proj.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.cross_attn.attn.out_proj.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.cross_attn.attn.out_proj.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.cross_attn.attn.out_proj.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.cross_attn.attn.out_proj.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.cross_attn.attn.out_proj.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.cross_attn.attn.out_proj.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.cross_attn.attn.out_proj.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.ffn.layers.0.0.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.ffn.layers.0.0.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.ffn.layers.0.0.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.ffn.layers.0.0.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.ffn.layers.0.0.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.ffn.layers.0.0.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.ffn.layers.0.0.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.ffn.layers.0.0.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.ffn.layers.1.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.ffn.layers.1.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.ffn.layers.1.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.ffn.layers.1.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.ffn.layers.1.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.ffn.layers.1.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.ffn.layers.1.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.ffn.layers.1.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.norms.0.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.norms.0.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.norms.0.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.norms.0.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.norms.0.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.norms.0.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.norms.0.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.norms.0.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.norms.1.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.norms.1.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.norms.1.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.norms.1.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.norms.1.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.norms.1.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.norms.1.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.norms.1.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.norms.2.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.norms.2.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.norms.2.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.norms.2.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.norms.2.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.norms.2.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.norms.2.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.norms.2.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.norms.3.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.norms.3.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.norms.3.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.norms.3.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.norms.3.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.norms.3.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.norms.3.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.norms.3.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_posembed.position_embedding_head.0.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_posembed.position_embedding_head.0.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_posembed.position_embedding_head.0.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_posembed.position_embedding_head.0.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_posembed.position_embedding_head.0.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_posembed.position_embedding_head.0.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_posembed.position_embedding_head.0.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_posembed.position_embedding_head.0.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_posembed.position_embedding_head.1.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_posembed.position_embedding_head.1.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_posembed.position_embedding_head.1.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_posembed.position_embedding_head.1.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_posembed.position_embedding_head.1.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_posembed.position_embedding_head.1.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_posembed.position_embedding_head.1.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_posembed.position_embedding_head.1.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_posembed.position_embedding_head.3.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_posembed.position_embedding_head.3.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_posembed.position_embedding_head.3.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_posembed.position_embedding_head.3.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_posembed.position_embedding_head.3.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_posembed.position_embedding_head.3.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_posembed.position_embedding_head.3.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.1.self_posembed.position_embedding_head.3.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_attn.attn.in_proj_weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_attn.attn.in_proj_weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_attn.attn.in_proj_weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_attn.attn.in_proj_weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_attn.attn.in_proj_bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_attn.attn.in_proj_bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_attn.attn.in_proj_bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_attn.attn.in_proj_bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_attn.attn.out_proj.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_attn.attn.out_proj.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_attn.attn.out_proj.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_attn.attn.out_proj.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_attn.attn.out_proj.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_attn.attn.out_proj.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_attn.attn.out_proj.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_attn.attn.out_proj.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.cross_attn_text.attn.in_proj_weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.cross_attn_text.attn.in_proj_weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.cross_attn_text.attn.in_proj_weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.cross_attn_text.attn.in_proj_weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.cross_attn_text.attn.in_proj_bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.cross_attn_text.attn.in_proj_bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.cross_attn_text.attn.in_proj_bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.cross_attn_text.attn.in_proj_bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.cross_attn_text.attn.out_proj.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.cross_attn_text.attn.out_proj.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.cross_attn_text.attn.out_proj.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.cross_attn_text.attn.out_proj.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.cross_attn_text.attn.out_proj.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.cross_attn_text.attn.out_proj.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.cross_attn_text.attn.out_proj.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.cross_attn_text.attn.out_proj.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.cross_attn.attn.in_proj_weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.cross_attn.attn.in_proj_weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.cross_attn.attn.in_proj_weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.cross_attn.attn.in_proj_weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.cross_attn.attn.in_proj_bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.cross_attn.attn.in_proj_bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.cross_attn.attn.in_proj_bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.cross_attn.attn.in_proj_bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.cross_attn.attn.out_proj.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.cross_attn.attn.out_proj.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.cross_attn.attn.out_proj.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.cross_attn.attn.out_proj.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.cross_attn.attn.out_proj.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.cross_attn.attn.out_proj.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.cross_attn.attn.out_proj.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.cross_attn.attn.out_proj.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.ffn.layers.0.0.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.ffn.layers.0.0.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.ffn.layers.0.0.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.ffn.layers.0.0.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.ffn.layers.0.0.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.ffn.layers.0.0.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.ffn.layers.0.0.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.ffn.layers.0.0.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.ffn.layers.1.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.ffn.layers.1.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.ffn.layers.1.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.ffn.layers.1.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.ffn.layers.1.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.ffn.layers.1.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.ffn.layers.1.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.ffn.layers.1.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.norms.0.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.norms.0.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.norms.0.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.norms.0.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.norms.0.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.norms.0.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.norms.0.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.norms.0.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.norms.1.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.norms.1.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.norms.1.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.norms.1.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.norms.1.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.norms.1.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.norms.1.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.norms.1.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.norms.2.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.norms.2.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.norms.2.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.norms.2.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.norms.2.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.norms.2.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.norms.2.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.norms.2.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.norms.3.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.norms.3.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.norms.3.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.norms.3.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.norms.3.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.norms.3.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.norms.3.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.norms.3.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_posembed.position_embedding_head.0.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_posembed.position_embedding_head.0.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_posembed.position_embedding_head.0.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_posembed.position_embedding_head.0.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_posembed.position_embedding_head.0.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_posembed.position_embedding_head.0.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_posembed.position_embedding_head.0.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_posembed.position_embedding_head.0.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_posembed.position_embedding_head.1.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_posembed.position_embedding_head.1.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_posembed.position_embedding_head.1.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_posembed.position_embedding_head.1.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_posembed.position_embedding_head.1.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_posembed.position_embedding_head.1.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_posembed.position_embedding_head.1.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_posembed.position_embedding_head.1.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_posembed.position_embedding_head.3.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_posembed.position_embedding_head.3.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_posembed.position_embedding_head.3.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_posembed.position_embedding_head.3.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_posembed.position_embedding_head.3.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_posembed.position_embedding_head.3.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_posembed.position_embedding_head.3.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.2.self_posembed.position_embedding_head.3.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_attn.attn.in_proj_weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_attn.attn.in_proj_weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_attn.attn.in_proj_weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_attn.attn.in_proj_weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_attn.attn.in_proj_bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_attn.attn.in_proj_bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_attn.attn.in_proj_bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_attn.attn.in_proj_bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_attn.attn.out_proj.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_attn.attn.out_proj.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_attn.attn.out_proj.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_attn.attn.out_proj.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_attn.attn.out_proj.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_attn.attn.out_proj.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_attn.attn.out_proj.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_attn.attn.out_proj.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.cross_attn_text.attn.in_proj_weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.cross_attn_text.attn.in_proj_weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.cross_attn_text.attn.in_proj_weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.cross_attn_text.attn.in_proj_weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.cross_attn_text.attn.in_proj_bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.cross_attn_text.attn.in_proj_bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.cross_attn_text.attn.in_proj_bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.cross_attn_text.attn.in_proj_bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.cross_attn_text.attn.out_proj.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.cross_attn_text.attn.out_proj.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.cross_attn_text.attn.out_proj.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.cross_attn_text.attn.out_proj.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.cross_attn_text.attn.out_proj.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.cross_attn_text.attn.out_proj.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.cross_attn_text.attn.out_proj.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.cross_attn_text.attn.out_proj.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.cross_attn.attn.in_proj_weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.cross_attn.attn.in_proj_weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.cross_attn.attn.in_proj_weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.cross_attn.attn.in_proj_weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.cross_attn.attn.in_proj_bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.cross_attn.attn.in_proj_bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.cross_attn.attn.in_proj_bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.cross_attn.attn.in_proj_bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.cross_attn.attn.out_proj.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.cross_attn.attn.out_proj.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.cross_attn.attn.out_proj.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.cross_attn.attn.out_proj.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.cross_attn.attn.out_proj.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.cross_attn.attn.out_proj.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.cross_attn.attn.out_proj.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.cross_attn.attn.out_proj.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.ffn.layers.0.0.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.ffn.layers.0.0.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.ffn.layers.0.0.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.ffn.layers.0.0.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.ffn.layers.0.0.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.ffn.layers.0.0.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.ffn.layers.0.0.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.ffn.layers.0.0.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.ffn.layers.1.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.ffn.layers.1.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.ffn.layers.1.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.ffn.layers.1.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.ffn.layers.1.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.ffn.layers.1.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.ffn.layers.1.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.ffn.layers.1.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.norms.0.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.norms.0.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.norms.0.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.norms.0.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.norms.0.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.norms.0.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.norms.0.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.norms.0.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.norms.1.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.norms.1.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.norms.1.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.norms.1.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.norms.1.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.norms.1.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.norms.1.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.norms.1.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.norms.2.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.norms.2.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.norms.2.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.norms.2.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.norms.2.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.norms.2.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.norms.2.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.norms.2.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.norms.3.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.norms.3.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.norms.3.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.norms.3.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.norms.3.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.norms.3.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.norms.3.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.norms.3.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_posembed.position_embedding_head.0.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_posembed.position_embedding_head.0.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_posembed.position_embedding_head.0.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_posembed.position_embedding_head.0.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_posembed.position_embedding_head.0.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_posembed.position_embedding_head.0.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_posembed.position_embedding_head.0.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_posembed.position_embedding_head.0.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_posembed.position_embedding_head.1.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_posembed.position_embedding_head.1.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_posembed.position_embedding_head.1.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_posembed.position_embedding_head.1.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_posembed.position_embedding_head.1.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_posembed.position_embedding_head.1.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_posembed.position_embedding_head.1.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_posembed.position_embedding_head.1.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_posembed.position_embedding_head.3.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_posembed.position_embedding_head.3.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_posembed.position_embedding_head.3.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_posembed.position_embedding_head.3.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_posembed.position_embedding_head.3.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_posembed.position_embedding_head.3.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_posembed.position_embedding_head.3.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.3.self_posembed.position_embedding_head.3.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_attn.attn.in_proj_weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_attn.attn.in_proj_weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_attn.attn.in_proj_weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_attn.attn.in_proj_weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_attn.attn.in_proj_bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_attn.attn.in_proj_bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_attn.attn.in_proj_bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_attn.attn.in_proj_bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_attn.attn.out_proj.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_attn.attn.out_proj.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_attn.attn.out_proj.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_attn.attn.out_proj.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_attn.attn.out_proj.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_attn.attn.out_proj.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_attn.attn.out_proj.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_attn.attn.out_proj.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.cross_attn_text.attn.in_proj_weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.cross_attn_text.attn.in_proj_weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.cross_attn_text.attn.in_proj_weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.cross_attn_text.attn.in_proj_weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.cross_attn_text.attn.in_proj_bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.cross_attn_text.attn.in_proj_bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.cross_attn_text.attn.in_proj_bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.cross_attn_text.attn.in_proj_bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.cross_attn_text.attn.out_proj.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.cross_attn_text.attn.out_proj.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.cross_attn_text.attn.out_proj.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.cross_attn_text.attn.out_proj.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.cross_attn_text.attn.out_proj.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.cross_attn_text.attn.out_proj.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.cross_attn_text.attn.out_proj.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.cross_attn_text.attn.out_proj.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.cross_attn.attn.in_proj_weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.cross_attn.attn.in_proj_weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.cross_attn.attn.in_proj_weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.cross_attn.attn.in_proj_weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.cross_attn.attn.in_proj_bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.cross_attn.attn.in_proj_bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.cross_attn.attn.in_proj_bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.cross_attn.attn.in_proj_bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.cross_attn.attn.out_proj.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.cross_attn.attn.out_proj.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.cross_attn.attn.out_proj.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.cross_attn.attn.out_proj.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.cross_attn.attn.out_proj.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.cross_attn.attn.out_proj.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.cross_attn.attn.out_proj.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.cross_attn.attn.out_proj.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.ffn.layers.0.0.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.ffn.layers.0.0.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.ffn.layers.0.0.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.ffn.layers.0.0.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.ffn.layers.0.0.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.ffn.layers.0.0.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.ffn.layers.0.0.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.ffn.layers.0.0.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.ffn.layers.1.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.ffn.layers.1.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.ffn.layers.1.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.ffn.layers.1.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.ffn.layers.1.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.ffn.layers.1.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.ffn.layers.1.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.ffn.layers.1.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.norms.0.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.norms.0.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.norms.0.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.norms.0.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.norms.0.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.norms.0.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.norms.0.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.norms.0.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.norms.1.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.norms.1.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.norms.1.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.norms.1.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.norms.1.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.norms.1.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.norms.1.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.norms.1.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.norms.2.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.norms.2.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.norms.2.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.norms.2.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.norms.2.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.norms.2.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.norms.2.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.norms.2.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.norms.3.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.norms.3.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.norms.3.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.norms.3.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.norms.3.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.norms.3.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.norms.3.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.norms.3.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_posembed.position_embedding_head.0.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_posembed.position_embedding_head.0.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_posembed.position_embedding_head.0.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_posembed.position_embedding_head.0.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_posembed.position_embedding_head.0.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_posembed.position_embedding_head.0.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_posembed.position_embedding_head.0.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_posembed.position_embedding_head.0.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_posembed.position_embedding_head.1.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_posembed.position_embedding_head.1.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_posembed.position_embedding_head.1.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_posembed.position_embedding_head.1.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_posembed.position_embedding_head.1.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_posembed.position_embedding_head.1.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_posembed.position_embedding_head.1.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_posembed.position_embedding_head.1.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_posembed.position_embedding_head.3.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_posembed.position_embedding_head.3.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_posembed.position_embedding_head.3.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_posembed.position_embedding_head.3.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_posembed.position_embedding_head.3.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_posembed.position_embedding_head.3.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_posembed.position_embedding_head.3.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.4.self_posembed.position_embedding_head.3.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_attn.attn.in_proj_weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_attn.attn.in_proj_weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_attn.attn.in_proj_weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_attn.attn.in_proj_weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_attn.attn.in_proj_bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_attn.attn.in_proj_bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_attn.attn.in_proj_bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_attn.attn.in_proj_bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_attn.attn.out_proj.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_attn.attn.out_proj.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_attn.attn.out_proj.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_attn.attn.out_proj.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_attn.attn.out_proj.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_attn.attn.out_proj.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_attn.attn.out_proj.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_attn.attn.out_proj.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.cross_attn_text.attn.in_proj_weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.cross_attn_text.attn.in_proj_weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.cross_attn_text.attn.in_proj_weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.cross_attn_text.attn.in_proj_weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.cross_attn_text.attn.in_proj_bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.cross_attn_text.attn.in_proj_bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.cross_attn_text.attn.in_proj_bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.cross_attn_text.attn.in_proj_bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.cross_attn_text.attn.out_proj.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.cross_attn_text.attn.out_proj.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.cross_attn_text.attn.out_proj.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.cross_attn_text.attn.out_proj.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.cross_attn_text.attn.out_proj.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.cross_attn_text.attn.out_proj.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.cross_attn_text.attn.out_proj.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.cross_attn_text.attn.out_proj.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.cross_attn.attn.in_proj_weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.cross_attn.attn.in_proj_weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.cross_attn.attn.in_proj_weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.cross_attn.attn.in_proj_weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.cross_attn.attn.in_proj_bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.cross_attn.attn.in_proj_bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.cross_attn.attn.in_proj_bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.cross_attn.attn.in_proj_bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.cross_attn.attn.out_proj.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.cross_attn.attn.out_proj.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.cross_attn.attn.out_proj.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.cross_attn.attn.out_proj.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.cross_attn.attn.out_proj.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.cross_attn.attn.out_proj.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.cross_attn.attn.out_proj.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.cross_attn.attn.out_proj.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.ffn.layers.0.0.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.ffn.layers.0.0.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.ffn.layers.0.0.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.ffn.layers.0.0.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.ffn.layers.0.0.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.ffn.layers.0.0.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.ffn.layers.0.0.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.ffn.layers.0.0.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.ffn.layers.1.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.ffn.layers.1.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.ffn.layers.1.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.ffn.layers.1.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.ffn.layers.1.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.ffn.layers.1.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.ffn.layers.1.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.ffn.layers.1.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.norms.0.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.norms.0.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.norms.0.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.norms.0.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.norms.0.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.norms.0.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.norms.0.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.norms.0.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.norms.1.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.norms.1.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.norms.1.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.norms.1.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.norms.1.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.norms.1.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.norms.1.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.norms.1.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.norms.2.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.norms.2.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.norms.2.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.norms.2.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.norms.2.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.norms.2.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.norms.2.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.norms.2.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.norms.3.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.norms.3.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.norms.3.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.norms.3.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.norms.3.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.norms.3.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.norms.3.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.norms.3.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_posembed.position_embedding_head.0.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_posembed.position_embedding_head.0.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_posembed.position_embedding_head.0.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_posembed.position_embedding_head.0.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_posembed.position_embedding_head.0.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_posembed.position_embedding_head.0.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_posembed.position_embedding_head.0.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_posembed.position_embedding_head.0.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_posembed.position_embedding_head.1.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_posembed.position_embedding_head.1.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_posembed.position_embedding_head.1.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_posembed.position_embedding_head.1.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_posembed.position_embedding_head.1.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_posembed.position_embedding_head.1.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_posembed.position_embedding_head.1.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_posembed.position_embedding_head.1.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_posembed.position_embedding_head.3.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_posembed.position_embedding_head.3.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_posembed.position_embedding_head.3.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_posembed.position_embedding_head.3.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_posembed.position_embedding_head.3.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_posembed.position_embedding_head.3.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_posembed.position_embedding_head.3.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.layers.5.self_posembed.position_embedding_head.3.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.self_posembed.position_embedding_head.0.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.self_posembed.position_embedding_head.0.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.self_posembed.position_embedding_head.0.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.self_posembed.position_embedding_head.0.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.self_posembed.position_embedding_head.0.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.self_posembed.position_embedding_head.0.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.self_posembed.position_embedding_head.0.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.self_posembed.position_embedding_head.0.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.self_posembed.position_embedding_head.1.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.self_posembed.position_embedding_head.1.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.self_posembed.position_embedding_head.1.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.self_posembed.position_embedding_head.1.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.self_posembed.position_embedding_head.1.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.self_posembed.position_embedding_head.1.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.self_posembed.position_embedding_head.1.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.self_posembed.position_embedding_head.1.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.self_posembed.position_embedding_head.3.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.self_posembed.position_embedding_head.3.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.self_posembed.position_embedding_head.3.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.self_posembed.position_embedding_head.3.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.self_posembed.position_embedding_head.3.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.self_posembed.position_embedding_head.3.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.self_posembed.position_embedding_head.3.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.self_posembed.position_embedding_head.3.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.cross_posembed.position_embedding_head.0.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.cross_posembed.position_embedding_head.0.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.cross_posembed.position_embedding_head.0.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.cross_posembed.position_embedding_head.0.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.cross_posembed.position_embedding_head.0.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.cross_posembed.position_embedding_head.0.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.cross_posembed.position_embedding_head.0.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.cross_posembed.position_embedding_head.0.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.cross_posembed.position_embedding_head.1.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.cross_posembed.position_embedding_head.1.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.cross_posembed.position_embedding_head.1.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.cross_posembed.position_embedding_head.1.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.cross_posembed.position_embedding_head.1.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.cross_posembed.position_embedding_head.1.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.cross_posembed.position_embedding_head.1.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.cross_posembed.position_embedding_head.1.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.cross_posembed.position_embedding_head.3.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.cross_posembed.position_embedding_head.3.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.cross_posembed.position_embedding_head.3.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.cross_posembed.position_embedding_head.3.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.cross_posembed.position_embedding_head.3.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.cross_posembed.position_embedding_head.3.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.cross_posembed.position_embedding_head.3.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.cross_posembed.position_embedding_head.3.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.norm.weight:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.norm.weight:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.norm.weight:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.norm.weight:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.norm.bias:lr=5e-05 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.norm.bias:weight_decay=0.0005 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.norm.bias:lr_mult=0.1 2024/04/07 14:28:44 - mmengine - INFO - paramwise_options -- decoder.norm.bias:decay_mult=1.0 2024/04/07 14:28:44 - mmengine - WARNING - Failed to search registry with scope "embodiedscan" in the "optimizer" registry tree. As a workaround, the current "optimizer" registry in "mmengine" is used to build instance. This may cause unexpected failure when running the built modules. Please check whether "embodiedscan" is a correct scope, or whether the registry is initialized. 2024/04/07 14:28:45 - mmengine - WARNING - Failed to search registry with scope "embodiedscan" in the "optim_wrapper" registry tree. As a workaround, the current "optim_wrapper" registry in "mmengine" is used to build instance. This may cause unexpected failure when running the built modules. Please check whether "embodiedscan" is a correct scope, or whether the registry is initialized. 2024/04/07 14:28:45 - mmengine - WARNING - Failed to search registry with scope "embodiedscan" in the "parameter scheduler" registry tree. As a workaround, the current "parameter scheduler" registry in "mmengine" is used to build instance. This may cause unexpected failure when running the built modules. Please check whether "embodiedscan" is a correct scope, or whether the registry is initialized. 2024/04/07 14:29:36 - mmengine - WARNING - The prefix is not set in metric class GroundingMetric. 2024/04/07 14:29:38 - mmengine - WARNING - Failed to search registry with scope "embodiedscan" in the "weight initializer" registry tree. As a workaround, the current "weight initializer" registry in "mmengine" is used to build instance. This may cause unexpected failure when running the built modules. Please check whether "embodiedscan" is a correct scope, or whether the registry is initialized. 2024/04/07 14:29:38 - mmengine - INFO - load model from: torchvision://resnet50 2024/04/07 14:29:38 - mmengine - INFO - Loads checkpoint by torchvision backend from path: torchvision://resnet50 2024/04/07 14:29:43 - mmengine - WARNING - The model and loaded state dict do not match exactly size mismatch for conv1.weight: copying a param with shape torch.Size([64, 3, 7, 7]) from checkpoint, the shape in current model is torch.Size([16, 3, 7, 7]). size mismatch for bn1.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([16]). size mismatch for bn1.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([16]). size mismatch for bn1.running_mean: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([16]). size mismatch for bn1.running_var: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([16]). size mismatch for layer1.0.conv1.weight: copying a param with shape torch.Size([64, 64, 1, 1]) from checkpoint, the shape in current model is torch.Size([16, 16, 1, 1]). size mismatch for layer1.0.bn1.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([16]). size mismatch for layer1.0.bn1.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([16]). size mismatch for layer1.0.bn1.running_mean: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([16]). size mismatch for layer1.0.bn1.running_var: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([16]). size mismatch for layer1.0.conv2.weight: copying a param with shape torch.Size([64, 64, 3, 3]) from checkpoint, the shape in current model is torch.Size([16, 16, 3, 3]). size mismatch for layer1.0.bn2.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([16]). size mismatch for layer1.0.bn2.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([16]). size mismatch for layer1.0.bn2.running_mean: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([16]). size mismatch for layer1.0.bn2.running_var: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([16]). size mismatch for layer1.0.conv3.weight: copying a param with shape torch.Size([256, 64, 1, 1]) from checkpoint, the shape in current model is torch.Size([64, 16, 1, 1]). size mismatch for layer1.0.bn3.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer1.0.bn3.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer1.0.bn3.running_mean: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer1.0.bn3.running_var: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer1.0.downsample.0.weight: copying a param with shape torch.Size([256, 64, 1, 1]) from checkpoint, the shape in current model is torch.Size([64, 16, 1, 1]). size mismatch for layer1.0.downsample.1.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer1.0.downsample.1.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer1.0.downsample.1.running_mean: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer1.0.downsample.1.running_var: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer1.1.conv1.weight: copying a param with shape torch.Size([64, 256, 1, 1]) from checkpoint, the shape in current model is torch.Size([16, 64, 1, 1]). size mismatch for layer1.1.bn1.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([16]). size mismatch for layer1.1.bn1.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([16]). size mismatch for layer1.1.bn1.running_mean: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([16]). size mismatch for layer1.1.bn1.running_var: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([16]). size mismatch for layer1.1.conv2.weight: copying a param with shape torch.Size([64, 64, 3, 3]) from checkpoint, the shape in current model is torch.Size([16, 16, 3, 3]). size mismatch for layer1.1.bn2.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([16]). size mismatch for layer1.1.bn2.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([16]). size mismatch for layer1.1.bn2.running_mean: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([16]). size mismatch for layer1.1.bn2.running_var: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([16]). size mismatch for layer1.1.conv3.weight: copying a param with shape torch.Size([256, 64, 1, 1]) from checkpoint, the shape in current model is torch.Size([64, 16, 1, 1]). size mismatch for layer1.1.bn3.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer1.1.bn3.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer1.1.bn3.running_mean: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer1.1.bn3.running_var: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer1.2.conv1.weight: copying a param with shape torch.Size([64, 256, 1, 1]) from checkpoint, the shape in current model is torch.Size([16, 64, 1, 1]). size mismatch for layer1.2.bn1.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([16]). size mismatch for layer1.2.bn1.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([16]). size mismatch for layer1.2.bn1.running_mean: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([16]). size mismatch for layer1.2.bn1.running_var: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([16]). size mismatch for layer1.2.conv2.weight: copying a param with shape torch.Size([64, 64, 3, 3]) from checkpoint, the shape in current model is torch.Size([16, 16, 3, 3]). size mismatch for layer1.2.bn2.weight: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([16]). size mismatch for layer1.2.bn2.bias: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([16]). size mismatch for layer1.2.bn2.running_mean: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([16]). size mismatch for layer1.2.bn2.running_var: copying a param with shape torch.Size([64]) from checkpoint, the shape in current model is torch.Size([16]). size mismatch for layer1.2.conv3.weight: copying a param with shape torch.Size([256, 64, 1, 1]) from checkpoint, the shape in current model is torch.Size([64, 16, 1, 1]). size mismatch for layer1.2.bn3.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer1.2.bn3.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer1.2.bn3.running_mean: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer1.2.bn3.running_var: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer2.0.conv1.weight: copying a param with shape torch.Size([128, 256, 1, 1]) from checkpoint, the shape in current model is torch.Size([32, 64, 1, 1]). size mismatch for layer2.0.bn1.weight: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([32]). size mismatch for layer2.0.bn1.bias: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([32]). size mismatch for layer2.0.bn1.running_mean: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([32]). size mismatch for layer2.0.bn1.running_var: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([32]). size mismatch for layer2.0.conv2.weight: copying a param with shape torch.Size([128, 128, 3, 3]) from checkpoint, the shape in current model is torch.Size([32, 32, 3, 3]). size mismatch for layer2.0.bn2.weight: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([32]). size mismatch for layer2.0.bn2.bias: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([32]). size mismatch for layer2.0.bn2.running_mean: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([32]). size mismatch for layer2.0.bn2.running_var: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([32]). size mismatch for layer2.0.conv3.weight: copying a param with shape torch.Size([512, 128, 1, 1]) from checkpoint, the shape in current model is torch.Size([128, 32, 1, 1]). size mismatch for layer2.0.bn3.weight: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer2.0.bn3.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer2.0.bn3.running_mean: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer2.0.bn3.running_var: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer2.0.downsample.0.weight: copying a param with shape torch.Size([512, 256, 1, 1]) from checkpoint, the shape in current model is torch.Size([128, 64, 1, 1]). size mismatch for layer2.0.downsample.1.weight: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer2.0.downsample.1.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer2.0.downsample.1.running_mean: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer2.0.downsample.1.running_var: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer2.1.conv1.weight: copying a param with shape torch.Size([128, 512, 1, 1]) from checkpoint, the shape in current model is torch.Size([32, 128, 1, 1]). size mismatch for layer2.1.bn1.weight: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([32]). size mismatch for layer2.1.bn1.bias: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([32]). size mismatch for layer2.1.bn1.running_mean: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([32]). size mismatch for layer2.1.bn1.running_var: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([32]). size mismatch for layer2.1.conv2.weight: copying a param with shape torch.Size([128, 128, 3, 3]) from checkpoint, the shape in current model is torch.Size([32, 32, 3, 3]). size mismatch for layer2.1.bn2.weight: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([32]). size mismatch for layer2.1.bn2.bias: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([32]). size mismatch for layer2.1.bn2.running_mean: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([32]). size mismatch for layer2.1.bn2.running_var: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([32]). size mismatch for layer2.1.conv3.weight: copying a param with shape torch.Size([512, 128, 1, 1]) from checkpoint, the shape in current model is torch.Size([128, 32, 1, 1]). size mismatch for layer2.1.bn3.weight: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer2.1.bn3.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer2.1.bn3.running_mean: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer2.1.bn3.running_var: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer2.2.conv1.weight: copying a param with shape torch.Size([128, 512, 1, 1]) from checkpoint, the shape in current model is torch.Size([32, 128, 1, 1]). size mismatch for layer2.2.bn1.weight: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([32]). size mismatch for layer2.2.bn1.bias: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([32]). size mismatch for layer2.2.bn1.running_mean: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([32]). size mismatch for layer2.2.bn1.running_var: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([32]). size mismatch for layer2.2.conv2.weight: copying a param with shape torch.Size([128, 128, 3, 3]) from checkpoint, the shape in current model is torch.Size([32, 32, 3, 3]). size mismatch for layer2.2.bn2.weight: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([32]). size mismatch for layer2.2.bn2.bias: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([32]). size mismatch for layer2.2.bn2.running_mean: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([32]). size mismatch for layer2.2.bn2.running_var: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([32]). size mismatch for layer2.2.conv3.weight: copying a param with shape torch.Size([512, 128, 1, 1]) from checkpoint, the shape in current model is torch.Size([128, 32, 1, 1]). size mismatch for layer2.2.bn3.weight: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer2.2.bn3.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer2.2.bn3.running_mean: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer2.2.bn3.running_var: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer2.3.conv1.weight: copying a param with shape torch.Size([128, 512, 1, 1]) from checkpoint, the shape in current model is torch.Size([32, 128, 1, 1]). size mismatch for layer2.3.bn1.weight: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([32]). size mismatch for layer2.3.bn1.bias: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([32]). size mismatch for layer2.3.bn1.running_mean: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([32]). size mismatch for layer2.3.bn1.running_var: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([32]). size mismatch for layer2.3.conv2.weight: copying a param with shape torch.Size([128, 128, 3, 3]) from checkpoint, the shape in current model is torch.Size([32, 32, 3, 3]). size mismatch for layer2.3.bn2.weight: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([32]). size mismatch for layer2.3.bn2.bias: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([32]). size mismatch for layer2.3.bn2.running_mean: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([32]). size mismatch for layer2.3.bn2.running_var: copying a param with shape torch.Size([128]) from checkpoint, the shape in current model is torch.Size([32]). size mismatch for layer2.3.conv3.weight: copying a param with shape torch.Size([512, 128, 1, 1]) from checkpoint, the shape in current model is torch.Size([128, 32, 1, 1]). size mismatch for layer2.3.bn3.weight: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer2.3.bn3.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer2.3.bn3.running_mean: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer2.3.bn3.running_var: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer3.0.conv1.weight: copying a param with shape torch.Size([256, 512, 1, 1]) from checkpoint, the shape in current model is torch.Size([64, 128, 1, 1]). size mismatch for layer3.0.bn1.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.0.bn1.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.0.bn1.running_mean: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.0.bn1.running_var: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.0.conv2.weight: copying a param with shape torch.Size([256, 256, 3, 3]) from checkpoint, the shape in current model is torch.Size([64, 64, 3, 3]). size mismatch for layer3.0.bn2.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.0.bn2.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.0.bn2.running_mean: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.0.bn2.running_var: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.0.conv3.weight: copying a param with shape torch.Size([1024, 256, 1, 1]) from checkpoint, the shape in current model is torch.Size([256, 64, 1, 1]). size mismatch for layer3.0.bn3.weight: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([256]). size mismatch for layer3.0.bn3.bias: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([256]). size mismatch for layer3.0.bn3.running_mean: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([256]). size mismatch for layer3.0.bn3.running_var: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([256]). size mismatch for layer3.0.downsample.0.weight: copying a param with shape torch.Size([1024, 512, 1, 1]) from checkpoint, the shape in current model is torch.Size([256, 128, 1, 1]). size mismatch for layer3.0.downsample.1.weight: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([256]). size mismatch for layer3.0.downsample.1.bias: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([256]). size mismatch for layer3.0.downsample.1.running_mean: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([256]). size mismatch for layer3.0.downsample.1.running_var: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([256]). size mismatch for layer3.1.conv1.weight: copying a param with shape torch.Size([256, 1024, 1, 1]) from checkpoint, the shape in current model is torch.Size([64, 256, 1, 1]). size mismatch for layer3.1.bn1.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.1.bn1.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.1.bn1.running_mean: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.1.bn1.running_var: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.1.conv2.weight: copying a param with shape torch.Size([256, 256, 3, 3]) from checkpoint, the shape in current model is torch.Size([64, 64, 3, 3]). size mismatch for layer3.1.bn2.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.1.bn2.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.1.bn2.running_mean: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.1.bn2.running_var: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.1.conv3.weight: copying a param with shape torch.Size([1024, 256, 1, 1]) from checkpoint, the shape in current model is torch.Size([256, 64, 1, 1]). size mismatch for layer3.1.bn3.weight: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([256]). size mismatch for layer3.1.bn3.bias: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([256]). size mismatch for layer3.1.bn3.running_mean: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([256]). size mismatch for layer3.1.bn3.running_var: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([256]). size mismatch for layer3.2.conv1.weight: copying a param with shape torch.Size([256, 1024, 1, 1]) from checkpoint, the shape in current model is torch.Size([64, 256, 1, 1]). size mismatch for layer3.2.bn1.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.2.bn1.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.2.bn1.running_mean: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.2.bn1.running_var: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.2.conv2.weight: copying a param with shape torch.Size([256, 256, 3, 3]) from checkpoint, the shape in current model is torch.Size([64, 64, 3, 3]). size mismatch for layer3.2.bn2.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.2.bn2.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.2.bn2.running_mean: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.2.bn2.running_var: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.2.conv3.weight: copying a param with shape torch.Size([1024, 256, 1, 1]) from checkpoint, the shape in current model is torch.Size([256, 64, 1, 1]). size mismatch for layer3.2.bn3.weight: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([256]). size mismatch for layer3.2.bn3.bias: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([256]). size mismatch for layer3.2.bn3.running_mean: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([256]). size mismatch for layer3.2.bn3.running_var: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([256]). size mismatch for layer3.3.conv1.weight: copying a param with shape torch.Size([256, 1024, 1, 1]) from checkpoint, the shape in current model is torch.Size([64, 256, 1, 1]). size mismatch for layer3.3.bn1.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.3.bn1.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.3.bn1.running_mean: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.3.bn1.running_var: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.3.conv2.weight: copying a param with shape torch.Size([256, 256, 3, 3]) from checkpoint, the shape in current model is torch.Size([64, 64, 3, 3]). size mismatch for layer3.3.bn2.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.3.bn2.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.3.bn2.running_mean: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.3.bn2.running_var: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.3.conv3.weight: copying a param with shape torch.Size([1024, 256, 1, 1]) from checkpoint, the shape in current model is torch.Size([256, 64, 1, 1]). size mismatch for layer3.3.bn3.weight: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([256]). size mismatch for layer3.3.bn3.bias: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([256]). size mismatch for layer3.3.bn3.running_mean: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([256]). size mismatch for layer3.3.bn3.running_var: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([256]). size mismatch for layer3.4.conv1.weight: copying a param with shape torch.Size([256, 1024, 1, 1]) from checkpoint, the shape in current model is torch.Size([64, 256, 1, 1]). size mismatch for layer3.4.bn1.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.4.bn1.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.4.bn1.running_mean: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.4.bn1.running_var: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.4.conv2.weight: copying a param with shape torch.Size([256, 256, 3, 3]) from checkpoint, the shape in current model is torch.Size([64, 64, 3, 3]). size mismatch for layer3.4.bn2.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.4.bn2.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.4.bn2.running_mean: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.4.bn2.running_var: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.4.conv3.weight: copying a param with shape torch.Size([1024, 256, 1, 1]) from checkpoint, the shape in current model is torch.Size([256, 64, 1, 1]). size mismatch for layer3.4.bn3.weight: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([256]). size mismatch for layer3.4.bn3.bias: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([256]). size mismatch for layer3.4.bn3.running_mean: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([256]). size mismatch for layer3.4.bn3.running_var: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([256]). size mismatch for layer3.5.conv1.weight: copying a param with shape torch.Size([256, 1024, 1, 1]) from checkpoint, the shape in current model is torch.Size([64, 256, 1, 1]). size mismatch for layer3.5.bn1.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.5.bn1.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.5.bn1.running_mean: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.5.bn1.running_var: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.5.conv2.weight: copying a param with shape torch.Size([256, 256, 3, 3]) from checkpoint, the shape in current model is torch.Size([64, 64, 3, 3]). size mismatch for layer3.5.bn2.weight: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.5.bn2.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.5.bn2.running_mean: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.5.bn2.running_var: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([64]). size mismatch for layer3.5.conv3.weight: copying a param with shape torch.Size([1024, 256, 1, 1]) from checkpoint, the shape in current model is torch.Size([256, 64, 1, 1]). size mismatch for layer3.5.bn3.weight: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([256]). size mismatch for layer3.5.bn3.bias: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([256]). size mismatch for layer3.5.bn3.running_mean: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([256]). size mismatch for layer3.5.bn3.running_var: copying a param with shape torch.Size([1024]) from checkpoint, the shape in current model is torch.Size([256]). size mismatch for layer4.0.conv1.weight: copying a param with shape torch.Size([512, 1024, 1, 1]) from checkpoint, the shape in current model is torch.Size([128, 256, 1, 1]). size mismatch for layer4.0.bn1.weight: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer4.0.bn1.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer4.0.bn1.running_mean: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer4.0.bn1.running_var: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer4.0.conv2.weight: copying a param with shape torch.Size([512, 512, 3, 3]) from checkpoint, the shape in current model is torch.Size([128, 128, 3, 3]). size mismatch for layer4.0.bn2.weight: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer4.0.bn2.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer4.0.bn2.running_mean: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer4.0.bn2.running_var: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer4.0.conv3.weight: copying a param with shape torch.Size([2048, 512, 1, 1]) from checkpoint, the shape in current model is torch.Size([512, 128, 1, 1]). size mismatch for layer4.0.bn3.weight: copying a param with shape torch.Size([2048]) from checkpoint, the shape in current model is torch.Size([512]). size mismatch for layer4.0.bn3.bias: copying a param with shape torch.Size([2048]) from checkpoint, the shape in current model is torch.Size([512]). size mismatch for layer4.0.bn3.running_mean: copying a param with shape torch.Size([2048]) from checkpoint, the shape in current model is torch.Size([512]). size mismatch for layer4.0.bn3.running_var: copying a param with shape torch.Size([2048]) from checkpoint, the shape in current model is torch.Size([512]). size mismatch for layer4.0.downsample.0.weight: copying a param with shape torch.Size([2048, 1024, 1, 1]) from checkpoint, the shape in current model is torch.Size([512, 256, 1, 1]). size mismatch for layer4.0.downsample.1.weight: copying a param with shape torch.Size([2048]) from checkpoint, the shape in current model is torch.Size([512]). size mismatch for layer4.0.downsample.1.bias: copying a param with shape torch.Size([2048]) from checkpoint, the shape in current model is torch.Size([512]). size mismatch for layer4.0.downsample.1.running_mean: copying a param with shape torch.Size([2048]) from checkpoint, the shape in current model is torch.Size([512]). size mismatch for layer4.0.downsample.1.running_var: copying a param with shape torch.Size([2048]) from checkpoint, the shape in current model is torch.Size([512]). size mismatch for layer4.1.conv1.weight: copying a param with shape torch.Size([512, 2048, 1, 1]) from checkpoint, the shape in current model is torch.Size([128, 512, 1, 1]). size mismatch for layer4.1.bn1.weight: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer4.1.bn1.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer4.1.bn1.running_mean: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer4.1.bn1.running_var: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer4.1.conv2.weight: copying a param with shape torch.Size([512, 512, 3, 3]) from checkpoint, the shape in current model is torch.Size([128, 128, 3, 3]). size mismatch for layer4.1.bn2.weight: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer4.1.bn2.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer4.1.bn2.running_mean: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer4.1.bn2.running_var: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer4.1.conv3.weight: copying a param with shape torch.Size([2048, 512, 1, 1]) from checkpoint, the shape in current model is torch.Size([512, 128, 1, 1]). size mismatch for layer4.1.bn3.weight: copying a param with shape torch.Size([2048]) from checkpoint, the shape in current model is torch.Size([512]). size mismatch for layer4.1.bn3.bias: copying a param with shape torch.Size([2048]) from checkpoint, the shape in current model is torch.Size([512]). size mismatch for layer4.1.bn3.running_mean: copying a param with shape torch.Size([2048]) from checkpoint, the shape in current model is torch.Size([512]). size mismatch for layer4.1.bn3.running_var: copying a param with shape torch.Size([2048]) from checkpoint, the shape in current model is torch.Size([512]). size mismatch for layer4.2.conv1.weight: copying a param with shape torch.Size([512, 2048, 1, 1]) from checkpoint, the shape in current model is torch.Size([128, 512, 1, 1]). size mismatch for layer4.2.bn1.weight: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer4.2.bn1.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer4.2.bn1.running_mean: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer4.2.bn1.running_var: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer4.2.conv2.weight: copying a param with shape torch.Size([512, 512, 3, 3]) from checkpoint, the shape in current model is torch.Size([128, 128, 3, 3]). size mismatch for layer4.2.bn2.weight: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer4.2.bn2.bias: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer4.2.bn2.running_mean: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer4.2.bn2.running_var: copying a param with shape torch.Size([512]) from checkpoint, the shape in current model is torch.Size([128]). size mismatch for layer4.2.conv3.weight: copying a param with shape torch.Size([2048, 512, 1, 1]) from checkpoint, the shape in current model is torch.Size([512, 128, 1, 1]). size mismatch for layer4.2.bn3.weight: copying a param with shape torch.Size([2048]) from checkpoint, the shape in current model is torch.Size([512]). size mismatch for layer4.2.bn3.bias: copying a param with shape torch.Size([2048]) from checkpoint, the shape in current model is torch.Size([512]). size mismatch for layer4.2.bn3.running_mean: copying a param with shape torch.Size([2048]) from checkpoint, the shape in current model is torch.Size([512]). size mismatch for layer4.2.bn3.running_var: copying a param with shape torch.Size([2048]) from checkpoint, the shape in current model is torch.Size([512]). unexpected key in source state_dict: fc.weight, fc.bias Name of parameter - Initialization information backbone.conv1.weight - torch.Size([16, 3, 7, 7]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.bn1.weight - torch.Size([16]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.bn1.bias - torch.Size([16]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer1.0.conv1.weight - torch.Size([16, 16, 1, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer1.0.bn1.weight - torch.Size([16]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer1.0.bn1.bias - torch.Size([16]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer1.0.conv2.weight - torch.Size([16, 16, 3, 3]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer1.0.bn2.weight - torch.Size([16]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer1.0.bn2.bias - torch.Size([16]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer1.0.conv3.weight - torch.Size([64, 16, 1, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer1.0.bn3.weight - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer1.0.bn3.bias - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer1.0.downsample.0.weight - torch.Size([64, 16, 1, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer1.0.downsample.1.weight - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer1.0.downsample.1.bias - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer1.1.conv1.weight - torch.Size([16, 64, 1, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer1.1.bn1.weight - torch.Size([16]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer1.1.bn1.bias - torch.Size([16]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer1.1.conv2.weight - torch.Size([16, 16, 3, 3]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer1.1.bn2.weight - torch.Size([16]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer1.1.bn2.bias - torch.Size([16]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer1.1.conv3.weight - torch.Size([64, 16, 1, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer1.1.bn3.weight - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer1.1.bn3.bias - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer1.2.conv1.weight - torch.Size([16, 64, 1, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer1.2.bn1.weight - torch.Size([16]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer1.2.bn1.bias - torch.Size([16]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer1.2.conv2.weight - torch.Size([16, 16, 3, 3]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer1.2.bn2.weight - torch.Size([16]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer1.2.bn2.bias - torch.Size([16]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer1.2.conv3.weight - torch.Size([64, 16, 1, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer1.2.bn3.weight - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer1.2.bn3.bias - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.0.conv1.weight - torch.Size([32, 64, 1, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.0.bn1.weight - torch.Size([32]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.0.bn1.bias - torch.Size([32]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.0.conv2.weight - torch.Size([32, 32, 3, 3]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.0.bn2.weight - torch.Size([32]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.0.bn2.bias - torch.Size([32]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.0.conv3.weight - torch.Size([128, 32, 1, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.0.bn3.weight - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.0.bn3.bias - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.0.downsample.0.weight - torch.Size([128, 64, 1, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.0.downsample.1.weight - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.0.downsample.1.bias - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.1.conv1.weight - torch.Size([32, 128, 1, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.1.bn1.weight - torch.Size([32]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.1.bn1.bias - torch.Size([32]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.1.conv2.weight - torch.Size([32, 32, 3, 3]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.1.bn2.weight - torch.Size([32]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.1.bn2.bias - torch.Size([32]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.1.conv3.weight - torch.Size([128, 32, 1, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.1.bn3.weight - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.1.bn3.bias - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.2.conv1.weight - torch.Size([32, 128, 1, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.2.bn1.weight - torch.Size([32]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.2.bn1.bias - torch.Size([32]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.2.conv2.weight - torch.Size([32, 32, 3, 3]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.2.bn2.weight - torch.Size([32]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.2.bn2.bias - torch.Size([32]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.2.conv3.weight - torch.Size([128, 32, 1, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.2.bn3.weight - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.2.bn3.bias - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.3.conv1.weight - torch.Size([32, 128, 1, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.3.bn1.weight - torch.Size([32]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.3.bn1.bias - torch.Size([32]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.3.conv2.weight - torch.Size([32, 32, 3, 3]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.3.bn2.weight - torch.Size([32]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.3.bn2.bias - torch.Size([32]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.3.conv3.weight - torch.Size([128, 32, 1, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.3.bn3.weight - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer2.3.bn3.bias - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.0.conv1.weight - torch.Size([64, 128, 1, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.0.bn1.weight - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.0.bn1.bias - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.0.conv2.weight - torch.Size([64, 64, 3, 3]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.0.bn2.weight - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.0.bn2.bias - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.0.conv3.weight - torch.Size([256, 64, 1, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.0.bn3.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.0.bn3.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.0.downsample.0.weight - torch.Size([256, 128, 1, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.0.downsample.1.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.0.downsample.1.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.1.conv1.weight - torch.Size([64, 256, 1, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.1.bn1.weight - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.1.bn1.bias - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.1.conv2.weight - torch.Size([64, 64, 3, 3]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.1.bn2.weight - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.1.bn2.bias - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.1.conv3.weight - torch.Size([256, 64, 1, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.1.bn3.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.1.bn3.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.2.conv1.weight - torch.Size([64, 256, 1, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.2.bn1.weight - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.2.bn1.bias - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.2.conv2.weight - torch.Size([64, 64, 3, 3]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.2.bn2.weight - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.2.bn2.bias - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.2.conv3.weight - torch.Size([256, 64, 1, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.2.bn3.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.2.bn3.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.3.conv1.weight - torch.Size([64, 256, 1, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.3.bn1.weight - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.3.bn1.bias - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.3.conv2.weight - torch.Size([64, 64, 3, 3]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.3.bn2.weight - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.3.bn2.bias - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.3.conv3.weight - torch.Size([256, 64, 1, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.3.bn3.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.3.bn3.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.4.conv1.weight - torch.Size([64, 256, 1, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.4.bn1.weight - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.4.bn1.bias - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.4.conv2.weight - torch.Size([64, 64, 3, 3]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.4.bn2.weight - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.4.bn2.bias - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.4.conv3.weight - torch.Size([256, 64, 1, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.4.bn3.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.4.bn3.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.5.conv1.weight - torch.Size([64, 256, 1, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.5.bn1.weight - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.5.bn1.bias - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.5.conv2.weight - torch.Size([64, 64, 3, 3]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.5.bn2.weight - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.5.bn2.bias - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.5.conv3.weight - torch.Size([256, 64, 1, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.5.bn3.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer3.5.bn3.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer4.0.conv1.weight - torch.Size([128, 256, 1, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer4.0.bn1.weight - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer4.0.bn1.bias - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer4.0.conv2.weight - torch.Size([128, 128, 3, 3]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer4.0.bn2.weight - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer4.0.bn2.bias - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer4.0.conv3.weight - torch.Size([512, 128, 1, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer4.0.bn3.weight - torch.Size([512]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer4.0.bn3.bias - torch.Size([512]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer4.0.downsample.0.weight - torch.Size([512, 256, 1, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer4.0.downsample.1.weight - torch.Size([512]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer4.0.downsample.1.bias - torch.Size([512]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer4.1.conv1.weight - torch.Size([128, 512, 1, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer4.1.bn1.weight - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer4.1.bn1.bias - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer4.1.conv2.weight - torch.Size([128, 128, 3, 3]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer4.1.bn2.weight - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer4.1.bn2.bias - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer4.1.conv3.weight - torch.Size([512, 128, 1, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer4.1.bn3.weight - torch.Size([512]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer4.1.bn3.bias - torch.Size([512]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer4.2.conv1.weight - torch.Size([128, 512, 1, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer4.2.bn1.weight - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer4.2.bn1.bias - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer4.2.conv2.weight - torch.Size([128, 128, 3, 3]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer4.2.bn2.weight - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer4.2.bn2.bias - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer4.2.conv3.weight - torch.Size([512, 128, 1, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer4.2.bn3.weight - torch.Size([512]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone.layer4.2.bn3.bias - torch.Size([512]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.conv1.kernel - torch.Size([27, 3, 64]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.norm1.weight - torch.Size([1, 64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.norm1.bias - torch.Size([1, 64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer1.0.conv1.kernel - torch.Size([27, 64, 64]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.layer1.0.norm1.bn.weight - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer1.0.norm1.bn.bias - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer1.0.conv2.kernel - torch.Size([27, 64, 64]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.layer1.0.norm2.bn.weight - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer1.0.norm2.bn.bias - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer1.0.downsample.0.kernel - torch.Size([1, 64, 64]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.layer1.0.downsample.1.bn.weight - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer1.0.downsample.1.bn.bias - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer1.1.conv1.kernel - torch.Size([27, 64, 64]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.layer1.1.norm1.bn.weight - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer1.1.norm1.bn.bias - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer1.1.conv2.kernel - torch.Size([27, 64, 64]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.layer1.1.norm2.bn.weight - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer1.1.norm2.bn.bias - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer1.2.conv1.kernel - torch.Size([27, 64, 64]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.layer1.2.norm1.bn.weight - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer1.2.norm1.bn.bias - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer1.2.conv2.kernel - torch.Size([27, 64, 64]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.layer1.2.norm2.bn.weight - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer1.2.norm2.bn.bias - torch.Size([64]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer2.0.conv1.kernel - torch.Size([27, 64, 128]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.layer2.0.norm1.bn.weight - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer2.0.norm1.bn.bias - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer2.0.conv2.kernel - torch.Size([27, 128, 128]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.layer2.0.norm2.bn.weight - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer2.0.norm2.bn.bias - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer2.0.downsample.0.kernel - torch.Size([1, 64, 128]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.layer2.0.downsample.1.bn.weight - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer2.0.downsample.1.bn.bias - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer2.1.conv1.kernel - torch.Size([27, 128, 128]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.layer2.1.norm1.bn.weight - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer2.1.norm1.bn.bias - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer2.1.conv2.kernel - torch.Size([27, 128, 128]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.layer2.1.norm2.bn.weight - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer2.1.norm2.bn.bias - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer2.2.conv1.kernel - torch.Size([27, 128, 128]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.layer2.2.norm1.bn.weight - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer2.2.norm1.bn.bias - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer2.2.conv2.kernel - torch.Size([27, 128, 128]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.layer2.2.norm2.bn.weight - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer2.2.norm2.bn.bias - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer2.3.conv1.kernel - torch.Size([27, 128, 128]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.layer2.3.norm1.bn.weight - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer2.3.norm1.bn.bias - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer2.3.conv2.kernel - torch.Size([27, 128, 128]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.layer2.3.norm2.bn.weight - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer2.3.norm2.bn.bias - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer3.0.conv1.kernel - torch.Size([27, 128, 256]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.layer3.0.norm1.bn.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer3.0.norm1.bn.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer3.0.conv2.kernel - torch.Size([27, 256, 256]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.layer3.0.norm2.bn.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer3.0.norm2.bn.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer3.0.downsample.0.kernel - torch.Size([1, 128, 256]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.layer3.0.downsample.1.bn.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer3.0.downsample.1.bn.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer3.1.conv1.kernel - torch.Size([27, 256, 256]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.layer3.1.norm1.bn.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer3.1.norm1.bn.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer3.1.conv2.kernel - torch.Size([27, 256, 256]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.layer3.1.norm2.bn.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer3.1.norm2.bn.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer3.2.conv1.kernel - torch.Size([27, 256, 256]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.layer3.2.norm1.bn.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer3.2.norm1.bn.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer3.2.conv2.kernel - torch.Size([27, 256, 256]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.layer3.2.norm2.bn.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer3.2.norm2.bn.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer3.3.conv1.kernel - torch.Size([27, 256, 256]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.layer3.3.norm1.bn.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer3.3.norm1.bn.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer3.3.conv2.kernel - torch.Size([27, 256, 256]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.layer3.3.norm2.bn.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer3.3.norm2.bn.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer3.4.conv1.kernel - torch.Size([27, 256, 256]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.layer3.4.norm1.bn.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer3.4.norm1.bn.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer3.4.conv2.kernel - torch.Size([27, 256, 256]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.layer3.4.norm2.bn.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer3.4.norm2.bn.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer3.5.conv1.kernel - torch.Size([27, 256, 256]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.layer3.5.norm1.bn.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer3.5.norm1.bn.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer3.5.conv2.kernel - torch.Size([27, 256, 256]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.layer3.5.norm2.bn.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer3.5.norm2.bn.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer4.0.conv1.kernel - torch.Size([27, 256, 512]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.layer4.0.norm1.bn.weight - torch.Size([512]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer4.0.norm1.bn.bias - torch.Size([512]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer4.0.conv2.kernel - torch.Size([27, 512, 512]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.layer4.0.norm2.bn.weight - torch.Size([512]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer4.0.norm2.bn.bias - torch.Size([512]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer4.0.downsample.0.kernel - torch.Size([1, 256, 512]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.layer4.0.downsample.1.bn.weight - torch.Size([512]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer4.0.downsample.1.bn.bias - torch.Size([512]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer4.1.conv1.kernel - torch.Size([27, 512, 512]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.layer4.1.norm1.bn.weight - torch.Size([512]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer4.1.norm1.bn.bias - torch.Size([512]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer4.1.conv2.kernel - torch.Size([27, 512, 512]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.layer4.1.norm2.bn.weight - torch.Size([512]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer4.1.norm2.bn.bias - torch.Size([512]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer4.2.conv1.kernel - torch.Size([27, 512, 512]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.layer4.2.norm1.bn.weight - torch.Size([512]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer4.2.norm1.bn.bias - torch.Size([512]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer4.2.conv2.kernel - torch.Size([27, 512, 512]): Initialized by user-defined `init_weights` in MinkResNet backbone_lidar.layer4.2.norm2.bn.weight - torch.Size([512]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder backbone_lidar.layer4.2.norm2.bn.bias - torch.Size([512]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder neck_3d.out_block_0.0.kernel - torch.Size([27, 128, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder neck_3d.out_block_0.1.bn.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder neck_3d.out_block_0.1.bn.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder neck_3d.up_block_1.0.kernel - torch.Size([8, 256, 128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder neck_3d.up_block_1.1.bn.weight - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder neck_3d.up_block_1.1.bn.bias - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder neck_3d.up_block_1.3.kernel - torch.Size([27, 128, 128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder neck_3d.up_block_1.4.bn.weight - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder neck_3d.up_block_1.4.bn.bias - torch.Size([128]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder neck_3d.out_block_1.0.kernel - torch.Size([27, 256, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder neck_3d.out_block_1.1.bn.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder neck_3d.out_block_1.1.bn.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder neck_3d.up_block_2.0.kernel - torch.Size([8, 512, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder neck_3d.up_block_2.1.bn.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder neck_3d.up_block_2.1.bn.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder neck_3d.up_block_2.3.kernel - torch.Size([27, 256, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder neck_3d.up_block_2.4.bn.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder neck_3d.up_block_2.4.bn.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder neck_3d.out_block_2.0.kernel - torch.Size([27, 512, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder neck_3d.out_block_2.1.bn.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder neck_3d.out_block_2.1.bn.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder neck_3d.up_block_3.0.kernel - torch.Size([8, 1024, 512]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder neck_3d.up_block_3.1.bn.weight - torch.Size([512]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder neck_3d.up_block_3.1.bn.bias - torch.Size([512]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder neck_3d.up_block_3.3.kernel - torch.Size([27, 512, 512]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder neck_3d.up_block_3.4.bn.weight - torch.Size([512]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder neck_3d.up_block_3.4.bn.bias - torch.Size([512]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder neck_3d.out_block_3.0.kernel - torch.Size([27, 1024, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder neck_3d.out_block_3.1.bn.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder neck_3d.out_block_3.1.bn.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder neck_3d.conv_cls.kernel - torch.Size([256, 1]): Initialized by user-defined `init_weights` in MinkNeck neck_3d.conv_cls.bias - torch.Size([1, 1]): Initialized by user-defined `init_weights` in MinkNeck bbox_head.cls_branches.0.bias - torch.Size([1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder bbox_head.reg_branches.0.0.weight - torch.Size([256, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder bbox_head.reg_branches.0.0.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder bbox_head.reg_branches.0.2.weight - torch.Size([256, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder bbox_head.reg_branches.0.2.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder bbox_head.reg_branches.0.4.weight - torch.Size([9, 256]): Initialized by user-defined `init_weights` in GroundingHead bbox_head.reg_branches.0.4.bias - torch.Size([9]): Initialized by user-defined `init_weights` in GroundingHead text_encoder.embeddings.word_embeddings.weight - torch.Size([50265, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.embeddings.position_embeddings.weight - torch.Size([514, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.embeddings.token_type_embeddings.weight - torch.Size([1, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.embeddings.LayerNorm.weight - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.embeddings.LayerNorm.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.0.attention.self.query.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.0.attention.self.query.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.0.attention.self.key.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.0.attention.self.key.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.0.attention.self.value.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.0.attention.self.value.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.0.attention.output.dense.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.0.attention.output.dense.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.0.attention.output.LayerNorm.weight - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.0.attention.output.LayerNorm.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.0.intermediate.dense.weight - torch.Size([3072, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.0.intermediate.dense.bias - torch.Size([3072]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.0.output.dense.weight - torch.Size([768, 3072]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.0.output.dense.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.0.output.LayerNorm.weight - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.0.output.LayerNorm.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.1.attention.self.query.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.1.attention.self.query.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.1.attention.self.key.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.1.attention.self.key.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.1.attention.self.value.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.1.attention.self.value.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.1.attention.output.dense.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.1.attention.output.dense.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.1.attention.output.LayerNorm.weight - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.1.attention.output.LayerNorm.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.1.intermediate.dense.weight - torch.Size([3072, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.1.intermediate.dense.bias - torch.Size([3072]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.1.output.dense.weight - torch.Size([768, 3072]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.1.output.dense.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.1.output.LayerNorm.weight - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.1.output.LayerNorm.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.2.attention.self.query.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.2.attention.self.query.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.2.attention.self.key.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.2.attention.self.key.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.2.attention.self.value.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.2.attention.self.value.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.2.attention.output.dense.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.2.attention.output.dense.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.2.attention.output.LayerNorm.weight - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.2.attention.output.LayerNorm.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.2.intermediate.dense.weight - torch.Size([3072, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.2.intermediate.dense.bias - torch.Size([3072]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.2.output.dense.weight - torch.Size([768, 3072]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.2.output.dense.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.2.output.LayerNorm.weight - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.2.output.LayerNorm.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.3.attention.self.query.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.3.attention.self.query.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.3.attention.self.key.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.3.attention.self.key.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.3.attention.self.value.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.3.attention.self.value.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.3.attention.output.dense.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.3.attention.output.dense.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.3.attention.output.LayerNorm.weight - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.3.attention.output.LayerNorm.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.3.intermediate.dense.weight - torch.Size([3072, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.3.intermediate.dense.bias - torch.Size([3072]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.3.output.dense.weight - torch.Size([768, 3072]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.3.output.dense.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.3.output.LayerNorm.weight - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.3.output.LayerNorm.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.4.attention.self.query.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.4.attention.self.query.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.4.attention.self.key.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.4.attention.self.key.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.4.attention.self.value.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.4.attention.self.value.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.4.attention.output.dense.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.4.attention.output.dense.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.4.attention.output.LayerNorm.weight - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.4.attention.output.LayerNorm.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.4.intermediate.dense.weight - torch.Size([3072, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.4.intermediate.dense.bias - torch.Size([3072]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.4.output.dense.weight - torch.Size([768, 3072]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.4.output.dense.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.4.output.LayerNorm.weight - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.4.output.LayerNorm.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.5.attention.self.query.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.5.attention.self.query.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.5.attention.self.key.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.5.attention.self.key.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.5.attention.self.value.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.5.attention.self.value.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.5.attention.output.dense.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.5.attention.output.dense.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.5.attention.output.LayerNorm.weight - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.5.attention.output.LayerNorm.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.5.intermediate.dense.weight - torch.Size([3072, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.5.intermediate.dense.bias - torch.Size([3072]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.5.output.dense.weight - torch.Size([768, 3072]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.5.output.dense.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.5.output.LayerNorm.weight - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.5.output.LayerNorm.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.6.attention.self.query.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.6.attention.self.query.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.6.attention.self.key.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.6.attention.self.key.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.6.attention.self.value.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.6.attention.self.value.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.6.attention.output.dense.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.6.attention.output.dense.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.6.attention.output.LayerNorm.weight - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.6.attention.output.LayerNorm.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.6.intermediate.dense.weight - torch.Size([3072, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.6.intermediate.dense.bias - torch.Size([3072]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.6.output.dense.weight - torch.Size([768, 3072]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.6.output.dense.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.6.output.LayerNorm.weight - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.6.output.LayerNorm.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.7.attention.self.query.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.7.attention.self.query.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.7.attention.self.key.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.7.attention.self.key.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.7.attention.self.value.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.7.attention.self.value.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.7.attention.output.dense.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.7.attention.output.dense.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.7.attention.output.LayerNorm.weight - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.7.attention.output.LayerNorm.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.7.intermediate.dense.weight - torch.Size([3072, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.7.intermediate.dense.bias - torch.Size([3072]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.7.output.dense.weight - torch.Size([768, 3072]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.7.output.dense.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.7.output.LayerNorm.weight - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.7.output.LayerNorm.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.8.attention.self.query.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.8.attention.self.query.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.8.attention.self.key.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.8.attention.self.key.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.8.attention.self.value.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.8.attention.self.value.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.8.attention.output.dense.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.8.attention.output.dense.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.8.attention.output.LayerNorm.weight - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.8.attention.output.LayerNorm.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.8.intermediate.dense.weight - torch.Size([3072, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.8.intermediate.dense.bias - torch.Size([3072]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.8.output.dense.weight - torch.Size([768, 3072]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.8.output.dense.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.8.output.LayerNorm.weight - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.8.output.LayerNorm.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.9.attention.self.query.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.9.attention.self.query.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.9.attention.self.key.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.9.attention.self.key.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.9.attention.self.value.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.9.attention.self.value.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.9.attention.output.dense.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.9.attention.output.dense.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.9.attention.output.LayerNorm.weight - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.9.attention.output.LayerNorm.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.9.intermediate.dense.weight - torch.Size([3072, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.9.intermediate.dense.bias - torch.Size([3072]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.9.output.dense.weight - torch.Size([768, 3072]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.9.output.dense.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.9.output.LayerNorm.weight - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.9.output.LayerNorm.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.10.attention.self.query.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.10.attention.self.query.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.10.attention.self.key.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.10.attention.self.key.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.10.attention.self.value.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.10.attention.self.value.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.10.attention.output.dense.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.10.attention.output.dense.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.10.attention.output.LayerNorm.weight - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.10.attention.output.LayerNorm.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.10.intermediate.dense.weight - torch.Size([3072, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.10.intermediate.dense.bias - torch.Size([3072]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.10.output.dense.weight - torch.Size([768, 3072]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.10.output.dense.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.10.output.LayerNorm.weight - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.10.output.LayerNorm.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.11.attention.self.query.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.11.attention.self.query.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.11.attention.self.key.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.11.attention.self.key.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.11.attention.self.value.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.11.attention.self.value.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.11.attention.output.dense.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.11.attention.output.dense.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.11.attention.output.LayerNorm.weight - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.11.attention.output.LayerNorm.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.11.intermediate.dense.weight - torch.Size([3072, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.11.intermediate.dense.bias - torch.Size([3072]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.11.output.dense.weight - torch.Size([768, 3072]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.11.output.dense.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.11.output.LayerNorm.weight - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.encoder.layer.11.output.LayerNorm.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.pooler.dense.weight - torch.Size([768, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_encoder.pooler.dense.bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.0.self_attn.attn.in_proj_weight - torch.Size([768, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.0.self_attn.attn.in_proj_bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.0.self_attn.attn.out_proj.weight - torch.Size([256, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.0.self_attn.attn.out_proj.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.0.cross_attn_text.attn.in_proj_weight - torch.Size([768, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.0.cross_attn_text.attn.in_proj_bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.0.cross_attn_text.attn.out_proj.weight - torch.Size([256, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.0.cross_attn_text.attn.out_proj.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.0.cross_attn.attn.in_proj_weight - torch.Size([768, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.0.cross_attn.attn.in_proj_bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.0.cross_attn.attn.out_proj.weight - torch.Size([256, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.0.cross_attn.attn.out_proj.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.0.ffn.layers.0.0.weight - torch.Size([2048, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.0.ffn.layers.0.0.bias - torch.Size([2048]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.0.ffn.layers.1.weight - torch.Size([256, 2048]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.0.ffn.layers.1.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.0.norms.0.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.0.norms.0.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.0.norms.1.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.0.norms.1.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.0.norms.2.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.0.norms.2.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.0.norms.3.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.0.norms.3.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.0.self_posembed.position_embedding_head.0.weight - torch.Size([256, 3, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.0.self_posembed.position_embedding_head.0.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.0.self_posembed.position_embedding_head.1.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.0.self_posembed.position_embedding_head.1.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.0.self_posembed.position_embedding_head.3.weight - torch.Size([256, 256, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.0.self_posembed.position_embedding_head.3.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.1.self_attn.attn.in_proj_weight - torch.Size([768, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.1.self_attn.attn.in_proj_bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.1.self_attn.attn.out_proj.weight - torch.Size([256, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.1.self_attn.attn.out_proj.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.1.cross_attn_text.attn.in_proj_weight - torch.Size([768, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.1.cross_attn_text.attn.in_proj_bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.1.cross_attn_text.attn.out_proj.weight - torch.Size([256, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.1.cross_attn_text.attn.out_proj.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.1.cross_attn.attn.in_proj_weight - torch.Size([768, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.1.cross_attn.attn.in_proj_bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.1.cross_attn.attn.out_proj.weight - torch.Size([256, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.1.cross_attn.attn.out_proj.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.1.ffn.layers.0.0.weight - torch.Size([2048, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.1.ffn.layers.0.0.bias - torch.Size([2048]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.1.ffn.layers.1.weight - torch.Size([256, 2048]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.1.ffn.layers.1.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.1.norms.0.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.1.norms.0.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.1.norms.1.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.1.norms.1.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.1.norms.2.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.1.norms.2.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.1.norms.3.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.1.norms.3.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.1.self_posembed.position_embedding_head.0.weight - torch.Size([256, 3, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.1.self_posembed.position_embedding_head.0.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.1.self_posembed.position_embedding_head.1.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.1.self_posembed.position_embedding_head.1.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.1.self_posembed.position_embedding_head.3.weight - torch.Size([256, 256, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.1.self_posembed.position_embedding_head.3.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.2.self_attn.attn.in_proj_weight - torch.Size([768, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.2.self_attn.attn.in_proj_bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.2.self_attn.attn.out_proj.weight - torch.Size([256, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.2.self_attn.attn.out_proj.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.2.cross_attn_text.attn.in_proj_weight - torch.Size([768, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.2.cross_attn_text.attn.in_proj_bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.2.cross_attn_text.attn.out_proj.weight - torch.Size([256, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.2.cross_attn_text.attn.out_proj.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.2.cross_attn.attn.in_proj_weight - torch.Size([768, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.2.cross_attn.attn.in_proj_bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.2.cross_attn.attn.out_proj.weight - torch.Size([256, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.2.cross_attn.attn.out_proj.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.2.ffn.layers.0.0.weight - torch.Size([2048, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.2.ffn.layers.0.0.bias - torch.Size([2048]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.2.ffn.layers.1.weight - torch.Size([256, 2048]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.2.ffn.layers.1.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.2.norms.0.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.2.norms.0.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.2.norms.1.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.2.norms.1.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.2.norms.2.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.2.norms.2.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.2.norms.3.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.2.norms.3.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.2.self_posembed.position_embedding_head.0.weight - torch.Size([256, 3, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.2.self_posembed.position_embedding_head.0.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.2.self_posembed.position_embedding_head.1.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.2.self_posembed.position_embedding_head.1.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.2.self_posembed.position_embedding_head.3.weight - torch.Size([256, 256, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.2.self_posembed.position_embedding_head.3.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.3.self_attn.attn.in_proj_weight - torch.Size([768, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.3.self_attn.attn.in_proj_bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.3.self_attn.attn.out_proj.weight - torch.Size([256, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.3.self_attn.attn.out_proj.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.3.cross_attn_text.attn.in_proj_weight - torch.Size([768, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.3.cross_attn_text.attn.in_proj_bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.3.cross_attn_text.attn.out_proj.weight - torch.Size([256, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.3.cross_attn_text.attn.out_proj.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.3.cross_attn.attn.in_proj_weight - torch.Size([768, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.3.cross_attn.attn.in_proj_bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.3.cross_attn.attn.out_proj.weight - torch.Size([256, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.3.cross_attn.attn.out_proj.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.3.ffn.layers.0.0.weight - torch.Size([2048, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.3.ffn.layers.0.0.bias - torch.Size([2048]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.3.ffn.layers.1.weight - torch.Size([256, 2048]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.3.ffn.layers.1.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.3.norms.0.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.3.norms.0.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.3.norms.1.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.3.norms.1.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.3.norms.2.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.3.norms.2.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.3.norms.3.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.3.norms.3.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.3.self_posembed.position_embedding_head.0.weight - torch.Size([256, 3, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.3.self_posembed.position_embedding_head.0.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.3.self_posembed.position_embedding_head.1.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.3.self_posembed.position_embedding_head.1.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.3.self_posembed.position_embedding_head.3.weight - torch.Size([256, 256, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.3.self_posembed.position_embedding_head.3.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.4.self_attn.attn.in_proj_weight - torch.Size([768, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.4.self_attn.attn.in_proj_bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.4.self_attn.attn.out_proj.weight - torch.Size([256, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.4.self_attn.attn.out_proj.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.4.cross_attn_text.attn.in_proj_weight - torch.Size([768, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.4.cross_attn_text.attn.in_proj_bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.4.cross_attn_text.attn.out_proj.weight - torch.Size([256, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.4.cross_attn_text.attn.out_proj.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.4.cross_attn.attn.in_proj_weight - torch.Size([768, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.4.cross_attn.attn.in_proj_bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.4.cross_attn.attn.out_proj.weight - torch.Size([256, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.4.cross_attn.attn.out_proj.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.4.ffn.layers.0.0.weight - torch.Size([2048, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.4.ffn.layers.0.0.bias - torch.Size([2048]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.4.ffn.layers.1.weight - torch.Size([256, 2048]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.4.ffn.layers.1.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.4.norms.0.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.4.norms.0.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.4.norms.1.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.4.norms.1.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.4.norms.2.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.4.norms.2.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.4.norms.3.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.4.norms.3.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.4.self_posembed.position_embedding_head.0.weight - torch.Size([256, 3, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.4.self_posembed.position_embedding_head.0.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.4.self_posembed.position_embedding_head.1.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.4.self_posembed.position_embedding_head.1.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.4.self_posembed.position_embedding_head.3.weight - torch.Size([256, 256, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.4.self_posembed.position_embedding_head.3.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.5.self_attn.attn.in_proj_weight - torch.Size([768, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.5.self_attn.attn.in_proj_bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.5.self_attn.attn.out_proj.weight - torch.Size([256, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.5.self_attn.attn.out_proj.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.5.cross_attn_text.attn.in_proj_weight - torch.Size([768, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.5.cross_attn_text.attn.in_proj_bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.5.cross_attn_text.attn.out_proj.weight - torch.Size([256, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.5.cross_attn_text.attn.out_proj.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.5.cross_attn.attn.in_proj_weight - torch.Size([768, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.5.cross_attn.attn.in_proj_bias - torch.Size([768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.5.cross_attn.attn.out_proj.weight - torch.Size([256, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.5.cross_attn.attn.out_proj.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.5.ffn.layers.0.0.weight - torch.Size([2048, 256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.5.ffn.layers.0.0.bias - torch.Size([2048]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.5.ffn.layers.1.weight - torch.Size([256, 2048]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.5.ffn.layers.1.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.5.norms.0.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.5.norms.0.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.5.norms.1.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.5.norms.1.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.5.norms.2.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.5.norms.2.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.5.norms.3.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.5.norms.3.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.5.self_posembed.position_embedding_head.0.weight - torch.Size([256, 3, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.5.self_posembed.position_embedding_head.0.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.5.self_posembed.position_embedding_head.1.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.5.self_posembed.position_embedding_head.1.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.5.self_posembed.position_embedding_head.3.weight - torch.Size([256, 256, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.layers.5.self_posembed.position_embedding_head.3.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.self_posembed.position_embedding_head.0.weight - torch.Size([256, 9, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.self_posembed.position_embedding_head.0.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.self_posembed.position_embedding_head.1.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.self_posembed.position_embedding_head.1.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.self_posembed.position_embedding_head.3.weight - torch.Size([256, 256, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.self_posembed.position_embedding_head.3.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.cross_posembed.position_embedding_head.0.weight - torch.Size([256, 3, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.cross_posembed.position_embedding_head.0.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.cross_posembed.position_embedding_head.1.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.cross_posembed.position_embedding_head.1.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.cross_posembed.position_embedding_head.3.weight - torch.Size([256, 256, 1]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.cross_posembed.position_embedding_head.3.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.norm.weight - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder decoder.norm.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_feat_map.weight - torch.Size([256, 768]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder text_feat_map.bias - torch.Size([256]): The value is the same before and after calling `init_weights` of SparseFeatureFusion3DGrounder 2024/04/07 14:29:43 - mmengine - INFO - Auto resumed from the latest checkpoint /mnt/petrelfs/wangtai/EmbodiedScan/work_dirs/mv-grounding-challenge-full/epoch_9.pth. 2024/04/07 14:29:52 - mmengine - INFO - Load checkpoint from /mnt/petrelfs/wangtai/EmbodiedScan/work_dirs/mv-grounding-challenge-full/epoch_9.pth 2024/04/07 14:29:52 - mmengine - INFO - resumed epoch: 9, iter: 21888 2024/04/07 14:29:52 - mmengine - WARNING - "FileClient" will be deprecated in future. Please use io functions in https://mmengine.readthedocs.io/en/latest/api/fileio.html#file-io 2024/04/07 14:29:52 - mmengine - WARNING - "HardDiskBackend" is the alias of "LocalBackend" and the former will be deprecated in future. 2024/04/07 14:29:52 - mmengine - INFO - Checkpoints will be saved to /mnt/petrelfs/wangtai/EmbodiedScan/work_dirs/mv-grounding-challenge-full. 2024/04/07 14:34:53 - mmengine - INFO - Epoch(train) [10][ 50/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 12:08:11 time: 6.0298 data_time: 2.8323 memory: 29307 grad_norm: 21.5166 loss: 6.4494 loss_cls: 0.7404 loss_bbox: 0.2961 d0.loss_cls: 0.8597 d0.loss_bbox: 0.3122 d1.loss_cls: 0.7865 d1.loss_bbox: 0.3028 d2.loss_cls: 0.7610 d2.loss_bbox: 0.3026 d3.loss_cls: 0.7512 d3.loss_bbox: 0.3028 d4.loss_cls: 0.7337 d4.loss_bbox: 0.3004 2024/04/07 14:38:42 - mmengine - INFO - Epoch(train) [10][ 100/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 10:35:17 time: 4.5643 data_time: 1.8802 memory: 28217 grad_norm: 22.0447 loss: 5.8693 loss_cls: 0.6714 loss_bbox: 0.2658 d0.loss_cls: 0.7987 d0.loss_bbox: 0.2893 d1.loss_cls: 0.7234 d1.loss_bbox: 0.2696 d2.loss_cls: 0.6955 d2.loss_bbox: 0.2691 d3.loss_cls: 0.6873 d3.loss_bbox: 0.2641 d4.loss_cls: 0.6720 d4.loss_bbox: 0.2630 2024/04/07 14:39:44 - mmengine - INFO - Exp name: mv-grounding_8xb12_embodiedscan-vg-9dof-full_20240407_142237 2024/04/07 14:42:07 - mmengine - INFO - Epoch(train) [10][ 150/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 9:43:38 time: 4.1072 data_time: 1.4055 memory: 28204 grad_norm: 21.2040 loss: 6.1560 loss_cls: 0.7082 loss_bbox: 0.2790 d0.loss_cls: 0.8358 d0.loss_bbox: 0.2985 d1.loss_cls: 0.7665 d1.loss_bbox: 0.2847 d2.loss_cls: 0.7315 d2.loss_bbox: 0.2761 d3.loss_cls: 0.7067 d3.loss_bbox: 0.2790 d4.loss_cls: 0.7122 d4.loss_bbox: 0.2780 2024/04/07 14:45:49 - mmengine - INFO - Epoch(train) [10][ 200/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 9:26:12 time: 4.4491 data_time: 0.5398 memory: 28529 grad_norm: 21.7783 loss: 6.3876 loss_cls: 0.7404 loss_bbox: 0.2778 d0.loss_cls: 0.8706 d0.loss_bbox: 0.3094 d1.loss_cls: 0.8016 d1.loss_bbox: 0.2894 d2.loss_cls: 0.7646 d2.loss_bbox: 0.2847 d3.loss_cls: 0.7464 d3.loss_bbox: 0.2807 d4.loss_cls: 0.7429 d4.loss_bbox: 0.2791 2024/04/07 14:49:55 - mmengine - INFO - Epoch(train) [10][ 250/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 9:24:55 time: 4.9028 data_time: 0.3362 memory: 30209 grad_norm: 21.2864 loss: 6.3704 loss_cls: 0.7315 loss_bbox: 0.2799 d0.loss_cls: 0.8799 d0.loss_bbox: 0.3137 d1.loss_cls: 0.8033 d1.loss_bbox: 0.2875 d2.loss_cls: 0.7679 d2.loss_bbox: 0.2731 d3.loss_cls: 0.7401 d3.loss_bbox: 0.2779 d4.loss_cls: 0.7380 d4.loss_bbox: 0.2775 2024/04/07 14:53:31 - mmengine - INFO - Epoch(train) [10][ 300/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 9:11:24 time: 4.3216 data_time: 0.2982 memory: 29145 grad_norm: 22.3320 loss: 6.2169 loss_cls: 0.7161 loss_bbox: 0.2728 d0.loss_cls: 0.8443 d0.loss_bbox: 0.2911 d1.loss_cls: 0.7885 d1.loss_bbox: 0.2832 d2.loss_cls: 0.7423 d2.loss_bbox: 0.2793 d3.loss_cls: 0.7277 d3.loss_bbox: 0.2739 d4.loss_cls: 0.7194 d4.loss_bbox: 0.2782 2024/04/07 14:57:23 - mmengine - INFO - Epoch(train) [10][ 350/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 9:06:05 time: 4.6452 data_time: 1.4948 memory: 28276 grad_norm: 21.2111 loss: 6.2524 loss_cls: 0.7309 loss_bbox: 0.2699 d0.loss_cls: 0.8580 d0.loss_bbox: 0.3053 d1.loss_cls: 0.7877 d1.loss_bbox: 0.2778 d2.loss_cls: 0.7456 d2.loss_bbox: 0.2761 d3.loss_cls: 0.7327 d3.loss_bbox: 0.2695 d4.loss_cls: 0.7280 d4.loss_bbox: 0.2709 2024/04/07 15:01:07 - mmengine - INFO - Epoch(train) [10][ 400/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 8:58:41 time: 4.4756 data_time: 0.5386 memory: 28974 grad_norm: 21.4274 loss: 6.3353 loss_cls: 0.7217 loss_bbox: 0.2900 d0.loss_cls: 0.8515 d0.loss_bbox: 0.3180 d1.loss_cls: 0.7881 d1.loss_bbox: 0.2947 d2.loss_cls: 0.7490 d2.loss_bbox: 0.2899 d3.loss_cls: 0.7287 d3.loss_bbox: 0.2905 d4.loss_cls: 0.7226 d4.loss_bbox: 0.2906 2024/04/07 15:05:16 - mmengine - INFO - Epoch(train) [10][ 450/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 8:58:29 time: 4.9801 data_time: 0.2760 memory: 27836 grad_norm: 21.1355 loss: 6.1729 loss_cls: 0.7031 loss_bbox: 0.2823 d0.loss_cls: 0.8350 d0.loss_bbox: 0.3005 d1.loss_cls: 0.7781 d1.loss_bbox: 0.2804 d2.loss_cls: 0.7295 d2.loss_bbox: 0.2810 d3.loss_cls: 0.7190 d3.loss_bbox: 0.2777 d4.loss_cls: 0.7093 d4.loss_bbox: 0.2772 2024/04/07 15:08:46 - mmengine - INFO - Epoch(train) [10][ 500/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 8:48:44 time: 4.2051 data_time: 0.2796 memory: 28055 grad_norm: 21.8385 loss: 6.2649 loss_cls: 0.7220 loss_bbox: 0.2821 d0.loss_cls: 0.8507 d0.loss_bbox: 0.3095 d1.loss_cls: 0.7819 d1.loss_bbox: 0.2834 d2.loss_cls: 0.7389 d2.loss_bbox: 0.2874 d3.loss_cls: 0.7250 d3.loss_bbox: 0.2825 d4.loss_cls: 0.7188 d4.loss_bbox: 0.2829 2024/04/07 15:12:45 - mmengine - INFO - Epoch(train) [10][ 550/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 8:46:03 time: 4.7866 data_time: 0.6105 memory: 28312 grad_norm: 21.7192 loss: 6.2149 loss_cls: 0.7063 loss_bbox: 0.2849 d0.loss_cls: 0.8269 d0.loss_bbox: 0.3132 d1.loss_cls: 0.7662 d1.loss_bbox: 0.2972 d2.loss_cls: 0.7354 d2.loss_bbox: 0.2817 d3.loss_cls: 0.7189 d3.loss_bbox: 0.2868 d4.loss_cls: 0.7042 d4.loss_bbox: 0.2929 2024/04/07 15:16:12 - mmengine - INFO - Epoch(train) [10][ 600/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 8:37:05 time: 4.1337 data_time: 0.4026 memory: 28396 grad_norm: 21.4489 loss: 6.3359 loss_cls: 0.7356 loss_bbox: 0.2763 d0.loss_cls: 0.8615 d0.loss_bbox: 0.2975 d1.loss_cls: 0.7958 d1.loss_bbox: 0.2814 d2.loss_cls: 0.7646 d2.loss_bbox: 0.2821 d3.loss_cls: 0.7482 d3.loss_bbox: 0.2779 d4.loss_cls: 0.7382 d4.loss_bbox: 0.2766 2024/04/07 15:20:06 - mmengine - INFO - Epoch(train) [10][ 650/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 8:33:34 time: 4.6746 data_time: 0.2409 memory: 28928 grad_norm: 21.8338 loss: 6.2626 loss_cls: 0.7133 loss_bbox: 0.2806 d0.loss_cls: 0.8522 d0.loss_bbox: 0.3094 d1.loss_cls: 0.7738 d1.loss_bbox: 0.2967 d2.loss_cls: 0.7445 d2.loss_bbox: 0.2824 d3.loss_cls: 0.7199 d3.loss_bbox: 0.2808 d4.loss_cls: 0.7261 d4.loss_bbox: 0.2829 2024/04/07 15:23:49 - mmengine - INFO - Epoch(train) [10][ 700/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 8:28:23 time: 4.4675 data_time: 0.2650 memory: 27776 grad_norm: 21.5178 loss: 6.4120 loss_cls: 0.7444 loss_bbox: 0.2795 d0.loss_cls: 0.8777 d0.loss_bbox: 0.3061 d1.loss_cls: 0.8101 d1.loss_bbox: 0.2980 d2.loss_cls: 0.7652 d2.loss_bbox: 0.2846 d3.loss_cls: 0.7407 d3.loss_bbox: 0.2818 d4.loss_cls: 0.7409 d4.loss_bbox: 0.2830 2024/04/07 15:27:26 - mmengine - INFO - Epoch(train) [10][ 750/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 8:22:30 time: 4.3460 data_time: 0.4861 memory: 29054 grad_norm: 21.7628 loss: 6.4806 loss_cls: 0.7390 loss_bbox: 0.2930 d0.loss_cls: 0.8801 d0.loss_bbox: 0.3283 d1.loss_cls: 0.8004 d1.loss_bbox: 0.3006 d2.loss_cls: 0.7722 d2.loss_bbox: 0.2957 d3.loss_cls: 0.7452 d3.loss_bbox: 0.2914 d4.loss_cls: 0.7427 d4.loss_bbox: 0.2921 2024/04/07 15:31:46 - mmengine - INFO - Epoch(train) [10][ 800/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 8:22:36 time: 5.1873 data_time: 0.2558 memory: 28772 grad_norm: 22.1921 loss: 6.2656 loss_cls: 0.7202 loss_bbox: 0.2776 d0.loss_cls: 0.8507 d0.loss_bbox: 0.3150 d1.loss_cls: 0.7823 d1.loss_bbox: 0.2946 d2.loss_cls: 0.7396 d2.loss_bbox: 0.2850 d3.loss_cls: 0.7244 d3.loss_bbox: 0.2789 d4.loss_cls: 0.7131 d4.loss_bbox: 0.2843 2024/04/07 15:35:13 - mmengine - INFO - Epoch(train) [10][ 850/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 8:15:34 time: 4.1428 data_time: 0.2749 memory: 29525 grad_norm: 21.8583 loss: 6.5249 loss_cls: 0.7647 loss_bbox: 0.2770 d0.loss_cls: 0.8937 d0.loss_bbox: 0.2985 d1.loss_cls: 0.8383 d1.loss_bbox: 0.2950 d2.loss_cls: 0.7972 d2.loss_bbox: 0.2729 d3.loss_cls: 0.7790 d3.loss_bbox: 0.2723 d4.loss_cls: 0.7675 d4.loss_bbox: 0.2689 2024/04/07 15:39:04 - mmengine - INFO - Epoch(train) [10][ 900/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 8:11:46 time: 4.6195 data_time: 0.4348 memory: 28366 grad_norm: 23.4273 loss: 6.2579 loss_cls: 0.7286 loss_bbox: 0.2708 d0.loss_cls: 0.8626 d0.loss_bbox: 0.2977 d1.loss_cls: 0.7917 d1.loss_bbox: 0.2841 d2.loss_cls: 0.7516 d2.loss_bbox: 0.2711 d3.loss_cls: 0.7304 d3.loss_bbox: 0.2717 d4.loss_cls: 0.7237 d4.loss_bbox: 0.2739 2024/04/07 15:42:40 - mmengine - INFO - Epoch(train) [10][ 950/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 8:06:21 time: 4.3299 data_time: 1.2063 memory: 28863 grad_norm: 21.5130 loss: 6.1569 loss_cls: 0.7192 loss_bbox: 0.2796 d0.loss_cls: 0.8257 d0.loss_bbox: 0.2893 d1.loss_cls: 0.7624 d1.loss_bbox: 0.2814 d2.loss_cls: 0.7327 d2.loss_bbox: 0.2762 d3.loss_cls: 0.7221 d3.loss_bbox: 0.2761 d4.loss_cls: 0.7151 d4.loss_bbox: 0.2770 2024/04/07 15:46:33 - mmengine - INFO - Epoch(train) [10][1000/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 8:02:45 time: 4.6423 data_time: 0.2718 memory: 29119 grad_norm: 21.6437 loss: 6.7333 loss_cls: 0.7709 loss_bbox: 0.3088 d0.loss_cls: 0.8864 d0.loss_bbox: 0.3433 d1.loss_cls: 0.8294 d1.loss_bbox: 0.3230 d2.loss_cls: 0.7851 d2.loss_bbox: 0.3161 d3.loss_cls: 0.7759 d3.loss_bbox: 0.3118 d4.loss_cls: 0.7739 d4.loss_bbox: 0.3086 2024/04/07 15:50:09 - mmengine - INFO - Epoch(train) [10][1050/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 7:57:36 time: 4.3382 data_time: 0.4419 memory: 28755 grad_norm: 21.3545 loss: 6.2533 loss_cls: 0.7107 loss_bbox: 0.2933 d0.loss_cls: 0.8369 d0.loss_bbox: 0.3094 d1.loss_cls: 0.7670 d1.loss_bbox: 0.2972 d2.loss_cls: 0.7355 d2.loss_bbox: 0.2868 d3.loss_cls: 0.7198 d3.loss_bbox: 0.2902 d4.loss_cls: 0.7147 d4.loss_bbox: 0.2918 2024/04/07 15:53:49 - mmengine - INFO - Epoch(train) [10][1100/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 7:52:52 time: 4.3930 data_time: 0.7637 memory: 28433 grad_norm: 21.7231 loss: 6.0762 loss_cls: 0.7075 loss_bbox: 0.2605 d0.loss_cls: 0.8301 d0.loss_bbox: 0.2888 d1.loss_cls: 0.7579 d1.loss_bbox: 0.2762 d2.loss_cls: 0.7280 d2.loss_bbox: 0.2698 d3.loss_cls: 0.7146 d3.loss_bbox: 0.2677 d4.loss_cls: 0.7112 d4.loss_bbox: 0.2639 2024/04/07 15:54:45 - mmengine - INFO - Exp name: mv-grounding_8xb12_embodiedscan-vg-9dof-full_20240407_142237 2024/04/07 15:57:36 - mmengine - INFO - Epoch(train) [10][1150/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 7:48:53 time: 4.5429 data_time: 0.5244 memory: 28962 grad_norm: 22.4338 loss: 5.9482 loss_cls: 0.6715 loss_bbox: 0.2711 d0.loss_cls: 0.8218 d0.loss_bbox: 0.2958 d1.loss_cls: 0.7377 d1.loss_bbox: 0.2781 d2.loss_cls: 0.6984 d2.loss_bbox: 0.2748 d3.loss_cls: 0.6785 d3.loss_bbox: 0.2709 d4.loss_cls: 0.6831 d4.loss_bbox: 0.2666 2024/04/07 16:01:40 - mmengine - INFO - Epoch(train) [10][1200/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 7:46:22 time: 4.8840 data_time: 0.2365 memory: 28355 grad_norm: 21.7364 loss: 6.1433 loss_cls: 0.7165 loss_bbox: 0.2585 d0.loss_cls: 0.8644 d0.loss_bbox: 0.2822 d1.loss_cls: 0.7917 d1.loss_bbox: 0.2722 d2.loss_cls: 0.7398 d2.loss_bbox: 0.2638 d3.loss_cls: 0.7144 d3.loss_bbox: 0.2617 d4.loss_cls: 0.7153 d4.loss_bbox: 0.2628 2024/04/07 16:05:04 - mmengine - INFO - Epoch(train) [10][1250/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 7:40:27 time: 4.0691 data_time: 0.4603 memory: 28948 grad_norm: 21.8352 loss: 6.1469 loss_cls: 0.7311 loss_bbox: 0.2545 d0.loss_cls: 0.8503 d0.loss_bbox: 0.2820 d1.loss_cls: 0.7858 d1.loss_bbox: 0.2590 d2.loss_cls: 0.7543 d2.loss_bbox: 0.2550 d3.loss_cls: 0.7360 d3.loss_bbox: 0.2531 d4.loss_cls: 0.7347 d4.loss_bbox: 0.2513 2024/04/07 16:08:54 - mmengine - INFO - Epoch(train) [10][1300/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 7:36:45 time: 4.5989 data_time: 1.5173 memory: 29238 grad_norm: 20.9411 loss: 5.9085 loss_cls: 0.6945 loss_bbox: 0.2568 d0.loss_cls: 0.8020 d0.loss_bbox: 0.2808 d1.loss_cls: 0.7347 d1.loss_bbox: 0.2669 d2.loss_cls: 0.7107 d2.loss_bbox: 0.2583 d3.loss_cls: 0.6988 d3.loss_bbox: 0.2550 d4.loss_cls: 0.6994 d4.loss_bbox: 0.2506 2024/04/07 16:12:38 - mmengine - INFO - Epoch(train) [10][1350/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 7:32:36 time: 4.4776 data_time: 1.8267 memory: 28768 grad_norm: 21.0933 loss: 6.4125 loss_cls: 0.7550 loss_bbox: 0.2733 d0.loss_cls: 0.8798 d0.loss_bbox: 0.2921 d1.loss_cls: 0.8189 d1.loss_bbox: 0.2749 d2.loss_cls: 0.7892 d2.loss_bbox: 0.2713 d3.loss_cls: 0.7638 d3.loss_bbox: 0.2706 d4.loss_cls: 0.7504 d4.loss_bbox: 0.2732 2024/04/07 16:16:39 - mmengine - INFO - Epoch(train) [10][1400/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 7:29:40 time: 4.8171 data_time: 1.8590 memory: 28150 grad_norm: 22.0589 loss: 6.1350 loss_cls: 0.7243 loss_bbox: 0.2619 d0.loss_cls: 0.8442 d0.loss_bbox: 0.2823 d1.loss_cls: 0.7819 d1.loss_bbox: 0.2732 d2.loss_cls: 0.7389 d2.loss_bbox: 0.2636 d3.loss_cls: 0.7188 d3.loss_bbox: 0.2627 d4.loss_cls: 0.7221 d4.loss_bbox: 0.2612 2024/04/07 16:20:06 - mmengine - INFO - Epoch(train) [10][1450/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 7:24:25 time: 4.1476 data_time: 0.2573 memory: 29085 grad_norm: 22.4377 loss: 6.2333 loss_cls: 0.7342 loss_bbox: 0.2654 d0.loss_cls: 0.8633 d0.loss_bbox: 0.2851 d1.loss_cls: 0.7825 d1.loss_bbox: 0.2733 d2.loss_cls: 0.7591 d2.loss_bbox: 0.2630 d3.loss_cls: 0.7452 d3.loss_bbox: 0.2635 d4.loss_cls: 0.7359 d4.loss_bbox: 0.2628 2024/04/07 16:24:00 - mmengine - INFO - Epoch(train) [10][1500/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 7:20:59 time: 4.6759 data_time: 0.2920 memory: 28891 grad_norm: 20.9650 loss: 6.1776 loss_cls: 0.7034 loss_bbox: 0.2734 d0.loss_cls: 0.8593 d0.loss_bbox: 0.2966 d1.loss_cls: 0.7772 d1.loss_bbox: 0.2764 d2.loss_cls: 0.7369 d2.loss_bbox: 0.2735 d3.loss_cls: 0.7310 d3.loss_bbox: 0.2676 d4.loss_cls: 0.7088 d4.loss_bbox: 0.2734 2024/04/07 16:28:02 - mmengine - INFO - Epoch(train) [10][1550/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 7:18:03 time: 4.8436 data_time: 0.5869 memory: 29300 grad_norm: 20.9741 loss: 6.2995 loss_cls: 0.7331 loss_bbox: 0.2685 d0.loss_cls: 0.8638 d0.loss_bbox: 0.2929 d1.loss_cls: 0.7996 d1.loss_bbox: 0.2749 d2.loss_cls: 0.7652 d2.loss_bbox: 0.2720 d3.loss_cls: 0.7514 d3.loss_bbox: 0.2669 d4.loss_cls: 0.7439 d4.loss_bbox: 0.2673 2024/04/07 16:31:39 - mmengine - INFO - Epoch(train) [10][1600/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 7:13:34 time: 4.3475 data_time: 0.2753 memory: 28597 grad_norm: 21.0084 loss: 6.2377 loss_cls: 0.7089 loss_bbox: 0.2833 d0.loss_cls: 0.8485 d0.loss_bbox: 0.2932 d1.loss_cls: 0.7788 d1.loss_bbox: 0.2797 d2.loss_cls: 0.7455 d2.loss_bbox: 0.2830 d3.loss_cls: 0.7367 d3.loss_bbox: 0.2838 d4.loss_cls: 0.7149 d4.loss_bbox: 0.2814 2024/04/07 16:35:11 - mmengine - INFO - Epoch(train) [10][1650/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 7:08:46 time: 4.2238 data_time: 1.2058 memory: 28361 grad_norm: 21.1803 loss: 6.1909 loss_cls: 0.7220 loss_bbox: 0.2704 d0.loss_cls: 0.8436 d0.loss_bbox: 0.2946 d1.loss_cls: 0.7804 d1.loss_bbox: 0.2775 d2.loss_cls: 0.7431 d2.loss_bbox: 0.2715 d3.loss_cls: 0.7230 d3.loss_bbox: 0.2722 d4.loss_cls: 0.7230 d4.loss_bbox: 0.2696 2024/04/07 16:38:59 - mmengine - INFO - Epoch(train) [10][1700/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 7:05:00 time: 4.5634 data_time: 0.7795 memory: 28469 grad_norm: 21.9717 loss: 6.0565 loss_cls: 0.7005 loss_bbox: 0.2616 d0.loss_cls: 0.8280 d0.loss_bbox: 0.2862 d1.loss_cls: 0.7653 d1.loss_bbox: 0.2717 d2.loss_cls: 0.7376 d2.loss_bbox: 0.2625 d3.loss_cls: 0.7111 d3.loss_bbox: 0.2629 d4.loss_cls: 0.7086 d4.loss_bbox: 0.2604 2024/04/07 16:43:22 - mmengine - INFO - Epoch(train) [10][1750/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 7:03:05 time: 5.2704 data_time: 2.0177 memory: 28851 grad_norm: 21.9091 loss: 6.0287 loss_cls: 0.7086 loss_bbox: 0.2555 d0.loss_cls: 0.8144 d0.loss_bbox: 0.2931 d1.loss_cls: 0.7564 d1.loss_bbox: 0.2690 d2.loss_cls: 0.7350 d2.loss_bbox: 0.2618 d3.loss_cls: 0.7075 d3.loss_bbox: 0.2661 d4.loss_cls: 0.7058 d4.loss_bbox: 0.2555 2024/04/07 16:46:42 - mmengine - INFO - Epoch(train) [10][1800/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 6:57:48 time: 4.0030 data_time: 1.7409 memory: 28245 grad_norm: 20.9727 loss: 5.9808 loss_cls: 0.7150 loss_bbox: 0.2442 d0.loss_cls: 0.8207 d0.loss_bbox: 0.2747 d1.loss_cls: 0.7539 d1.loss_bbox: 0.2611 d2.loss_cls: 0.7309 d2.loss_bbox: 0.2541 d3.loss_cls: 0.7188 d3.loss_bbox: 0.2469 d4.loss_cls: 0.7147 d4.loss_bbox: 0.2457 2024/04/07 16:50:32 - mmengine - INFO - Epoch(train) [10][1850/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 6:54:06 time: 4.5990 data_time: 0.7698 memory: 28501 grad_norm: 21.3425 loss: 5.9077 loss_cls: 0.6929 loss_bbox: 0.2532 d0.loss_cls: 0.8121 d0.loss_bbox: 0.2755 d1.loss_cls: 0.7364 d1.loss_bbox: 0.2684 d2.loss_cls: 0.7106 d2.loss_bbox: 0.2587 d3.loss_cls: 0.6913 d3.loss_bbox: 0.2601 d4.loss_cls: 0.6975 d4.loss_bbox: 0.2511 2024/04/07 16:54:10 - mmengine - INFO - Epoch(train) [10][1900/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 6:49:48 time: 4.3548 data_time: 0.2447 memory: 28538 grad_norm: 21.3389 loss: 6.4263 loss_cls: 0.7425 loss_bbox: 0.2784 d0.loss_cls: 0.8711 d0.loss_bbox: 0.3126 d1.loss_cls: 0.8164 d1.loss_bbox: 0.2906 d2.loss_cls: 0.7730 d2.loss_bbox: 0.2895 d3.loss_cls: 0.7470 d3.loss_bbox: 0.2829 d4.loss_cls: 0.7455 d4.loss_bbox: 0.2768 2024/04/07 16:58:22 - mmengine - INFO - Epoch(train) [10][1950/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 6:47:05 time: 5.0298 data_time: 0.2645 memory: 29828 grad_norm: 21.7048 loss: 5.8641 loss_cls: 0.6643 loss_bbox: 0.2670 d0.loss_cls: 0.7973 d0.loss_bbox: 0.2942 d1.loss_cls: 0.7298 d1.loss_bbox: 0.2738 d2.loss_cls: 0.6954 d2.loss_bbox: 0.2654 d3.loss_cls: 0.6798 d3.loss_bbox: 0.2666 d4.loss_cls: 0.6647 d4.loss_bbox: 0.2656 2024/04/07 17:01:43 - mmengine - INFO - Epoch(train) [10][2000/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 6:42:05 time: 4.0285 data_time: 0.9138 memory: 30492 grad_norm: 21.4557 loss: 6.2700 loss_cls: 0.7444 loss_bbox: 0.2570 d0.loss_cls: 0.8635 d0.loss_bbox: 0.2882 d1.loss_cls: 0.8025 d1.loss_bbox: 0.2711 d2.loss_cls: 0.7597 d2.loss_bbox: 0.2659 d3.loss_cls: 0.7487 d3.loss_bbox: 0.2605 d4.loss_cls: 0.7493 d4.loss_bbox: 0.2590 2024/04/07 17:05:46 - mmengine - INFO - Epoch(train) [10][2050/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 6:38:56 time: 4.8548 data_time: 0.2851 memory: 28341 grad_norm: 20.7694 loss: 6.1085 loss_cls: 0.7298 loss_bbox: 0.2471 d0.loss_cls: 0.8532 d0.loss_bbox: 0.2746 d1.loss_cls: 0.7833 d1.loss_bbox: 0.2568 d2.loss_cls: 0.7511 d2.loss_bbox: 0.2493 d3.loss_cls: 0.7362 d3.loss_bbox: 0.2450 d4.loss_cls: 0.7331 d4.loss_bbox: 0.2492 2024/04/07 17:09:43 - mmengine - INFO - Epoch(train) [10][2100/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 6:35:31 time: 4.7521 data_time: 0.2587 memory: 29072 grad_norm: 21.6622 loss: 6.3366 loss_cls: 0.7305 loss_bbox: 0.2824 d0.loss_cls: 0.8456 d0.loss_bbox: 0.3014 d1.loss_cls: 0.7819 d1.loss_bbox: 0.2929 d2.loss_cls: 0.7599 d2.loss_bbox: 0.2875 d3.loss_cls: 0.7498 d3.loss_bbox: 0.2822 d4.loss_cls: 0.7345 d4.loss_bbox: 0.2879 2024/04/07 17:10:28 - mmengine - INFO - Exp name: mv-grounding_8xb12_embodiedscan-vg-9dof-full_20240407_142237 2024/04/07 17:13:05 - mmengine - INFO - Epoch(train) [10][2150/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 6:30:40 time: 4.0420 data_time: 0.3769 memory: 27953 grad_norm: 21.1069 loss: 6.2580 loss_cls: 0.7485 loss_bbox: 0.2602 d0.loss_cls: 0.8655 d0.loss_bbox: 0.2802 d1.loss_cls: 0.7930 d1.loss_bbox: 0.2633 d2.loss_cls: 0.7686 d2.loss_bbox: 0.2583 d3.loss_cls: 0.7616 d3.loss_bbox: 0.2519 d4.loss_cls: 0.7513 d4.loss_bbox: 0.2556 2024/04/07 17:17:04 - mmengine - INFO - Epoch(train) [10][2200/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 6:27:16 time: 4.7628 data_time: 1.3939 memory: 28591 grad_norm: 22.1460 loss: 6.1079 loss_cls: 0.7035 loss_bbox: 0.2586 d0.loss_cls: 0.8501 d0.loss_bbox: 0.2925 d1.loss_cls: 0.7732 d1.loss_bbox: 0.2776 d2.loss_cls: 0.7341 d2.loss_bbox: 0.2679 d3.loss_cls: 0.7195 d3.loss_bbox: 0.2609 d4.loss_cls: 0.7105 d4.loss_bbox: 0.2597 2024/04/07 17:20:49 - mmengine - INFO - Epoch(train) [10][2250/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 6:23:23 time: 4.5097 data_time: 1.1053 memory: 28036 grad_norm: 22.0117 loss: 6.2336 loss_cls: 0.7344 loss_bbox: 0.2661 d0.loss_cls: 0.8531 d0.loss_bbox: 0.2817 d1.loss_cls: 0.7956 d1.loss_bbox: 0.2715 d2.loss_cls: 0.7654 d2.loss_bbox: 0.2577 d3.loss_cls: 0.7424 d3.loss_bbox: 0.2621 d4.loss_cls: 0.7409 d4.loss_bbox: 0.2626 2024/04/07 17:24:30 - mmengine - INFO - Epoch(train) [10][2300/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 6:19:20 time: 4.4272 data_time: 0.2612 memory: 27915 grad_norm: 21.3877 loss: 6.1558 loss_cls: 0.7276 loss_bbox: 0.2590 d0.loss_cls: 0.8297 d0.loss_bbox: 0.2903 d1.loss_cls: 0.7788 d1.loss_bbox: 0.2621 d2.loss_cls: 0.7527 d2.loss_bbox: 0.2637 d3.loss_cls: 0.7346 d3.loss_bbox: 0.2612 d4.loss_cls: 0.7344 d4.loss_bbox: 0.2617 2024/04/07 17:28:37 - mmengine - INFO - Epoch(train) [10][2350/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 6:16:13 time: 4.9393 data_time: 0.2300 memory: 28456 grad_norm: 21.9400 loss: 6.1965 loss_cls: 0.7281 loss_bbox: 0.2644 d0.loss_cls: 0.8587 d0.loss_bbox: 0.2843 d1.loss_cls: 0.7848 d1.loss_bbox: 0.2695 d2.loss_cls: 0.7510 d2.loss_bbox: 0.2644 d3.loss_cls: 0.7386 d3.loss_bbox: 0.2595 d4.loss_cls: 0.7251 d4.loss_bbox: 0.2683 2024/04/07 17:31:57 - mmengine - INFO - Epoch(train) [10][2400/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 6:11:27 time: 3.9972 data_time: 0.2610 memory: 28560 grad_norm: 22.3924 loss: 6.1931 loss_cls: 0.7310 loss_bbox: 0.2604 d0.loss_cls: 0.8537 d0.loss_bbox: 0.2858 d1.loss_cls: 0.7789 d1.loss_bbox: 0.2747 d2.loss_cls: 0.7406 d2.loss_bbox: 0.2707 d3.loss_cls: 0.7274 d3.loss_bbox: 0.2695 d4.loss_cls: 0.7339 d4.loss_bbox: 0.2665 2024/04/07 17:33:56 - mmengine - INFO - Exp name: mv-grounding_8xb12_embodiedscan-vg-9dof-full_20240407_142237 2024/04/07 17:33:56 - mmengine - INFO - Saving checkpoint at 10 epochs 2024/04/07 17:39:16 - mmengine - INFO - Epoch(train) [11][ 50/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 6:06:47 time: 6.0581 data_time: 3.4581 memory: 29241 grad_norm: 21.7072 loss: 6.1763 loss_cls: 0.7240 loss_bbox: 0.2616 d0.loss_cls: 0.8519 d0.loss_bbox: 0.2929 d1.loss_cls: 0.7804 d1.loss_bbox: 0.2721 d2.loss_cls: 0.7474 d2.loss_bbox: 0.2639 d3.loss_cls: 0.7267 d3.loss_bbox: 0.2630 d4.loss_cls: 0.7283 d4.loss_bbox: 0.2641 2024/04/07 17:42:52 - mmengine - INFO - Epoch(train) [11][ 100/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 6:02:35 time: 4.3203 data_time: 1.8165 memory: 29000 grad_norm: 22.2006 loss: 6.3186 loss_cls: 0.7513 loss_bbox: 0.2613 d0.loss_cls: 0.8903 d0.loss_bbox: 0.2747 d1.loss_cls: 0.8199 d1.loss_bbox: 0.2577 d2.loss_cls: 0.7808 d2.loss_bbox: 0.2548 d3.loss_cls: 0.7603 d3.loss_bbox: 0.2627 d4.loss_cls: 0.7453 d4.loss_bbox: 0.2596 2024/04/07 17:46:33 - mmengine - INFO - Epoch(train) [11][ 150/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 5:58:34 time: 4.4294 data_time: 2.2066 memory: 28351 grad_norm: 22.0462 loss: 6.0860 loss_cls: 0.7017 loss_bbox: 0.2682 d0.loss_cls: 0.8342 d0.loss_bbox: 0.2908 d1.loss_cls: 0.7608 d1.loss_bbox: 0.2788 d2.loss_cls: 0.7365 d2.loss_bbox: 0.2706 d3.loss_cls: 0.7066 d3.loss_bbox: 0.2661 d4.loss_cls: 0.7031 d4.loss_bbox: 0.2686 2024/04/07 17:50:05 - mmengine - INFO - Epoch(train) [11][ 200/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 5:54:17 time: 4.2372 data_time: 1.8410 memory: 28494 grad_norm: 21.9881 loss: 6.1375 loss_cls: 0.7081 loss_bbox: 0.2775 d0.loss_cls: 0.8255 d0.loss_bbox: 0.3000 d1.loss_cls: 0.7676 d1.loss_bbox: 0.2817 d2.loss_cls: 0.7199 d2.loss_bbox: 0.2811 d3.loss_cls: 0.7067 d3.loss_bbox: 0.2817 d4.loss_cls: 0.7098 d4.loss_bbox: 0.2777 2024/04/07 17:54:02 - mmengine - INFO - Epoch(train) [11][ 250/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 5:50:44 time: 4.7336 data_time: 1.1095 memory: 28882 grad_norm: 22.0230 loss: 6.1533 loss_cls: 0.7117 loss_bbox: 0.2811 d0.loss_cls: 0.8246 d0.loss_bbox: 0.3131 d1.loss_cls: 0.7610 d1.loss_bbox: 0.2875 d2.loss_cls: 0.7185 d2.loss_bbox: 0.2800 d3.loss_cls: 0.7091 d3.loss_bbox: 0.2820 d4.loss_cls: 0.7029 d4.loss_bbox: 0.2818 2024/04/07 17:58:00 - mmengine - INFO - Epoch(train) [11][ 300/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 5:47:14 time: 4.7739 data_time: 1.8692 memory: 28108 grad_norm: 22.4509 loss: 6.5220 loss_cls: 0.7432 loss_bbox: 0.2883 d0.loss_cls: 0.8900 d0.loss_bbox: 0.3141 d1.loss_cls: 0.8155 d1.loss_bbox: 0.2950 d2.loss_cls: 0.7914 d2.loss_bbox: 0.2911 d3.loss_cls: 0.7655 d3.loss_bbox: 0.2887 d4.loss_cls: 0.7515 d4.loss_bbox: 0.2878 2024/04/07 18:01:40 - mmengine - INFO - Epoch(train) [11][ 350/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 5:43:12 time: 4.3992 data_time: 2.0379 memory: 29091 grad_norm: 22.1856 loss: 6.2841 loss_cls: 0.7146 loss_bbox: 0.2921 d0.loss_cls: 0.8397 d0.loss_bbox: 0.3118 d1.loss_cls: 0.7647 d1.loss_bbox: 0.3014 d2.loss_cls: 0.7379 d2.loss_bbox: 0.2950 d3.loss_cls: 0.7296 d3.loss_bbox: 0.2901 d4.loss_cls: 0.7171 d4.loss_bbox: 0.2901 2024/04/07 18:05:20 - mmengine - INFO - Epoch(train) [11][ 400/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 5:39:09 time: 4.3778 data_time: 0.5709 memory: 28342 grad_norm: 21.6981 loss: 6.0447 loss_cls: 0.6994 loss_bbox: 0.2714 d0.loss_cls: 0.8054 d0.loss_bbox: 0.2967 d1.loss_cls: 0.7481 d1.loss_bbox: 0.2761 d2.loss_cls: 0.7203 d2.loss_bbox: 0.2788 d3.loss_cls: 0.7022 d3.loss_bbox: 0.2746 d4.loss_cls: 0.6990 d4.loss_bbox: 0.2727 2024/04/07 18:08:56 - mmengine - INFO - Epoch(train) [11][ 450/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 5:35:05 time: 4.3430 data_time: 0.2655 memory: 27996 grad_norm: 21.5961 loss: 5.9881 loss_cls: 0.6884 loss_bbox: 0.2609 d0.loss_cls: 0.8282 d0.loss_bbox: 0.2822 d1.loss_cls: 0.7565 d1.loss_bbox: 0.2689 d2.loss_cls: 0.7269 d2.loss_bbox: 0.2625 d3.loss_cls: 0.7073 d3.loss_bbox: 0.2547 d4.loss_cls: 0.6957 d4.loss_bbox: 0.2558 2024/04/07 18:12:55 - mmengine - INFO - Epoch(train) [11][ 500/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 5:31:33 time: 4.7709 data_time: 0.2952 memory: 28783 grad_norm: 21.3349 loss: 6.0996 loss_cls: 0.7097 loss_bbox: 0.2695 d0.loss_cls: 0.8249 d0.loss_bbox: 0.2973 d1.loss_cls: 0.7582 d1.loss_bbox: 0.2809 d2.loss_cls: 0.7272 d2.loss_bbox: 0.2723 d3.loss_cls: 0.7122 d3.loss_bbox: 0.2728 d4.loss_cls: 0.7019 d4.loss_bbox: 0.2728 2024/04/07 18:17:01 - mmengine - INFO - Epoch(train) [11][ 550/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 5:28:12 time: 4.9268 data_time: 0.3987 memory: 28681 grad_norm: 21.0096 loss: 6.1113 loss_cls: 0.7223 loss_bbox: 0.2646 d0.loss_cls: 0.8249 d0.loss_bbox: 0.2885 d1.loss_cls: 0.7634 d1.loss_bbox: 0.2749 d2.loss_cls: 0.7349 d2.loss_bbox: 0.2676 d3.loss_cls: 0.7282 d3.loss_bbox: 0.2630 d4.loss_cls: 0.7148 d4.loss_bbox: 0.2643 2024/04/07 18:20:29 - mmengine - INFO - Epoch(train) [11][ 600/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 5:23:54 time: 4.1468 data_time: 0.2680 memory: 28859 grad_norm: 21.2165 loss: 6.3152 loss_cls: 0.7041 loss_bbox: 0.2898 d0.loss_cls: 0.8429 d0.loss_bbox: 0.3206 d1.loss_cls: 0.7859 d1.loss_bbox: 0.3043 d2.loss_cls: 0.7444 d2.loss_bbox: 0.2986 d3.loss_cls: 0.7294 d3.loss_bbox: 0.2918 d4.loss_cls: 0.7120 d4.loss_bbox: 0.2914 2024/04/07 18:24:21 - mmengine - INFO - Epoch(train) [11][ 650/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 5:20:12 time: 4.6447 data_time: 0.4485 memory: 29624 grad_norm: 21.4132 loss: 6.1933 loss_cls: 0.7171 loss_bbox: 0.2679 d0.loss_cls: 0.8488 d0.loss_bbox: 0.2998 d1.loss_cls: 0.7781 d1.loss_bbox: 0.2860 d2.loss_cls: 0.7395 d2.loss_bbox: 0.2738 d3.loss_cls: 0.7255 d3.loss_bbox: 0.2719 d4.loss_cls: 0.7120 d4.loss_bbox: 0.2729 2024/04/07 18:26:38 - mmengine - INFO - Exp name: mv-grounding_8xb12_embodiedscan-vg-9dof-full_20240407_142237 2024/04/07 18:28:15 - mmengine - INFO - Epoch(train) [11][ 700/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 5:16:32 time: 4.6746 data_time: 0.7531 memory: 28548 grad_norm: 21.8133 loss: 5.9067 loss_cls: 0.6792 loss_bbox: 0.2672 d0.loss_cls: 0.8053 d0.loss_bbox: 0.2796 d1.loss_cls: 0.7249 d1.loss_bbox: 0.2643 d2.loss_cls: 0.7130 d2.loss_bbox: 0.2571 d3.loss_cls: 0.6975 d3.loss_bbox: 0.2663 d4.loss_cls: 0.6901 d4.loss_bbox: 0.2623 2024/04/07 18:32:18 - mmengine - INFO - Epoch(train) [11][ 750/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 5:13:04 time: 4.8685 data_time: 1.3710 memory: 28993 grad_norm: 21.8126 loss: 6.0916 loss_cls: 0.6965 loss_bbox: 0.2765 d0.loss_cls: 0.8300 d0.loss_bbox: 0.2998 d1.loss_cls: 0.7545 d1.loss_bbox: 0.2839 d2.loss_cls: 0.7251 d2.loss_bbox: 0.2761 d3.loss_cls: 0.7035 d3.loss_bbox: 0.2753 d4.loss_cls: 0.6969 d4.loss_bbox: 0.2735 2024/04/07 18:35:43 - mmengine - INFO - Epoch(train) [11][ 800/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 5:08:46 time: 4.0980 data_time: 0.9881 memory: 27890 grad_norm: 22.2894 loss: 6.1205 loss_cls: 0.7007 loss_bbox: 0.2665 d0.loss_cls: 0.8503 d0.loss_bbox: 0.2945 d1.loss_cls: 0.7828 d1.loss_bbox: 0.2807 d2.loss_cls: 0.7253 d2.loss_bbox: 0.2732 d3.loss_cls: 0.7146 d3.loss_bbox: 0.2644 d4.loss_cls: 0.7007 d4.loss_bbox: 0.2668 2024/04/07 18:39:55 - mmengine - INFO - Epoch(train) [11][ 850/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 5:05:28 time: 5.0374 data_time: 0.2603 memory: 28861 grad_norm: 21.1796 loss: 6.0729 loss_cls: 0.7121 loss_bbox: 0.2582 d0.loss_cls: 0.8343 d0.loss_bbox: 0.2912 d1.loss_cls: 0.7702 d1.loss_bbox: 0.2680 d2.loss_cls: 0.7371 d2.loss_bbox: 0.2628 d3.loss_cls: 0.7106 d3.loss_bbox: 0.2572 d4.loss_cls: 0.7128 d4.loss_bbox: 0.2585 2024/04/07 18:43:35 - mmengine - INFO - Epoch(train) [11][ 900/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 5:01:29 time: 4.3981 data_time: 0.3414 memory: 29658 grad_norm: 21.7354 loss: 6.0995 loss_cls: 0.6952 loss_bbox: 0.2714 d0.loss_cls: 0.8400 d0.loss_bbox: 0.2947 d1.loss_cls: 0.7691 d1.loss_bbox: 0.2809 d2.loss_cls: 0.7263 d2.loss_bbox: 0.2712 d3.loss_cls: 0.7013 d3.loss_bbox: 0.2727 d4.loss_cls: 0.7064 d4.loss_bbox: 0.2703 2024/04/07 18:47:24 - mmengine - INFO - Epoch(train) [11][ 950/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 4:57:43 time: 4.5916 data_time: 1.2811 memory: 27968 grad_norm: 21.2730 loss: 6.1982 loss_cls: 0.7068 loss_bbox: 0.2781 d0.loss_cls: 0.8542 d0.loss_bbox: 0.2972 d1.loss_cls: 0.7711 d1.loss_bbox: 0.2882 d2.loss_cls: 0.7433 d2.loss_bbox: 0.2826 d3.loss_cls: 0.7164 d3.loss_bbox: 0.2760 d4.loss_cls: 0.7082 d4.loss_bbox: 0.2761 2024/04/07 18:51:10 - mmengine - INFO - Epoch(train) [11][1000/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 4:53:52 time: 4.5185 data_time: 1.7285 memory: 28692 grad_norm: 21.5022 loss: 6.3013 loss_cls: 0.7252 loss_bbox: 0.2744 d0.loss_cls: 0.8604 d0.loss_bbox: 0.3091 d1.loss_cls: 0.7945 d1.loss_bbox: 0.2876 d2.loss_cls: 0.7498 d2.loss_bbox: 0.2792 d3.loss_cls: 0.7371 d3.loss_bbox: 0.2754 d4.loss_cls: 0.7391 d4.loss_bbox: 0.2694 2024/04/07 18:55:07 - mmengine - INFO - Epoch(train) [11][1050/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 4:50:14 time: 4.7407 data_time: 1.6265 memory: 28584 grad_norm: 22.3019 loss: 6.0239 loss_cls: 0.6943 loss_bbox: 0.2737 d0.loss_cls: 0.8054 d0.loss_bbox: 0.3089 d1.loss_cls: 0.7377 d1.loss_bbox: 0.2907 d2.loss_cls: 0.7083 d2.loss_bbox: 0.2770 d3.loss_cls: 0.6877 d3.loss_bbox: 0.2744 d4.loss_cls: 0.6875 d4.loss_bbox: 0.2782 2024/04/07 18:58:58 - mmengine - INFO - Epoch(train) [11][1100/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 4:46:28 time: 4.6219 data_time: 1.4331 memory: 28199 grad_norm: 21.4989 loss: 6.0227 loss_cls: 0.6986 loss_bbox: 0.2648 d0.loss_cls: 0.8201 d0.loss_bbox: 0.2937 d1.loss_cls: 0.7552 d1.loss_bbox: 0.2728 d2.loss_cls: 0.7131 d2.loss_bbox: 0.2733 d3.loss_cls: 0.7079 d3.loss_bbox: 0.2662 d4.loss_cls: 0.6939 d4.loss_bbox: 0.2632 2024/04/07 19:02:42 - mmengine - INFO - Epoch(train) [11][1150/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 4:42:35 time: 4.4709 data_time: 0.3490 memory: 28610 grad_norm: 21.9099 loss: 5.9283 loss_cls: 0.6876 loss_bbox: 0.2616 d0.loss_cls: 0.8028 d0.loss_bbox: 0.2829 d1.loss_cls: 0.7497 d1.loss_bbox: 0.2655 d2.loss_cls: 0.7156 d2.loss_bbox: 0.2549 d3.loss_cls: 0.6926 d3.loss_bbox: 0.2587 d4.loss_cls: 0.6979 d4.loss_bbox: 0.2587 2024/04/07 19:06:13 - mmengine - INFO - Epoch(train) [11][1200/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 4:38:30 time: 4.2251 data_time: 0.2771 memory: 28539 grad_norm: 21.9811 loss: 6.0633 loss_cls: 0.7034 loss_bbox: 0.2649 d0.loss_cls: 0.8187 d0.loss_bbox: 0.2982 d1.loss_cls: 0.7502 d1.loss_bbox: 0.2797 d2.loss_cls: 0.7289 d2.loss_bbox: 0.2687 d3.loss_cls: 0.7195 d3.loss_bbox: 0.2626 d4.loss_cls: 0.7076 d4.loss_bbox: 0.2611 2024/04/07 19:10:20 - mmengine - INFO - Epoch(train) [11][1250/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 4:34:59 time: 4.9278 data_time: 0.2527 memory: 29013 grad_norm: 21.4251 loss: 5.9928 loss_cls: 0.7067 loss_bbox: 0.2545 d0.loss_cls: 0.8264 d0.loss_bbox: 0.2804 d1.loss_cls: 0.7558 d1.loss_bbox: 0.2633 d2.loss_cls: 0.7228 d2.loss_bbox: 0.2541 d3.loss_cls: 0.7116 d3.loss_bbox: 0.2532 d4.loss_cls: 0.7119 d4.loss_bbox: 0.2523 2024/04/07 19:14:03 - mmengine - INFO - Epoch(train) [11][1300/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 4:31:07 time: 4.4707 data_time: 0.2575 memory: 28566 grad_norm: 22.6510 loss: 6.1288 loss_cls: 0.7081 loss_bbox: 0.2734 d0.loss_cls: 0.8350 d0.loss_bbox: 0.2912 d1.loss_cls: 0.7628 d1.loss_bbox: 0.2774 d2.loss_cls: 0.7343 d2.loss_bbox: 0.2697 d3.loss_cls: 0.7240 d3.loss_bbox: 0.2691 d4.loss_cls: 0.7148 d4.loss_bbox: 0.2690 2024/04/07 19:17:35 - mmengine - INFO - Epoch(train) [11][1350/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 4:27:03 time: 4.2350 data_time: 0.2833 memory: 28513 grad_norm: 22.0978 loss: 6.1676 loss_cls: 0.6875 loss_bbox: 0.2900 d0.loss_cls: 0.8275 d0.loss_bbox: 0.3321 d1.loss_cls: 0.7504 d1.loss_bbox: 0.3051 d2.loss_cls: 0.7112 d2.loss_bbox: 0.3001 d3.loss_cls: 0.6954 d3.loss_bbox: 0.2920 d4.loss_cls: 0.6869 d4.loss_bbox: 0.2892 2024/04/07 19:21:32 - mmengine - INFO - Epoch(train) [11][1400/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 4:23:23 time: 4.7441 data_time: 0.2308 memory: 28434 grad_norm: 21.6201 loss: 6.1618 loss_cls: 0.7184 loss_bbox: 0.2673 d0.loss_cls: 0.8433 d0.loss_bbox: 0.2924 d1.loss_cls: 0.7721 d1.loss_bbox: 0.2804 d2.loss_cls: 0.7355 d2.loss_bbox: 0.2718 d3.loss_cls: 0.7252 d3.loss_bbox: 0.2647 d4.loss_cls: 0.7204 d4.loss_bbox: 0.2701 2024/04/07 19:25:27 - mmengine - INFO - Epoch(train) [11][1450/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 4:19:41 time: 4.7001 data_time: 0.2557 memory: 29094 grad_norm: 21.7632 loss: 6.1361 loss_cls: 0.7056 loss_bbox: 0.2660 d0.loss_cls: 0.8400 d0.loss_bbox: 0.2926 d1.loss_cls: 0.7792 d1.loss_bbox: 0.2817 d2.loss_cls: 0.7411 d2.loss_bbox: 0.2694 d3.loss_cls: 0.7148 d3.loss_bbox: 0.2729 d4.loss_cls: 0.7077 d4.loss_bbox: 0.2651 2024/04/07 19:29:12 - mmengine - INFO - Epoch(train) [11][1500/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 4:15:51 time: 4.5028 data_time: 0.2801 memory: 28531 grad_norm: 21.1055 loss: 6.1315 loss_cls: 0.7034 loss_bbox: 0.2773 d0.loss_cls: 0.8379 d0.loss_bbox: 0.3031 d1.loss_cls: 0.7752 d1.loss_bbox: 0.2823 d2.loss_cls: 0.7215 d2.loss_bbox: 0.2763 d3.loss_cls: 0.6998 d3.loss_bbox: 0.2800 d4.loss_cls: 0.7018 d4.loss_bbox: 0.2730 2024/04/07 19:32:52 - mmengine - INFO - Epoch(train) [11][1550/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 4:11:56 time: 4.4026 data_time: 0.3024 memory: 28438 grad_norm: 21.6711 loss: 6.1107 loss_cls: 0.7088 loss_bbox: 0.2662 d0.loss_cls: 0.8306 d0.loss_bbox: 0.2880 d1.loss_cls: 0.7765 d1.loss_bbox: 0.2720 d2.loss_cls: 0.7357 d2.loss_bbox: 0.2693 d3.loss_cls: 0.7157 d3.loss_bbox: 0.2684 d4.loss_cls: 0.7106 d4.loss_bbox: 0.2688 2024/04/07 19:36:45 - mmengine - INFO - Epoch(train) [11][1600/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 4:08:11 time: 4.6472 data_time: 0.2914 memory: 28192 grad_norm: 21.2713 loss: 5.8890 loss_cls: 0.6945 loss_bbox: 0.2500 d0.loss_cls: 0.8036 d0.loss_bbox: 0.2797 d1.loss_cls: 0.7408 d1.loss_bbox: 0.2614 d2.loss_cls: 0.7106 d2.loss_bbox: 0.2584 d3.loss_cls: 0.6935 d3.loss_bbox: 0.2489 d4.loss_cls: 0.6911 d4.loss_bbox: 0.2565 2024/04/07 19:40:28 - mmengine - INFO - Epoch(train) [11][1650/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 4:04:19 time: 4.4559 data_time: 0.4456 memory: 28224 grad_norm: 20.9779 loss: 6.0771 loss_cls: 0.6965 loss_bbox: 0.2713 d0.loss_cls: 0.8362 d0.loss_bbox: 0.2867 d1.loss_cls: 0.7599 d1.loss_bbox: 0.2781 d2.loss_cls: 0.7326 d2.loss_bbox: 0.2692 d3.loss_cls: 0.7052 d3.loss_bbox: 0.2698 d4.loss_cls: 0.6983 d4.loss_bbox: 0.2732 2024/04/07 19:42:37 - mmengine - INFO - Exp name: mv-grounding_8xb12_embodiedscan-vg-9dof-full_20240407_142237 2024/04/07 19:44:13 - mmengine - INFO - Epoch(train) [11][1700/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 4:00:29 time: 4.5015 data_time: 0.7202 memory: 28836 grad_norm: 21.6303 loss: 6.3267 loss_cls: 0.7319 loss_bbox: 0.2785 d0.loss_cls: 0.8463 d0.loss_bbox: 0.3117 d1.loss_cls: 0.7926 d1.loss_bbox: 0.2929 d2.loss_cls: 0.7588 d2.loss_bbox: 0.2849 d3.loss_cls: 0.7355 d3.loss_bbox: 0.2817 d4.loss_cls: 0.7353 d4.loss_bbox: 0.2764 2024/04/07 19:47:49 - mmengine - INFO - Epoch(train) [11][1750/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 3:56:32 time: 4.3236 data_time: 1.1089 memory: 28829 grad_norm: 22.9028 loss: 6.2928 loss_cls: 0.7289 loss_bbox: 0.2757 d0.loss_cls: 0.8436 d0.loss_bbox: 0.3011 d1.loss_cls: 0.7826 d1.loss_bbox: 0.2851 d2.loss_cls: 0.7633 d2.loss_bbox: 0.2790 d3.loss_cls: 0.7411 d3.loss_bbox: 0.2773 d4.loss_cls: 0.7394 d4.loss_bbox: 0.2757 2024/04/07 19:51:39 - mmengine - INFO - Epoch(train) [11][1800/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 3:52:46 time: 4.6103 data_time: 0.2474 memory: 28359 grad_norm: 21.2715 loss: 6.1426 loss_cls: 0.7096 loss_bbox: 0.2622 d0.loss_cls: 0.8441 d0.loss_bbox: 0.2951 d1.loss_cls: 0.7774 d1.loss_bbox: 0.2733 d2.loss_cls: 0.7382 d2.loss_bbox: 0.2649 d3.loss_cls: 0.7236 d3.loss_bbox: 0.2678 d4.loss_cls: 0.7212 d4.loss_bbox: 0.2652 2024/04/07 19:55:40 - mmengine - INFO - Epoch(train) [11][1850/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 3:49:07 time: 4.8078 data_time: 0.2943 memory: 28197 grad_norm: 21.5789 loss: 5.9061 loss_cls: 0.6718 loss_bbox: 0.2611 d0.loss_cls: 0.8222 d0.loss_bbox: 0.2853 d1.loss_cls: 0.7437 d1.loss_bbox: 0.2740 d2.loss_cls: 0.7018 d2.loss_bbox: 0.2656 d3.loss_cls: 0.6847 d3.loss_bbox: 0.2614 d4.loss_cls: 0.6704 d4.loss_bbox: 0.2641 2024/04/07 19:58:59 - mmengine - INFO - Epoch(train) [11][1900/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 3:44:59 time: 3.9912 data_time: 0.2537 memory: 28566 grad_norm: 22.1575 loss: 6.1133 loss_cls: 0.7148 loss_bbox: 0.2637 d0.loss_cls: 0.8389 d0.loss_bbox: 0.2814 d1.loss_cls: 0.7694 d1.loss_bbox: 0.2711 d2.loss_cls: 0.7344 d2.loss_bbox: 0.2692 d3.loss_cls: 0.7238 d3.loss_bbox: 0.2653 d4.loss_cls: 0.7197 d4.loss_bbox: 0.2616 2024/04/07 20:02:54 - mmengine - INFO - Epoch(train) [11][1950/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 3:41:16 time: 4.7002 data_time: 0.2648 memory: 28318 grad_norm: 22.4933 loss: 6.4938 loss_cls: 0.7611 loss_bbox: 0.2754 d0.loss_cls: 0.8792 d0.loss_bbox: 0.3202 d1.loss_cls: 0.8207 d1.loss_bbox: 0.2927 d2.loss_cls: 0.7842 d2.loss_bbox: 0.2820 d3.loss_cls: 0.7685 d3.loss_bbox: 0.2735 d4.loss_cls: 0.7575 d4.loss_bbox: 0.2788 2024/04/07 20:06:32 - mmengine - INFO - Epoch(train) [11][2000/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 3:37:22 time: 4.3453 data_time: 0.3125 memory: 29654 grad_norm: 21.7231 loss: 5.9605 loss_cls: 0.6893 loss_bbox: 0.2686 d0.loss_cls: 0.8049 d0.loss_bbox: 0.2869 d1.loss_cls: 0.7419 d1.loss_bbox: 0.2732 d2.loss_cls: 0.7182 d2.loss_bbox: 0.2721 d3.loss_cls: 0.6973 d3.loss_bbox: 0.2607 d4.loss_cls: 0.6891 d4.loss_bbox: 0.2582 2024/04/07 20:10:23 - mmengine - INFO - Epoch(train) [11][2050/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 3:33:36 time: 4.6335 data_time: 0.3856 memory: 28152 grad_norm: 21.1199 loss: 6.1331 loss_cls: 0.7103 loss_bbox: 0.2739 d0.loss_cls: 0.8366 d0.loss_bbox: 0.2998 d1.loss_cls: 0.7704 d1.loss_bbox: 0.2829 d2.loss_cls: 0.7208 d2.loss_bbox: 0.2734 d3.loss_cls: 0.7083 d3.loss_bbox: 0.2762 d4.loss_cls: 0.7088 d4.loss_bbox: 0.2716 2024/04/07 20:14:07 - mmengine - INFO - Epoch(train) [11][2100/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 3:29:46 time: 4.4829 data_time: 0.6682 memory: 29293 grad_norm: 21.6731 loss: 6.1986 loss_cls: 0.7163 loss_bbox: 0.2705 d0.loss_cls: 0.8479 d0.loss_bbox: 0.2969 d1.loss_cls: 0.7854 d1.loss_bbox: 0.2829 d2.loss_cls: 0.7405 d2.loss_bbox: 0.2769 d3.loss_cls: 0.7212 d3.loss_bbox: 0.2744 d4.loss_cls: 0.7150 d4.loss_bbox: 0.2707 2024/04/07 20:18:12 - mmengine - INFO - Epoch(train) [11][2150/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 3:26:09 time: 4.8960 data_time: 0.3010 memory: 28359 grad_norm: 21.5690 loss: 6.0942 loss_cls: 0.7139 loss_bbox: 0.2544 d0.loss_cls: 0.8578 d0.loss_bbox: 0.2708 d1.loss_cls: 0.7830 d1.loss_bbox: 0.2633 d2.loss_cls: 0.7442 d2.loss_bbox: 0.2513 d3.loss_cls: 0.7242 d3.loss_bbox: 0.2551 d4.loss_cls: 0.7223 d4.loss_bbox: 0.2538 2024/04/07 20:22:07 - mmengine - INFO - Epoch(train) [11][2200/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 3:22:25 time: 4.6929 data_time: 1.1922 memory: 27894 grad_norm: 22.2633 loss: 6.4122 loss_cls: 0.7431 loss_bbox: 0.2748 d0.loss_cls: 0.8749 d0.loss_bbox: 0.3123 d1.loss_cls: 0.8119 d1.loss_bbox: 0.2849 d2.loss_cls: 0.7712 d2.loss_bbox: 0.2841 d3.loss_cls: 0.7522 d3.loss_bbox: 0.2818 d4.loss_cls: 0.7456 d4.loss_bbox: 0.2754 2024/04/07 20:25:33 - mmengine - INFO - Epoch(train) [11][2250/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 3:18:25 time: 4.1261 data_time: 1.7425 memory: 29768 grad_norm: 21.1890 loss: 6.1565 loss_cls: 0.7368 loss_bbox: 0.2519 d0.loss_cls: 0.8546 d0.loss_bbox: 0.2815 d1.loss_cls: 0.7862 d1.loss_bbox: 0.2594 d2.loss_cls: 0.7535 d2.loss_bbox: 0.2548 d3.loss_cls: 0.7436 d3.loss_bbox: 0.2532 d4.loss_cls: 0.7302 d4.loss_bbox: 0.2508 2024/04/07 20:29:23 - mmengine - INFO - Epoch(train) [11][2300/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 3:14:38 time: 4.5961 data_time: 1.3596 memory: 27942 grad_norm: 20.9708 loss: 6.0329 loss_cls: 0.6944 loss_bbox: 0.2632 d0.loss_cls: 0.8217 d0.loss_bbox: 0.2879 d1.loss_cls: 0.7558 d1.loss_bbox: 0.2790 d2.loss_cls: 0.7165 d2.loss_bbox: 0.2738 d3.loss_cls: 0.7101 d3.loss_bbox: 0.2655 d4.loss_cls: 0.7018 d4.loss_bbox: 0.2629 2024/04/07 20:33:03 - mmengine - INFO - Epoch(train) [11][2350/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 3:10:46 time: 4.3929 data_time: 0.9164 memory: 28254 grad_norm: 21.5629 loss: 5.8060 loss_cls: 0.6760 loss_bbox: 0.2508 d0.loss_cls: 0.8059 d0.loss_bbox: 0.2765 d1.loss_cls: 0.7327 d1.loss_bbox: 0.2559 d2.loss_cls: 0.6936 d2.loss_bbox: 0.2502 d3.loss_cls: 0.6827 d3.loss_bbox: 0.2523 d4.loss_cls: 0.6811 d4.loss_bbox: 0.2482 2024/04/07 20:36:44 - mmengine - INFO - Epoch(train) [11][2400/2432] base_lr: 5.0000e-05 lr: 5.0000e-05 eta: 3:06:55 time: 4.4198 data_time: 0.6237 memory: 28196 grad_norm: 21.3945 loss: 6.1970 loss_cls: 0.7003 loss_bbox: 0.2784 d0.loss_cls: 0.8484 d0.loss_bbox: 0.3121 d1.loss_cls: 0.7727 d1.loss_bbox: 0.2888 d2.loss_cls: 0.7333 d2.loss_bbox: 0.2837 d3.loss_cls: 0.7169 d3.loss_bbox: 0.2776 d4.loss_cls: 0.7078 d4.loss_bbox: 0.2770 2024/04/07 20:38:36 - mmengine - INFO - Exp name: mv-grounding_8xb12_embodiedscan-vg-9dof-full_20240407_142237 2024/04/07 20:38:36 - mmengine - INFO - Saving checkpoint at 11 epochs 2024/04/07 20:43:26 - mmengine - INFO - Epoch(train) [12][ 50/2432] base_lr: 5.0000e-06 lr: 5.0000e-06 eta: 3:00:49 time: 5.4927 data_time: 2.2230 memory: 29384 grad_norm: 22.3255 loss: 6.1213 loss_cls: 0.7126 loss_bbox: 0.2605 d0.loss_cls: 0.8434 d0.loss_bbox: 0.2898 d1.loss_cls: 0.7740 d1.loss_bbox: 0.2707 d2.loss_cls: 0.7472 d2.loss_bbox: 0.2604 d3.loss_cls: 0.7258 d3.loss_bbox: 0.2612 d4.loss_cls: 0.7107 d4.loss_bbox: 0.2649 2024/04/07 20:47:24 - mmengine - INFO - Epoch(train) [12][ 100/2432] base_lr: 5.0000e-06 lr: 5.0000e-06 eta: 2:57:06 time: 4.7551 data_time: 0.2674 memory: 28930 grad_norm: 20.9947 loss: 6.0604 loss_cls: 0.6905 loss_bbox: 0.2751 d0.loss_cls: 0.8331 d0.loss_bbox: 0.2976 d1.loss_cls: 0.7533 d1.loss_bbox: 0.2810 d2.loss_cls: 0.7223 d2.loss_bbox: 0.2672 d3.loss_cls: 0.7049 d3.loss_bbox: 0.2682 d4.loss_cls: 0.6990 d4.loss_bbox: 0.2682 2024/04/07 20:51:01 - mmengine - INFO - Epoch(train) [12][ 150/2432] base_lr: 5.0000e-06 lr: 5.0000e-06 eta: 2:53:13 time: 4.3425 data_time: 0.2438 memory: 28678 grad_norm: 21.5179 loss: 5.9446 loss_cls: 0.6778 loss_bbox: 0.2637 d0.loss_cls: 0.8174 d0.loss_bbox: 0.2848 d1.loss_cls: 0.7559 d1.loss_bbox: 0.2646 d2.loss_cls: 0.7130 d2.loss_bbox: 0.2664 d3.loss_cls: 0.6887 d3.loss_bbox: 0.2639 d4.loss_cls: 0.6889 d4.loss_bbox: 0.2594 2024/04/07 20:54:48 - mmengine - INFO - Epoch(train) [12][ 200/2432] base_lr: 5.0000e-06 lr: 5.0000e-06 eta: 2:49:25 time: 4.5314 data_time: 0.5508 memory: 28918 grad_norm: 21.0962 loss: 6.1391 loss_cls: 0.7046 loss_bbox: 0.2712 d0.loss_cls: 0.8419 d0.loss_bbox: 0.2988 d1.loss_cls: 0.7714 d1.loss_bbox: 0.2793 d2.loss_cls: 0.7366 d2.loss_bbox: 0.2786 d3.loss_cls: 0.7191 d3.loss_bbox: 0.2697 d4.loss_cls: 0.7006 d4.loss_bbox: 0.2673 2024/04/07 20:58:13 - mmengine - INFO - Exp name: mv-grounding_8xb12_embodiedscan-vg-9dof-full_20240407_142237 2024/04/07 20:58:18 - mmengine - INFO - Epoch(train) [12][ 250/2432] base_lr: 5.0000e-06 lr: 5.0000e-06 eta: 2:45:30 time: 4.1999 data_time: 0.5582 memory: 28735 grad_norm: 20.9927 loss: 5.8675 loss_cls: 0.6757 loss_bbox: 0.2634 d0.loss_cls: 0.7915 d0.loss_bbox: 0.2902 d1.loss_cls: 0.7299 d1.loss_bbox: 0.2742 d2.loss_cls: 0.7011 d2.loss_bbox: 0.2593 d3.loss_cls: 0.6833 d3.loss_bbox: 0.2624 d4.loss_cls: 0.6744 d4.loss_bbox: 0.2621 2024/04/07 21:02:19 - mmengine - INFO - Epoch(train) [12][ 300/2432] base_lr: 5.0000e-06 lr: 5.0000e-06 eta: 2:41:48 time: 4.8223 data_time: 0.4152 memory: 29146 grad_norm: 21.2595 loss: 6.1393 loss_cls: 0.7085 loss_bbox: 0.2695 d0.loss_cls: 0.8493 d0.loss_bbox: 0.2993 d1.loss_cls: 0.7671 d1.loss_bbox: 0.2766 d2.loss_cls: 0.7301 d2.loss_bbox: 0.2783 d3.loss_cls: 0.7093 d3.loss_bbox: 0.2735 d4.loss_cls: 0.7076 d4.loss_bbox: 0.2702 2024/04/07 21:06:05 - mmengine - INFO - Epoch(train) [12][ 350/2432] base_lr: 5.0000e-06 lr: 5.0000e-06 eta: 2:37:59 time: 4.5180 data_time: 1.3037 memory: 27815 grad_norm: 22.3463 loss: 5.9077 loss_cls: 0.6877 loss_bbox: 0.2514 d0.loss_cls: 0.8157 d0.loss_bbox: 0.2875 d1.loss_cls: 0.7471 d1.loss_bbox: 0.2628 d2.loss_cls: 0.7159 d2.loss_bbox: 0.2511 d3.loss_cls: 0.6961 d3.loss_bbox: 0.2510 d4.loss_cls: 0.6898 d4.loss_bbox: 0.2518 2024/04/07 21:09:56 - mmengine - INFO - Epoch(train) [12][ 400/2432] base_lr: 5.0000e-06 lr: 5.0000e-06 eta: 2:34:13 time: 4.6271 data_time: 0.2640 memory: 28520 grad_norm: 21.1433 loss: 6.1567 loss_cls: 0.7156 loss_bbox: 0.2666 d0.loss_cls: 0.8422 d0.loss_bbox: 0.3015 d1.loss_cls: 0.7745 d1.loss_bbox: 0.2861 d2.loss_cls: 0.7314 d2.loss_bbox: 0.2734 d3.loss_cls: 0.7224 d3.loss_bbox: 0.2638 d4.loss_cls: 0.7127 d4.loss_bbox: 0.2665 2024/04/07 21:13:48 - mmengine - INFO - Epoch(train) [12][ 450/2432] base_lr: 5.0000e-06 lr: 5.0000e-06 eta: 2:30:27 time: 4.6350 data_time: 0.2710 memory: 27788 grad_norm: 21.4711 loss: 6.3703 loss_cls: 0.7559 loss_bbox: 0.2662 d0.loss_cls: 0.8774 d0.loss_bbox: 0.2821 d1.loss_cls: 0.8136 d1.loss_bbox: 0.2719 d2.loss_cls: 0.7791 d2.loss_bbox: 0.2632 d3.loss_cls: 0.7735 d3.loss_bbox: 0.2608 d4.loss_cls: 0.7675 d4.loss_bbox: 0.2592 2024/04/07 21:17:29 - mmengine - INFO - Epoch(train) [12][ 500/2432] base_lr: 5.0000e-06 lr: 5.0000e-06 eta: 2:26:37 time: 4.4140 data_time: 0.2720 memory: 28330 grad_norm: 21.3955 loss: 6.3118 loss_cls: 0.7241 loss_bbox: 0.2805 d0.loss_cls: 0.8526 d0.loss_bbox: 0.3086 d1.loss_cls: 0.7775 d1.loss_bbox: 0.3035 d2.loss_cls: 0.7557 d2.loss_bbox: 0.2855 d3.loss_cls: 0.7344 d3.loss_bbox: 0.2818 d4.loss_cls: 0.7259 d4.loss_bbox: 0.2818 2024/04/07 21:21:31 - mmengine - INFO - Epoch(train) [12][ 550/2432] base_lr: 5.0000e-06 lr: 5.0000e-06 eta: 2:22:54 time: 4.8438 data_time: 0.2485 memory: 28062 grad_norm: 20.6297 loss: 5.9539 loss_cls: 0.6881 loss_bbox: 0.2665 d0.loss_cls: 0.7909 d0.loss_bbox: 0.2976 d1.loss_cls: 0.7339 d1.loss_bbox: 0.2799 d2.loss_cls: 0.7080 d2.loss_bbox: 0.2707 d3.loss_cls: 0.6962 d3.loss_bbox: 0.2657 d4.loss_cls: 0.6893 d4.loss_bbox: 0.2670 2024/04/07 21:25:02 - mmengine - INFO - Epoch(train) [12][ 600/2432] base_lr: 5.0000e-06 lr: 5.0000e-06 eta: 2:19:00 time: 4.2154 data_time: 0.6408 memory: 28685 grad_norm: 21.5571 loss: 6.1334 loss_cls: 0.7081 loss_bbox: 0.2663 d0.loss_cls: 0.8478 d0.loss_bbox: 0.2901 d1.loss_cls: 0.7789 d1.loss_bbox: 0.2747 d2.loss_cls: 0.7478 d2.loss_bbox: 0.2667 d3.loss_cls: 0.7147 d3.loss_bbox: 0.2596 d4.loss_cls: 0.7145 d4.loss_bbox: 0.2643 2024/04/07 21:28:54 - mmengine - INFO - Epoch(train) [12][ 650/2432] base_lr: 5.0000e-06 lr: 5.0000e-06 eta: 2:15:14 time: 4.6425 data_time: 0.4922 memory: 29309 grad_norm: 21.6767 loss: 6.2910 loss_cls: 0.7344 loss_bbox: 0.2707 d0.loss_cls: 0.8494 d0.loss_bbox: 0.3094 d1.loss_cls: 0.7891 d1.loss_bbox: 0.2834 d2.loss_cls: 0.7564 d2.loss_bbox: 0.2768 d3.loss_cls: 0.7392 d3.loss_bbox: 0.2745 d4.loss_cls: 0.7349 d4.loss_bbox: 0.2727 2024/04/07 21:32:46 - mmengine - INFO - Epoch(train) [12][ 700/2432] base_lr: 5.0000e-06 lr: 5.0000e-06 eta: 2:11:28 time: 4.6460 data_time: 0.2528 memory: 29469 grad_norm: 21.4180 loss: 6.2102 loss_cls: 0.7137 loss_bbox: 0.2828 d0.loss_cls: 0.8404 d0.loss_bbox: 0.3109 d1.loss_cls: 0.7683 d1.loss_bbox: 0.2905 d2.loss_cls: 0.7292 d2.loss_bbox: 0.2857 d3.loss_cls: 0.7198 d3.loss_bbox: 0.2818 d4.loss_cls: 0.7048 d4.loss_bbox: 0.2824 2024/04/07 21:36:29 - mmengine - INFO - Epoch(train) [12][ 750/2432] base_lr: 5.0000e-06 lr: 5.0000e-06 eta: 2:07:39 time: 4.4502 data_time: 0.2802 memory: 28732 grad_norm: 20.7700 loss: 6.0603 loss_cls: 0.7137 loss_bbox: 0.2566 d0.loss_cls: 0.8276 d0.loss_bbox: 0.2744 d1.loss_cls: 0.7727 d1.loss_bbox: 0.2628 d2.loss_cls: 0.7421 d2.loss_bbox: 0.2585 d3.loss_cls: 0.7211 d3.loss_bbox: 0.2572 d4.loss_cls: 0.7198 d4.loss_bbox: 0.2538 2024/04/07 21:40:32 - mmengine - INFO - Epoch(train) [12][ 800/2432] base_lr: 5.0000e-06 lr: 5.0000e-06 eta: 2:03:55 time: 4.8642 data_time: 0.5535 memory: 29389 grad_norm: 20.8351 loss: 6.3828 loss_cls: 0.7336 loss_bbox: 0.2924 d0.loss_cls: 0.8480 d0.loss_bbox: 0.3192 d1.loss_cls: 0.7811 d1.loss_bbox: 0.3054 d2.loss_cls: 0.7529 d2.loss_bbox: 0.2979 d3.loss_cls: 0.7317 d3.loss_bbox: 0.2965 d4.loss_cls: 0.7298 d4.loss_bbox: 0.2943 2024/04/07 21:44:09 - mmengine - INFO - Epoch(train) [12][ 850/2432] base_lr: 5.0000e-06 lr: 5.0000e-06 eta: 2:00:05 time: 4.3502 data_time: 0.2682 memory: 29017 grad_norm: 20.9715 loss: 6.1608 loss_cls: 0.7043 loss_bbox: 0.2788 d0.loss_cls: 0.8244 d0.loss_bbox: 0.3079 d1.loss_cls: 0.7623 d1.loss_bbox: 0.2923 d2.loss_cls: 0.7273 d2.loss_bbox: 0.2852 d3.loss_cls: 0.7117 d3.loss_bbox: 0.2818 d4.loss_cls: 0.7056 d4.loss_bbox: 0.2793 2024/04/07 21:48:02 - mmengine - INFO - Epoch(train) [12][ 900/2432] base_lr: 5.0000e-06 lr: 5.0000e-06 eta: 1:56:18 time: 4.6496 data_time: 0.6317 memory: 28304 grad_norm: 21.2256 loss: 6.1282 loss_cls: 0.7129 loss_bbox: 0.2669 d0.loss_cls: 0.8290 d0.loss_bbox: 0.3040 d1.loss_cls: 0.7635 d1.loss_bbox: 0.2803 d2.loss_cls: 0.7348 d2.loss_bbox: 0.2702 d3.loss_cls: 0.7224 d3.loss_bbox: 0.2663 d4.loss_cls: 0.7104 d4.loss_bbox: 0.2674 2024/04/07 21:51:39 - mmengine - INFO - Epoch(train) [12][ 950/2432] base_lr: 5.0000e-06 lr: 5.0000e-06 eta: 1:52:28 time: 4.3446 data_time: 0.2573 memory: 28926 grad_norm: 20.4526 loss: 6.1172 loss_cls: 0.7110 loss_bbox: 0.2597 d0.loss_cls: 0.8417 d0.loss_bbox: 0.2902 d1.loss_cls: 0.7768 d1.loss_bbox: 0.2716 d2.loss_cls: 0.7388 d2.loss_bbox: 0.2665 d3.loss_cls: 0.7199 d3.loss_bbox: 0.2632 d4.loss_cls: 0.7139 d4.loss_bbox: 0.2639 2024/04/07 21:55:30 - mmengine - INFO - Epoch(train) [12][1000/2432] base_lr: 5.0000e-06 lr: 5.0000e-06 eta: 1:48:41 time: 4.6188 data_time: 1.0554 memory: 29354 grad_norm: 22.2947 loss: 6.2451 loss_cls: 0.7244 loss_bbox: 0.2702 d0.loss_cls: 0.8532 d0.loss_bbox: 0.2957 d1.loss_cls: 0.7967 d1.loss_bbox: 0.2761 d2.loss_cls: 0.7538 d2.loss_bbox: 0.2742 d3.loss_cls: 0.7319 d3.loss_bbox: 0.2697 d4.loss_cls: 0.7299 d4.loss_bbox: 0.2694 2024/04/07 21:59:41 - mmengine - INFO - Epoch(train) [12][1050/2432] base_lr: 5.0000e-06 lr: 5.0000e-06 eta: 1:44:59 time: 5.0279 data_time: 0.2688 memory: 28842 grad_norm: 21.3915 loss: 6.1735 loss_cls: 0.7173 loss_bbox: 0.2717 d0.loss_cls: 0.8511 d0.loss_bbox: 0.2793 d1.loss_cls: 0.7791 d1.loss_bbox: 0.2737 d2.loss_cls: 0.7453 d2.loss_bbox: 0.2699 d3.loss_cls: 0.7229 d3.loss_bbox: 0.2703 d4.loss_cls: 0.7207 d4.loss_bbox: 0.2722 2024/04/07 22:02:51 - mmengine - INFO - Epoch(train) [12][1100/2432] base_lr: 5.0000e-06 lr: 5.0000e-06 eta: 1:41:02 time: 3.7811 data_time: 0.2744 memory: 29180 grad_norm: 22.3834 loss: 6.1226 loss_cls: 0.7167 loss_bbox: 0.2739 d0.loss_cls: 0.8167 d0.loss_bbox: 0.3014 d1.loss_cls: 0.7526 d1.loss_bbox: 0.2752 d2.loss_cls: 0.7345 d2.loss_bbox: 0.2732 d3.loss_cls: 0.7162 d3.loss_bbox: 0.2762 d4.loss_cls: 0.7184 d4.loss_bbox: 0.2677 2024/04/07 22:06:57 - mmengine - INFO - Epoch(train) [12][1150/2432] base_lr: 5.0000e-06 lr: 5.0000e-06 eta: 1:37:19 time: 4.9188 data_time: 0.2700 memory: 28420 grad_norm: 21.6165 loss: 5.9992 loss_cls: 0.6851 loss_bbox: 0.2779 d0.loss_cls: 0.8086 d0.loss_bbox: 0.2951 d1.loss_cls: 0.7453 d1.loss_bbox: 0.2770 d2.loss_cls: 0.7129 d2.loss_bbox: 0.2780 d3.loss_cls: 0.6880 d3.loss_bbox: 0.2729 d4.loss_cls: 0.6817 d4.loss_bbox: 0.2766 2024/04/07 22:11:14 - mmengine - INFO - Epoch(train) [12][1200/2432] base_lr: 5.0000e-06 lr: 5.0000e-06 eta: 1:33:37 time: 5.1472 data_time: 0.2897 memory: 29024 grad_norm: 21.7809 loss: 6.1367 loss_cls: 0.7005 loss_bbox: 0.2748 d0.loss_cls: 0.8365 d0.loss_bbox: 0.3024 d1.loss_cls: 0.7696 d1.loss_bbox: 0.2862 d2.loss_cls: 0.7265 d2.loss_bbox: 0.2792 d3.loss_cls: 0.7073 d3.loss_bbox: 0.2763 d4.loss_cls: 0.6997 d4.loss_bbox: 0.2777 2024/04/07 22:14:49 - mmengine - INFO - Exp name: mv-grounding_8xb12_embodiedscan-vg-9dof-full_20240407_142237 2024/04/07 22:14:54 - mmengine - INFO - Epoch(train) [12][1250/2432] base_lr: 5.0000e-06 lr: 5.0000e-06 eta: 1:29:47 time: 4.4058 data_time: 0.3007 memory: 27965 grad_norm: 21.2876 loss: 6.3198 loss_cls: 0.7398 loss_bbox: 0.2652 d0.loss_cls: 0.8883 d0.loss_bbox: 0.2876 d1.loss_cls: 0.7999 d1.loss_bbox: 0.2735 d2.loss_cls: 0.7799 d2.loss_bbox: 0.2648 d3.loss_cls: 0.7508 d3.loss_bbox: 0.2658 d4.loss_cls: 0.7394 d4.loss_bbox: 0.2648 2024/04/07 22:18:27 - mmengine - INFO - Epoch(train) [12][1300/2432] base_lr: 5.0000e-06 lr: 5.0000e-06 eta: 1:25:57 time: 4.2638 data_time: 0.2606 memory: 28043 grad_norm: 20.3998 loss: 5.9811 loss_cls: 0.7091 loss_bbox: 0.2428 d0.loss_cls: 0.8258 d0.loss_bbox: 0.2742 d1.loss_cls: 0.7633 d1.loss_bbox: 0.2610 d2.loss_cls: 0.7387 d2.loss_bbox: 0.2519 d3.loss_cls: 0.7131 d3.loss_bbox: 0.2457 d4.loss_cls: 0.7089 d4.loss_bbox: 0.2466 2024/04/07 22:22:41 - mmengine - INFO - Epoch(train) [12][1350/2432] base_lr: 5.0000e-06 lr: 5.0000e-06 eta: 1:22:13 time: 5.0639 data_time: 0.2524 memory: 28360 grad_norm: 21.1869 loss: 6.1259 loss_cls: 0.7216 loss_bbox: 0.2515 d0.loss_cls: 0.8583 d0.loss_bbox: 0.2774 d1.loss_cls: 0.7924 d1.loss_bbox: 0.2608 d2.loss_cls: 0.7480 d2.loss_bbox: 0.2586 d3.loss_cls: 0.7295 d3.loss_bbox: 0.2502 d4.loss_cls: 0.7280 d4.loss_bbox: 0.2496 2024/04/07 22:26:06 - mmengine - INFO - Epoch(train) [12][1400/2432] base_lr: 5.0000e-06 lr: 5.0000e-06 eta: 1:18:22 time: 4.1044 data_time: 0.2945 memory: 27680 grad_norm: 20.7675 loss: 5.9679 loss_cls: 0.6879 loss_bbox: 0.2581 d0.loss_cls: 0.8303 d0.loss_bbox: 0.2984 d1.loss_cls: 0.7620 d1.loss_bbox: 0.2676 d2.loss_cls: 0.7115 d2.loss_bbox: 0.2600 d3.loss_cls: 0.6945 d3.loss_bbox: 0.2564 d4.loss_cls: 0.6836 d4.loss_bbox: 0.2577 2024/04/07 22:30:04 - mmengine - INFO - Epoch(train) [12][1450/2432] base_lr: 5.0000e-06 lr: 5.0000e-06 eta: 1:14:36 time: 4.7756 data_time: 0.2655 memory: 28744 grad_norm: 22.8803 loss: 6.0432 loss_cls: 0.7135 loss_bbox: 0.2556 d0.loss_cls: 0.8390 d0.loss_bbox: 0.2765 d1.loss_cls: 0.7656 d1.loss_bbox: 0.2679 d2.loss_cls: 0.7223 d2.loss_bbox: 0.2577 d3.loss_cls: 0.7152 d3.loss_bbox: 0.2559 d4.loss_cls: 0.7147 d4.loss_bbox: 0.2593 2024/04/07 22:33:58 - mmengine - INFO - Epoch(train) [12][1500/2432] base_lr: 5.0000e-06 lr: 5.0000e-06 eta: 1:10:48 time: 4.6684 data_time: 0.7442 memory: 28734 grad_norm: 21.1625 loss: 6.1561 loss_cls: 0.7065 loss_bbox: 0.2727 d0.loss_cls: 0.8373 d0.loss_bbox: 0.3131 d1.loss_cls: 0.7610 d1.loss_bbox: 0.2927 d2.loss_cls: 0.7258 d2.loss_bbox: 0.2845 d3.loss_cls: 0.7096 d3.loss_bbox: 0.2776 d4.loss_cls: 0.7010 d4.loss_bbox: 0.2743 2024/04/07 22:37:31 - mmengine - INFO - Epoch(train) [12][1550/2432] base_lr: 5.0000e-06 lr: 5.0000e-06 eta: 1:06:58 time: 4.2643 data_time: 1.3494 memory: 27977 grad_norm: 22.3898 loss: 6.1294 loss_cls: 0.7096 loss_bbox: 0.2713 d0.loss_cls: 0.8381 d0.loss_bbox: 0.2871 d1.loss_cls: 0.7764 d1.loss_bbox: 0.2784 d2.loss_cls: 0.7352 d2.loss_bbox: 0.2738 d3.loss_cls: 0.7139 d3.loss_bbox: 0.2662 d4.loss_cls: 0.7137 d4.loss_bbox: 0.2658 2024/04/07 22:41:21 - mmengine - INFO - Epoch(train) [12][1600/2432] base_lr: 5.0000e-06 lr: 5.0000e-06 eta: 1:03:11 time: 4.6067 data_time: 0.2428 memory: 29617 grad_norm: 20.6939 loss: 6.0170 loss_cls: 0.6880 loss_bbox: 0.2585 d0.loss_cls: 0.8460 d0.loss_bbox: 0.2825 d1.loss_cls: 0.7708 d1.loss_bbox: 0.2675 d2.loss_cls: 0.7209 d2.loss_bbox: 0.2682 d3.loss_cls: 0.7050 d3.loss_bbox: 0.2594 d4.loss_cls: 0.6896 d4.loss_bbox: 0.2605 2024/04/07 22:45:04 - mmengine - INFO - Epoch(train) [12][1650/2432] base_lr: 5.0000e-06 lr: 5.0000e-06 eta: 0:59:22 time: 4.4518 data_time: 0.2620 memory: 28640 grad_norm: 20.4610 loss: 6.1685 loss_cls: 0.7134 loss_bbox: 0.2728 d0.loss_cls: 0.8454 d0.loss_bbox: 0.2947 d1.loss_cls: 0.7703 d1.loss_bbox: 0.2811 d2.loss_cls: 0.7385 d2.loss_bbox: 0.2746 d3.loss_cls: 0.7176 d3.loss_bbox: 0.2717 d4.loss_cls: 0.7181 d4.loss_bbox: 0.2703 2024/04/07 22:48:55 - mmengine - INFO - Epoch(train) [12][1700/2432] base_lr: 5.0000e-06 lr: 5.0000e-06 eta: 0:55:35 time: 4.6259 data_time: 1.0260 memory: 28265 grad_norm: 20.9994 loss: 5.9256 loss_cls: 0.6717 loss_bbox: 0.2783 d0.loss_cls: 0.7923 d0.loss_bbox: 0.3095 d1.loss_cls: 0.7229 d1.loss_bbox: 0.2881 d2.loss_cls: 0.6860 d2.loss_bbox: 0.2838 d3.loss_cls: 0.6706 d3.loss_bbox: 0.2814 d4.loss_cls: 0.6618 d4.loss_bbox: 0.2792 2024/04/07 22:52:51 - mmengine - INFO - Epoch(train) [12][1750/2432] base_lr: 5.0000e-06 lr: 5.0000e-06 eta: 0:51:48 time: 4.7092 data_time: 0.3276 memory: 29583 grad_norm: 21.4117 loss: 5.8492 loss_cls: 0.6559 loss_bbox: 0.2726 d0.loss_cls: 0.7935 d0.loss_bbox: 0.3017 d1.loss_cls: 0.7185 d1.loss_bbox: 0.2795 d2.loss_cls: 0.6902 d2.loss_bbox: 0.2700 d3.loss_cls: 0.6670 d3.loss_bbox: 0.2688 d4.loss_cls: 0.6645 d4.loss_bbox: 0.2671 2024/04/07 22:56:28 - mmengine - INFO - Epoch(train) [12][1800/2432] base_lr: 5.0000e-06 lr: 5.0000e-06 eta: 0:47:59 time: 4.3465 data_time: 0.3385 memory: 29009 grad_norm: 21.9047 loss: 5.9975 loss_cls: 0.7032 loss_bbox: 0.2510 d0.loss_cls: 0.8448 d0.loss_bbox: 0.2677 d1.loss_cls: 0.7790 d1.loss_bbox: 0.2627 d2.loss_cls: 0.7280 d2.loss_bbox: 0.2565 d3.loss_cls: 0.7044 d3.loss_bbox: 0.2505 d4.loss_cls: 0.7019 d4.loss_bbox: 0.2478 2024/04/07 23:00:36 - mmengine - INFO - Epoch(train) [12][1850/2432] base_lr: 5.0000e-06 lr: 5.0000e-06 eta: 0:44:13 time: 4.9466 data_time: 0.4580 memory: 28190 grad_norm: 21.0227 loss: 6.1195 loss_cls: 0.7049 loss_bbox: 0.2750 d0.loss_cls: 0.8244 d0.loss_bbox: 0.3047 d1.loss_cls: 0.7563 d1.loss_bbox: 0.2859 d2.loss_cls: 0.7282 d2.loss_bbox: 0.2773 d3.loss_cls: 0.7133 d3.loss_bbox: 0.2749 d4.loss_cls: 0.7033 d4.loss_bbox: 0.2711 2024/04/07 23:04:25 - mmengine - INFO - Epoch(train) [12][1900/2432] base_lr: 5.0000e-06 lr: 5.0000e-06 eta: 0:40:25 time: 4.5795 data_time: 0.2715 memory: 28561 grad_norm: 21.2693 loss: 6.0654 loss_cls: 0.7027 loss_bbox: 0.2663 d0.loss_cls: 0.8494 d0.loss_bbox: 0.2986 d1.loss_cls: 0.7742 d1.loss_bbox: 0.2719 d2.loss_cls: 0.7193 d2.loss_bbox: 0.2577 d3.loss_cls: 0.6957 d3.loss_bbox: 0.2657 d4.loss_cls: 0.7007 d4.loss_bbox: 0.2632 2024/04/07 23:08:09 - mmengine - INFO - Epoch(train) [12][1950/2432] base_lr: 5.0000e-06 lr: 5.0000e-06 eta: 0:36:37 time: 4.4965 data_time: 0.2758 memory: 29074 grad_norm: 22.0031 loss: 6.1766 loss_cls: 0.7245 loss_bbox: 0.2703 d0.loss_cls: 0.8383 d0.loss_bbox: 0.2915 d1.loss_cls: 0.7721 d1.loss_bbox: 0.2716 d2.loss_cls: 0.7433 d2.loss_bbox: 0.2700 d3.loss_cls: 0.7321 d3.loss_bbox: 0.2670 d4.loss_cls: 0.7292 d4.loss_bbox: 0.2666 2024/04/07 23:11:52 - mmengine - INFO - Epoch(train) [12][2000/2432] base_lr: 5.0000e-06 lr: 5.0000e-06 eta: 0:32:49 time: 4.4583 data_time: 0.3454 memory: 27982 grad_norm: 21.6891 loss: 6.0931 loss_cls: 0.7123 loss_bbox: 0.2636 d0.loss_cls: 0.8285 d0.loss_bbox: 0.2940 d1.loss_cls: 0.7757 d1.loss_bbox: 0.2757 d2.loss_cls: 0.7248 d2.loss_bbox: 0.2696 d3.loss_cls: 0.7182 d3.loss_bbox: 0.2607 d4.loss_cls: 0.7080 d4.loss_bbox: 0.2620 2024/04/07 23:15:42 - mmengine - INFO - Epoch(train) [12][2050/2432] base_lr: 5.0000e-06 lr: 5.0000e-06 eta: 0:29:01 time: 4.5949 data_time: 1.7574 memory: 28201 grad_norm: 21.3195 loss: 5.9308 loss_cls: 0.6939 loss_bbox: 0.2542 d0.loss_cls: 0.8121 d0.loss_bbox: 0.2841 d1.loss_cls: 0.7428 d1.loss_bbox: 0.2634 d2.loss_cls: 0.7186 d2.loss_bbox: 0.2605 d3.loss_cls: 0.6887 d3.loss_bbox: 0.2591 d4.loss_cls: 0.6943 d4.loss_bbox: 0.2592 2024/04/07 23:19:37 - mmengine - INFO - Epoch(train) [12][2100/2432] base_lr: 5.0000e-06 lr: 5.0000e-06 eta: 0:25:13 time: 4.6998 data_time: 1.7299 memory: 29773 grad_norm: 21.0593 loss: 6.0156 loss_cls: 0.6985 loss_bbox: 0.2601 d0.loss_cls: 0.8282 d0.loss_bbox: 0.2851 d1.loss_cls: 0.7584 d1.loss_bbox: 0.2707 d2.loss_cls: 0.7278 d2.loss_bbox: 0.2671 d3.loss_cls: 0.7036 d3.loss_bbox: 0.2588 d4.loss_cls: 0.6997 d4.loss_bbox: 0.2577 2024/04/07 23:23:25 - mmengine - INFO - Epoch(train) [12][2150/2432] base_lr: 5.0000e-06 lr: 5.0000e-06 eta: 0:21:25 time: 4.5566 data_time: 0.2540 memory: 28435 grad_norm: 20.8279 loss: 5.9400 loss_cls: 0.6876 loss_bbox: 0.2610 d0.loss_cls: 0.8035 d0.loss_bbox: 0.2976 d1.loss_cls: 0.7419 d1.loss_bbox: 0.2746 d2.loss_cls: 0.7080 d2.loss_bbox: 0.2634 d3.loss_cls: 0.6880 d3.loss_bbox: 0.2624 d4.loss_cls: 0.6889 d4.loss_bbox: 0.2629 2024/04/07 23:27:05 - mmengine - INFO - Epoch(train) [12][2200/2432] base_lr: 5.0000e-06 lr: 5.0000e-06 eta: 0:17:37 time: 4.3920 data_time: 0.4874 memory: 28015 grad_norm: 21.4541 loss: 6.0061 loss_cls: 0.6991 loss_bbox: 0.2640 d0.loss_cls: 0.8371 d0.loss_bbox: 0.2843 d1.loss_cls: 0.7607 d1.loss_bbox: 0.2662 d2.loss_cls: 0.7202 d2.loss_bbox: 0.2628 d3.loss_cls: 0.7016 d3.loss_bbox: 0.2601 d4.loss_cls: 0.6904 d4.loss_bbox: 0.2596 2024/04/07 23:30:57 - mmengine - INFO - Exp name: mv-grounding_8xb12_embodiedscan-vg-9dof-full_20240407_142237 2024/04/07 23:31:02 - mmengine - INFO - Epoch(train) [12][2250/2432] base_lr: 5.0000e-06 lr: 5.0000e-06 eta: 0:13:49 time: 4.7452 data_time: 0.2934 memory: 28481 grad_norm: 21.7981 loss: 6.1811 loss_cls: 0.7299 loss_bbox: 0.2574 d0.loss_cls: 0.8498 d0.loss_bbox: 0.2922 d1.loss_cls: 0.7981 d1.loss_bbox: 0.2695 d2.loss_cls: 0.7471 d2.loss_bbox: 0.2677 d3.loss_cls: 0.7259 d3.loss_bbox: 0.2616 d4.loss_cls: 0.7204 d4.loss_bbox: 0.2615 2024/04/07 23:34:51 - mmengine - INFO - Epoch(train) [12][2300/2432] base_lr: 5.0000e-06 lr: 5.0000e-06 eta: 0:10:01 time: 4.5822 data_time: 0.8825 memory: 28361 grad_norm: 22.7415 loss: 6.0293 loss_cls: 0.6818 loss_bbox: 0.2807 d0.loss_cls: 0.8116 d0.loss_bbox: 0.3034 d1.loss_cls: 0.7505 d1.loss_bbox: 0.2842 d2.loss_cls: 0.7089 d2.loss_bbox: 0.2827 d3.loss_cls: 0.6918 d3.loss_bbox: 0.2758 d4.loss_cls: 0.6774 d4.loss_bbox: 0.2807 2024/04/07 23:38:15 - mmengine - INFO - Epoch(train) [12][2350/2432] base_lr: 5.0000e-06 lr: 5.0000e-06 eta: 0:06:13 time: 4.0748 data_time: 0.2951 memory: 27856 grad_norm: 21.3151 loss: 5.9898 loss_cls: 0.6927 loss_bbox: 0.2673 d0.loss_cls: 0.8133 d0.loss_bbox: 0.2857 d1.loss_cls: 0.7485 d1.loss_bbox: 0.2689 d2.loss_cls: 0.7139 d2.loss_bbox: 0.2706 d3.loss_cls: 0.6957 d3.loss_bbox: 0.2682 d4.loss_cls: 0.6954 d4.loss_bbox: 0.2697 2024/04/07 23:42:21 - mmengine - INFO - Epoch(train) [12][2400/2432] base_lr: 5.0000e-06 lr: 5.0000e-06 eta: 0:02:25 time: 4.9223 data_time: 0.2415 memory: 28466 grad_norm: 21.8721 loss: 6.0513 loss_cls: 0.6761 loss_bbox: 0.2810 d0.loss_cls: 0.8263 d0.loss_bbox: 0.3243 d1.loss_cls: 0.7493 d1.loss_bbox: 0.2878 d2.loss_cls: 0.7112 d2.loss_bbox: 0.2791 d3.loss_cls: 0.6858 d3.loss_bbox: 0.2763 d4.loss_cls: 0.6779 d4.loss_bbox: 0.2761 2024/04/07 23:44:02 - mmengine - INFO - Exp name: mv-grounding_8xb12_embodiedscan-vg-9dof-full_20240407_142237 2024/04/07 23:44:02 - mmengine - INFO - Saving checkpoint at 12 epochs 2024/04/07 23:55:20 - mmengine - INFO - Epoch(val) [12][ 50/593] eta: 1:59:46 time: 13.2341 data_time: 12.2426 memory: 27977 2024/04/08 00:04:04 - mmengine - INFO - Epoch(val) [12][100/593] eta: 1:37:22 time: 10.4689 data_time: 9.4796 memory: 13559 2024/04/08 00:12:28 - mmengine - INFO - Epoch(val) [12][150/593] eta: 1:23:08 time: 10.0810 data_time: 9.1422 memory: 13545 2024/04/08 00:16:08 - mmengine - INFO - Epoch(val) [12][200/593] eta: 1:02:31 time: 4.3961 data_time: 3.5485 memory: 13552