220 lines
7.7 KiB
Python
220 lines
7.7 KiB
Python
|
# This file is part of COAT, and is distributed under the
|
||
|
# OSI-approved BSD 3-Clause License. See top-level LICENSE file or
|
||
|
# https://github.com/Kitware/COAT/blob/master/LICENSE for details.
|
||
|
|
||
|
from yacs.config import CfgNode as CN
|
||
|
|
||
|
_C = CN()
|
||
|
|
||
|
# -------------------------------------------------------- #
|
||
|
# Input #
|
||
|
# -------------------------------------------------------- #
|
||
|
_C.INPUT = CN()
|
||
|
_C.INPUT.DATASET = "CUHK-SYSU"
|
||
|
_C.INPUT.DATA_ROOT = "data/CUHK-SYSU"
|
||
|
|
||
|
# Size of the smallest side of the image
|
||
|
_C.INPUT.MIN_SIZE = 900
|
||
|
# Maximum size of the side of the image
|
||
|
_C.INPUT.MAX_SIZE = 1500
|
||
|
|
||
|
# Number of images per batch
|
||
|
_C.INPUT.BATCH_SIZE_TRAIN = 5
|
||
|
_C.INPUT.BATCH_SIZE_TEST = 1
|
||
|
|
||
|
# Number of data loading threads
|
||
|
_C.INPUT.NUM_WORKERS_TRAIN = 5
|
||
|
_C.INPUT.NUM_WORKERS_TEST = 1
|
||
|
|
||
|
# Image augmentation
|
||
|
_C.INPUT.IMAGE_CUTOUT = False
|
||
|
_C.INPUT.IMAGE_ERASE = False
|
||
|
_C.INPUT.IMAGE_MIXUP = False
|
||
|
|
||
|
# -------------------------------------------------------- #
|
||
|
# GRID #
|
||
|
# -------------------------------------------------------- #
|
||
|
_C.INPUT.IMAGE_GRID = False
|
||
|
_C.GRID = CN()
|
||
|
_C.GRID.ROTATE = 1
|
||
|
_C.GRID.OFFSET = 0
|
||
|
_C.GRID.RATIO = 0.5
|
||
|
_C.GRID.MODE = 1
|
||
|
_C.GRID.PROB = 0.5
|
||
|
|
||
|
# -------------------------------------------------------- #
|
||
|
# Solver #
|
||
|
# -------------------------------------------------------- #
|
||
|
_C.SOLVER = CN()
|
||
|
_C.SOLVER.MAX_EPOCHS = 13
|
||
|
|
||
|
# Learning rate settings
|
||
|
_C.SOLVER.BASE_LR = 0.003
|
||
|
|
||
|
# The epoch milestones to decrease the learning rate by GAMMA
|
||
|
_C.SOLVER.LR_DECAY_MILESTONES = [10, 14]
|
||
|
_C.SOLVER.GAMMA = 0.1
|
||
|
|
||
|
_C.SOLVER.WEIGHT_DECAY = 0.0005
|
||
|
_C.SOLVER.SGD_MOMENTUM = 0.9
|
||
|
|
||
|
# Loss weight of RPN regression
|
||
|
_C.SOLVER.LW_RPN_REG = 1
|
||
|
# Loss weight of RPN classification
|
||
|
_C.SOLVER.LW_RPN_CLS = 1
|
||
|
|
||
|
# Loss weight of Cascade R-CNN and Re-ID (OIM)
|
||
|
_C.SOLVER.LW_RCNN_REG_1ST = 10
|
||
|
_C.SOLVER.LW_RCNN_CLS_1ST = 1
|
||
|
_C.SOLVER.LW_RCNN_REG_2ND = 10
|
||
|
_C.SOLVER.LW_RCNN_CLS_2ND = 1
|
||
|
_C.SOLVER.LW_RCNN_REG_3RD = 10
|
||
|
_C.SOLVER.LW_RCNN_CLS_3RD = 1
|
||
|
_C.SOLVER.LW_RCNN_REID_2ND = 0.5
|
||
|
_C.SOLVER.LW_RCNN_REID_3RD = 0.5
|
||
|
# Loss weight of box reid, softmax loss
|
||
|
_C.SOLVER.LW_RCNN_SOFTMAX_2ND = 0.5
|
||
|
_C.SOLVER.LW_RCNN_SOFTMAX_3RD = 0.5
|
||
|
|
||
|
# Set to negative value to disable gradient clipping
|
||
|
_C.SOLVER.CLIP_GRADIENTS = 10.0
|
||
|
|
||
|
# -------------------------------------------------------- #
|
||
|
# RPN #
|
||
|
# -------------------------------------------------------- #
|
||
|
_C.MODEL = CN()
|
||
|
_C.MODEL.RPN = CN()
|
||
|
# NMS threshold used on RoIs
|
||
|
_C.MODEL.RPN.NMS_THRESH = 0.7
|
||
|
# Number of anchors per image used to train RPN
|
||
|
_C.MODEL.RPN.BATCH_SIZE_TRAIN = 256
|
||
|
# Target fraction of foreground examples per RPN minibatch
|
||
|
_C.MODEL.RPN.POS_FRAC_TRAIN = 0.5
|
||
|
# Overlap threshold for an anchor to be considered foreground (if >= POS_THRESH_TRAIN)
|
||
|
_C.MODEL.RPN.POS_THRESH_TRAIN = 0.7
|
||
|
# Overlap threshold for an anchor to be considered background (if < NEG_THRESH_TRAIN)
|
||
|
_C.MODEL.RPN.NEG_THRESH_TRAIN = 0.3
|
||
|
# Number of top scoring RPN RoIs to keep before applying NMS
|
||
|
_C.MODEL.RPN.PRE_NMS_TOPN_TRAIN = 12000
|
||
|
_C.MODEL.RPN.PRE_NMS_TOPN_TEST = 6000
|
||
|
# Number of top scoring RPN RoIs to keep after applying NMS
|
||
|
_C.MODEL.RPN.POST_NMS_TOPN_TRAIN = 2000
|
||
|
_C.MODEL.RPN.POST_NMS_TOPN_TEST = 300
|
||
|
|
||
|
# -------------------------------------------------------- #
|
||
|
# RoI head #
|
||
|
# -------------------------------------------------------- #
|
||
|
_C.MODEL.ROI_HEAD = CN()
|
||
|
# Whether to use bn neck (i.e. batch normalization after linear)
|
||
|
_C.MODEL.ROI_HEAD.BN_NECK = True
|
||
|
# Number of RoIs per image used to train RoI head
|
||
|
_C.MODEL.ROI_HEAD.BATCH_SIZE_TRAIN = 128
|
||
|
# Target fraction of foreground examples per RoI minibatch
|
||
|
_C.MODEL.ROI_HEAD.POS_FRAC_TRAIN = 0.25 # 0.5
|
||
|
|
||
|
_C.MODEL.ROI_HEAD.USE_DIFF_THRESH = True
|
||
|
# Overlap threshold for an RoI to be considered foreground (if >= POS_THRESH_TRAIN)
|
||
|
_C.MODEL.ROI_HEAD.POS_THRESH_TRAIN = 0.5
|
||
|
_C.MODEL.ROI_HEAD.POS_THRESH_TRAIN_2ND = 0.6
|
||
|
_C.MODEL.ROI_HEAD.POS_THRESH_TRAIN_3RD = 0.7
|
||
|
# Overlap threshold for an RoI to be considered background (if < NEG_THRESH_TRAIN)
|
||
|
_C.MODEL.ROI_HEAD.NEG_THRESH_TRAIN = 0.5
|
||
|
_C.MODEL.ROI_HEAD.NEG_THRESH_TRAIN_2ND = 0.6
|
||
|
_C.MODEL.ROI_HEAD.NEG_THRESH_TRAIN_3RD = 0.7
|
||
|
# Minimum score threshold
|
||
|
_C.MODEL.ROI_HEAD.SCORE_THRESH_TEST = 0.5
|
||
|
# NMS threshold used on boxes
|
||
|
_C.MODEL.ROI_HEAD.NMS_THRESH_TEST = 0.4
|
||
|
_C.MODEL.ROI_HEAD.NMS_THRESH_TEST_1ST = 0.4
|
||
|
_C.MODEL.ROI_HEAD.NMS_THRESH_TEST_2ND = 0.4
|
||
|
_C.MODEL.ROI_HEAD.NMS_THRESH_TEST_3RD = 0.5
|
||
|
# Maximum number of detected objects
|
||
|
_C.MODEL.ROI_HEAD.DETECTIONS_PER_IMAGE_TEST = 300
|
||
|
|
||
|
# -------------------------------------------------------- #
|
||
|
# Transformer head #
|
||
|
# -------------------------------------------------------- #
|
||
|
_C.MODEL.TRANSFORMER = CN()
|
||
|
_C.MODEL.TRANSFORMER.DIM_MODEL = 512
|
||
|
_C.MODEL.TRANSFORMER.ENCODER_LAYERS = 1
|
||
|
_C.MODEL.TRANSFORMER.N_HEAD = 8
|
||
|
_C.MODEL.TRANSFORMER.USE_OUTPUT_LAYER = False
|
||
|
_C.MODEL.TRANSFORMER.DROPOUT = 0.
|
||
|
_C.MODEL.TRANSFORMER.USE_LOCAL_SHORTCUT = True
|
||
|
_C.MODEL.TRANSFORMER.USE_GLOBAL_SHORTCUT = True
|
||
|
|
||
|
_C.MODEL.TRANSFORMER.USE_DIFF_SCALE = True
|
||
|
_C.MODEL.TRANSFORMER.NAMES_1ST = ['scale1','scale2']
|
||
|
_C.MODEL.TRANSFORMER.NAMES_2ND = ['scale1','scale2']
|
||
|
_C.MODEL.TRANSFORMER.NAMES_3RD = ['scale1','scale2']
|
||
|
_C.MODEL.TRANSFORMER.KERNEL_SIZE_1ST = [(1,1),(3,3)]
|
||
|
_C.MODEL.TRANSFORMER.KERNEL_SIZE_2ND = [(1,1),(3,3)]
|
||
|
_C.MODEL.TRANSFORMER.KERNEL_SIZE_3RD = [(1,1),(3,3)]
|
||
|
_C.MODEL.TRANSFORMER.USE_MASK_1ST = False
|
||
|
_C.MODEL.TRANSFORMER.USE_MASK_2ND = True
|
||
|
_C.MODEL.TRANSFORMER.USE_MASK_3RD = True
|
||
|
_C.MODEL.TRANSFORMER.USE_PATCH2VEC = True
|
||
|
|
||
|
####
|
||
|
_C.MODEL.USE_FEATURE_MASK = True
|
||
|
_C.MODEL.FEATURE_AUG_TYPE = 'exchange_token' # 'exchange_token', 'jigsaw_token', 'cutout_patch', 'erase_patch', 'mixup_patch', 'jigsaw_patch'
|
||
|
_C.MODEL.FEATURE_MASK_SIZE = 4
|
||
|
_C.MODEL.MASK_SHAPE = 'stripe' # 'square', 'random'
|
||
|
_C.MODEL.MASK_SIZE = 1
|
||
|
_C.MODEL.MASK_MODE = 'random_direction' # 'horizontal', 'vertical' for stripe; 'random_size' for square
|
||
|
_C.MODEL.MASK_PERCENT = 0.1
|
||
|
####
|
||
|
_C.MODEL.EMBEDDING_DIM = 256
|
||
|
|
||
|
# -------------------------------------------------------- #
|
||
|
# Loss #
|
||
|
# -------------------------------------------------------- #
|
||
|
_C.MODEL.LOSS = CN()
|
||
|
# Size of the lookup table in OIM
|
||
|
_C.MODEL.LOSS.LUT_SIZE = 5532
|
||
|
# Size of the circular queue in OIM
|
||
|
_C.MODEL.LOSS.CQ_SIZE = 5000
|
||
|
_C.MODEL.LOSS.OIM_MOMENTUM = 0.5
|
||
|
_C.MODEL.LOSS.OIM_SCALAR = 30.0
|
||
|
|
||
|
_C.MODEL.LOSS.USE_SOFTMAX = True
|
||
|
|
||
|
# -------------------------------------------------------- #
|
||
|
# Evaluation #
|
||
|
# -------------------------------------------------------- #
|
||
|
# The period to evaluate the model during training
|
||
|
_C.EVAL_PERIOD = 1
|
||
|
# Evaluation with GT boxes to verify the upper bound of person search performance
|
||
|
_C.EVAL_USE_GT = False
|
||
|
# Fast evaluation with cached features
|
||
|
_C.EVAL_USE_CACHE = False
|
||
|
# Evaluation with Context Bipartite Graph Matching (CBGM) algorithm
|
||
|
_C.EVAL_USE_CBGM = False
|
||
|
# Gallery size in evaluation, only for CUHK-SYSU
|
||
|
_C.EVAL_GALLERY_SIZE = 100
|
||
|
# Feature used for evaluation
|
||
|
_C.EVAL_FEATURE = 'concat' # 'stage2', 'stage3'
|
||
|
|
||
|
# -------------------------------------------------------- #
|
||
|
# Miscs #
|
||
|
# -------------------------------------------------------- #
|
||
|
# Save a checkpoint after every this number of epochs
|
||
|
_C.CKPT_PERIOD = 1
|
||
|
# The period (in terms of iterations) to display training losses
|
||
|
_C.DISP_PERIOD = 10
|
||
|
# Whether to use tensorboard for visualization
|
||
|
_C.TF_BOARD = True
|
||
|
# The device loading the model
|
||
|
_C.DEVICE = "cuda:0"
|
||
|
# Set seed to negative to fully randomize everything
|
||
|
_C.SEED = 1
|
||
|
# Directory where output files are written
|
||
|
_C.OUTPUT_DIR = "./output"
|
||
|
|
||
|
|
||
|
def get_default_cfg():
|
||
|
"""
|
||
|
Get a copy of the default config.
|
||
|
"""
|
||
|
return _C.clone()
|