COAT/coat-pvtv2-b2/defaults.py

# This file is part of COAT, and is distributed under the
# OSI-approved BSD 3-Clause License. See top-level LICENSE file or
# https://github.com/Kitware/COAT/blob/master/LICENSE for details.

from yacs.config import CfgNode as CN

_C = CN()

# -------------------------------------------------------- #
#                           Input                          #
# -------------------------------------------------------- #
_C.INPUT = CN()
_C.INPUT.DATASET = "CUHK-SYSU"
_C.INPUT.DATA_ROOT = "data/CUHK-SYSU"

# Size of the smallest side of the image
_C.INPUT.MIN_SIZE = 900
# Maximum size of the side of the image
_C.INPUT.MAX_SIZE = 1500

# Number of images per batch
_C.INPUT.BATCH_SIZE_TRAIN = 5
_C.INPUT.BATCH_SIZE_TEST = 1

# Number of data loading threads
_C.INPUT.NUM_WORKERS_TRAIN = 5
_C.INPUT.NUM_WORKERS_TEST = 1

# Image augmentation
_C.INPUT.IMAGE_CUTOUT = False
_C.INPUT.IMAGE_ERASE = False
_C.INPUT.IMAGE_MIXUP = False

# -------------------------------------------------------- #
#                           GRID                           #
# -------------------------------------------------------- #
_C.INPUT.IMAGE_GRID = False
_C.GRID = CN()
_C.GRID.ROTATE = 1
_C.GRID.OFFSET = 0
_C.GRID.RATIO = 0.5
_C.GRID.MODE = 1
_C.GRID.PROB = 0.5

# -------------------------------------------------------- #
#                          Solver                          #
# -------------------------------------------------------- #
_C.SOLVER = CN()
_C.SOLVER.MAX_EPOCHS = 13

# Learning rate settings
_C.SOLVER.BASE_LR = 0.003

# The epoch milestones to decrease the learning rate by GAMMA
_C.SOLVER.LR_DECAY_MILESTONES = [10, 14]
_C.SOLVER.GAMMA = 0.1

_C.SOLVER.WEIGHT_DECAY = 0.0005
_C.SOLVER.SGD_MOMENTUM = 0.9

# Loss weight of RPN regression
_C.SOLVER.LW_RPN_REG = 1
# Loss weight of RPN classification
_C.SOLVER.LW_RPN_CLS = 1

# Loss weight of Cascade R-CNN and Re-ID (OIM)
_C.SOLVER.LW_RCNN_REG_1ST = 10
_C.SOLVER.LW_RCNN_CLS_1ST = 1
_C.SOLVER.LW_RCNN_REG_2ND = 10
_C.SOLVER.LW_RCNN_CLS_2ND = 1
_C.SOLVER.LW_RCNN_REG_3RD = 10
_C.SOLVER.LW_RCNN_CLS_3RD = 1
_C.SOLVER.LW_RCNN_REID_2ND = 0.5
_C.SOLVER.LW_RCNN_REID_3RD = 0.5
# Loss weight of box reid, softmax loss
_C.SOLVER.LW_RCNN_SOFTMAX_2ND = 0.5
_C.SOLVER.LW_RCNN_SOFTMAX_3RD = 0.5

# Set to negative value to disable gradient clipping
_C.SOLVER.CLIP_GRADIENTS = 10.0

# -------------------------------------------------------- #
#                            RPN                           #
# -------------------------------------------------------- #
_C.MODEL = CN()
_C.MODEL.RPN = CN()
# NMS threshold used on RoIs
_C.MODEL.RPN.NMS_THRESH = 0.7
# Number of anchors per image used to train RPN
_C.MODEL.RPN.BATCH_SIZE_TRAIN = 256
# Target fraction of foreground examples per RPN minibatch
_C.MODEL.RPN.POS_FRAC_TRAIN = 0.5
# Overlap threshold for an anchor to be considered foreground (if >= POS_THRESH_TRAIN)
_C.MODEL.RPN.POS_THRESH_TRAIN = 0.7
# Overlap threshold for an anchor to be considered background (if < NEG_THRESH_TRAIN)
_C.MODEL.RPN.NEG_THRESH_TRAIN = 0.3
# Number of top scoring RPN RoIs to keep before applying NMS
_C.MODEL.RPN.PRE_NMS_TOPN_TRAIN = 12000
_C.MODEL.RPN.PRE_NMS_TOPN_TEST = 6000
# Number of top scoring RPN RoIs to keep after applying NMS
_C.MODEL.RPN.POST_NMS_TOPN_TRAIN = 2000
_C.MODEL.RPN.POST_NMS_TOPN_TEST = 300

# -------------------------------------------------------- #
#                         RoI head                         #
# -------------------------------------------------------- #
_C.MODEL.ROI_HEAD = CN()
# Whether to use bn neck (i.e. batch normalization after linear)
_C.MODEL.ROI_HEAD.BN_NECK = True
# Number of RoIs per image used to train RoI head
_C.MODEL.ROI_HEAD.BATCH_SIZE_TRAIN = 128
# Target fraction of foreground examples per RoI minibatch
_C.MODEL.ROI_HEAD.POS_FRAC_TRAIN = 0.25 # 0.5

_C.MODEL.ROI_HEAD.USE_DIFF_THRESH = True
# Overlap threshold for an RoI to be considered foreground (if >= POS_THRESH_TRAIN)
_C.MODEL.ROI_HEAD.POS_THRESH_TRAIN = 0.5
_C.MODEL.ROI_HEAD.POS_THRESH_TRAIN_2ND = 0.6
_C.MODEL.ROI_HEAD.POS_THRESH_TRAIN_3RD = 0.7
# Overlap threshold for an RoI to be considered background (if < NEG_THRESH_TRAIN)
_C.MODEL.ROI_HEAD.NEG_THRESH_TRAIN = 0.5
_C.MODEL.ROI_HEAD.NEG_THRESH_TRAIN_2ND = 0.6
_C.MODEL.ROI_HEAD.NEG_THRESH_TRAIN_3RD = 0.7
# Minimum score threshold
_C.MODEL.ROI_HEAD.SCORE_THRESH_TEST = 0.5
# NMS threshold used on boxes
_C.MODEL.ROI_HEAD.NMS_THRESH_TEST = 0.4
_C.MODEL.ROI_HEAD.NMS_THRESH_TEST_1ST = 0.4
_C.MODEL.ROI_HEAD.NMS_THRESH_TEST_2ND = 0.4
_C.MODEL.ROI_HEAD.NMS_THRESH_TEST_3RD = 0.5
# Maximum number of detected objects
_C.MODEL.ROI_HEAD.DETECTIONS_PER_IMAGE_TEST = 300

# -------------------------------------------------------- #
#                     Transformer head                     #
# -------------------------------------------------------- #
_C.MODEL.TRANSFORMER = CN()
_C.MODEL.TRANSFORMER.DIM_MODEL = 512
_C.MODEL.TRANSFORMER.ENCODER_LAYERS = 1
_C.MODEL.TRANSFORMER.N_HEAD = 8
_C.MODEL.TRANSFORMER.USE_OUTPUT_LAYER = False
_C.MODEL.TRANSFORMER.DROPOUT = 0.
_C.MODEL.TRANSFORMER.USE_LOCAL_SHORTCUT = True
_C.MODEL.TRANSFORMER.USE_GLOBAL_SHORTCUT = True

_C.MODEL.TRANSFORMER.USE_DIFF_SCALE = True
_C.MODEL.TRANSFORMER.NAMES_1ST = ['scale1','scale2']
_C.MODEL.TRANSFORMER.NAMES_2ND = ['scale1','scale2']
_C.MODEL.TRANSFORMER.NAMES_3RD = ['scale1','scale2']
_C.MODEL.TRANSFORMER.KERNEL_SIZE_1ST = [(1,1),(3,3)]
_C.MODEL.TRANSFORMER.KERNEL_SIZE_2ND = [(1,1),(3,3)]
_C.MODEL.TRANSFORMER.KERNEL_SIZE_3RD = [(1,1),(3,3)]
_C.MODEL.TRANSFORMER.USE_MASK_1ST = False
_C.MODEL.TRANSFORMER.USE_MASK_2ND = True
_C.MODEL.TRANSFORMER.USE_MASK_3RD = True
_C.MODEL.TRANSFORMER.USE_PATCH2VEC = True

####
_C.MODEL.USE_FEATURE_MASK = True
_C.MODEL.FEATURE_AUG_TYPE = 'exchange_token' # 'exchange_token', 'jigsaw_token', 'cutout_patch', 'erase_patch', 'mixup_patch', 'jigsaw_patch'
_C.MODEL.FEATURE_MASK_SIZE = 4
_C.MODEL.MASK_SHAPE = 'stripe' # 'square', 'random'
_C.MODEL.MASK_SIZE = 1
_C.MODEL.MASK_MODE = 'random_direction' # 'horizontal', 'vertical' for stripe; 'random_size' for square
_C.MODEL.MASK_PERCENT = 0.1
####
_C.MODEL.EMBEDDING_DIM = 256

# -------------------------------------------------------- #
#                           Loss                           #
# -------------------------------------------------------- #
_C.MODEL.LOSS = CN()
# Size of the lookup table in OIM
_C.MODEL.LOSS.LUT_SIZE = 5532
# Size of the circular queue in OIM
_C.MODEL.LOSS.CQ_SIZE = 5000
_C.MODEL.LOSS.OIM_MOMENTUM = 0.5
_C.MODEL.LOSS.OIM_SCALAR = 30.0

_C.MODEL.LOSS.USE_SOFTMAX = True

# -------------------------------------------------------- #
#                        Evaluation                        #
# -------------------------------------------------------- #
# The period to evaluate the model during training
_C.EVAL_PERIOD = 1
# Evaluation with GT boxes to verify the upper bound of person search performance
_C.EVAL_USE_GT = False
# Fast evaluation with cached features
_C.EVAL_USE_CACHE = False
# Evaluation with Context Bipartite Graph Matching (CBGM) algorithm
_C.EVAL_USE_CBGM = False
# Gallery size in evaluation, only for CUHK-SYSU
_C.EVAL_GALLERY_SIZE = 100
# Feature used for evaluation
_C.EVAL_FEATURE = 'concat' # 'stage2', 'stage3'

# -------------------------------------------------------- #
#                           Miscs                          #
# -------------------------------------------------------- #
# Save a checkpoint after every this number of epochs
_C.CKPT_PERIOD = 1
# The period (in terms of iterations) to display training losses
_C.DISP_PERIOD = 10
# Whether to use tensorboard for visualization
_C.TF_BOARD = True
# The device loading the model
_C.DEVICE = "cuda:0"
# Set seed to negative to fully randomize everything
_C.SEED = 1
# Directory where output files are written
_C.OUTPUT_DIR = "./output"


def get_default_cfg():
    """
    Get a copy of the default config.
    """
    return _C.clone()