import torch
import torchvision
from collections import OrderedDict

# featmap_names (List[str]): the names of the feature maps that will be used
#     for the pooling.
# output_size (List[Tuple[int, int]] or List[int]): output size for the pooled region
# sampling_ratio (int): sampling ratio for ROIAlign

# canonical_scale (int, optional): canonical_scale for LevelMapper
# canonical_level (int, optional): canonical_level for LevelMapper
# 依次是要处理的特征图名字、输出尺寸、采样系数
roi = torchvision.ops.MultiScaleRoIAlign(['feat1', 'feat3'], 5, 2)
i = OrderedDict()
# 构建仿真的特征
i['feat1'] = torch.rand(1, 5, 64, 64)
# this feature won't be used in the pooling
i['feat2'] = torch.rand(1, 5, 32, 32)  
i['feat3'] = torch.rand(1, 5, 16, 16)
# 创建随机的矩形框
boxes = torch.rand(6, 4) * 256; boxes[:, 2:] += boxes[:, :2]
# original image size, before computing the feature maps
image_sizes = [(512, 512)]
output = roi(i, [boxes], image_sizes)
print(output.shape)
#print(output)

# 6个矩形框、5个通道、3x3是怎么来的？
# torch.Size([6, 5, 3, 3])