# This file is part of COAT, and is distributed under the # OSI-approved BSD 3-Clause License. See top-level LICENSE file or # https://github.com/Kitware/COAT/blob/master/LICENSE for details. import os.path as osp import numpy as np from scipy.io import loadmat from sklearn.metrics import average_precision_score from utils.km import run_kuhn_munkres from utils.utils import write_json def _compute_iou(a, b): x1 = max(a[0], b[0]) y1 = max(a[1], b[1]) x2 = min(a[2], b[2]) y2 = min(a[3], b[3]) inter = max(0, x2 - x1) * max(0, y2 - y1) union = (a[2] - a[0]) * (a[3] - a[1]) + (b[2] - b[0]) * (b[3] - b[1]) - inter return inter * 1.0 / union def eval_detection( gallery_dataset, gallery_dets, det_thresh=0.5, iou_thresh=0.5, labeled_only=False ): """ gallery_det (list of ndarray): n_det x [x1, y1, x2, y2, score] per image det_thresh (float): filter out gallery detections whose scores below this iou_thresh (float): treat as true positive if IoU is above this threshold labeled_only (bool): filter out unlabeled background people """ assert len(gallery_dataset) == len(gallery_dets) annos = gallery_dataset.annotations y_true, y_score = [], [] count_gt, count_tp = 0, 0 for anno, det in zip(annos, gallery_dets): gt_boxes = anno["boxes"] if labeled_only: # exclude the unlabeled people (pid == 5555) inds = np.where(anno["pids"].ravel() != 5555)[0] if len(inds) == 0: continue gt_boxes = gt_boxes[inds] num_gt = gt_boxes.shape[0] if det != []: det = np.asarray(det) inds = np.where(det[:, 4].ravel() >= det_thresh)[0] det = det[inds] num_det = det.shape[0] else: num_det = 0 if num_det == 0: count_gt += num_gt continue ious = np.zeros((num_gt, num_det), dtype=np.float32) for i in range(num_gt): for j in range(num_det): ious[i, j] = _compute_iou(gt_boxes[i], det[j, :4]) tfmat = ious >= iou_thresh # for each det, keep only the largest iou of all the gt for j in range(num_det): largest_ind = np.argmax(ious[:, j]) for i in range(num_gt): if i != largest_ind: tfmat[i, j] = False # for each gt, keep only the largest iou of all the det for i in range(num_gt): largest_ind = np.argmax(ious[i, :]) for j in range(num_det): if j != largest_ind: tfmat[i, j] = False for j in range(num_det): y_score.append(det[j, -1]) y_true.append(tfmat[:, j].any()) count_tp += tfmat.sum() count_gt += num_gt det_rate = count_tp * 1.0 / count_gt ap = average_precision_score(y_true, y_score) * det_rate print("{} detection:".format("labeled only" if labeled_only else "all")) print(" recall = {:.2%}".format(det_rate)) if not labeled_only: print(" ap = {:.2%}".format(ap)) return det_rate, ap def eval_search_cuhk( gallery_dataset, query_dataset, gallery_dets, gallery_feats, query_box_feats, query_dets, query_feats, k1=10, k2=3, det_thresh=0.5, cbgm=False, gallery_size=100, ): """ gallery_dataset/query_dataset: an instance of BaseDataset gallery_det (list of ndarray): n_det x [x1, x2, y1, y2, score] per image gallery_feat (list of ndarray): n_det x D features per image query_feat (list of ndarray): D dimensional features per query image det_thresh (float): filter out gallery detections whose scores below this gallery_size (int): gallery size [-1, 50, 100, 500, 1000, 2000, 4000] -1 for using full set """ assert len(gallery_dataset) == len(gallery_dets) assert len(gallery_dataset) == len(gallery_feats) assert len(query_dataset) == len(query_box_feats) use_full_set = gallery_size == -1 fname = "TestG{}".format(gallery_size if not use_full_set else 50) protoc = loadmat(osp.join(gallery_dataset.root, "annotation/test/train_test", fname + ".mat")) protoc = protoc[fname].squeeze() # mapping from gallery image to (det, feat) annos = gallery_dataset.annotations name_to_det_feat = {} for anno, det, feat in zip(annos, gallery_dets, gallery_feats): name = anno["img_name"] if det != []: scores = det[:, 4].ravel() inds = np.where(scores >= det_thresh)[0] if len(inds) > 0: name_to_det_feat[name] = (det[inds], feat[inds]) aps = [] accs = [] topk = [1, 5, 10] ret = {"image_root": gallery_dataset.img_prefix, "results": []} for i in range(len(query_dataset)): y_true, y_score = [], [] imgs, rois = [], [] count_gt, count_tp = 0, 0 # get L2-normalized feature vector feat_q = query_box_feats[i].ravel() # ignore the query image query_imname = str(protoc["Query"][i]["imname"][0, 0][0]) query_roi = protoc["Query"][i]["idlocate"][0, 0][0].astype(np.int32) query_roi[2:] += query_roi[:2] query_gt = [] tested = set([query_imname]) name2sim = {} name2gt = {} sims = [] imgs_cbgm = [] # 1. Go through the gallery samples defined by the protocol for item in protoc["Gallery"][i].squeeze(): gallery_imname = str(item[0][0]) # some contain the query (gt not empty), some not gt = item[1][0].astype(np.int32) count_gt += gt.size > 0 # compute distance between query and gallery dets if gallery_imname not in name_to_det_feat: continue det, feat_g = name_to_det_feat[gallery_imname] # no detection in this gallery, skip it if det.shape[0] == 0: continue # get L2-normalized feature matrix NxD assert feat_g.size == np.prod(feat_g.shape[:2]) feat_g = feat_g.reshape(feat_g.shape[:2]) # compute cosine similarities sim = feat_g.dot(feat_q).ravel() if gallery_imname in name2sim: continue name2sim[gallery_imname] = sim name2gt[gallery_imname] = gt sims.extend(list(sim)) imgs_cbgm.extend([gallery_imname] * len(sim)) # 2. Go through the remaining gallery images if using full set if use_full_set: for gallery_imname in gallery_dataset.imgs: if gallery_imname in tested: continue if gallery_imname not in name_to_det_feat: continue det, feat_g = name_to_det_feat[gallery_imname] # get L2-normalized feature matrix NxD assert feat_g.size == np.prod(feat_g.shape[:2]) feat_g = feat_g.reshape(feat_g.shape[:2]) # compute cosine similarities sim = feat_g.dot(feat_q).ravel() # guaranteed no target query in these gallery images label = np.zeros(len(sim), dtype=np.int32) y_true.extend(list(label)) y_score.extend(list(sim)) imgs.extend([gallery_imname] * len(sim)) rois.extend(list(det)) if cbgm: # -------- Context Bipartite Graph Matching (CBGM) ------- # sims = np.array(sims) imgs_cbgm = np.array(imgs_cbgm) # only process the top-k1 gallery images for efficiency inds = np.argsort(sims)[-k1:] imgs_cbgm = set(imgs_cbgm[inds]) for img in imgs_cbgm: sim = name2sim[img] det, feat_g = name_to_det_feat[img] # only regard the people with top-k2 detection confidence # in the query image as context information qboxes = query_dets[i][:k2] qfeats = query_feats[i][:k2] assert ( query_roi - qboxes[0][:4] ).sum() <= 0.001, "query_roi must be the first one in pboxes" # build the bipartite graph and run Kuhn-Munkres (K-M) algorithm # to find the best match graph = [] for indx_i, pfeat in enumerate(qfeats): for indx_j, gfeat in enumerate(feat_g): graph.append((indx_i, indx_j, (pfeat * gfeat).sum())) km_res, max_val = run_kuhn_munkres(graph) # revise the similarity between query person and its matching for indx_i, indx_j, _ in km_res: # 0 denotes the query roi if indx_i == 0: sim[indx_j] = max_val break for gallery_imname, sim in name2sim.items(): gt = name2gt[gallery_imname] det, feat_g = name_to_det_feat[gallery_imname] # assign label for each det label = np.zeros(len(sim), dtype=np.int32) if gt.size > 0: w, h = gt[2], gt[3] gt[2:] += gt[:2] query_gt.append({"img": str(gallery_imname), "roi": list(map(float, list(gt)))}) iou_thresh = min(0.5, (w * h * 1.0) / ((w + 10) * (h + 10))) inds = np.argsort(sim)[::-1] sim = sim[inds] det = det[inds] # only set the first matched det as true positive for j, roi in enumerate(det[:, :4]): if _compute_iou(roi, gt) >= iou_thresh: label[j] = 1 count_tp += 1 break y_true.extend(list(label)) y_score.extend(list(sim)) imgs.extend([gallery_imname] * len(sim)) rois.extend(list(det)) tested.add(gallery_imname) # 3. Compute AP for this query (need to scale by recall rate) y_score = np.asarray(y_score) y_true = np.asarray(y_true) assert count_tp <= count_gt recall_rate = count_tp * 1.0 / count_gt ap = 0 if count_tp == 0 else average_precision_score(y_true, y_score) * recall_rate aps.append(ap) inds = np.argsort(y_score)[::-1] y_score = y_score[inds] y_true = y_true[inds] accs.append([min(1, sum(y_true[:k])) for k in topk]) # 4. Save result for JSON dump new_entry = { "query_img": str(query_imname), "query_roi": list(map(float, list(query_roi))), "query_gt": query_gt, "gallery": [], } # only record wrong results if int(y_true[0]): continue # only save top-10 predictions for k in range(10): new_entry["gallery"].append( { "img": str(imgs[inds[k]]), "roi": list(map(float, list(rois[inds[k]]))), "score": float(y_score[k]), "correct": int(y_true[k]), } ) ret["results"].append(new_entry) print("search ranking:") print(" mAP = {:.2%}".format(np.mean(aps))) accs = np.mean(accs, axis=0) for i, k in enumerate(topk): print(" top-{:2d} = {:.2%}".format(k, accs[i])) write_json(ret, "vis/results.json") ret["mAP"] = np.mean(aps) ret["accs"] = accs return ret def eval_search_prw( gallery_dataset, query_dataset, gallery_dets, gallery_feats, query_box_feats, query_dets, query_feats, k1=30, k2=4, det_thresh=0.5, cbgm=False, gallery_size=None, # not used in PRW ignore_cam_id=True, ): """ gallery_det (list of ndarray): n_det x [x1, x2, y1, y2, score] per image gallery_feat (list of ndarray): n_det x D features per image query_feat (list of ndarray): D dimensional features per query image det_thresh (float): filter out gallery detections whose scores below this gallery_size (int): -1 for using full set ignore_cam_id (bool): Set to True acoording to CUHK-SYSU, although it's a common practice to focus on cross-cam match only. """ assert len(gallery_dataset) == len(gallery_dets) assert len(gallery_dataset) == len(gallery_feats) assert len(query_dataset) == len(query_box_feats) annos = gallery_dataset.annotations name_to_det_feat = {} for anno, det, feat in zip(annos, gallery_dets, gallery_feats): name = anno["img_name"] scores = det[:, 4].ravel() inds = np.where(scores >= det_thresh)[0] if len(inds) > 0: name_to_det_feat[name] = (det[inds], feat[inds]) aps = [] accs = [] topk = [1, 5, 10] ret = {"image_root": gallery_dataset.img_prefix, "results": []} for i in range(len(query_dataset)): y_true, y_score = [], [] imgs, rois = [], [] count_gt, count_tp = 0, 0 feat_p = query_box_feats[i].ravel() query_imname = query_dataset.annotations[i]["img_name"] query_roi = query_dataset.annotations[i]["boxes"] query_pid = query_dataset.annotations[i]["pids"] query_cam = query_dataset.annotations[i]["cam_id"] # Find all occurence of this query gallery_imgs = [] for x in annos: if query_pid in x["pids"] and x["img_name"] != query_imname: gallery_imgs.append(x) query_gts = {} for item in gallery_imgs: query_gts[item["img_name"]] = item["boxes"][item["pids"] == query_pid] # Construct gallery set for this query if ignore_cam_id: gallery_imgs = [] for x in annos: if x["img_name"] != query_imname: gallery_imgs.append(x) else: gallery_imgs = [] for x in annos: if x["img_name"] != query_imname and x["cam_id"] != query_cam: gallery_imgs.append(x) name2sim = {} sims = [] imgs_cbgm = [] # 1. Go through all gallery samples for item in gallery_imgs: gallery_imname = item["img_name"] # some contain the query (gt not empty), some not count_gt += gallery_imname in query_gts # compute distance between query and gallery dets if gallery_imname not in name_to_det_feat: continue det, feat_g = name_to_det_feat[gallery_imname] # get L2-normalized feature matrix NxD assert feat_g.size == np.prod(feat_g.shape[:2]) feat_g = feat_g.reshape(feat_g.shape[:2]) # compute cosine similarities sim = feat_g.dot(feat_p).ravel() if gallery_imname in name2sim: continue name2sim[gallery_imname] = sim sims.extend(list(sim)) imgs_cbgm.extend([gallery_imname] * len(sim)) if cbgm: sims = np.array(sims) imgs_cbgm = np.array(imgs_cbgm) inds = np.argsort(sims)[-k1:] imgs_cbgm = set(imgs_cbgm[inds]) for img in imgs_cbgm: sim = name2sim[img] det, feat_g = name_to_det_feat[img] qboxes = query_dets[i][:k2] qfeats = query_feats[i][:k2] # assert ( # query_roi - qboxes[0][:4] # ).sum() <= 0.001, "query_roi must be the first one in pboxes" graph = [] for indx_i, pfeat in enumerate(qfeats): for indx_j, gfeat in enumerate(feat_g): graph.append((indx_i, indx_j, (pfeat * gfeat).sum())) km_res, max_val = run_kuhn_munkres(graph) for indx_i, indx_j, _ in km_res: if indx_i == 0: sim[indx_j] = max_val break for gallery_imname, sim in name2sim.items(): det, feat_g = name_to_det_feat[gallery_imname] # assign label for each det label = np.zeros(len(sim), dtype=np.int32) if gallery_imname in query_gts: gt = query_gts[gallery_imname].ravel() w, h = gt[2] - gt[0], gt[3] - gt[1] iou_thresh = min(0.5, (w * h * 1.0) / ((w + 10) * (h + 10))) inds = np.argsort(sim)[::-1] sim = sim[inds] det = det[inds] # only set the first matched det as true positive for j, roi in enumerate(det[:, :4]): if _compute_iou(roi, gt) >= iou_thresh: label[j] = 1 count_tp += 1 break y_true.extend(list(label)) y_score.extend(list(sim)) imgs.extend([gallery_imname] * len(sim)) rois.extend(list(det)) # 2. Compute AP for this query (need to scale by recall rate) y_score = np.asarray(y_score) y_true = np.asarray(y_true) assert count_tp <= count_gt recall_rate = count_tp * 1.0 / count_gt ap = 0 if count_tp == 0 else average_precision_score(y_true, y_score) * recall_rate aps.append(ap) inds = np.argsort(y_score)[::-1] y_score = y_score[inds] y_true = y_true[inds] accs.append([min(1, sum(y_true[:k])) for k in topk]) # 4. Save result for JSON dump new_entry = { "query_img": str(query_imname), "query_roi": list(map(float, list(query_roi.squeeze()))), "query_gt": query_gts, "gallery": [], } # only save top-10 predictions for k in range(10): new_entry["gallery"].append( { "img": str(imgs[inds[k]]), "roi": list(map(float, list(rois[inds[k]]))), "score": float(y_score[k]), "correct": int(y_true[k]), } ) ret["results"].append(new_entry) print("search ranking:") mAP = np.mean(aps) print(" mAP = {:.2%}".format(mAP)) accs = np.mean(accs, axis=0) for i, k in enumerate(topk): print(" top-{:2d} = {:.2%}".format(k, accs[i])) # write_json(ret, "vis/results.json") ret["mAP"] = np.mean(aps) ret["accs"] = accs return ret