diff --git a/research/cv/FCOS/README_CN.md b/research/cv/FCOS/README_CN.md new file mode 100644 index 0000000000000000000000000000000000000000..dd787c487d7d29117f6a407c04f09e9ed7212711 --- /dev/null +++ b/research/cv/FCOS/README_CN.md @@ -0,0 +1,240 @@ +# 鐩綍 + +<!-- TOC --> + +- [鐩綍](#鐩綍) +- [FCOS鎻忚堪](#fcos鎻忚堪) +- [妯″瀷鏋舵瀯](#妯″瀷鏋舵瀯) +- [鏁版嵁闆哴(#鏁版嵁闆�) +- [鐜瑕佹眰](#鐜瑕佹眰) +- [蹇€熷叆闂╙(#蹇€熷叆闂�) +- [鑴氭湰璇存槑](#鑴氭湰璇存槑) + - [鑴氭湰鍙婃牱渚嬩唬鐮乚(#鑴氭湰鍙婃牱渚嬩唬鐮�) + - [鑴氭湰鍙傛暟](#鑴氭湰鍙傛暟) + - [璁粌杩囩▼](#璁粌杩囩▼) + - [璁粌](#璁粌) + - [鍒嗗竷寮忚缁僝(#鍒嗗竷寮忚缁�) + - [璇勪及杩囩▼](#璇勪及杩囩▼) + - [璇勪及](#璇勪及) + - [瀵煎嚭mindir妯″瀷](#瀵煎嚭mindir妯″瀷) + - [鎺ㄧ悊杩囩▼](#鎺ㄧ悊杩囩▼) + - [鐢ㄦ硶](#鐢ㄦ硶) + - [鐩稿叧璇存槑](#鐩稿叧璇存槑) + - [缁撴灉](#缁撴灉) +- [妯″瀷鎻忚堪](#妯″瀷鎻忚堪) + - [鎬ц兘](#鎬ц兘) + - [璇勪及鎬ц兘](#璇勪及鎬ц兘) + - [鎺ㄧ悊鎬ц兘](#鎺ㄧ悊鎬ц兘) +- [闅忔満鎯呭喌璇存槑](#闅忔満鎯呭喌璇存槑) +- [ModelZoo涓婚〉](#modelzoo涓婚〉) + +<!-- TOC --> + +# FCOS鎻忚堪 + +**FCOS**鏄� anchor-free 妯″瀷锛岄€氳繃鍦ㄥ嵎绉樁娈靛姞鍏PN銆佸湪璁$畻鎹熷け闃舵鍔犲叆centerness杩欎竴鍒嗘敮锛屽疄鐜颁簡鏇撮珮鐨勬娴嬬簿搴︼紱閫氳繃璋冩暣妯″瀷鐨刡ackbone锛屽彲浠ヨ揪鍒�44.7%鐨凙P銆俓 +[璁烘枃](https://arxiv.org/pdf/1904.01355.pdf): ```FCOS: Fully Convolutional One-Stage Object Detection.``` + +[瀹樻柟浠g爜](https://github.com/tianzhi0549/FCOS): <https://github.com/tianzhi0549/FCOS> + +# 妯″瀷鏋舵瀯 + +FCOS鐨勭綉缁滄灦鏋勬ā鍨婤ackbone閲囩敤resnet50锛宐ackbone鐨凜3銆丆4銆丆5鐗瑰緛灞備綔涓篎PN鐨勮緭鍏ワ紝FPN鐢熸垚P3,P4,P5,P6,P7鐗瑰緛鍥撅紝閫佸叆鍚庣画鐨勬娴嬪ごHead銆傛瘡涓狧ead鍖呭惈3涓垎鏀細 classification鍒嗘敮锛氶娴嬬被鍒紝鍥句腑鐨凜琛ㄧず绫诲埆鏁帮紝鐩稿綋浜嶤涓簩鍒嗙被 regression鍒嗘敮锛氬洖褰掍綅缃紝鍥句腑鐨�4琛ㄧず锛歭,t,r,b锛岄娴嬮敋鐐瑰埌妫€娴嬫涓婁笅宸﹀彸鍥涙潯杈圭晫鐨勮窛绂� center-ness锛氫腑蹇冨害锛屼竴涓敋鐐瑰搴斾竴涓腑蹇冨害锛岀敤浜庨敋鐐圭浉瀵逛簬妫€娴嬫涓績鎬х殑鍒ゆ柇鍦ㄦ娴嬪瓙缃戠粶Head涓紝鍒嗙被鍒嗘敮鍜屽洖褰掑垎鏀兘鍏堢粡杩囦簡4涓嵎绉眰杩涜浜嗙壒寰佸己鍖栥€� + +# 鏁版嵁闆� + +浣跨敤鐨勬暟鎹泦:[COCO 2017](https://cocodataset.org/#download) + +鏀寔鐨勬暟鎹泦: COCO2017 鎴栬€呬笌 MS COCO 鏍煎紡鐩稿悓鐨勬暟鎹泦 + +鏀寔鐨勬爣娉�: COCO2017 鎴栬€呬笌 MS COCO 鐩稿悓鏍煎紡鐨勬爣娉� + +- 鐩綍缁撴瀯濡備笅锛岀敱鐢ㄦ埛瀹氫箟鐩綍鍜屾枃浠剁殑鍚嶇О + + ```ext + + 鈹溾攢鈹€ dataset + 鈹溾攢鈹€ coco2017 + 鈹溾攢鈹€ annotations + 鈹� 鈹溾攢 train.json + 鈹� 鈹斺攢 val.json + 鈹溾攢 train + 鈹� 鈹溾攢picture1.jpg + 鈹� 鈹溾攢 ... + 鈹� 鈹斺攢picturen.jpg + 鈹斺攢 val + 鈹溾攢picture1.jpg + 鈹溾攢 ... + 鈹斺攢picturen.jpg + + ``` + +- 濡傛灉鐢ㄦ埛闇€瑕佽嚜瀹氫箟鏁版嵁闆嗭紝鍒欓渶瑕佸皢鏁版嵁闆嗘牸寮忚浆鍖栦负coco鏁版嵁鏍煎紡锛屽苟涓旓紝json鏂囦欢涓殑鏁版嵁瑕佸拰鍥剧墖鏁版嵁瀵瑰簲濂姐€� + +# 鐜瑕佹眰 + +- 纭欢锛圙PU锛� + - 鍑嗗GPU澶勭悊鍣ㄦ惌寤虹‖浠剁幆澧冦€� + +- 妗嗘灦 + - [MindSpore](https://www.mindspore.cn/install) + +- 濡傞渶鏌ョ湅璇︽儏锛岃鍙傝濡備笅璧勬簮锛� + - [MindSpore鏁欑▼](https://www.mindspore.cn/tutorials/zh-CN/master/index.html) + - [MindSpore Python API](https://www.mindspore.cn/docs/api/zh-CN/master/index.html) + +# 蹇€熷叆闂� + +- 閫氳繃瀹樻柟缃戠珯瀹夎Mindspore鍚庯紝鎮ㄥ彲浠ユ寜鐓у涓嬫楠よ繘琛岃缁冨拰璇勪及 +- 鍦ㄦ湰鍦拌繘琛岃缁� + + ```shell + # 閫氳繃shell鑴氭湰杩涜8鍗¤缁� + sh run_distribute_train_gpu.sh [TRAIN_DATA_PATH] [ANNO_DATA_PATH] [PRETRAIN_PATH] [CKPT_SAVE_PATH] + ``` + + ```shell + # 閫氳繃shell鑴氭湰杩涜鍗曞崱璁粌 + sh run_standalone_train_gpu.sh [TRAIN_DATA_PATH] [ANNO_DATA_PATH] [PRETRAIN_PATH] [CKPT_SAVE_PATH] [DEVICE_ID] [DEVICE_NUM] + ``` + +- 鍦ㄦ湰鍦拌繘琛岃瘎浼� + + ```shell + sh run_standalone_eval_gpu.sh [EVAL_DATA_PATH] [ANNO_DATA_PATH] [CKPT_PATH] [DEVICE_ID] + ``` + +# 鑴氭湰璇存槑 + +## 鑴氭湰鍙婃牱渚嬩唬鐮� + +```text +鈹溾攢鈹€ cv + 鈹溾攢鈹€ FCOS + 鈹溾攢鈹€ README.md // FCOS鐩稿叧璇存槑 + 鈹溾攢鈹€ scripts + 鈹� 鈹溾攢鈹€run_distribute_train_gpu.sh // GPU澶氬崱璁粌鑴氭湰 + 鈹� 鈹溾攢鈹€run_standalone_eval_gpu.sh // GPU鍗曞崱鎺ㄧ悊鑴氭湰 + 鈹� 鈹溾攢鈹€run_standalone_train_gpu.sh // GPU鍗曞崱璁粌鑴氭湰 + 鈹溾攢鈹€ src + 鈹� 鈹溾攢鈹€COCO_dataset.py // 鍒涘缓鏁版嵁闆� + 鈹� 鈹溾攢鈹€augment.py // 鏁版嵁澧炲己 + 鈹� 鈹溾攢鈹€resnet.py // 楠ㄥ共缃戠粶 + 鈹� 鈹溾攢鈹€eval_utils.py // 璇勪及宸ュ叿 + 鈹� 鈹溾攢鈹€fcos.py // FCOS妯″瀷缃戠粶 + 鈹� 鈹溾攢鈹€fpn_neck.py // FPN澶勭悊 + 鈹� 鈹溾攢鈹€head.py // 妯″瀷head + 鈹� 鈹溾攢鈹€network_define.py // 缃戠粶瀹氫箟 + 鈹� 鈹溾攢鈹€config.py // 鍙傛暟閰嶇疆 + 鈹溾攢鈹€ train.py // 璁粌鑴氭湰 + 鈹溾攢鈹€ eval.py // 璇勪及鑴氭湰 +``` + +## 鑴氭湰鍙傛暟 + +train.py涓富瑕佺殑鍙傛暟濡備笅: + +```text + +--device_num 浣跨敤璁惧鐨勬暟閲�,榛樿涓�8 +--device_id 浣跨敤璁惧鐨勫崱鍙凤紝榛樿鍊间负0 +--pretrain_ckpt_path 棰勮缁價esnet50鏉冮噸鏂囦欢 +--ckpt_save_path 璁粌鍚庝繚瀛樼殑妫€鏌ョ偣鏂囦欢鐨勭粷瀵瑰畬鏁磋矾寰� +--train_path train2017淇濆瓨璺緞 +--anno_path instances_train2017.json淇濆瓨璺緞 + +``` + +## 璁粌杩囩▼ + +### 鍒嗗竷寮忚缁� + +鍦℅PU璁惧涓婏紝浣跨敤shell鑴氭湰鎵ц鍒嗗竷寮忚缁冪ず渚�(8鍗�) + +- 绗竴姝� + + ```shell + + # 閫氳繃shell鑴氭湰杩涜8鍗¤缁� + sh run_distribute_train_gpu.sh /coco2017/train2017 /coco2017/annotations/instances_train2017.json resnet50.ckpt /checkpoint + + ``` + + 涓婅堪shell鑴氭湰灏嗗湪鍚庡彴杩愯鍒嗗竷寮忚缁冦€� 寰楀埌濡備笅鎹熷け鍊硷細 + + ```log + + epoch: 1 step: 1, loss is 0.7623271 + epoch: 1 step: 1, loss is 0.7853986 + epoch: 1 step: 1, loss is 0.8126975 + epoch: 1 step: 1, loss is 0.63795793 + epoch: 1 step: 1, loss is 0.6717266 + epoch: 1 step: 1, loss is 0.5369471 + epoch: 1 step: 1, loss is 0.50559396 + epoch: 1 step: 1, loss is 0.6490997 + epoch: 1 step: 2, loss is 0.7356057 + epoch: 1 step: 2, loss is 0.7328874 + epoch: 1 step: 2, loss is 0.79695445 + epoch: 1 step: 2, loss is 0.8426137 + epoch: 1 step: 2, loss is 0.87362385 + epoch: 1 step: 2, loss is 0.7765503 + epoch: 1 step: 2, loss is 0.67726403 + epoch: 1 step: 2, loss is 0.48694384 + + ``` + +## 璇勪及杩囩▼ + +### 璇勪及 + +```shell +sh run_standalone_eval_gpu.sh /coco2017/val2017 instances_val2017.json ms8p_24epoch.ckpt 0 +``` + +娴嬭瘯鏁版嵁闆嗙殑mAP濡備笅锛� + +```log + + ===============================coco eval result=============================== + Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.381 + Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.570 + Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.410 + Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.225 + Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.414 + Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.497 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.311 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.509 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.554 + Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.356 + Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.603 + Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.688 + +``` + +# 妯″瀷鎻忚堪 + +## 鎬ц兘 + +### 璇勪及鎬ц兘 + +FCOS搴旂敤浜巆oco2017 + +|鍙傛暟| FCOS-resnet50 | +| -------------------------- | ----------------------------------------------------------- | +|璧勬簮| Tesla V100*8锛涘唴瀛橈細755G锛涚郴缁燂細EulerOS 2.8锛泑 +|涓婁紶鏃ユ湡|2022骞�8鏈�16鏃 +| MindSpore鐗堟湰|1.5.0-alpha| +|鏁版嵁闆唡118000寮犲浘鍍弢 +|璁粌鍙傛暟|epoch=25, batch_size=16, lr=0.001,momentum=0.9| +| 浼樺寲鍣� | SGD | +|杈撳嚭|妗嗗拰鏍囩| +|閫熷害| 530ms/step| +|鎬绘椂闀縷48灏忔椂| + +# 闅忔満鎯呭喌璇存槑 + +浠g爜涓鏁版嵁闆嗚繘琛屼簡闅忔満augmentation鎿嶄綔锛屽叾涓惈鏈夊鍥剧墖杩涜鏃嬭浆銆佽鍓搷浣溿€� + +# ModelZoo涓婚〉 + +璇锋祻瑙堝畼缃慬涓婚〉](https://gitee.com/mindspore/models)銆� diff --git a/research/cv/FCOS/eval.py b/research/cv/FCOS/eval.py new file mode 100644 index 0000000000000000000000000000000000000000..b818b81597277b3998cca5c0081ab012ceb81c04 --- /dev/null +++ b/research/cv/FCOS/eval.py @@ -0,0 +1,214 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""FCOS EVAL""" +import json +import os +import argparse +import cv2 +import numpy as np +import mindspore +import mindspore.ops as ops +import mindspore.dataset.vision.py_transforms as py_vision +import mindspore.dataset.vision.c_transforms as c_vision +from mindspore import Tensor +from mindspore import context +from mindspore.ops import stop_gradient + +from tqdm import tqdm +from PIL import Image +from pycocotools.coco import COCO +from pycocotools.cocoeval import COCOeval +from src.fcos import FCOSDetector +from src.eval_utils import post_process +from src.eval_utils import ClipBoxes + +class COCOGenerator: + CLASSES_NAME = ( + '__back_ground__', 'person', 'bicycle', 'car', 'motorcycle', + 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', + 'fire hydrant', 'stop sign', 'parking meter', 'bench', + 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', + 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', + 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', + 'sports ball', 'kite', 'baseball bat', 'baseball glove', + 'skateboard', 'surfboard', 'tennis racket', 'bottle', + 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', + 'banana', 'apple', 'sandwich', 'orange', 'broccoli', + 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', + 'couch', 'potted plant', 'bed', 'dining table', 'toilet', + 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', + 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', + 'book', 'clock', 'vase', 'scissors', 'teddy bear', + 'hair drier', 'toothbrush') + + def __init__(self, dataset_dir, annotation_file, resize_size): + self.coco = COCO(annotation_file) + self.root = dataset_dir + ids = list(sorted(self.coco.imgs.keys())) + print("INFO====>check annos, filtering invalid data......") + new_ids = [] + for i in ids: + ann_id = self.coco.getAnnIds(imgIds=i, iscrowd=None) + ann = self.coco.loadAnns(ann_id) + if self._has_valid_annotation(ann): + new_ids.append(i) + self.ids = new_ids + + self.category2id = {v: i + 1 for i, v in enumerate(self.coco.getCatIds())} + self.id2category = {v: k for k, v in self.category2id.items()} + + self.resize_size = resize_size + + self.mean = [0.40789654, 0.44719302, 0.47026115] + self.std = [0.28863828, 0.27408164, 0.27809835] + + + + def getImg(self, index): + img_id = self.ids[index] + coco = self.coco + ann_ids = coco.getAnnIds(imgIds=img_id) + target = coco.loadAnns(ann_ids) + path = coco.loadImgs(img_id)[0]['file_name'] + + img = Image.open(os.path.join(self.root, path)).convert('RGB') + + return img, target + + def __getitem__(self, index): + + img, ann = self.getImg(index) + + ann = [o for o in ann if o['iscrowd'] == 0] + boxes = [o['bbox'] for o in ann] + boxes = np.array(boxes, dtype=np.float32) + # xywh-->xyxy + boxes[..., 2:] = boxes[..., 2:] + boxes[..., :2] + img = np.array(img) + img, boxes, scale = self.preprocess_img_boxes(img, boxes, self.resize_size) + classes = [o['category_id'] for o in ann] + classes = [self.category2id[c] for c in classes] + to_tensor = py_vision.ToTensor() + img = to_tensor(img) + max_h = 1344 + max_w = 1344 + max_num = 90 + img = np.pad(img, ((0, 0), (0, max(int(max_h - img.shape[1]), 0)), \ + (0, max(int(max_w - img.shape[2]), 0)))) + normalize_op = c_vision.Normalize(mean=[0.40789654, 0.44719302, 0.47026115], \ + std=[0.28863828, 0.27408164, 0.27809835]) + img = img.transpose(1, 2, 0) # chw to hwc + img = normalize_op(img) + img = img.transpose(2, 0, 1) #hwc to chw + boxes = np.pad(boxes, ((0, max(max_num-boxes.shape[0], 0)), (0, 0)), 'constant', constant_values=-1) + classes = np.pad(classes, (0, max(max_num - len(classes), 0)), 'constant', constant_values=-1).astype('int32') + box_info = {"boxes": boxes, "classes": classes, "scale": scale} + return img, box_info + def __len__(self): + return len(self.ids) + + def preprocess_img_boxes(self, image, boxes, input_ksize): + ''' + resize image and bboxes + Returns + image_paded: input_ksize + bboxes: [None,4] + ''' + min_side, max_side = input_ksize + h, w, _ = image.shape + smallest_side = min(w, h) + largest_side = max(w, h) + scale = min_side / smallest_side + if largest_side * scale > max_side: + scale = max_side / largest_side + nw, nh = int(scale * w), int(scale * h) + image_resized = cv2.resize(image, (nw, nh)) + pad_w = 32 - nw % 32 + pad_h = 32 - nh % 32 + image_paded = np.zeros(shape=[nh + pad_h, nw + pad_w, 3], dtype=np.uint8) + image_paded[:nh, :nw, :] = image_resized + if boxes is not None: + boxes[:, [0, 2]] = boxes[:, [0, 2]] * scale + boxes[:, [1, 3]] = boxes[:, [1, 3]] * scale + return image_paded, boxes, scale + + def _has_only_empty_bbox(self, annot): + return all(any(o <= 1 for o in obj['bbox'][2:]) for obj in annot) + + def _has_valid_annotation(self, annot): + if annot is None: + return False + if self._has_only_empty_bbox(annot): + return False + + return True + +def evaluate_coco(_generator, _model, threshold=0.05): + results = [] + image_ids = [] + for index in tqdm(range(len(_generator))): + img, box_info = _generator[index] + scale = box_info["scale"] + img = Tensor(img, mindspore.float32) + expand_dims = ops.ExpandDims() + img = expand_dims(img, 0) + batch_imgs = img + scores, labels, boxes = _model(img) + scores, labels, boxes = post_process([scores, labels, boxes], 0.05, 0.6) + boxes = ClipBoxes(batch_imgs, boxes) + scores = stop_gradient(scores) + labels = stop_gradient(labels) + boxes = stop_gradient(boxes) + boxes /= scale + boxes[:, :, 2] -= boxes[:, :, 0] + boxes[:, :, 3] -= boxes[:, :, 1] + boxes = boxes.asnumpy() + labels = labels.asnumpy() + scores = scores.asnumpy() + for box, score, label in zip(boxes[0], scores[0], labels[0]): + if score < threshold: + break + image_result = { + 'image_id': _generator.ids[index], + 'category_id': _generator.id2category[label], + 'score': float(score), + 'bbox': box.tolist(), + } + results.append(image_result) + image_ids.append(_generator.ids[index]) + json.dump(results, open('coco_bbox_results.json', 'w'), indent=4) + coco_true = _generator.coco + coco_pred = coco_true.loadRes('coco_bbox_results.json') + # run COCO evaluation + coco_eval = COCOeval(coco_true, coco_pred, 'bbox') + coco_eval.params.imgIds = image_ids + coco_eval.evaluate() + coco_eval.accumulate() + coco_eval.summarize() + return coco_eval.stats + +parser = argparse.ArgumentParser() +parser.add_argument("--device_id", type=int, default=0, help="DEVICE_ID to run ") +parser.add_argument("--eval_path", type=str, default="/data2/dataset/coco2017/val2017") +parser.add_argument("--anno_path", type=str, default="/data2/dataset/coco2017/annotations/instances_val2017.json") +parser.add_argument("--ckpt_path", type=str, default="/data1/FCOS/checkpoint/backbone/s1.ckpt") +opt = parser.parse_args() +if __name__ == "__main__": + context.set_context(mode=context.GRAPH_MODE, device_target='GPU', device_id=opt.device_id) + generator = COCOGenerator(opt.eval_path, opt.anno_path, [800, 1333]) + model = FCOSDetector(mode="inference") + model.set_train(False) + mindspore.load_param_into_net(model, mindspore.load_checkpoint(opt.ckpt_path)) + evaluate_coco(generator, model) diff --git a/research/cv/FCOS/scripts/run_distribute_train_gpu.sh b/research/cv/FCOS/scripts/run_distribute_train_gpu.sh new file mode 100644 index 0000000000000000000000000000000000000000..7121c4ce40d1dc17f0f5a86e723df67143aa33f8 --- /dev/null +++ b/research/cv/FCOS/scripts/run_distribute_train_gpu.sh @@ -0,0 +1,76 @@ +#!/bin/bash +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +if [ $# != 4 ] +then + echo "Usage: sh run_distribute_train_gpu.sh [TRAIN_DATA_PATH] [ANNO_DATA_PATH] [PRETRAIN_PATH] [CKPT_SAVE_PATH]" +exit 1 +fi + +get_real_path(){ + if [ "${1:0:1}" == "/" ]; then + echo "$1" + else + echo "$(realpath -m $PWD/$1)" + fi +} + +TRAIN_PATH=$(get_real_path $1) +ANNO_PATH=$(get_real_path $2) +PRETRAIN_PATH=$(get_real_path $3) +SAVE_PATH=$(get_real_path $4) +export RANK_SIZE=8 + +echo $TRAIN_PATH +echo $ANNO_PATH +echo $PRETRAIN_PATH +echo $SAVE_PATH + +if [ ! -d $PATH1 ] +then + echo "error: TRAIN_DATA_PATH=$PATH1 is not a directory" +exit 1 +fi + +if [ ! -d $PATH2 ] +then + echo "error: ANNO_DATA_PATH=$PATH2 is not a directory" +exit 1 +fi + +if [ ! -d $PATH3 ] +then + echo "error: PRETRAIN_PATH=$PATH3 is not a directory" +exit 1 +fi + +if [ ! -d $PATH4 ] +then + echo "error: CKPT_SAVE_PATH=$PATH4 is not a directory" +exit 1 +fi + +rm -rf device +mkdir device +cp -r ../src/ ./device +cp ../train.py ./device +echo "start training" +cd ./device +export TRAIN_PATH=$1 +export ANNO_PATH=$2 +export PRETRAIN_PATH=$3 +export SAVE_PATH=$4 +mpirun --allow-run-as-root -n 8 python train.py --train_path=$TRAIN_PATH --anno_path=$ANNO_PATH --pretrain_ckpt_path=$PRETRAIN_PATH --ckpt_save_path=$SAVE_PATH > train.log 2>&1 & + diff --git a/research/cv/FCOS/scripts/run_standalone_eval_gpu.sh b/research/cv/FCOS/scripts/run_standalone_eval_gpu.sh new file mode 100644 index 0000000000000000000000000000000000000000..b03538b416d2061477ec86bf2a966fef14875d6b --- /dev/null +++ b/research/cv/FCOS/scripts/run_standalone_eval_gpu.sh @@ -0,0 +1,65 @@ +#!/bin/bash +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +if [ $# != 4 ] +then + echo "Usage: sh run_standalone_eval_gpu.sh [EVAL_DATA_PATH] [ANNO_DATA_PATH] [CKPT_PATH] [DEVICE_ID]" +exit 1 +fi + +get_real_path(){ + if [ "${1:0:1}" == "/" ]; then + echo "$1" + else + echo "$(realpath -m $PWD/$1)" + fi +} +EVAL_PATH=$(get_real_path $1) +ANNO_PATH=$(get_real_path $2) +CKPT_PATH=$(get_real_path $3) + +if [ ! -d $EVAL_PATH ] +then + echo "error: EVAL_DATA_PATH=$EVAL_PATH is not a directory" +exit 1 +fi + +if [ ! -f $ANNO_PATH ] +then + echo "error: ANNO_DATA_PATH=$ANNO_PATH is not a file" +exit 1 +fi + +if [ ! -f $CKPT_PATH ] +then + echo "error: CKPT_PATH=$CKPT_PATH is not a file" +exit 1 +fi + + +export DEVICE_ID=$4 + +rm -rf eval +mkdir eval +cp -r ../src/ ./eval +cp ../eval.py ./eval +echo "start eval" +cd ./eval +export EVAL_PATH=$1 +export ANNO_PATH=$2 +export CKPT_PATH=$3 + +python eval.py --device_id=$DEVICE_ID --eval_path=$EVAL_PATH --anno_path=$ANNO_PATH --ckpt_path=$CKPT_PATH > eval.log 2>&1 & + diff --git a/research/cv/FCOS/scripts/run_standalone_train_gpu.sh b/research/cv/FCOS/scripts/run_standalone_train_gpu.sh new file mode 100644 index 0000000000000000000000000000000000000000..50e7239f5f7acaea4e72229e5645b8bbfe3fbacd --- /dev/null +++ b/research/cv/FCOS/scripts/run_standalone_train_gpu.sh @@ -0,0 +1,74 @@ +#!/bin/bash +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +if [ $# != 6 ] +then + echo "Usage: sh run_standalone_train_gpu.sh [TRAIN_DATA_PATH] [ANNO_DATA_PATH] [PRETRAIN_PATH] [CKPT_SAVE_PATH] [DEVICE_ID] [DEVICE_NUM]" +exit 1 +fi + +get_real_path(){ + if [ "${1:0:1}" == "/" ]; then + echo "$1" + else + echo "$(realpath -m $PWD/$1)" + fi +} +TRAIN_PATH=$(get_real_path $1) +ANNO_PATH=$(get_real_path $2) +PRETRAIN_PATH=$(get_real_path $3) +SAVE_PATH=$(get_real_path $4) + + +if [ ! -d $PATH1 ] +then + echo "error: TRAIN_DATA_PATH=$PATH1 is not a directory" +exit 1 +fi + +if [ ! -d $PATH2 ] +then + echo "error: ANNO_DATA_PATH=$PATH2 is not a directory" +exit 1 +fi + +if [ ! -d $PATH3 ] +then + echo "error: PRETRAIN_PATH=$PATH3 is not a directory" +exit 1 +fi + +if [ ! -d $PATH4 ] +then + echo "error: CKPT_SAVE_PATH=$PATH4 is not a directory" +exit 1 +fi + +export DEVICE_ID=$5 +export DEVICE_NUM=$6 + +rm -rf device +mkdir device +cp -r ../src/ ./device +cp ../train.py ./device +echo "start training" +cd ./device +export TRAIN_PATH=$1 +export ANNO_PATH=$2 +export PRETRAIN_PATH=$3 +export SAVE_PATH=$4 + +python train.py --device_id=$DEVICE_ID --train_path=$TRAIN_PATH --device_num=$DEVICE_NUM --anno_path=$ANNO_PATH --pretrain_ckpt_path=$PRETRAIN_PATH --ckpt_save_path=$SAVE_PATH > train.log 2>&1 & + diff --git a/research/cv/FCOS/src/COCO_dataset.py b/research/cv/FCOS/src/COCO_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..7d3b95872d442e51b43849264389bb0983f7742c --- /dev/null +++ b/research/cv/FCOS/src/COCO_dataset.py @@ -0,0 +1,163 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""COCO_dataset""" +import os +import random +import cv2 +import numpy as np +import mindspore.dataset as de +import mindspore.dataset.vision.py_transforms as py_vision +import mindspore.dataset.vision.c_transforms as c_vision + +from pycocotools.coco import COCO +from PIL import Image + +def flip(img, boxes): + img = img.transpose(Image.FLIP_LEFT_RIGHT) + w = img.width + if boxes.shape[0] != 0: + xmin = w - boxes[:, 2] + xmax = w - boxes[:, 0] + boxes[:, 2] = xmax + boxes[:, 0] = xmin + return img, boxes + + +class COCODataset: + CLASSES_NAME = ( + '__back_ground__', 'person', 'bicycle', 'car', 'motorcycle', + 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', + 'fire hydrant', 'stop sign', 'parking meter', 'bench', + 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', + 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', + 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', + 'sports ball', 'kite', 'baseball bat', 'baseball glove', + 'skateboard', 'surfboard', 'tennis racket', 'bottle', + 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', + 'banana', 'apple', 'sandwich', 'orange', 'broccoli', + 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', + 'couch', 'potted plant', 'bed', 'dining table', 'toilet', + 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', + 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', + 'book', 'clock', 'vase', 'scissors', 'teddy bear', + 'hair drier', 'toothbrush') + + def __init__(self, dataset_dir, annotation_file, resize_size, is_train=True, transform=None): + if resize_size is None: + resize_size = [800, 1333] + self.coco = COCO(annotation_file) + self.root = dataset_dir + ids = list(sorted(self.coco.imgs.keys())) + new_ids = [] + for i in ids: + ann_id = self.coco.getAnnIds(imgIds=i, iscrowd=None) + ann = self.coco.loadAnns(ann_id) + if self._has_valid_annotation(ann): + new_ids.append(i) + self.ids = new_ids + self.category2id = {v: i + 1 for i, v in enumerate(self.coco.getCatIds())} + self.id2category = {v: k for k, v in self.category2id.items()} + self.transform = transform + self.resize_size = resize_size + self.train = is_train + + def getImg(self, index): + img_id = self.ids[index] + coco = self.coco + ann_ids = coco.getAnnIds(imgIds=img_id) + target = coco.loadAnns(ann_ids) + path = coco.loadImgs(img_id)[0]['file_name'] + img = Image.open(os.path.join(self.root, path)).convert('RGB') + return img, target + + def __getitem__(self, index): + img, ann = self.getImg(index) + ann = [o for o in ann if o['iscrowd'] == 0] + boxes = [o['bbox'] for o in ann] + boxes = np.array(boxes, dtype=np.float32) + boxes[..., 2:] = boxes[..., 2:] + boxes[..., :2] + if self.train: + if random.random() < 0.5: + img, boxes = flip(img, boxes) + if self.transform is not None: + img, boxes = self.transform(img, boxes) + img = np.array(img) + img, boxes = self.preprocess_img_boxes(img, boxes, self.resize_size) + classes = [o['category_id'] for o in ann] + classes = [self.category2id[c] for c in classes] + to_tensor = py_vision.ToTensor() + img = to_tensor(img) + max_h = 1344 + max_w = 1344 + max_num = 90 + img = np.pad(img, ((0, 0), (0, max(int(max_h - img.shape[1]), 0)), (0, max(int(max_w - img.shape[2]), 0)))) + normalize_op = c_vision.Normalize(mean=[0.40789654, 0.44719302, 0.47026115], \ + std=[0.28863828, 0.27408164, 0.27809835]) + img = img.transpose(1, 2, 0) # chw to hwc + img = normalize_op(img) + img = img.transpose(2, 0, 1) #hwc to chw + boxes = np.pad(boxes, ((0, max(max_num-boxes.shape[0], 0)), (0, 0)), 'constant', constant_values=-1) + classes = np.pad(classes, (0, max(max_num - len(classes), 0)), 'constant', constant_values=-1).astype('int32') + + return img, boxes, classes + + def __len__(self): + return len(self.ids) + + def preprocess_img_boxes(self, image, boxes, input_ksize): + ''' + resize image and bboxes + Returns + image_paded: input_ksize + bboxes: [None,4] + ''' + min_side, max_side = input_ksize + h, w, _ = image.shape + smallest_side = min(w, h) + largest_side = max(w, h) + scale = min_side / smallest_side + if largest_side * scale > max_side: + scale = max_side / largest_side + nw, nh = int(scale * w), int(scale * h) + image_resized = cv2.resize(image, (nw, nh)) + pad_w = 32 - nw % 32 + pad_h = 32 - nh % 32 + image_paded = np.zeros(shape=[nh + pad_h, nw + pad_w, 3], dtype=np.uint8) + image_paded[:nh, :nw, :] = image_resized + if boxes is not None: + boxes[:, [0, 2]] = boxes[:, [0, 2]] * scale + boxes[:, [1, 3]] = boxes[:, [1, 3]] * scale + return image_paded, boxes + + def _has_only_empty_bbox(self, annot): + return all(any(o <= 1 for o in obj['bbox'][2:]) for obj in annot) + + def _has_valid_annotation(self, annot): + if annot is None: + return False + + if self._has_only_empty_bbox(annot): + return False + + return True +def create_coco_dataset(dataset_dir, annotation_file, batch_size, shuffle=True, \ + transform=None, num_parallel_workers=8, num_shards=None, shard_id=None): + cv2.setNumThreads(0) + dataset = COCODataset(dataset_dir, annotation_file, is_train=True, transform=transform) + dataset_column_names = ["img", "boxes", "class"] + ds = de.GeneratorDataset(dataset, column_names=dataset_column_names, \ + shuffle=shuffle, num_parallel_workers=min(8, num_parallel_workers), num_shards=num_shards, shard_id=shard_id) + ds = ds.batch(batch_size, num_parallel_workers=min(8, num_parallel_workers), drop_remainder=True) + return ds, len(dataset) diff --git a/research/cv/FCOS/src/augment.py b/research/cv/FCOS/src/augment.py new file mode 100644 index 0000000000000000000000000000000000000000..a884bfd733e0a6fbe0a974f329a83dafa61646b6 --- /dev/null +++ b/research/cv/FCOS/src/augment.py @@ -0,0 +1,75 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""data_augment""" +import math +import random +import sys +import numpy as np +import mindspore.dataset.vision.py_transforms as transforms + +class Transforms(): + def __init__(self): + pass + def __call__(self, img, boxes): + if random.random() < 0.3: + img, boxes = colorJitter(img, boxes) + if random.random() < 0.5: + img, boxes = random_rotation(img, boxes) + return img, np.array(boxes) + +def colorJitter(img, boxes, brightness=0.1, contrast=0.1, saturation=0.1, hue=0.1): + img = transforms.RandomColorAdjust(brightness=brightness, contrast=contrast, saturation=saturation, hue=hue)(img) + return img, boxes + +def random_rotation(img, boxes, degree=10): + d = random.uniform(-degree, degree) + w, h = img.size + rx0, ry0 = w / 2.0, h / 2.0 + img = img.rotate(d) + a = -d / 180.0 * math.pi + new_boxes = np.zeros_like(boxes) + new_boxes[:, 0] = boxes[:, 1] + new_boxes[:, 1] = boxes[:, 0] + new_boxes[:, 2] = boxes[:, 3] + new_boxes[:, 3] = boxes[:, 2] + for i in range(boxes.shape[0]): + ymin, xmin, ymax, xmax = new_boxes[i, :] + xmin, ymin, xmax, ymax = float(xmin), float(ymin), float(xmax), float(ymax) + x0, y0 = xmin, ymin + x1, y1 = xmin, ymax + x2, y2 = xmax, ymin + x3, y3 = xmax, ymax + z = np.array([[y0, x0], [y1, x1], [y2, x2], [y3, x3]], dtype=np.float32) + tp = np.zeros_like(z) + tp[:, 1] = (z[:, 1] - rx0) * math.cos(a) - (z[:, 0] - ry0) * math.sin(a) + rx0 + tp[:, 0] = (z[:, 1] - rx0) * math.sin(a) + (z[:, 0] - ry0) * math.cos(a) + ry0 + ymax, xmax = np.max(tp, axis=0) + ymin, xmin = np.min(tp, axis=0) + new_boxes[i] = np.stack([ymin, xmin, ymax, xmax]) + new_boxes[:, 1::2] = np.clip(new_boxes[:, 1::2], 0, w - 1) + new_boxes[:, 0::2] = np.clip(new_boxes[:, 0::2], 0, h - 1) + boxes[:, 0] = new_boxes[:, 1] + boxes[:, 1] = new_boxes[:, 0] + boxes[:, 2] = new_boxes[:, 3] + boxes[:, 3] = new_boxes[:, 2] + return img, boxes + +def _box_inter(box1, box2): + tl = np.maximum(box1[:, None, :2], box2[:, :2]) # [n,m,2] + br = np.minimum(box1[:, None, 2:], box2[:, 2:]) # [n,m,2] + inter_tensor = np.array((br-tl), dtype=np.float32) + hw = np.clip(inter_tensor, 0, sys.maxsize) # [n,m,2] + inter = hw[:, :, 0] * hw[:, :, 1] # [n,m] + return inter diff --git a/research/cv/FCOS/src/config.py b/research/cv/FCOS/src/config.py new file mode 100644 index 0000000000000000000000000000000000000000..4575444b0e5ffcab67aeeb62537ba61e603bf226 --- /dev/null +++ b/research/cv/FCOS/src/config.py @@ -0,0 +1,37 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Config""" +class DefaultConfig(): + #backbone + pretrained = True + freeze_stage_1 = True + freeze_bn = True + #fpn + fpn_out_channels = 256 + use_p5 = True + #head + class_num = 80 + use_GN_head = True + prior = 0.01 + add_centerness = True + cnt_on_reg = True + #training + strides = [8, 16, 32, 64, 128] + limit_range = [[-1, 64], [64, 128], [128, 256], [256, 512], [512, 999999]] + save_checkpoint = True + #inference + score_threshold = 0.05 + nms_iou_threshold = 0.6 + max_detection_boxes_num = 1000 diff --git a/research/cv/FCOS/src/eval_utils.py b/research/cv/FCOS/src/eval_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..cf963009b22fa96f953c45184d8d84f6f7df44e3 --- /dev/null +++ b/research/cv/FCOS/src/eval_utils.py @@ -0,0 +1,116 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""eval_utils""" +import numpy as np +import mindspore +import mindspore.numpy as mnp +from mindspore import ops, Tensor + +def post_process(preds_topk, score_threshold, nms_iou_threshold): + ''' + cls_scores_topk [batch_size,max_num] + cls_classes_topk [batch_size,max_num] + boxes_topk [batch_size,max_num,4] + ''' + _cls_scores_post = [] + _cls_classes_post = [] + _boxes_post = [] + cls_scores_topk, cls_classes_topk, boxes_topk = preds_topk + cls_scores_topk = mindspore.numpy.squeeze(cls_scores_topk, axis=0) + cls_classes_topk = mindspore.numpy.squeeze(cls_classes_topk, axis=0) + boxes_topk = mindspore.numpy.squeeze(boxes_topk, axis=0) + for batch in range(cls_classes_topk.shape[0]): + mask = cls_scores_topk[batch] >= score_threshold + mul = ops.Mul() + _cls_scores_b = mul(cls_scores_topk[batch], mask) + _cls_scores_b = np.squeeze(_cls_scores_b) + _cls_classes_b = mul(cls_classes_topk[batch], mask) + _cls_classes_b = np.squeeze(_cls_classes_b) + expand_dims = ops.ExpandDims() + mask = expand_dims(mask, -1) + op = ops.Concat(-1) + mask = op((mask, mask, mask, mask)) + _boxes_b = mul(boxes_topk[batch], mask) + nms_ind = batched_nms(_boxes_b, _cls_scores_b, _cls_classes_b, nms_iou_threshold) + _cls_scores_post.append(_cls_scores_b[nms_ind]) + _cls_classes_post.append(_cls_classes_b[nms_ind]) + _boxes_post.append(_boxes_b[nms_ind]) + stack = ops.Stack(axis=0) + scores, classes, boxes = stack(_cls_scores_post), stack(_cls_classes_post), stack(_boxes_post) + return scores, classes, boxes + +def batched_nms(boxes, scores, idxs, iou_threshold): + if ops.Size()(boxes) == 0: + return mnp.empty((0,)) + argmax = ops.ArgMaxWithValue() + reshape = ops.Reshape() + squeeze = ops.Squeeze() + boxes2 = reshape(boxes, (-1, 1)) + _, max_coordinate = argmax(boxes2) + max_coordinate = squeeze(max_coordinate) + offsets = idxs * (max_coordinate + 1) + boxes_for_nms = boxes + offsets[:, None] + keep = box_nms(boxes_for_nms, scores, iou_threshold) + return keep + +def box_nms(boxes, scores, thr): + ''' + boxes: [?,4] + scores: [?] + ''' + if boxes.shape[0] == 0: + return ops.Zeros(0, mindspore.float32) + boxes = boxes.asnumpy() + x1, y1, x2, y2 = boxes[:, 0], boxes[:, 1], boxes[:, 2], boxes[:, 3] + areas = (x2 - x1 + 1) * (y2 - y1 + 1) + sort = ops.Sort(0, descending=True) + + _, order = sort(scores) + order = order.asnumpy() + keep = [] + while order.size > 0: + i = order[0] + keep.append(i) + xx1 = np.maximum(x1[i], x1[order[1:]]) + yy1 = np.maximum(y1[i], y1[order[1:]]) + xx2 = np.minimum(x2[i], x2[order[1:]]) + yy2 = np.minimum(y2[i], y2[order[1:]]) + + w = np.maximum(0.0, xx2 - xx1 + 1) + h = np.maximum(0.0, yy2 - yy1 + 1) + inter = w * h + ovr = inter / (areas[i] + areas[order[1:]] - inter) + center_x1 = (x1[i] + x2[i]) / 2 + center_x2 = (x1[order[1:]] + x2[order[1:]]) / 2 + center_y1 = (y1[i] + y2[i]) / 2 + center_y2 = (y1[order[1:]] + y2[order[1:]]) / 2 + inter_diag = (center_x2 - center_x1) ** 2 + (center_y2 - center_y1) ** 2 + out_max_x = np.maximum(x2[i], x2[order[1:]]) + out_max_y = np.maximum(y2[i], y2[order[1:]]) + out_min_x = np.minimum(x1[i], x1[order[1:]]) + out_min_y = np.minimum(y1[i], y1[order[1:]]) + outer_diag = (out_max_x - out_min_x) ** 2 + (out_max_y - out_min_y) ** 2 + diou = ovr - inter_diag / outer_diag + diou = np.clip(diou, -1, 1) + inds = np.where(diou <= 0.6)[0] + order = order[inds + 1] + return Tensor(keep, mindspore.int32) + +def ClipBoxes(batch_imgs, batch_boxes): + batch_boxes = ops.clip_by_value(batch_boxes, Tensor(0, mindspore.float32), Tensor(9999999, mindspore.float32)) + h, w = batch_imgs.shape[2:] + batch_boxes[..., [0, 2]] = ops.clip_by_value(batch_boxes[..., [0, 2]], Tensor(0, mindspore.float32), w - 1) + batch_boxes[..., [1, 3]] = ops.clip_by_value(batch_boxes[..., [1, 3]], Tensor(0, mindspore.float32), h - 1) + return batch_boxes diff --git a/research/cv/FCOS/src/fcos.py b/research/cv/FCOS/src/fcos.py new file mode 100644 index 0000000000000000000000000000000000000000..b1da877f62a5563cd52196b6f1a5732d929f0cf2 --- /dev/null +++ b/research/cv/FCOS/src/fcos.py @@ -0,0 +1,214 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""network""" +import mindspore +import mindspore.nn as nn +import mindspore.ops as ops +from src.head import ClsCntRegHead +from src.fpn_neck import FPN +from src.resnet import resnet50 +from src.config import DefaultConfig +from src.network_define import coords_fmap2orig + + + +class FCOS(nn.Cell): + def __init__(self, config=None, preckpt_path=None): + super().__init__() + if config is None: + config = DefaultConfig + self.backbone = resnet50(pretrained=config.pretrained, preckpt_path=preckpt_path) + self.fpn = FPN(config.fpn_out_channels, use_p5=config.use_p5) + self.head = ClsCntRegHead(config.fpn_out_channels, config.class_num, + config.use_GN_head, config.cnt_on_reg, config.prior) + self.config = config + + self.freeze() + + def train(self, mode=True): + """ + set module training mode, and frozen bn + """ + super().train(mode=True) + + def freeze_bn(module): + if isinstance(module, nn.BatchNorm2d): + module.eval() + classname = module.__class__.__name__ + if classname.find('BatchNorm') != -1: + for p in module.parameters(): + p.requires_grad = False + if self.config.freeze_bn: + self.apply(freeze_bn) + if self.config.freeze_stage_1: + self.backbone.freeze_stages(1) + + def flatten(self, nested): + try: + try: + nested + '' + except TypeError: + pass + else: + raise TypeError + + for sublist in nested: + for element in self.flatten(sublist): + yield element + except TypeError: + yield nested + + def freeze(self): + + for i in self.trainable_params(): + if i.name.find('bn') != -1 or i.name.find('down_sample_layer.1') != -1: + i.requires_grad = False + + self.backbone.freeze_stages(1) + + + + def construct(self, x): + """ + Returns + list [cls_logits,cnt_logits,reg_preds] + cls_logits list contains five [batch_size,class_num,h,w] + cnt_logits list contains five [batch_size,1,h,w] + reg_preds list contains five [batch_size,4,h,w] + """ + C3, C4, C5 = self.backbone(x) + all_P = self.fpn((C3, C4, C5)) + cls_logits, cnt_logits, reg_preds = self.head((all_P)) + return (cls_logits, cnt_logits, reg_preds) + + +class DetectHead(nn.Cell): + def __init__(self, score_threshold, nms_iou_threshold, max_detection_boxes_num, strides, config=None): + super().__init__() + self.score_threshold = score_threshold + self.nms_iou_threshold = nms_iou_threshold + self.max_detection_boxes_num = max_detection_boxes_num + self.strides = strides + if config is None: + self.config = DefaultConfig + else: + self.config = config + + def construct(self, inputs): + ''' + inputs list [cls_logits,cnt_logits,reg_preds] + cls_logits list contains five [batch_size,class_num,h,w] + cnt_logits list contains five [batch_size,1,h,w] + reg_preds list contains five [batch_size,4,h,w] + ''' + + cast = ops.Cast() + cls_logits, coords = self._reshape_cat_out(inputs[0], self.strides) # [batch_size,sum(_h*_w),class_num] + cnt_logits, _ = self._reshape_cat_out(inputs[1], self.strides) # [batch_size,sum(_h*_w),1] + reg_preds, _ = self._reshape_cat_out(inputs[2], self.strides) # [batch_size,sum(_h*_w),4]\ + + sigmoid = ops.Sigmoid() + + cls_preds = sigmoid(cls_logits) + cnt_preds = sigmoid(cnt_logits) + + cls_classes, cls_scores = ops.ArgMaxWithValue(axis=-1)(cls_preds) # [batch_size,sum(_h*_w)] + + cnt_preds = ops.Squeeze(axis=-1)(cnt_preds) + cls_scores = ops.Sqrt()(cls_scores * cnt_preds) + cls_classes = cls_classes + 1 # [batch_size,sum(_h*_w)] + + boxes = self._coords2boxes(coords, reg_preds) # [batch_size,sum(_h*_w),4] + if self.max_detection_boxes_num > cls_scores.shape[-1]: + max_num = cls_scores.shape[-1] + else: + max_num = self.max_detection_boxes_num + topk = ops.TopK(sorted=True) + topk_ind = topk(cls_scores, max_num)[1] # [batch_size,max_num] + + _cls_scores = () + _cls_classes = () + _boxes = () + stack = mindspore.ops.Stack(axis=0) + for batch in range(cls_scores.shape[0]): + topk_index = cast(topk_ind, mindspore.int32) + _cls_scores = _cls_scores + (cls_scores[batch][topk_index],) # [max_num] + _cls_classes = _cls_classes + (cls_classes[batch][topk_index],) # [max_num] + _boxes = _boxes + (boxes[batch][topk_index],) # [max_num,4] + cls_scores_topk = stack(_cls_scores)#[batch_size,max_num] + cls_classes_topk = stack(_cls_classes)#[batch_size,max_num] + boxes_topk = stack(_boxes)#[batch_size,max_num,4] + return cls_scores_topk, cls_classes_topk, boxes_topk + + + def _coords2boxes(self, coords, offsets): + ''' + Args + coords [sum(_h*_w),2] + offsets [batch_size,sum(_h*_w),4] ltrb + ''' + x1y1 = coords[None, :, :] - offsets[..., :2] + x2y2 = coords[None, :, :] + offsets[..., 2:] # [batch_size,sum(_h*_w),2] + concat = ops.Concat(axis=-1) + boxes = concat((x1y1, x2y2)) # [batch_size,sum(_h*_w),4] + return boxes + + def _reshape_cat_out(self, inputs, strides): + ''' + Args + inputs: list contains five [batch_size,c,_h,_w] + Returns + out [batch_size,sum(_h*_w),c] + coords [sum(_h*_w),2] + ''' + batch_size = inputs[0].shape[0] + c = inputs[0].shape[1] + out = () + coords = () + reshape = ops.Reshape() + transpose = ops.Transpose() + for pred, stride in zip(inputs, strides): + input_perm = (0, 2, 3, 1) + pred = transpose(pred, input_perm) + coord = coords_fmap2orig(pred, stride) + pred = reshape(pred, (batch_size, -1, c)) + out = out + (pred,) + coords = coords + (coord,) + return ops.Concat(axis=1)(out), ops.Concat(axis=0)(coords) + + +class FCOSDetector(nn.Cell): + def __init__(self, mode, config=None, preckpt_path=None): + super().__init__() + config = DefaultConfig + self.mode = mode + self.fcos_body = FCOS(config=config, preckpt_path=preckpt_path) + if mode == "training": + pass + elif mode == "inference": + self.detection_head = DetectHead(config.score_threshold, config.nms_iou_threshold, \ + config.max_detection_boxes_num, config.strides, config) + + def construct(self, input_imgs): + ''' + inputs + [training] list batch_imgs,batch_boxes,batch_classes + [inference] img + ''' + out = self.fcos_body(input_imgs) + if self.mode != "training": + scores, classes, boxes = self.detection_head(out) + out = (scores, classes, boxes) + return out diff --git a/research/cv/FCOS/src/fpn_neck.py b/research/cv/FCOS/src/fpn_neck.py new file mode 100644 index 0000000000000000000000000000000000000000..688d3c6eab07cfbb6885d64a0db8c82e9e8f629d --- /dev/null +++ b/research/cv/FCOS/src/fpn_neck.py @@ -0,0 +1,88 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""FPN""" +import mindspore.nn as nn +from mindspore import ops +from mindspore.ops import ResizeNearestNeighbor +import mindspore +from mindspore.common.initializer import initializer, HeUniform +from mindspore.common import dtype as mstype +from mindspore.common.tensor import Tensor +import numpy as np + + +def bias_init_zeros(shape): + """Bias init method.""" + return Tensor(np.array(np.zeros(shape).astype(np.float32))) + + +def _conv(in_channels, out_channels, kernel_size=3, stride=1, padding=0, pad_mode='pad'): + """Conv2D wrapper.""" + shape = (out_channels, in_channels, kernel_size, kernel_size) + weights = initializer(HeUniform(negative_slope=1), shape=shape, dtype=mstype.float32).to_tensor() + shape_bias = (out_channels,) + biass = bias_init_zeros(shape_bias) + return nn.Conv2d(in_channels, out_channels, + kernel_size=kernel_size, stride=stride, padding=padding, + pad_mode=pad_mode, weight_init=weights, has_bias=True, bias_init=biass) + +class FPN(nn.Cell): + '''only for resnet50,101,152''' + + def __init__(self, features=256, use_p5=True): + super(FPN, self).__init__() + + self.prj_5 = _conv(2048, features, kernel_size=1, stride=1, pad_mode='valid') + self.prj_4 = _conv(1024, features, kernel_size=1, stride=1, pad_mode='valid') + self.prj_3 = _conv(512, features, kernel_size=1, pad_mode='valid') + self.conv_5 = _conv(features, features, kernel_size=3, pad_mode='pad', padding=1) + self.conv_4 = _conv(features, features, kernel_size=3, pad_mode='pad', padding=1) + self.conv_3 = _conv(features, features, kernel_size=3, pad_mode='pad', padding=1) + if use_p5: + self.conv_out6 = _conv(features, features, kernel_size=3, pad_mode='pad', padding=1, stride=2) + else: + self.conv_out6 = _conv(2048, features, kernel_size=3, pad_mode='pad', padding=1, stride=2) + self.conv_out7 = _conv(features, features, kernel_size=3, pad_mode='pad', padding=1, stride=2) + self.use_p5 = use_p5 + constant_init = mindspore.common.initializer.Constant(0) + constant_init(self.prj_5.bias) + constant_init(self.prj_4.bias) + constant_init(self.prj_3.bias) + constant_init(self.conv_5.bias) + constant_init(self.conv_4.bias) + constant_init(self.conv_3.bias) + constant_init(self.conv_out6.bias) + constant_init(self.conv_out7.bias) + + def upsamplelike(self, inputs): + src, target = inputs + resize = ResizeNearestNeighbor((target.shape[2], target.shape[3])) + return resize(src) + + def construct(self, x): + C3, C4, C5 = x + P5 = self.prj_5(C5) + P4 = self.prj_4(C4) + P3 = self.prj_3(C3) + P4 = P4 + self.upsamplelike((P5, C4)) + P3 = P3 + self.upsamplelike((P4, C3)) + P3 = self.conv_3(P3) + P4 = self.conv_4(P4) + P5 = self.conv_5(P5) + P5 = P5 if self.use_p5 else C5 + P6 = self.conv_out6(P5) + relu = ops.ReLU() + P7 = self.conv_out7(relu(P6)) + return (P3, P4, P5, P6, P7) diff --git a/research/cv/FCOS/src/head.py b/research/cv/FCOS/src/head.py new file mode 100644 index 0000000000000000000000000000000000000000..93cceb0eb93dc98e61eeb40d907c21fdd5fc4b3f --- /dev/null +++ b/research/cv/FCOS/src/head.py @@ -0,0 +1,94 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Head""" +import math +import mindspore +import mindspore.nn as nn +import mindspore.ops as ops +from mindspore.common.tensor import Tensor +from mindspore.common import dtype as mstype +from mindspore.common.initializer import initializer +import numpy as np + +class ScaleExp(nn.Cell): + def __init__(self, init_value=1.0): + super(ScaleExp, self).__init__() + self.scale = mindspore.Parameter(mindspore.Tensor([init_value], dtype=mindspore.float32)) + + def construct(self, x): + return ops.Exp()(x * self.scale) + + +def bias_init_zeros(shape): + """Bias init method.""" + return Tensor(np.array(np.zeros(shape).astype(np.float32))) + + +def _conv(in_channels, out_channels, kernel_size=3, stride=1, padding=0, pad_mode='pad', has_bias=True): + """Conv2D wrapper.""" + shape = (out_channels, in_channels, kernel_size, kernel_size) + weights = initializer("normal", shape=shape, dtype=mstype.float32).init_data() + shape_bias = (out_channels,) + biass = bias_init_zeros(shape_bias) + return nn.Conv2d(in_channels, out_channels, + kernel_size=kernel_size, stride=stride, padding=padding, + pad_mode=pad_mode, weight_init=weights, has_bias=has_bias, bias_init=biass) + + +class ClsCntRegHead(nn.Cell): + def __init__(self, in_channel, class_num, GN=True, cnt_on_reg=True, prior=0.01): + super(ClsCntRegHead, self).__init__() + self.prior = prior + self.class_num = class_num + self.cnt_on_reg = cnt_on_reg + cls_branch = [] + reg_branch = [] + i = 4 + while i: + i -= 1 + cls_branch.append( + _conv(in_channel, in_channel, stride=1, pad_mode='pad', kernel_size=3, padding=1, has_bias=True)) + if GN: + cls_branch.append(nn.GroupNorm(32, in_channel)) + cls_branch.append(nn.ReLU()) + reg_branch.append( + _conv(in_channel, in_channel, stride=1, pad_mode='pad', kernel_size=3, padding=1, has_bias=True)) + if GN: + reg_branch.append(nn.GroupNorm(32, in_channel)) + reg_branch.append(nn.ReLU()) + self.cls_conv = nn.SequentialCell(cls_branch) + self.reg_conv = nn.SequentialCell(reg_branch) + self.cls_logits = _conv(in_channel, class_num, pad_mode='pad', kernel_size=3, padding=1, has_bias=True) + self.cnt_logits = _conv(in_channel, 1, pad_mode='pad', kernel_size=3, padding=1, has_bias=True) + self.reg_pred = _conv(in_channel, 4, pad_mode='pad', kernel_size=3, padding=1, has_bias=True) + constant_init = mindspore.common.initializer.Constant(-math.log((1 - prior) / prior)) + constant_init(self.cls_logits.bias) + self.scale_exp = nn.CellList([ScaleExp(1.0) for _ in range(5)]) + + def construct(self, inputs): + '''inputs:[P3~P7]''' + cls_logits = () + cnt_logits = () + reg_preds = () + for index, P in enumerate(inputs): + cls_conv_out = self.cls_conv(P) + reg_conv_out = self.reg_conv(P) + cls_logits = cls_logits + (self.cls_logits(cls_conv_out),) + if not self.cnt_on_reg: + cnt_logits = cnt_logits + (self.cnt_logits(cls_conv_out),) + else: + cnt_logits = cnt_logits + (self.cnt_logits(reg_conv_out),) + reg_preds = reg_preds + (self.scale_exp[index](self.reg_pred(reg_conv_out)),) + return cls_logits, cnt_logits, reg_preds diff --git a/research/cv/FCOS/src/network_define.py b/research/cv/FCOS/src/network_define.py new file mode 100644 index 0000000000000000000000000000000000000000..a54b5155bc66705b9dee293558c870f3cbf951ca --- /dev/null +++ b/research/cv/FCOS/src/network_define.py @@ -0,0 +1,460 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""network_define""" +import sys +import mindspore.nn as nn +import mindspore.common.dtype as mstype +import mindspore.numpy as mnp +from mindspore.ops import composite as C +from mindspore.ops import functional as F +from mindspore.nn.wrap.grad_reducer import DistributedGradReducer +import mindspore +import mindspore.ops as ops +from mindspore import Tensor +from mindspore.ops import operations as P +from mindspore.parallel._utils import (_get_device_num, _get_gradients_mean, _get_parallel_mode) +from mindspore.context import ParallelMode + +from src.config import DefaultConfig + +def coords_fmap2orig(feature, stride): + """ + transform one feature map coords to orig coords + Args + feature [batch_size,h,w,c] + stride int + Returns + coords [n,2] + """ + h, w = feature.shape[1:3] + shifts_x = mnp.arange(start=0, stop=w * stride, step=stride) + shifts_y = mnp.arange(start=0, stop=h * stride, step=stride) + shift_x, shift_y = mnp.meshgrid(shifts_x, shifts_y) + shift_x = mnp.reshape(shift_x, -1) + shift_y = mnp.reshape(shift_y, -1) + coords = mnp.stack((shift_x, shift_y), -1) + stride // 2 + return ops.Cast()(coords, mstype.float32) + + +class GenTargets(nn.Cell): + def __init__(self, strides, limit_range): + super().__init__() + self.strides = strides + self.limit_range = limit_range + def getTargets(self, inputs): + """ + inputs + [0]tuple (cls_logits,cnt_logits,reg_preds) + cls_logits list contains five [batch_size,class_num,h,w] + cnt_logits list contains five [batch_size,1,h,w] + reg_preds list contains five [batch_size,4,h,w] + [1]gt_boxes [batch_size,m,4] FloatTensor + [2]classes [batch_size,m] LongTensor + Returns + cls_targets:[batch_size,sum(_h*_w),1] + cnt_targets:[batch_size,sum(_h*_w),1] + reg_targets:[batch_size,sum(_h*_w),4] + """ + + cls_logits, cnt_logits, reg_preds = inputs[0] + gt_boxes = inputs[1] + classes = inputs[2] + cls_targets_all_level = () + cnt_targets_all_level = () + reg_targets_all_level = () + for level in range(len(cls_logits)): + level_out = (cls_logits[level], cnt_logits[level], reg_preds[level]) + level_targets = self._gen_level_targets(level_out, gt_boxes, classes, self.strides[level], + self.limit_range[level]) + cls_targets_all_level = cls_targets_all_level + (level_targets[0],) + cnt_targets_all_level = cnt_targets_all_level + (level_targets[1],) + reg_targets_all_level = reg_targets_all_level + (level_targets[2],) + + return ops.Concat(axis=1)(cls_targets_all_level), ops.Concat(axis=1)(cnt_targets_all_level), ops.Concat(axis=1)( + reg_targets_all_level) + + def _gen_level_targets(self, out, gt_boxes, classes, stride, limit_range, sample_radiu_ratio=1.5): + ''' + Args + out list contains [[batch_size,class_num,h,w],[batch_size,1,h,w],[batch_size,4,h,w]] + gt_boxes [batch_size,m,4] + classes [batch_size,m] + stride int + limit_range list [min,max] + Returns + cls_targets,cnt_targets,reg_targets + ''' + cls_logits, cnt_logits, reg_preds = out + batch_size = cls_logits.shape[0] + class_num = cls_logits.shape[1] + m = gt_boxes.shape[1] + transpose = ops.Transpose() + reshape = ops.Reshape() + cls_logits = transpose(cls_logits, (0, 2, 3, 1)) # [batch_size,h,w,class_num] + coords = coords_fmap2orig(cls_logits, stride) # [h*w,2] + cls_logits = reshape(cls_logits, (batch_size, -1, class_num)) # [batch_size,h*w,class_num] + cnt_logits = transpose(cnt_logits, (0, 2, 3, 1)) + cnt_logits = reshape(cnt_logits, (batch_size, -1, 1)) + reg_preds = transpose(reg_preds, (0, 2, 3, 1)) + reg_preds = reshape(reg_preds, (batch_size, -1, 4)) + x = coords[:, 0] + y = coords[:, 1] + l_off = x[None, :, None] - gt_boxes[..., 0][:, None, :] # [1,h*w,1]-[batch_size,1,m]-->[batch_size,h*w,m] + t_off = y[None, :, None] - gt_boxes[..., 1][:, None, :] + r_off = gt_boxes[..., 2][:, None, :] - x[None, :, None] + b_off = gt_boxes[..., 3][:, None, :] - y[None, :, None] + ltrb_off = ops.Stack(axis=-1)((l_off, t_off, r_off, b_off)) # [batch_size,h*w,m,4] + areas = (ltrb_off[..., 0] + ltrb_off[..., 2]) * (ltrb_off[..., 1] + ltrb_off[..., 3]) # [batch_size,h*w,m] + off_min = mnp.amin(ltrb_off, axis=-1) + off_max = mnp.amax(ltrb_off, axis=-1) + mask_in_gtboxes = off_min > 0 + tempmin = off_max > limit_range[0] + tempmax = off_max <= limit_range[1] + tempmin = ops.Cast()(tempmin, mindspore.int32) + tempmax = ops.Cast()(tempmax, mindspore.int32) + tempMask_in_level = ops.Mul()(tempmin, tempmax) + mask_in_level = ops.Cast()(tempMask_in_level, mindspore.bool_) + + radiu = stride * sample_radiu_ratio + gt_center_x = (gt_boxes[..., 0] + gt_boxes[..., 2]) / 2 + gt_center_y = (gt_boxes[..., 1] + gt_boxes[..., 3]) / 2 + c_l_off = x[None, :, None] - gt_center_x[:, None, :] # [1,h*w,1]-[batch_size,1,m]-->[batch_size,h*w,m] + c_t_off = y[None, :, None] - gt_center_y[:, None, :] + c_r_off = gt_center_x[:, None, :] - x[None, :, None] + c_b_off = gt_center_y[:, None, :] - y[None, :, None] + c_ltrb_off = ops.Stack(axis=-1)((c_l_off, c_t_off, c_r_off, c_b_off)) # [batch_size,h*w,m,4] + c_off_max = mnp.amax(c_ltrb_off, axis=-1) + mask_center = c_off_max < radiu + tempingtboxes = ops.Cast()(mask_in_gtboxes, mindspore.int32) + tempmaskinlevel = ops.Cast()(mask_in_level, mindspore.int32) + tempmaskcenter = ops.Cast()(mask_center, mindspore.int32) + mask_pos = ops.Mul()(ops.Mul()(tempingtboxes, tempmaskinlevel), tempmaskcenter) + mask_pos = ops.Cast()(mask_pos, mstype.bool_) + areas[~mask_pos] = 99999999 + tempareas = areas.reshape(-1, areas.shape[-1]) + areas_min_ind = P.ArgMinWithValue(-1)(tempareas) + x = mnp.arange(0, areas_min_ind[0].shape[0]).astype(mindspore.int32) + indices = P.Concat(-1)((P.ExpandDims()(x, -1), P.ExpandDims()(areas_min_ind[0], -1))) + reg_targets = P.GatherNd()(ltrb_off.reshape(-1, m, 4), indices) + reg_targets = ops.Reshape()(reg_targets, (batch_size, -1, 4)) + classes = mnp.broadcast_to(classes[:, None, :], areas.shape) + cls_targets = P.GatherNd()(classes.reshape(-1, m), indices) + cls_targets = ops.Reshape()(cls_targets, (batch_size, -1, 1)) + # [batch_size,h*w] + left_right_min = ops.Minimum()(reg_targets[..., 0], reg_targets[..., 2]) + left_right_max = ops.Maximum()(reg_targets[..., 0], reg_targets[..., 2]) + top_bottom_min = ops.Minimum()(reg_targets[..., 1], reg_targets[..., 3]) + top_bottom_max = ops.Maximum()(reg_targets[..., 1], reg_targets[..., 3]) + # [batch_size,h*w,1] + cnt_targets = ops.Sqrt()((left_right_min * top_bottom_min + 1e-8) / (left_right_max * top_bottom_max + 1e-8)) + cnt_targets = ops.ExpandDims()(cnt_targets, -1) + mask_pos_2 = ops.Cast()(mask_pos, mstype.float16) + mask_pos_2 = ops.ReduceSum()(mask_pos_2, -1) + mask_pos_2 = mask_pos_2 >= 1 + expand_dims = ops.ExpandDims() + mask_pos_2 = expand_dims(mask_pos_2, 2) + cls_targets[~mask_pos_2] = 0 # [batch_size,h*w,1] + cnt_targets[~mask_pos_2] = -1 + stack = ops.Stack(axis=2) + tempmask = () + i = 4 + while i: + i -= 1 + tempmask += (mask_pos_2,) + mask_pos_2 = stack(tempmask) + squeeze = ops.Squeeze(3) + mask_pos_2 = squeeze(mask_pos_2) + reg_targets[~mask_pos_2] = -1 + return cls_targets, cnt_targets, reg_targets + +def compute_cls_loss(preds, targets, mask, MIN, MAX): + ''' + Args + preds: list contains five level pred [batch_size,class_num,_h,_w] + targets: [batch_size,sum(_h*_w),1] + mask: [batch_size,sum(_h*_w)] + ''' + batch_size = targets.shape[0] + preds_reshape = () + class_num = preds[0].shape[1] + mask = ops.ExpandDims()(mask, -1) + # [batch_size,] + mask = ops.Cast()(mask, mstype.float32) + num_pos = ops.ReduceSum()(mask, (1, 2)) + ones = ops.Ones() + candidate = ones(num_pos.shape, mindspore.float32) + num_pos = mnp.where(num_pos == 0, candidate, num_pos) + num_pos = ops.Cast()(num_pos, mstype.float32) + for pred in preds: + pred = ops.Transpose()(pred, (0, 2, 3, 1)) + pred = ops.Reshape()(pred, (batch_size, -1, class_num)) + preds_reshape = preds_reshape + (pred,) + preds = ops.Concat(axis=1)(preds_reshape) + loss = () + for batch_index in range(batch_size): + pred_pos = preds[batch_index] # [sum(_h*_w),class_num] + target_pos = targets[batch_index] # [sum(_h*_w),1] + ar = mnp.arange(1, class_num + 1).astype(mstype.float32) + ar = ar[None, :] + target_pos = (ar == target_pos) + # sparse-->onehot + target_pos = ops.Cast()(target_pos, mstype.float32) + fl_result = focal_loss_from_logits(pred_pos, target_pos) + fl_result = ops.Reshape()(fl_result, (1,)) + loss = loss + (fl_result,) + # [batch_size,] + return ops.Concat()(loss) / num_pos + +def compute_cnt_loss(preds, targets, mask, MIN, MAX): + ''' + Args + preds: list contains five level pred [batch_size,1,_h,_w] + targets: [batch_size,sum(_h*_w),1] + mask: [batch_size,sum(_h*_w)]:Tensor(Bool) + ''' + batch_size = targets.shape[0] + c = targets.shape[-1] + preds_reshape = () + mask = ops.ExpandDims()(mask, -1) + mask = ops.Cast()(mask, mstype.float32) + num_pos = ops.ReduceSum()(mask, (1, 2)) + ones = ops.Ones() + candidate = ones(num_pos.shape, mindspore.float32) + num_pos = mnp.where(num_pos == 0, candidate, num_pos) + num_pos = ops.Cast()(num_pos, mstype.float32) + for pred in preds: + pred = P.Transpose()(pred, (0, 2, 3, 1)) + pred = P.Reshape()(pred, (batch_size, -1, c)) + preds_reshape = preds_reshape + (pred,) + preds = P.Concat(axis=1)(preds_reshape) + loss = () + for batch_index in range(batch_size): + pred_pos = preds[batch_index].flatten() + target_pos = targets[batch_index].flatten() + weight = P.Ones()(pred_pos.shape, mstype.float32) + pred_pos = P.Sigmoid()(pred_pos) + if pred_pos.shape[0] != 0: + bce_result = nn.BCELoss(weight=weight, reduction='none')(pred_pos, target_pos) + a = bce_result + b = ops.Squeeze(1)(mask[batch_index]) + c = ops.Mul()(a, b) + op = ops.ReduceSum() + bce_result = op(c) + else: + bce_result = mnp.zeros((1,), mindspore.float32) + bce_result = P.Reshape()(bce_result, (1,)) + loss += (bce_result,) + return P.Concat(axis=0)(loss) / num_pos + +def compute_reg_loss(preds, targets, mask, MIN, MAX, ZERO, ZB, mode='giou'): + ''' + Args + preds: list contains five level pred [batch_size,4,_h,_w] + targets: [batch_size,sum(_h*_w),4] + mask: [batch_size,sum(_h*_w)] + ''' + batch_size = targets.shape[0] + c = targets.shape[-1] + preds_reshape = () + mask = ops.Cast()(mask, mstype.float32) + num_pos = ops.ReduceSum()(mask, (1,)) + ones = ops.Ones() + candidate = ones(num_pos.shape, mindspore.float32) + num_pos = mnp.where(num_pos == 0, candidate, num_pos) + num_pos = ops.Cast()(num_pos, mstype.float32) + for pred in preds: + pred = ops.Transpose()(pred, (0, 2, 3, 1)) + pred = ops.Reshape()(pred, (batch_size, -1, c)) + preds_reshape = preds_reshape + (pred,) + preds = ops.Concat(axis=1)(preds_reshape) + loss = () + for batch_index in range(batch_size): + mask_index = mask[batch_index] + pred_pos = preds[batch_index] + target_pos = targets[batch_index] + if pred_pos.shape[0] != 0: + loss_result = giou_loss(pred_pos, target_pos, mask_index, ZERO, ZB, MAX) + else: + loss_result = mnp.zeros((1,), mindspore.float32) + loss_result = loss_result.reshape((1,)) + loss = loss + (loss_result,) + return ops.Concat()(loss) / num_pos + +def giou_loss(preds, targets, mask_index, ZERO, ZB, MAX): + ''' + Args: + preds: [n,4] ltrb + targets: [n,4] + ''' + minimum = ops.Minimum() + maximum = ops.Maximum() + lt_min = minimum(preds[:, :2], targets[:, :2]) + rb_min = minimum(preds[:, 2:], targets[:, 2:]) + wh_min = rb_min + lt_min + zeros = ops.Zeros() + candidate = zeros(wh_min.shape, mindspore.float32) + wh_min = mnp.where(wh_min < 0, candidate, wh_min) + overlap = wh_min[:, 0] * wh_min[:, 1] # [n] + area1 = (preds[:, 2] + preds[:, 0]) * (preds[:, 3] + preds[:, 1]) + area2 = (targets[:, 2] + targets[:, 0]) * (targets[:, 3] + targets[:, 1]) + union = (area1 + area2 - overlap) + iou = overlap / union + lt_max = maximum(preds[:, :2], targets[:, :2]) + rb_max = maximum(preds[:, 2:], targets[:, 2:]) + wh_max = rb_max + lt_max + zeros = ops.Zeros() + candidate = zeros(wh_max.shape, mindspore.float32) + wh_max = mnp.where(wh_max < 0, candidate, wh_max) + G_area = wh_max[:, 0] * wh_max[:, 1] # [n] + zeros = ops.Zeros() + candidate = zeros(G_area.shape, mindspore.float32) + 1e-10 + G_area = mnp.where(G_area <= 0, candidate, G_area) + giou = iou - (G_area - union) / G_area # back3 + loss = (1. - giou).reshape(1, -1) + mask_index = mask_index.reshape(-1, 1) + loss = ops.Cast()(loss, mstype.float32) + loss = ops.dot(loss, mask_index) + return loss + +def focal_loss_from_logits(preds, targets, gamma=2.0, alpha=0.25): + ''' + Args: + preds: [n,class_num] + targets: [n,class_num] + ''' + preds = ops.Sigmoid()(preds) + pt = preds * targets + (1.0 - preds) * (1.0 - targets) + w = alpha * targets + (1.0 - alpha) * (1.0 - targets) + loss = -w * ops.Pow()((1.0 - pt), gamma) * ops.Log()(pt) + return ops.ReduceSum()(loss) + +class LossNet(nn.Cell): + """loss method""" + def __init__(self, config=None): + super().__init__() + if config is None: + self.config = DefaultConfig + else: + self.config = config + self.min_value = Tensor(1.) + self.max_value = Tensor(sys.maxsize, mstype.float32) + self.zero = Tensor(0.) + self.zerobottom = Tensor(1e-10) + + def construct(self, inputs): + ''' + inputs list + [0]preds: .... + [1]targets : [[batch_size,sum(_h*_w),1],[batch_size,sum(_h*_w),1],[batch_size,sum(_h*_w),4]] + ''' + preds, targets = inputs + cls_logits, cnt_logits, reg_preds = preds + cls_targets, cnt_targets, reg_targets = targets + mask_pos = ops.Squeeze(axis=-1)(cnt_targets > -1) # [batch_size,sum(_h*_w)] + mean = ops.ReduceMean() + cls_loss = mean(compute_cls_loss(cls_logits, cls_targets, mask_pos, self.min_value, self.max_value)) + cnt_loss = mean(compute_cnt_loss(cnt_logits, cnt_targets, mask_pos, self.min_value, self.max_value)) + reg_loss = mean(compute_reg_loss(reg_preds, reg_targets, mask_pos, self.min_value, self.max_value, \ + self.zero, self.zerobottom)) + cls_loss = ops.Reshape()(cls_loss, (1,)) + cnt_loss = ops.Reshape()(cnt_loss, (1,)) + reg_loss = ops.Reshape()(reg_loss, (1,)) + total_loss = cls_loss + cnt_loss + reg_loss + return total_loss + + +class WithLossCell(nn.Cell): + """ + Wrap the network with loss function to compute loss. + + Args: + backbone (Cell): The target network to wrap. + loss_fn (Cell): The loss function used to compute loss. + """ + def __init__(self, backbone, loss_fn): + super(WithLossCell, self).__init__(auto_prefix=False) + config = DefaultConfig + self._backbone = backbone + self._targets_fn = GenTargets(config.strides, config.limit_range) + self._loss_fn = loss_fn + + def construct(self, input_imgs, input_boxes, input_classes): + #preds + out = self._backbone(input_imgs) + # stop gradients + cls_logits = () + cnt_logits = () + reg_preds = () + temp_cls_logits, temp_cnt_logits, temp_reg_preds = out + for i in temp_cls_logits: + cls_logits = cls_logits + (ops.Zeros()(i.shape, i.dtype),) + for i in temp_cnt_logits: + cnt_logits = cnt_logits + (ops.Zeros()(i.shape, i.dtype),) + for i in temp_reg_preds: + reg_preds = reg_preds + (ops.Zeros()(i.shape, i.dtype),) + stop_out = (cls_logits, cnt_logits, reg_preds) + targets = self._targets_fn.getTargets((stop_out, input_boxes, input_classes)) + return self._loss_fn((out, targets)) + + @property + def backbone_network(self): + """ + Get the backbone network. + + Returns: + Cell, return backbone network. + """ + return self._backbone + +class TrainOneStepCell(nn.Cell): + """ + Network training package class. + + Append an optimizer to the training network after that the construct function + can be called to create the backward graph. + + Args: + network (Cell): The training network. + optimizer (Cell): Optimizer for updating the weights. + sens (Number): The adjust parameter. Default value is 1.0. + reduce_flag (bool): The reduce flag. Default value is False. + mean (bool): Allreduce method. Default value is False. + degree (int): Device number. Default value is None. + """ + def __init__(self, network, optimizer, sens=1.0): + super(TrainOneStepCell, self).__init__(auto_prefix=False) + self.network = network + self.network.set_grad() + self.optimizer = optimizer + self.weights = self.optimizer.parameters + self.grad = C.GradOperation(get_by_list=True, sens_param=True) + self.sens = sens + self.reducer_flag = False + self.grad_reducer = F.identity + self.parallel_mode = _get_parallel_mode() + self.reducer_flag = self.parallel_mode in (ParallelMode.DATA_PARALLEL, ParallelMode.HYBRID_PARALLEL) + if self.reducer_flag: + self.mean = _get_gradients_mean() + self.degree = _get_device_num() + self.grad_reducer = DistributedGradReducer(self.weights, self.mean, self.degree) + + def construct(self, input_imgs, input_boxes, input_classes): + loss = self.network(input_imgs, input_boxes, input_classes) + sens = F.fill(loss.dtype, loss.shape, self.sens) + grads = self.grad(self.network, self.weights)(input_imgs, input_boxes, input_classes, sens) + grads = C.clip_by_global_norm(grads, clip_norm=3.0) + if self.reducer_flag: + grads = self.grad_reducer(grads) + return F.depend(loss, self.optimizer(grads)) diff --git a/research/cv/FCOS/src/resnet.py b/research/cv/FCOS/src/resnet.py new file mode 100644 index 0000000000000000000000000000000000000000..17d2471918411a2d82df358a88661522b5a68a21 --- /dev/null +++ b/research/cv/FCOS/src/resnet.py @@ -0,0 +1,559 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""backbone""" +import math +import numpy as np +import mindspore.nn as nn +import mindspore.common.dtype as mstype + +from mindspore.ops import operations as P +from mindspore.ops import functional as F +from mindspore.common.tensor import Tensor +from mindspore import load_checkpoint, load_param_into_net + +from scipy.stats import truncnorm +FROZEN = False + +def conv_variance_scaling_initializer(in_channel, out_channel, kernel_size): + fan_in = in_channel * kernel_size * kernel_size + scale = 1.0 + scale /= max(1., fan_in) + stddev = (scale ** 0.5) / .87962566103423978 + mu, sigma = 0, stddev + weight = truncnorm(-2, 2, loc=mu, scale=sigma).rvs(out_channel * in_channel * kernel_size * kernel_size) + weight = np.reshape(weight, (out_channel, in_channel, kernel_size, kernel_size)) + return Tensor(weight, dtype=mstype.float32) + + +def _weight_variable(shape, factor=0.01): + init_value = np.random.randn(*shape).astype(np.float32) * factor + return Tensor(init_value) + + +def calculate_gain(nonlinearity, param=None): + """calculate_gain""" + linear_fns = ['linear', 'conv1d', 'conv2d', 'conv3d', 'conv_transpose1d', 'conv_transpose2d', 'conv_transpose3d'] + res = 0 + if nonlinearity in linear_fns or nonlinearity == 'sigmoid': + res = 1 + elif nonlinearity == 'tanh': + res = 5.0 / 3 + elif nonlinearity == 'relu': + res = math.sqrt(2.0) + elif nonlinearity == 'leaky_relu': + if param is None: + neg_slope = 0.01 + elif not isinstance(param, bool) and isinstance(param, int) or isinstance(param, float): + neg_slope = param + else: + raise ValueError("neg_slope {} not a valid number".format(param)) + res = math.sqrt(2.0 / (1 + neg_slope ** 2)) + else: + raise ValueError("Unsupported nonlinearity {}".format(nonlinearity)) + return res + + +def _calculate_fan_in_and_fan_out(tensor): + """_calculate_fan_in_and_fan_out""" + dimensions = len(tensor) + if dimensions < 2: + raise ValueError("Fan in and fan out can not be computed for tensor with fewer than 2 dimensions") + if dimensions == 2: # Linear + fan_in = tensor[1] + fan_out = tensor[0] + else: + num_input_fmaps = tensor[1] + num_output_fmaps = tensor[0] + receptive_field_size = 1 + if dimensions > 2: + receptive_field_size = tensor[2] * tensor[3] + fan_in = num_input_fmaps * receptive_field_size + fan_out = num_output_fmaps * receptive_field_size + return fan_in, fan_out + + +def _calculate_correct_fan(tensor, mode): + mode = mode.lower() + valid_modes = ['fan_in', 'fan_out'] + if mode not in valid_modes: + raise ValueError("Unsupported mode {}, please use one of {}".format(mode, valid_modes)) + fan_in, fan_out = _calculate_fan_in_and_fan_out(tensor) + return fan_in if mode == 'fan_in' else fan_out + + +def kaiming_normal(inputs_shape, a=0, mode='fan_in', nonlinearity='leaky_relu'): + fan = _calculate_correct_fan(inputs_shape, mode) + gain = calculate_gain(nonlinearity, a) + std = gain / math.sqrt(fan) + return np.random.normal(0, std, size=inputs_shape).astype(np.float32) + + +def kaiming_uniform(inputs_shape, a=0., mode='fan_in', nonlinearity='leaky_relu'): + fan = _calculate_correct_fan(inputs_shape, mode) + gain = calculate_gain(nonlinearity, a) + std = gain / math.sqrt(fan) + bound = math.sqrt(3.0) * std # Calculate uniform bounds from standard deviation + return np.random.uniform(-bound, bound, size=inputs_shape).astype(np.float32) + + +def _conv3x3(in_channel, out_channel, stride=1, use_se=False, res_base=False): + if use_se: + weight = conv_variance_scaling_initializer(in_channel, out_channel, kernel_size=3) + else: + weight_shape = (out_channel, in_channel, 3, 3) + weight = Tensor(kaiming_normal(weight_shape, mode="fan_out", nonlinearity='relu')) + if res_base: + return nn.Conv2d(in_channel, out_channel, kernel_size=3, stride=stride, + padding=1, pad_mode='pad', weight_init=weight) + return nn.Conv2d(in_channel, out_channel, kernel_size=3, stride=stride, + padding=0, pad_mode='same', weight_init=weight) + + +def _conv1x1(in_channel, out_channel, stride=1, use_se=False, res_base=False): + if use_se: + weight = conv_variance_scaling_initializer(in_channel, out_channel, kernel_size=1) + else: + weight_shape = (out_channel, in_channel, 1, 1) + weight = Tensor(kaiming_normal(weight_shape, mode="fan_out", nonlinearity='relu')) + if res_base: + return nn.Conv2d(in_channel, out_channel, kernel_size=1, stride=stride, + padding=0, pad_mode='pad', weight_init=weight) + return nn.Conv2d(in_channel, out_channel, kernel_size=1, stride=stride, + padding=0, pad_mode='same', weight_init=weight) + + +def _conv7x7(in_channel, out_channel, stride=1, use_se=False, res_base=False): + if use_se: + weight = conv_variance_scaling_initializer(in_channel, out_channel, kernel_size=7) + else: + weight_shape = (out_channel, in_channel, 7, 7) + weight = Tensor(kaiming_normal(weight_shape, mode="fan_out", nonlinearity='relu')) + if res_base: + return nn.Conv2d(in_channel, out_channel, + kernel_size=7, stride=stride, padding=3, pad_mode='pad', weight_init=weight) + return nn.Conv2d(in_channel, out_channel, + kernel_size=7, stride=stride, padding=0, pad_mode='same', weight_init=weight) + + +def _bn(channel, res_base=False, is_training=False): + if res_base: + return nn.BatchNorm2d(channel, eps=1e-5, momentum=0.1, + gamma_init=1, beta_init=0, use_batch_statistics=FROZEN).set_train(FROZEN) + return nn.BatchNorm2d(channel, eps=1e-4, momentum=0.9, + gamma_init=1, beta_init=0, use_batch_statistics=FROZEN).set_train(FROZEN) + + +def _bn_last(channel, is_training=False): + return nn.BatchNorm2d(channel, eps=1e-4, momentum=0.9, + gamma_init=0, beta_init=0, use_batch_statistics=FROZEN).set_train(FROZEN) + + +def _fc(in_channel, out_channel, use_se=False): + if use_se: + weight = np.random.normal(loc=0, scale=0.01, size=out_channel * in_channel) + weight = Tensor(np.reshape(weight, (out_channel, in_channel)), dtype=mstype.float32) + else: + weight_shape = (out_channel, in_channel) + weight = Tensor(kaiming_uniform(weight_shape, a=math.sqrt(5))) + return nn.Dense(in_channel, out_channel, has_bias=True, weight_init=weight, bias_init=0) + + +class ResidualBlock(nn.Cell): + """ + ResNet V1 residual block definition. + + Args: + in_channel (int): Input channel. + out_channel (int): Output channel. + stride (int): Stride size for the first convolutional layer. Default: 1. + use_se (bool): Enable SE-ResNet50 net. Default: False. + se_block(bool): Use se block in SE-ResNet50 net. Default: False. + + Returns: + Tensor, output tensor. + + Examples: + >>> ResidualBlock(3, 256, stride=2) + """ + expansion = 4 + + def __init__(self, + in_channel, + out_channel, + stride=1, + use_se=False, se_block=False): + super(ResidualBlock, self).__init__() + self.stride = stride + self.use_se = use_se + self.se_block = se_block + channel = out_channel // self.expansion + self.conv1 = _conv1x1(in_channel, channel, stride=1, use_se=self.use_se) + self.bn1 = nn.BatchNorm2d(channel, use_batch_statistics=FROZEN).set_train(FROZEN) + if self.use_se and self.stride != 1: + self.e2 = nn.SequentialCell([_conv3x3(channel, channel, stride=1, use_se=True), _bn(channel), + nn.ReLU(), nn.MaxPool2d(kernel_size=2, stride=2, pad_mode='same')]) + else: + self.conv2 = _conv3x3(channel, channel, stride=stride, use_se=self.use_se) + self.bn2 = _bn(channel) + + self.conv3 = _conv1x1(channel, out_channel, stride=1, use_se=self.use_se) + self.bn3 = _bn(out_channel) + if self.se_block: + self.se_global_pool = P.ReduceMean(keep_dims=False) + self.se_dense_0 = _fc(out_channel, int(out_channel / 4), use_se=self.use_se) + self.se_dense_1 = _fc(int(out_channel / 4), out_channel, use_se=self.use_se) + self.se_sigmoid = nn.Sigmoid() + self.se_mul = P.Mul() + self.relu = nn.ReLU() + + self.down_sample = False + + if stride != 1 or in_channel != out_channel: + self.down_sample = True + self.down_sample_layer = None + + if self.down_sample: + if self.use_se: + if stride == 1: + self.down_sample_layer = nn.SequentialCell([_conv1x1(in_channel, out_channel, + stride, use_se=self.use_se), _bn(out_channel)]) + else: + self.down_sample_layer = nn.SequentialCell([nn.MaxPool2d(kernel_size=2, stride=2, pad_mode='same'), + _conv1x1(in_channel, out_channel, 1, + use_se=self.use_se), _bn(out_channel)]) + else: + self.down_sample_layer = nn.SequentialCell([_conv1x1(in_channel, out_channel, stride, + use_se=self.use_se), _bn(out_channel)]) + + def construct(self, x): + identity = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + if self.use_se and self.stride != 1: + out = self.e2(out) + else: + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + out = self.conv3(out) + out = self.bn3(out) + if self.se_block: + out_se = out + out = self.se_global_pool(out, (2, 3)) + out = self.se_dense_0(out) + out = self.relu(out) + out = self.se_dense_1(out) + out = self.se_sigmoid(out) + out = F.reshape(out, F.shape(out) + (1, 1)) + out = self.se_mul(out, out_se) + + if self.down_sample: + identity = self.down_sample_layer(identity) + + out = out + identity + out = self.relu(out) + + return out + + +class ResidualBlockBase(nn.Cell): + """ + ResNet V1 residual block definition. + + Args: + in_channel (int): Input channel. + out_channel (int): Output channel. + stride (int): Stride size for the first convolutional layer. Default: 1. + use_se (bool): Enable SE-ResNet50 net. Default: False. + se_block(bool): Use se block in SE-ResNet50 net. Default: False. + res_base (bool): Enable parameter setting of resnet18. Default: True. + + Returns: + Tensor, output tensor. + + Examples: + >>> ResidualBlockBase(3, 256, stride=2) + """ + + def __init__(self, + in_channel, + out_channel, + stride=1, + use_se=False, + se_block=False, + res_base=True): + super(ResidualBlockBase, self).__init__() + self.res_base = res_base + self.conv1 = _conv3x3(in_channel, out_channel, stride=stride, res_base=self.res_base) + self.bn1d = _bn(out_channel) + self.conv2 = _conv3x3(out_channel, out_channel, stride=1, res_base=self.res_base) + self.bn2d = _bn(out_channel) + self.relu = nn.ReLU() + + self.down_sample = False + if stride != 1 or in_channel != out_channel: + self.down_sample = True + + self.down_sample_layer = None + if self.down_sample: + self.down_sample_layer = nn.SequentialCell([_conv1x1(in_channel, out_channel, stride, + use_se=use_se, res_base=self.res_base), + _bn(out_channel, res_base)]) + + def construct(self, x): + identity = x + + out = self.conv1(x) + out = self.bn1d(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2d(out) + + if self.down_sample: + identity = self.down_sample_layer(identity) + + out = out + identity + out = self.relu(out) + + return out + + +class ResNet(nn.Cell): + """ + ResNet architecture. + + Args: + block (Cell): Block for network. + layer_nums (list): Numbers of block in different layers. + in_channels (list): Input channel in each layer. + out_channels (list): Output channel in each layer. + strides (list): Stride size in each layer. + num_classes (int): The number of classes that the training images are belonging to. + use_se (bool): Enable SE-ResNet50 net. Default: False. + se_block(bool): Use se block in SE-ResNet50 net in layer 3 and layer 4. Default: False. + res_base (bool): Enable parameter setting of resnet18. Default: False. + + Returns: + Tensor, output tensor. + + Examples: + >>> ResNet(ResidualBlock, + >>> [3, 4, 6, 3], + >>> [64, 256, 512, 1024], + >>> [256, 512, 1024, 2048], + >>> [1, 2, 2, 2], + >>> 10) + """ + + def __init__(self, + block, + layer_nums, + in_channels, + out_channels, + strides, + num_classes, + use_se=False, + res_base=False): + super(ResNet, self).__init__() + + if not len(layer_nums) == len(in_channels) == len(out_channels) == 4: + raise ValueError("the length of layer_num, in_channels, out_channels list must be 4!") + self.use_se = use_se + self.res_base = res_base + self.se_block = False + if self.use_se: + self.se_block = True + + if self.use_se: + self.conv1_0 = _conv3x3(3, 32, stride=2, use_se=self.use_se) + self.bn1_0 = _bn(32) + self.conv1_1 = _conv3x3(32, 32, stride=1, use_se=self.use_se) + self.bn1_1 = _bn(32) + self.conv1_2 = _conv3x3(32, 64, stride=1, use_se=self.use_se) + else: + self.conv1 = _conv7x7(3, 64, stride=2, res_base=self.res_base) + self.bn1 = nn.BatchNorm2d(64, use_batch_statistics=FROZEN).set_train(FROZEN) + self.relu = P.ReLU() + + if self.res_base: + self.pad = nn.Pad(paddings=((0, 0), (0, 0), (1, 1), (1, 1))) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode="valid") + else: + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode="same") + + self.layer1 = self._make_layer(block, + layer_nums[0], + in_channel=in_channels[0], + out_channel=out_channels[0], + stride=strides[0], + use_se=self.use_se) + self.layer2 = self._make_layer(block, + layer_nums[1], + in_channel=in_channels[1], + out_channel=out_channels[1], + stride=strides[1], + use_se=self.use_se) + self.layer3 = self._make_layer(block, + layer_nums[2], + in_channel=in_channels[2], + out_channel=out_channels[2], + stride=strides[2], + use_se=self.use_se, + se_block=self.se_block) + self.layer4 = self._make_layer(block, + layer_nums[3], + in_channel=in_channels[3], + out_channel=out_channels[3], + stride=strides[3], + use_se=self.use_se, + se_block=self.se_block) + + self.mean = P.ReduceMean(keep_dims=True) + self.flatten = nn.Flatten() + self.end_point = _fc(out_channels[3], num_classes, use_se=self.use_se) + + def _make_layer(self, block, layer_num, in_channel, out_channel, stride, use_se=False, se_block=False): + """ + Make stage network of ResNet. + + Args: + block (Cell): Resnet block. + layer_num (int): Layer number. + in_channel (int): Input channel. + out_channel (int): Output channel. + stride (int): Stride size for the first convolutional layer. + se_block(bool): Use se block in SE-ResNet50 net. Default: False. + Returns: + SequentialCell, the output layer. + + Examples: + >>> _make_layer(ResidualBlock, 3, 128, 256, 2) + """ + layers = [] + + resnet_block = block(in_channel, out_channel, stride=stride, use_se=use_se) + layers.append(resnet_block) + if se_block: + for _ in range(1, layer_num - 1): + resnet_block = block(out_channel, out_channel, stride=1, use_se=use_se) + layers.append(resnet_block) + resnet_block = block(out_channel, out_channel, stride=1, use_se=use_se, se_block=se_block) + layers.append(resnet_block) + else: + for _ in range(1, layer_num): + resnet_block = block(out_channel, out_channel, stride=1, use_se=use_se) + layers.append(resnet_block) + return nn.SequentialCell(layers) + + def construct(self, x): + if self.use_se: + x = self.conv1_0(x) + x = self.bn1_0(x) + x = self.relu(x) + x = self.conv1_1(x) + x = self.bn1_1(x) + x = self.relu(x) + x = self.conv1_2(x) + else: + x = self.conv1(x) + + x = self.bn1(x) + x = self.relu(x) + + if self.res_base: + x = self.pad(x) + c1 = self.maxpool(x) + + c2 = self.layer1(c1) + + + + c3 = self.layer2(c2) + + + + c4 = self.layer3(c3) + + + + c5 = self.layer4(c4) + + return c3, c4, c5 + + def freeze_stages(self, stage): + if stage >= 0: + print("FROZEN SUCCESSFUL") + for m in [self.conv1, self.bn1]: + for i in m.get_parameters(): + i.requires_grad = False + for i in range(1, stage + 1): + layer = getattr(self, 'layer{}'.format(i)) + for param in layer.get_parameters(): + param.requires_grad = False + + +def resnet18(class_num=10): + """ + Get ResNet18 neural network. + + Args: + class_num (int): Class number. + + Returns: + Cell, cell instance of ResNet18 neural network. + + Examples: + >>> net = resnet18(10) + """ + return ResNet(ResidualBlockBase, + [2, 2, 2, 2], + [64, 64, 128, 256], + [64, 128, 256, 512], + [1, 2, 2, 2], + class_num, + res_base=True) + +def resnet34(class_num=10): + """ + Get ResNet34 neural network. + + Args: + class_num (int): Class number. + + Returns: + Cell, cell instance of ResNet34 neural network. + + Examples: + >>> net = resnet18(10) + """ + return ResNet(ResidualBlockBase, + [3, 4, 6, 3], + [64, 64, 128, 256], + [64, 128, 256, 512], + [1, 2, 2, 2], + class_num, + res_base=True) + +def resnet50(pretrained=False, preckpt_path=None, class_num=1001): + model = ResNet(ResidualBlock, [3, 4, 6, 3], [64, 256, 512, 1024], [256, 512, 1024, 2048], \ + [1, 2, 2, 2], class_num) + if pretrained: + if preckpt_path is not None: + param_dict = load_checkpoint(preckpt_path) + load_param_into_net(model, param_dict) + return model diff --git a/research/cv/FCOS/train.py b/research/cv/FCOS/train.py new file mode 100644 index 0000000000000000000000000000000000000000..e6417bc95a2d84e4a40d7be2735e3cef67a7fee5 --- /dev/null +++ b/research/cv/FCOS/train.py @@ -0,0 +1,108 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""FCOS TRAIN""" +import os +import argparse +import numpy as np +import mindspore.nn as nn +from mindspore import context, Tensor +from mindspore.context import ParallelMode +from mindspore.train.callback import TimeMonitor, CheckpointConfig, ModelCheckpoint, LossMonitor +from mindspore import Model +from mindspore.common import set_seed +from mindspore.communication.management import init, get_rank, get_group_size + +from src import COCO_dataset +from src.fcos import FCOSDetector +from src.network_define import WithLossCell, TrainOneStepCell, LossNet +from src.augment import Transforms + +set_seed(1) +parser = argparse.ArgumentParser() +parser.add_argument("--epochs", type=int, default=25, help="number of epochs") +parser.add_argument("--batch_size", type=int, default=2, help="size of each image batch") +parser.add_argument("--platform", type=str, default='GPU', help="run platform") +parser.add_argument("--device_num", type=int, default=8, help="device_number to run") +parser.add_argument("--device_id", type=int, default=0, help="DEVICE_ID to run ") +parser.add_argument("--train_path", type=str, default="/data2/dataset/coco2017/train2017") +parser.add_argument("--anno_path", type=str, default="/coco2017/annotations/instances_train2017.json") +parser.add_argument("--ckpt_save_path", type=str, default="/data1/FCOS", help='checkpoint save path') +parser.add_argument("--pretrain_ckpt_path", type=str, default="/data1/test3/src/backbone/s1.ckpt") +opt = parser.parse_args() + +def lr_func(_LR_INIT, _WARMUP_STEPS, _WARMUP_FACTOR, _TOTAL_STEPS, _lr_schedule): + lr_res = [] + for step in range(0, _TOTAL_STEPS): + _lr = _LR_INIT + if step < _WARMUP_STEPS: + alpha = float(step) / _WARMUP_STEPS + warmup_factor = _WARMUP_FACTOR * (1.0 - alpha) + alpha + _lr = _lr * warmup_factor + lr_res.append(_lr) + else: + for w in range(len(_lr_schedule)): + if step < _lr_schedule[w]: + lr_res.append(_lr) + break + _lr *= 0.1 + if step >= 160000: + lr_res.append(_lr) + return np.array(lr_res, dtype=np.float32) + +if __name__ == '__main__': + context.set_context(mode=context.GRAPH_MODE, device_target='GPU') + context.set_context(enable_graph_kernel=True, graph_kernel_flags="--enable_parallel_fusion") + dataset_dir = opt.train_path + annotation_file = opt.anno_path + BATCH_SIZE = opt.batch_size + EPOCHS = opt.epochs + tr = Transforms() + if opt.device_num == 1: + context.set_context(device_id=opt.device_id) + train_dataset, dataset_size = COCO_dataset.create_coco_dataset(dataset_dir, \ + annotation_file, BATCH_SIZE, shuffle=True, transform=tr) + rank_id = 0 + else: + init() + rank_id = get_rank() + device_num = get_group_size() + context.set_auto_parallel_context(device_num=device_num, gradients_mean=True, \ + parallel_mode=ParallelMode.DATA_PARALLEL) + train_dataset, dataset_size = COCO_dataset.create_coco_dataset(dataset_dir, annotation_file, BATCH_SIZE, \ + shuffle=True, transform=tr, num_parallel_workers=device_num, num_shards=device_num, shard_id=rank_id) + print("the size of the dataset is %d" % train_dataset.get_dataset_size()) + steps_per_epoch = dataset_size//BATCH_SIZE + TOTAL_STEPS = steps_per_epoch * EPOCHS + WARMUP_STEPS = 500 + WARMUP_FACTOR = 1.0 / 3.0 + GLOBAL_STEPS = 0 + LR_INIT = 0.01 + lr_schedule = [120000, 160000] + fcos = FCOSDetector(mode="training", preckpt_path=opt.pretrain_ckpt_path).set_train() + loss = LossNet() + lr = Tensor(lr_func(LR_INIT, WARMUP_STEPS, WARMUP_FACTOR, TOTAL_STEPS, lr_schedule)) + sgd_optimizer = nn.SGD(fcos.trainable_params(), learning_rate=lr, momentum=0.9, weight_decay=0.0001) + loss_cb = LossMonitor() + time_cb = TimeMonitor() + cb = [loss_cb, time_cb] + net_with_loss = WithLossCell(fcos, loss) + net = TrainOneStepCell(net_with_loss, sgd_optimizer) + ckptconfig = CheckpointConfig(save_checkpoint_steps=10000, keep_checkpoint_max=20) + save_checkpoint_path = os.path.join(opt.ckpt_save_path, "ckpt_{}/".format(rank_id)) + ckpt_cb = ModelCheckpoint(prefix='ms8p', directory=save_checkpoint_path, config=ckptconfig) + cb += [ckpt_cb] + model = Model(net) + print("successfully build model, and now train the model...") + model.train(EPOCHS, train_dataset=train_dataset, dataset_sink_mode=True, sink_size=7329, callbacks=cb)