diff --git a/research/cv/rfcn/README_CN.md b/research/cv/rfcn/README_CN.md new file mode 100644 index 0000000000000000000000000000000000000000..8fd138fa869c772f45216751bb59e7b8636faf57 --- /dev/null +++ b/research/cv/rfcn/README_CN.md @@ -0,0 +1,273 @@ +# 鐩綍 + +- [鐩綍](#鐩綍) +- [RFCN鎻忚堪](#RFCN鎻忚堪) +- [妯″瀷鏋舵瀯](#妯″瀷鏋舵瀯) +- [鏁版嵁闆哴(#鏁版嵁闆�) +- [鐜瑕佹眰](#鐜瑕佹眰) +- [蹇€熷叆闂╙(#蹇€熷叆闂�) +- [鑴氭湰璇存槑](#鑴氭湰璇存槑) + - [鑴氭湰鍙婃牱渚嬩唬鐮乚(#鑴氭湰鍙婃牱渚嬩唬鐮�) + - [璁粌杩囩▼](#璁粌杩囩▼) + - [鐢ㄦ硶](#鐢ㄦ硶) + - [缁撴灉](#缁撴灉) + - [璇勪及杩囩▼](#璇勪及杩囩▼) + - [鐢ㄦ硶](#鐢ㄦ硶-1) + - [缁撴灉](#缁撴灉-1) +- [妯″瀷鎻忚堪](#妯″瀷鎻忚堪) + - [鎬ц兘](#鎬ц兘) + - [璁粌鎬ц兘](#璁粌鎬ц兘) + - [璇勪及鎬ц兘](#璇勪及鎬ц兘) +- [ModelZoo涓婚〉](#modelzoo涓婚〉) + +<!-- /TOC --> + +# RFCN鎻忚堪 + +RFCN锛圧egion-based fully convolutional network锛夋槸2016骞存彁鍑虹殑涓€绉嶇洰鏍囨娴嬬綉缁滄灦鏋勶紝瀹冨湪Faster RCNN鐨勫熀纭€涓婅垗寮冧簡鍏ㄨ繛鎺ュ眰浣垮叾閫傜敤浜庝笉鍚屽昂瀵哥殑鍥惧儚锛屽苟鎻愬嚭浜嗕綅缃晱鎰熷緱鍒嗗浘锛坧osition sensitive score map锛夋蹇碉紝灏嗙洰鏍囩殑浣嶇疆淇℃伅铻嶅悎鍒皉oI pooing涓紝鍦ㄤ繚鎸佹暣涓綉缁滃叏鍗风Н鐨勫熀纭€涓婂疄鐜颁簡鈥滃钩绉讳笉鍙樻€р€濓紝骞跺皢roi pooling鏀惧湪鍗风Н灞備箣鍚庯紝浣垮緱缃戠粶鏉冮噸瀹屽叏鍏变韩銆� + +RFCN閫氳繃閲囩敤鍏ㄥ嵎绉殑褰㈠紡鍔犲揩浜嗙綉缁滅殑璁$畻閫熷害锛屽苟寮曞叆浣嶇疆鏁忔劅寰楀垎鍥句繚璇佷簡鍏ㄥ嵎绉綉缁滃鐗╀綋浣嶇疆鐨勬晱鎰熸€э紝鎻愬崌浜嗙洰鏍囨娴嬬殑鏁堟灉銆� + +[璁烘枃](https://arxiv.org/pdf/1605.06409.pdf)锛欴ai J , Li Y , He K , et al. R-FCN: Object Detection via Region-based Fully Convolutional Networks[C]// Advances in Neural Information Processing Systems. Curran Associates Inc. 2016. + +# 妯″瀷鏋舵瀯 + +RFCN涓嶧aster-RCNN绫讳技锛屾槸涓€涓袱闃舵鐨勭洰鏍囨娴嬬綉缁溿€俁FCN浠esNet101涓篵ackbone锛孋4灞傜殑杈撳嚭浣滀负鐗瑰緛璁粌RPN缃戠粶鑾峰彇roi锛涚劧鍚庡埄鐢╬s roi pooling鎿嶄綔瀵瑰浘鍍忕殑鐗瑰緛鍜岃竟鐣屾杩涜姹犲寲寰楀埌鏈€鍚庣殑缁撴灉銆� + +# 鏁版嵁闆� + +浣跨敤鐨勬暟鎹泦锛歔COCO 2014](<https://pjreddie.com/projects/coco-mirror/>) + +- 鏁版嵁闆嗗ぇ灏忥細19G + - 璁粌闆嗭細13G锛�82783涓浘鍍� + - 楠岃瘉闆嗭細6G锛�40504涓浘鍍� + - 鏍囨敞闆嗭細241M锛屽疄渚嬶紝瀛楀箷锛宲erson_keypoints绛� +- 鏁版嵁鏍煎紡锛氬浘鍍忓拰json鏂囦欢 + - 娉ㄦ剰锛氭暟鎹湪dataset.py涓鐞嗐€� + +# 鐜瑕佹眰 + +- 纭欢锛圙PU锛� + + - 浣跨敤GPU澶勭悊鍣ㄦ潵鎼缓纭欢鐜銆� + +- 瀹夎[MindSpore](https://www.mindspore.cn/install)銆� + +- 涓嬭浇鏁版嵁闆咰OCO 2014銆� + +- 鏈ず渚嬮粯璁や娇鐢–OCO 2014浣滀负璁粌鏁版嵁闆嗭紝鎮ㄤ篃鍙互浣跨敤鑷繁鐨勬暟鎹泦銆� + + 1. 鑻ヤ娇鐢–OCO鏁版嵁闆嗭紝**鎵ц鑴氭湰鏃堕€夋嫨鏁版嵁闆咰OCO銆�** + 瀹夎Cython鍜宲ycocotool锛宱pencv-python銆� + + ```python + pip install Cython + + pip install pycocotools + + pip install opencv-python + ``` + + 鏍规嵁妯″瀷杩愯闇€瑕侊紝瀵瑰簲鍦板湪`default_config.yaml`涓洿鏀笴OCO_ROOT鍜屽叾浠栭渶瑕佺殑璁剧疆銆傜洰褰曠粨鏋勫涓嬶細 + + ```path + . + 鈹斺攢cocodataset + 鈹溾攢annotations + 鈹溾攢instance_train2014.json + 鈹斺攢instance_val2014.json + 鈹溾攢val2014 + 鈹斺攢train2014 + ``` + + 2. 鑻ヤ娇鐢ㄨ嚜宸辩殑鏁版嵁闆嗭紝**鎵ц鑴氭湰鏃堕€夋嫨鏁版嵁闆嗕负other銆傚皢鏁版嵁闆嗕俊鎭暣鐞嗘垚TXT鏂囦欢锛屾瘡琛屽唴瀹瑰涓嬶細 + + ```txt + train2014/0000001.jpg 0,259,401,459,7,0 35,28,324,201,2,0 0,30,59,80,2,0 + ``` + + 姣忚鏄寜绌洪棿鍒嗗壊鐨勫浘鍍忔爣娉紝绗竴鍒楁槸鍥惧儚鐨勭浉瀵硅矾寰勶紝鍏朵綑涓篬xmin,ymin,xmax,ymax,class,is_crowd]鏍煎紡鐨勬,绫诲拰鏄惁鏄竴缇ょ墿浣撶殑淇℃伅銆備粠`image_dir`锛堟暟鎹泦鐩綍锛夊浘鍍忚矾寰勪互鍙奰anno_path`锛圱XT鏂囦欢璺緞锛夌殑鐩稿璺緞涓鍙栧浘鍍忋€俙image_dir`鍜宍anno_path`鍙湪`default_config.yaml`涓缃€� + +# 蹇€熷叆闂� + +閫氳繃瀹樻柟缃戠珯瀹夎MindSpore鍚庯紝鎮ㄥ彲浠ユ寜鐓у涓嬫楠よ繘琛岃缁冨拰璇勪及锛� + +娉ㄦ剰锛� + +1. 绗竴娆¤繍琛岀敓鎴怣indRecord鏂囦欢锛岃€楁椂杈冮暱銆� +2. 棰勮缁冩ā鍨嬫槸鍦↖mageNet2012涓婅缁冪殑ResNet-101妫€鏌ョ偣銆備綘鍙互浣跨敤ModelZoo涓� [resnet101](https://gitee.com/mindspore/models/tree/master/official/cv/resnet) 鑴氭湰鏉ヨ缁�, 鐒跺悗浣跨敤src/convert_checkpoint.py鎶婅缁冨ソ鐨剅esnet101鐨勬潈閲嶆枃浠惰浆鎹负鍙姞杞界殑鏉冮噸鏂囦欢銆備篃鍙互浣跨敤mindspore瀹樻柟鎻愪緵鐨刐resnet101棰勮缁冩ā鍨媇(https://download.mindspore.cn/model_zoo/r1.3/resnet101_ascend_v130_imagenet2012_official_cv_bs32_acc78.58/)杩涜璁粌 +3. BACKBONE_MODEL鏄€氳繃modelzoo涓殑[resnet101](https://gitee.com/mindspore/models/tree/master/official/cv/resnet)鑴氭湰璁粌鐨勩€侾RETRAINED_MODEL鏄粡杩囪浆鎹㈠悗鐨勬潈閲嶆枃浠躲€俈ALIDATION_JSON_FILE涓烘爣绛炬枃浠躲€侰HECKPOINT_PATH鏄缁冨悗鐨勬鏌ョ偣鏂囦欢銆� + +## 鍦℅PU涓婅繍琛� + +```shell +# 鏉冮噸鏂囦欢杞崲 +python -m src.convert_checkpoint --ckpt_file=[BACKBONE_MODEL] + +# 鍗曟満璁粌 +bash run_standalone_train_gpu.sh [DEVICE_ID] [PRETRAINED_PATH] [COCO_ROOT] [MINDRECORD_DIR](option) + +# 鍒嗗竷寮忚缁� +bash run_distribute_train_gpu.sh [DEVICE_NUM] [PRETRAINED_PATH] [COCO_ROOT] [MINDRECORD_DIR](option) + +# 璇勪及 +bash run_eval_gpu.sh [DEVICE_ID] [ANNO_PATH] [CHECKPOINT_PATH] [COCO_ROOT] [MINDRECORD_DIR](option) +``` + +# 鑴氭湰璇存槑 + +## 鑴氭湰鍙婃牱渚嬩唬鐮� + +```shell +. +鈹斺攢rfcn + 鈹溾攢README_CN.md // RFCN鐩稿叧璇存槑 + 鈹溾攢scripts + 鈹溾攢run_standalone_train_gpu.sh // GPU鍗曟満shell鑴氭湰 + 鈹溾攢run_distribute_train_gpu.sh // GPU鍒嗗竷寮弒hell鑴氭湰 + 鈹斺攢run_eval_gpu.sh // GPU璇勪及shell鑴氭湰 + 鈹溾攢src + 鈹溾攢rfcn + 鈹溾攢__init__.py // init鏂囦欢 + 鈹溾攢anchor_generator.py // 閿氱偣鐢熸垚鍣� + 鈹溾攢bbox_assign_sample.py // 绗竴闃舵閲囨牱鍣� + 鈹溾攢bbox_assign_sample_stage2.py // 绗簩闃舵閲囨牱鍣� + 鈹溾攢rfcn_resnet.py // RFCN缃戠粶 + 鈹溾攢proposal_generator.py // 鍊欓€夌敓鎴愬櫒 + 鈹溾攢rfcn_loss.py // RFCN鐨刲oss缃戠粶 + 鈹溾攢resnet.py // 楠ㄥ共缃戠粶 + 鈹斺攢rpn.py // 鍖哄煙鍊欓€夌綉缁� + 鈹溾攢dataset.py // 鍒涘缓骞跺鐞嗘暟鎹泦 + 鈹溾攢lr_schedule.py // 瀛︿範鐜囩敓鎴愬櫒 + 鈹溾攢network_define.py // RFCN缃戠粶瀹氫箟 + 鈹溾攢util.py // 渚嬭鎿嶄綔 + 鈹溾攢eval_util.py // 璁$畻绮惧害鐢ㄥ埌鐨勬柟娉� + 鈹溾攢detecteval.py // 璇勪及鏃剁敤鍒扮殑鏂规硶 + 鈹斺攢model_utils + 鈹溾攢config.py // 鑾峰彇.yaml閰嶇疆鍙傛暟 + 鈹溾攢device_adapter.py // 鑾峰彇浜戜笂id + 鈹溾攢local_adapter.py // 鑾峰彇鏈湴id + 鈹斺攢moxing_adapter.py // 浜戜笂鏁版嵁鍑嗗 + 鈹溾攢default_config.yaml // 榛樿璁粌閰嶇疆 + 鈹溾攢config_standalone_gpu.yaml // 鍗曞崱璁粌閰嶇疆 + 鈹溾攢config_distribute_gpu.yaml // 鍏崱璁粌閰嶇疆 + 鈹溾攢eval.py // 璇勪及鑴氭湰 + 鈹斺攢train.py // 璁粌鑴氭湰 +``` + +## 璁粌杩囩▼ + +### 鐢ㄦ硶 + +#### 鍦℅PU涓婅繍琛� + +```shell +# GPU鍗曟満璁粌 +bash run_standalone_train_gpu.sh [DEVICE_ID] [PRETRAINED_PATH] [COCO_ROOT] [MINDRECORD_DIR](option) + +# GPU鍒嗗竷寮忚缁� +bash run_distribute_train_gpu.sh [DEVICE_NUM] [PRETRAINED_PATH] [COCO_ROOT] [MINDRECORD_DIR](option) +``` + +### 缁撴灉 + +璁粌缁撴灉淇濆瓨鍦ㄧず渚嬭矾寰勪腑锛屾枃浠跺す鍚嶇О浠モ€渢rain鈥濇垨鈥渞un_distribute_train鈥濆紑澶淬€傛偍鍙互鍦╨og鏌ョ湅璁粌淇℃伅锛屽涓嬫墍绀恒€� + +```log +# 鍒嗗竷寮忚缁冪粨鏋滐紙8P锛� +epoch time: 2176560.423 ms, per step time: 420.673 ms +epoch time: 2176562.112 ms, per step time: 420.673 ms +epoch time: 2176555.964 ms, per step time: 420.672 ms +epoch time: 2176560.564 ms, per step time: 420.673 ms +epoch time: 2176562.216 ms, per step time: 420.673 ms +epoch time: 2176560.212 ms, per step time: 420.673 ms +epoch time: 2176561.430 ms, per step time: 420.673 ms +epoch time: 2176530.907 ms, per step time: 420.667 ms +``` + +鍦ㄥ搴旂殑loss_0.log涓煡鐪嬪疄鏃剁殑鎹熷け鍊硷紝濡備笅鎵€绀恒€� + +```log +56943 epoch: 26 step: 5168 total_loss: 0.36969 +56943 epoch: 26 step: 5169 total_loss: 0.47171 +56944 epoch: 26 step: 5170 total_loss: 0.44770 +56944 epoch: 26 step: 5171 total_loss: 0.51082 +56944 epoch: 26 step: 5172 total_loss: 0.64440 +56945 epoch: 26 step: 5173 total_loss: 0.61452 +56945 epoch: 26 step: 5174 total_loss: 0.24274 +``` + +## 璇勪及杩囩▼ + +### 鐢ㄦ硶 + +#### 鍦℅PU涓婅繍琛� + +```shell +# GPU璇勪及 +bash run_eval_gpu.sh [DEVICE_ID] [ANNO_PATH] [CHECKPOINT_PATH] [COCO_ROOT] [MINDRECORD_DIR](option) +``` + +> 绗竴娆¤瘎浼版椂浼氬厛鐢熸垚Mindrecord鏂囦欢锛岄渶鑰愬績绛夊緟 +> +> 鍦ㄨ缁冭繃绋嬩腑鐢熸垚妫€鏌ョ偣銆� +> +> 鏁版嵁闆嗕腑鍥剧墖鐨勬暟閲忚鍜孷ALIDATION_JSON_FILE鏂囦欢涓爣璁版暟閲忎竴鑷达紝鍚﹀垯绮惧害缁撴灉灞曠ず鏍煎紡鍙兘鍑虹幇寮傚父銆� + +### 缁撴灉 + +璇勪及缁撴灉灏嗕繚瀛樺湪绀轰緥璺緞涓紝鏂囦欢澶瑰悕涓衡€渆val鈥濄€傚湪姝ゆ枃浠跺す涓嬶紝鎮ㄥ彲浠ュ湪鏃ュ織鏂囦欢log涓壘鍒扮被浼间互涓嬬殑缁撴灉銆� + +```log + Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.273 + Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.489 + Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.275 + Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.118 + Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.303 + Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.382 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.238 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.346 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.355 + Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.155 + Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.390 + Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.515 +``` + +# 妯″瀷鎻忚堪 + +## 鎬ц兘 + +### 璁粌鎬ц兘 + +| 鍙傛暟 |GPU | +| -------------------------- |----------------------------------------------------------- | +| 妯″瀷鐗堟湰 |V1 | +| 璧勬簮 |V100-PCIE 16G | +| 涓婁紶鏃ユ湡 | 2022/4/11 | +| MindSpore鐗堟湰 |1.6.0 | +| 鏁版嵁闆� |COCO 2014 | +| 璁粌鍙傛暟 |epoch=26, batch_size=2 | +| 浼樺寲鍣� |SGD | +| 鎹熷け鍑芥暟 |Softmax浜ゅ弶鐔碉紝Sigmoid浜ゅ弶鐔碉紝SmoothL1Loss | +| 閫熷害 | 1鍗★細420姣/姝ワ紱8鍗★細420姣/姝� | +| 鎬绘椂闂� |1鍗★細130灏忔椂锛�8鍗★細15.71灏忔椂 | +| 鍙傛暟(M) |670M | +| 鑴氭湰 | [RFCN鑴氭湰](https://gitee.com/mindspore/models/tree/master/research/cv/rfcn) | + +### 璇勪及鎬ц兘 + +| 鍙傛暟 |GPU | +| ------------------- | ------------------- | +| 妯″瀷鐗堟湰 |V1 | +| 璧勬簮 |V100-PCIE 16G | +| 涓婁紶鏃ユ湡 |2022/4/11 | +| MindSpore鐗堟湰 |1.6.0 | +| 鏁版嵁闆� |COCO2014 | +| batch_size | 2 | +| 杈撳嚭 |mAP | +| 鍑嗙‘鐜� | 浜ゅ苟姣旓紙IoU锛�=0.50:0.95 27.3% | +| 鎺ㄧ悊妯″瀷 |670M锛�.ckpt鏂囦欢锛� | + +# ModelZoo涓婚〉 + + 璇锋祻瑙堝畼缃慬涓婚〉](https://gitee.com/mindspore/models)銆� diff --git a/research/cv/rfcn/config_distribute_gpu.yaml b/research/cv/rfcn/config_distribute_gpu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..55835239b7db769683cbc8c75134a3914de2a13e --- /dev/null +++ b/research/cv/rfcn/config_distribute_gpu.yaml @@ -0,0 +1,195 @@ +# Builtin Configurations(DO NOT CHANGE THESE CONFIGURATIONS unless you know exactly what you are doing) +enable_modelarts: False +data_url: "" +train_url: "" +checkpoint_url: "" +mindrecord_dir: "" +data_path: "/cache/data" +output_path: "/cache/train" +load_path: "/cache/checkpoint_path" +device_target: GPU +enable_profiling: False + +# ============================================================================== +# config +img_width: 1280 +img_height: 768 +keep_ratio: True +flip_ratio: 0.5 +expand_ratio: 1.0 + +# anchor +anchor_scales: [4, 8, 16, 32, 64] +anchor_ratios: [0.5, 1.0, 2.0] +anchor_strides: [16] +num_anchors: 15 + +# resnet +resnet_block: [3, 4, 23, 3] +resnet_in_channels: [64, 256, 512, 1024] +resnet_out_channels: [256, 512, 1024, 2048] + +# roi pooling +k: 7 +group_size: 7 +n_cls_reg: 2 +roi_nums_test: 2000 + + +# rpn +rpn_in_channels: 1024 +rpn_feat_channels: 1024 +rpn_loss_cls_weight: 1.0 +rpn_loss_reg_weight: 1.0 +rpn_cls_out_channels: 1 +rpn_target_means: [0., 0., 0., 0.] +rpn_target_stds: [1.0, 1.0, 1.0, 1.0] + +# bbox_assign_sampler +neg_iou_thr: 0.3 +pos_iou_thr: 0.7 +min_pos_iou: 0.3 +num_gts: 128 +num_expected_neg: 256 +num_expected_pos: 128 + +# proposal +activate_num_classes: 2 +use_sigmoid_cls: True + +# bbox_assign_sampler_stage2 +neg_iou_thr_stage2: 0.5 +pos_iou_thr_stage2: 0.5 +min_pos_iou_stage2: 0.5 +num_bboxes_stage2: 2000 +num_expected_pos_stage2: 128 +num_expected_neg_stage2: 512 +num_expected_total_stage2: 512 + +# rfcn_loss +rfcn_loss_cls_weight: 1 +rfcn_loss_reg_weight: 1 +rfcn_target_means: [0., 0., 0., 0.] +rfcn_target_stds: [0.1, 0.1, 0.2, 0.2] + +# train proposal +rpn_proposal_nms_across_levels: False +rpn_proposal_nms_pre: 2000 +rpn_proposal_nms_post: 2000 +rpn_proposal_max_num: 2000 +rpn_proposal_nms_thr: 0.7 +rpn_proposal_min_bbox_size: 0 + +# test proposal +rpn_nms_across_levels: False +rpn_nms_pre: 1000 +rpn_nms_post: 1000 +rpn_max_num: 1000 +rpn_nms_thr: 0.7 +rpn_min_bbox_min_size: 0 +test_score_thr: 0.05 +test_iou_thr: 0.5 +test_max_per_img: 100 +test_batch_size: 2 + +rpn_head_use_sigmoid: True +rpn_head_weight: 1.0 + +# LR +base_lr: 0.01 +warmup_step: 500 +warmup_ratio: 0.0625 +sgd_step: [8, 11] +sgd_momentum: 0.9 + +# train +batch_size: 2 +loss_scale: 256 +momentum: 0.91 +weight_decay: 0.00001 +epoch_size: 26 +interval: 1 +save_checkpoint: True +save_checkpoint_epochs: 1 +keep_checkpoint_max: 5 +save_checkpoint_path: "./" + +# Number of threads used to process the dataset in parallel +num_parallel_workers: 8 +# Parallelize Python operations with multiple worker processes +python_multiprocessing: True +coco_root: "" +train_data_type: "train2014" +val_data_type: "val2014" +instance_set: "annotations/instances_{}.json" + +coco_classes: ['background', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', + 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', + 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', + 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', + 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', + 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', + 'kite', 'baseball bat', 'baseball glove', 'skateboard', + 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', + 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', + 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', + 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', + 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', + 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', + 'refrigerator', 'book', 'clock', 'vase', 'scissors', + 'teddy bear', 'hair drier', 'toothbrush'] +other_classes: [''] + +num_classes: 81 +prefix: "" + +# annotations file(json format or user defined text format) +anno_path: '' +image_dir: '' + +# train.py Rfcn training +run_distribute: False +dataset: "coco" +pre_trained: "" +device_id: 0 +device_num: 1 +rank_id: 0 +backbone: 'resnet_v1_101' + +# eval.py Rfcn evaluation +checkpoint_path: "" + +--- +# Config description for each option +enable_modelarts: 'Whether training on modelarts, default: False' +data_url: 'Dataset url for obs' +train_url: 'Training output url for obs' +data_path: 'Dataset path for local' +output_path: 'Training output path for local' +result_dir: "result files path." +label_dir: "image file path." + +device_target: "device where the code will be implemented, default is GPU" +file_name: "output file name." +dataset: "Dataset, either cifar10 or imagenet2012" +parameter_server: 'Run parameter server train' +width: 'input width' +height: 'input height' +enable_profiling: 'Whether enable profiling while training, default: False' +run_distribute: 'Run distribute, default is false.' +do_train: 'Do train or not, default is true.' +pre_trained: 'Pretrained checkpoint path' +device_id: 'Device id, default is 0.' +device_num: 'Use device nums, default is 1.' +rank_id: 'Rank id, default is 0.' +file_format: 'file format' +ann_file: "Ann file, default is val.json." +checkpoint_path: "Checkpoint file path." +result_path: "result file path." +backbone: "backbone network name, resnet_v1_101" +interval: "val interval" + +--- +device_target: ['Ascend', 'GPU', 'CPU'] +file_format: ["AIR", "ONNX", "MINDIR"] +dataset_name: ["cifar10", "imagenet2012"] diff --git a/research/cv/rfcn/config_standalone_gpu.yaml b/research/cv/rfcn/config_standalone_gpu.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fbff6c3f59b4ddf656bfee6952c33e671d94fef6 --- /dev/null +++ b/research/cv/rfcn/config_standalone_gpu.yaml @@ -0,0 +1,194 @@ +# Builtin Configurations(DO NOT CHANGE THESE CONFIGURATIONS unless you know exactly what you are doing) +enable_modelarts: False +data_url: "" +train_url: "" +checkpoint_url: "" +mindrecord_dir: "" +data_path: "/cache/data" +output_path: "/cache/train" +load_path: "/cache/checkpoint_path" +device_target: GPU +enable_profiling: False + +# ============================================================================== +# config +img_width: 1280 +img_height: 768 +keep_ratio: True +flip_ratio: 0.5 +expand_ratio: 1.0 + +# anchor +anchor_scales: [4, 8, 16, 32, 64] +anchor_ratios: [0.5, 1.0, 2.0] +anchor_strides: [16] +num_anchors: 15 + +# resnet +resnet_block: [3, 4, 23, 3] +resnet_in_channels: [64, 256, 512, 1024] +resnet_out_channels: [256, 512, 1024, 2048] + +# roi pooling +k: 7 +group_size: 7 +n_cls_reg: 2 +roi_nums_test: 2000 + +# rpn +rpn_in_channels: 1024 +rpn_feat_channels: 1024 +rpn_loss_cls_weight: 1.0 +rpn_loss_reg_weight: 1.0 +rpn_cls_out_channels: 1 +rpn_target_means: [0., 0., 0., 0.] +rpn_target_stds: [1.0, 1.0, 1.0, 1.0] + +# bbox_assign_sampler +neg_iou_thr: 0.3 +pos_iou_thr: 0.7 +min_pos_iou: 0.3 +num_gts: 128 +num_expected_neg: 256 +num_expected_pos: 128 + +# proposal +activate_num_classes: 2 +use_sigmoid_cls: True + +# bbox_assign_sampler_stage2 +neg_iou_thr_stage2: 0.5 +pos_iou_thr_stage2: 0.5 +min_pos_iou_stage2: 0.5 +num_bboxes_stage2: 2000 +num_expected_pos_stage2: 128 +num_expected_neg_stage2: 512 +num_expected_total_stage2: 512 + +# rfcn_loss +rfcn_loss_cls_weight: 1 +rfcn_loss_reg_weight: 1 +rfcn_target_means: [0., 0., 0., 0.] +rfcn_target_stds: [0.1, 0.1, 0.2, 0.2] + +# train proposal +rpn_proposal_nms_across_levels: False +rpn_proposal_nms_pre: 2000 +rpn_proposal_nms_post: 2000 +rpn_proposal_max_num: 2000 +rpn_proposal_nms_thr: 0.7 +rpn_proposal_min_bbox_size: 0 + +# test proposal +rpn_nms_across_levels: False +rpn_nms_pre: 1000 +rpn_nms_post: 1000 +rpn_max_num: 1000 +rpn_nms_thr: 0.7 +rpn_min_bbox_min_size: 0 +test_score_thr: 0.05 +test_iou_thr: 0.5 +test_max_per_img: 100 +test_batch_size: 2 + +rpn_head_use_sigmoid: True +rpn_head_weight: 1.0 + +# LR +base_lr: 0.001 +warmup_step: 500 +warmup_ratio: 0.0625 +sgd_step: [8, 11] +sgd_momentum: 0.9 + +# train +batch_size: 2 +loss_scale: 256 +momentum: 0.91 +weight_decay: 0.00001 +epoch_size: 26 +interval: 1 +save_checkpoint: True +save_checkpoint_epochs: 1 +keep_checkpoint_max: 5 +save_checkpoint_path: "./" + +# Number of threads used to process the dataset in parallel +num_parallel_workers: 8 +# Parallelize Python operations with multiple worker processes +python_multiprocessing: True +coco_root: "" +train_data_type: "train2014" +val_data_type: "val2014" +instance_set: "annotations/instances_{}.json" + +coco_classes: ['background', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', + 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', + 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', + 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', + 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', + 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', + 'kite', 'baseball bat', 'baseball glove', 'skateboard', + 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', + 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', + 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', + 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', + 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', + 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', + 'refrigerator', 'book', 'clock', 'vase', 'scissors', + 'teddy bear', 'hair drier', 'toothbrush'] +other_classes: [''] + +num_classes: 81 +prefix: "" + +# annotations file(json format or user defined text format) +anno_path: '' +image_dir: '' + +# train.py Rfcn training +run_distribute: False +dataset: "coco" +pre_trained: "" +device_id: 0 +device_num: 1 +rank_id: 0 +backbone: 'resnet_v1_101' + +# eval.py Rfcn evaluation +checkpoint_path: "" + +--- +# Config description for each option +enable_modelarts: 'Whether training on modelarts, default: False' +data_url: 'Dataset url for obs' +train_url: 'Training output url for obs' +data_path: 'Dataset path for local' +output_path: 'Training output path for local' +result_dir: "result files path." +label_dir: "image file path." + +device_target: "device where the code will be implemented, default is GPU" +file_name: "output file name." +dataset: "Dataset, either cifar10 or imagenet2012" +parameter_server: 'Run parameter server train' +width: 'input width' +height: 'input height' +enable_profiling: 'Whether enable profiling while training, default: False' +run_distribute: 'Run distribute, default is false.' +do_train: 'Do train or not, default is true.' +pre_trained: 'Pretrained checkpoint path' +device_id: 'Device id, default is 0.' +device_num: 'Use device nums, default is 1.' +rank_id: 'Rank id, default is 0.' +file_format: 'file format' +ann_file: "Ann file, default is val.json." +checkpoint_path: "Checkpoint file path." +result_path: "result file path." +backbone: "backbone network name, resnet_v1_101" +interval: "val interval" + +--- +device_target: ['Ascend', 'GPU', 'CPU'] +file_format: ["AIR", "ONNX", "MINDIR"] +dataset_name: ["cifar10", "imagenet2012"] diff --git a/research/cv/rfcn/default_config.yaml b/research/cv/rfcn/default_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fbff6c3f59b4ddf656bfee6952c33e671d94fef6 --- /dev/null +++ b/research/cv/rfcn/default_config.yaml @@ -0,0 +1,194 @@ +# Builtin Configurations(DO NOT CHANGE THESE CONFIGURATIONS unless you know exactly what you are doing) +enable_modelarts: False +data_url: "" +train_url: "" +checkpoint_url: "" +mindrecord_dir: "" +data_path: "/cache/data" +output_path: "/cache/train" +load_path: "/cache/checkpoint_path" +device_target: GPU +enable_profiling: False + +# ============================================================================== +# config +img_width: 1280 +img_height: 768 +keep_ratio: True +flip_ratio: 0.5 +expand_ratio: 1.0 + +# anchor +anchor_scales: [4, 8, 16, 32, 64] +anchor_ratios: [0.5, 1.0, 2.0] +anchor_strides: [16] +num_anchors: 15 + +# resnet +resnet_block: [3, 4, 23, 3] +resnet_in_channels: [64, 256, 512, 1024] +resnet_out_channels: [256, 512, 1024, 2048] + +# roi pooling +k: 7 +group_size: 7 +n_cls_reg: 2 +roi_nums_test: 2000 + +# rpn +rpn_in_channels: 1024 +rpn_feat_channels: 1024 +rpn_loss_cls_weight: 1.0 +rpn_loss_reg_weight: 1.0 +rpn_cls_out_channels: 1 +rpn_target_means: [0., 0., 0., 0.] +rpn_target_stds: [1.0, 1.0, 1.0, 1.0] + +# bbox_assign_sampler +neg_iou_thr: 0.3 +pos_iou_thr: 0.7 +min_pos_iou: 0.3 +num_gts: 128 +num_expected_neg: 256 +num_expected_pos: 128 + +# proposal +activate_num_classes: 2 +use_sigmoid_cls: True + +# bbox_assign_sampler_stage2 +neg_iou_thr_stage2: 0.5 +pos_iou_thr_stage2: 0.5 +min_pos_iou_stage2: 0.5 +num_bboxes_stage2: 2000 +num_expected_pos_stage2: 128 +num_expected_neg_stage2: 512 +num_expected_total_stage2: 512 + +# rfcn_loss +rfcn_loss_cls_weight: 1 +rfcn_loss_reg_weight: 1 +rfcn_target_means: [0., 0., 0., 0.] +rfcn_target_stds: [0.1, 0.1, 0.2, 0.2] + +# train proposal +rpn_proposal_nms_across_levels: False +rpn_proposal_nms_pre: 2000 +rpn_proposal_nms_post: 2000 +rpn_proposal_max_num: 2000 +rpn_proposal_nms_thr: 0.7 +rpn_proposal_min_bbox_size: 0 + +# test proposal +rpn_nms_across_levels: False +rpn_nms_pre: 1000 +rpn_nms_post: 1000 +rpn_max_num: 1000 +rpn_nms_thr: 0.7 +rpn_min_bbox_min_size: 0 +test_score_thr: 0.05 +test_iou_thr: 0.5 +test_max_per_img: 100 +test_batch_size: 2 + +rpn_head_use_sigmoid: True +rpn_head_weight: 1.0 + +# LR +base_lr: 0.001 +warmup_step: 500 +warmup_ratio: 0.0625 +sgd_step: [8, 11] +sgd_momentum: 0.9 + +# train +batch_size: 2 +loss_scale: 256 +momentum: 0.91 +weight_decay: 0.00001 +epoch_size: 26 +interval: 1 +save_checkpoint: True +save_checkpoint_epochs: 1 +keep_checkpoint_max: 5 +save_checkpoint_path: "./" + +# Number of threads used to process the dataset in parallel +num_parallel_workers: 8 +# Parallelize Python operations with multiple worker processes +python_multiprocessing: True +coco_root: "" +train_data_type: "train2014" +val_data_type: "val2014" +instance_set: "annotations/instances_{}.json" + +coco_classes: ['background', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', + 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', + 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', + 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', + 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', + 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', + 'kite', 'baseball bat', 'baseball glove', 'skateboard', + 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', + 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', + 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', + 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', + 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', + 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', + 'refrigerator', 'book', 'clock', 'vase', 'scissors', + 'teddy bear', 'hair drier', 'toothbrush'] +other_classes: [''] + +num_classes: 81 +prefix: "" + +# annotations file(json format or user defined text format) +anno_path: '' +image_dir: '' + +# train.py Rfcn training +run_distribute: False +dataset: "coco" +pre_trained: "" +device_id: 0 +device_num: 1 +rank_id: 0 +backbone: 'resnet_v1_101' + +# eval.py Rfcn evaluation +checkpoint_path: "" + +--- +# Config description for each option +enable_modelarts: 'Whether training on modelarts, default: False' +data_url: 'Dataset url for obs' +train_url: 'Training output url for obs' +data_path: 'Dataset path for local' +output_path: 'Training output path for local' +result_dir: "result files path." +label_dir: "image file path." + +device_target: "device where the code will be implemented, default is GPU" +file_name: "output file name." +dataset: "Dataset, either cifar10 or imagenet2012" +parameter_server: 'Run parameter server train' +width: 'input width' +height: 'input height' +enable_profiling: 'Whether enable profiling while training, default: False' +run_distribute: 'Run distribute, default is false.' +do_train: 'Do train or not, default is true.' +pre_trained: 'Pretrained checkpoint path' +device_id: 'Device id, default is 0.' +device_num: 'Use device nums, default is 1.' +rank_id: 'Rank id, default is 0.' +file_format: 'file format' +ann_file: "Ann file, default is val.json." +checkpoint_path: "Checkpoint file path." +result_path: "result file path." +backbone: "backbone network name, resnet_v1_101" +interval: "val interval" + +--- +device_target: ['Ascend', 'GPU', 'CPU'] +file_format: ["AIR", "ONNX", "MINDIR"] +dataset_name: ["cifar10", "imagenet2012"] diff --git a/research/cv/rfcn/eval.py b/research/cv/rfcn/eval.py new file mode 100644 index 0000000000000000000000000000000000000000..5242a91a3a4ed95a4c963defe5316fb444e79ce7 --- /dev/null +++ b/research/cv/rfcn/eval.py @@ -0,0 +1,155 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Evaluation for RFCN""" +import os +import time +from collections import defaultdict + +import numpy as np +from pycocotools.coco import COCO +from mindspore import context +from mindspore.train.serialization import load_checkpoint, load_param_into_net +from mindspore.common import set_seed, Parameter +from src.dataset import data_to_mindrecord_byte_image, create_rfcn_dataset, parse_json_annos_from_txt +from src.util import coco_eval, bbox2result_1image, results2json +from src.model_utils.config import config +from src.model_utils.moxing_adapter import moxing_wrapper +from src.model_utils.device_adapter import get_device_id +from src.rfcn.rfcn_resnet import Rfcn_Resnet + +set_seed(1) +context.set_context(mode=context.GRAPH_MODE, device_target=config.device_target, device_id=get_device_id()) + +def rfcn_eval(dataset_path, ckpt_path, anno_path): + """Rfcn evaluation.""" + if not os.path.isfile(ckpt_path): + raise RuntimeError("CheckPoint file {} is not valid.".format(ckpt_path)) + ds = create_rfcn_dataset(config, dataset_path, batch_size=config.test_batch_size, is_training=False, + num_parallel_workers=config.num_parallel_workers) + net = Rfcn_Resnet(config) + param_dict = load_checkpoint(ckpt_path) + if config.device_target == "GPU": + for key, value in param_dict.items(): + tensor = value.asnumpy().astype(np.float32) + param_dict[key] = Parameter(tensor, key) + else: + raise RuntimeError("now, RFCN only support GPU.") + load_param_into_net(net, param_dict) + + net.set_train(False) + + eval_iter = 0 + total = ds.get_dataset_size() + outputs = [] + + if config.dataset != "coco": + dataset_coco = COCO() + dataset_coco.dataset, dataset_coco.anns, dataset_coco.cats, dataset_coco.imgs = dict(), dict(), dict(), dict() + dataset_coco.imgToAnns, dataset_coco.catToImgs = defaultdict(list), defaultdict(list) + dataset_coco.dataset = parse_json_annos_from_txt(anno_path, config) + dataset_coco.createIndex() + else: + dataset_coco = COCO(anno_path) + + print("\n========================================\n") + print("total images num: ", total) + print("Processing, please wait a moment.") + max_num = 128 + for data in ds.create_dict_iterator(num_epochs=1): + eval_iter = eval_iter + 1 + img_data = data['image'] + img_metas = data['image_shape'] + gt_bboxes = data['box'] + gt_labels = data['label'] + gt_num = data['valid_num'] + + start = time.time() + # run net + output = net(img_data, img_metas, gt_bboxes, gt_labels, gt_num) + end = time.time() + print("Iter {} cost time {}".format(eval_iter, end - start)) + + # output + all_bbox = output[0] + all_label = output[1] + all_mask = output[2] + + for j in range(config.test_batch_size): + all_bbox_squee = np.squeeze(all_bbox.asnumpy()[j, :, :]) + all_label_squee = np.squeeze(all_label.asnumpy()[j, :, :]) + all_mask_squee = np.squeeze(all_mask.asnumpy()[j, :, :]) + + all_bboxes_tmp_mask = all_bbox_squee[all_mask_squee, :] + all_labels_tmp_mask = all_label_squee[all_mask_squee] + + if all_bboxes_tmp_mask.shape[0] > max_num: + inds = np.argsort(-all_bboxes_tmp_mask[:, -1]) + inds = inds[:max_num] + all_bboxes_tmp_mask = all_bboxes_tmp_mask[inds] + all_labels_tmp_mask = all_labels_tmp_mask[inds] + + outputs_tmp = bbox2result_1image(all_bboxes_tmp_mask, all_labels_tmp_mask, config.num_classes) + + outputs.append(outputs_tmp) + + eval_types = ["bbox"] + result_files = results2json(dataset_coco, outputs, "./results.pkl") + coco_eval(config, result_files, eval_types, dataset_coco, single_result=False, plot_detect_result=False) + + +def modelarts_pre_process(): + pass + + +@moxing_wrapper(pre_process=modelarts_pre_process) +def eval_rfcn(): + """ eval_rfcn """ + if config.dataset == "coco": + prefix = "Rfcn_coco_eval.mindrecord" + else: + prefix = "Rfcn_other_eval.mindrecord" + + mindrecord_dir = config.mindrecord_dir + mindrecord_file = os.path.join(mindrecord_dir, prefix) + print("CHECKING MINDRECORD FILES ...") + + if not os.path.exists(mindrecord_file): + if not os.path.isdir(mindrecord_dir): + os.makedirs(mindrecord_dir) + if config.dataset == "coco": + if os.path.isdir(config.coco_root): + print("Create Mindrecord. It may take some time.") + data_to_mindrecord_byte_image(config, "coco", False, prefix, file_num=1) + print("Create Mindrecord Done, at {}".format(mindrecord_dir)) + else: + print("coco_root not exits.") + else: + if os.path.isdir(config.image_dir) and os.path.exists(config.anno_path): + print("Create Mindrecord. It may take some time.") + data_to_mindrecord_byte_image(config, "other", False, prefix, file_num=1) + print("Create Mindrecord Done, at {}".format(mindrecord_dir)) + else: + print("IMAGE_DIR or ANNO_PATH not exits.") + + print("CHECKING MINDRECORD FILES DONE!") + print("Start Eval!") + rfcn_eval(mindrecord_file, config.checkpoint_path, config.anno_path) + print("eval success.") + + + +if __name__ == '__main__': + eval_rfcn() diff --git a/research/cv/rfcn/requirements.txt b/research/cv/rfcn/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..dc51c2963b723e7359f04f85b161bc8624bec85b --- /dev/null +++ b/research/cv/rfcn/requirements.txt @@ -0,0 +1,5 @@ +Cython +pycocotools +numpy +opencv-python +pycocotools diff --git a/research/cv/rfcn/scripts/run_distribute_train_gpu.sh b/research/cv/rfcn/scripts/run_distribute_train_gpu.sh new file mode 100644 index 0000000000000000000000000000000000000000..d57a7dde36401462bd0d57ad847d274435021336 --- /dev/null +++ b/research/cv/rfcn/scripts/run_distribute_train_gpu.sh @@ -0,0 +1,73 @@ +#!/bin/bash +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +echo "==============================================================================================================" +echo "Please run the script as: " +echo "sh run_distribute_train_gpu.sh DEVICE_NUM PRETRAINED_PATH COCO_ROOT MINDRECORD_DIR(option)" +echo "for example: bash run_distribute_train_gpu.sh 8 /path/pretrain.ckpt cocodataset mindrecord_dir(option)" +echo "It is better to use absolute path." +echo "==============================================================================================================" + +if [ $# -le 2 ] +then + echo "Usage: bash run_distribute_train_gpu.sh [DEVICE_NUM] [PRETRAINED_PATH] [COCO_ROOT] [MINDRECORD_DIR](option)" +exit 1 +fi + +get_real_path(){ + if [ "${1:0:1}" == "/" ]; then + echo "$1" + else + echo "$(realpath -m $PWD/$1)" + fi +} + +rm -rf run_distribute_train +mkdir run_distribute_train +cp -rf ../src/ ../train.py ../*.yaml ./run_distribute_train +cd run_distribute_train || exit + +export RANK_SIZE=$1 +PRETRAINED_PATH=$2 +PATH3=$3 + +mindrecord_dir=$PATH3/RFCN_MINDRECORD/ +if [ $# -eq 4 ] +then + mindrecord_dir=$(get_real_path $4) + if [ ! -d $mindrecord_dir ] + then + echo "error: mindrecord_dir=$mindrecord_dir is not a dir" + exit 1 + fi +fi +echo $mindrecord_dir + +BASE_PATH=$(cd ./"`dirname $0`" || exit; pwd) +CONFIG_FILE="${BASE_PATH}/config_distribute_gpu.yaml" + +echo "start training on $RANK_SIZE devices" + +mpirun -n $RANK_SIZE --output-filename log_output --merge-stderr-to-stdout \ + --allow-run-as-root \ + python train.py \ + --config_path=$CONFIG_FILE \ + --run_distribute=True \ + --device_target="GPU" \ + --device_num=$RANK_SIZE \ + --pre_trained=$PRETRAINED_PATH \ + --coco_root=$PATH3 \ + --mindrecord_dir=$mindrecord_dir > log 2>&1 & diff --git a/research/cv/rfcn/scripts/run_eval_gpu.sh b/research/cv/rfcn/scripts/run_eval_gpu.sh new file mode 100644 index 0000000000000000000000000000000000000000..11dbab2c35e98e24a5681560dcbe1ac952078274 --- /dev/null +++ b/research/cv/rfcn/scripts/run_eval_gpu.sh @@ -0,0 +1,90 @@ +#!/bin/bash +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +if [ $# -le 3 ] +then + echo "Usage: sh run_eval_gpu.sh [DEVICE_ID] [ANNO_PATH] [CHECKPOINT_PATH] [COCO_ROOT] [MINDRECORD_DIR](option)" +exit 1 +fi + +get_real_path(){ + if [ "${1:0:1}" == "/" ]; then + echo "$1" + else + echo "$(realpath -m $PWD/$1)" + fi +} +PATH1=$(get_real_path $2) +PATH2=$(get_real_path $3) +PATH3=$(get_real_path $4) +echo $PATH1 +echo $PATH2 +echo $PATH3 + +if [ ! -f $PATH1 ] +then + echo "error: ANNO_PATH=$PATH1 is not a file" +exit 1 +fi + +if [ ! -f $PATH2 ] +then + echo "error: CHECKPOINT_PATH=$PATH2 is not a file" +exit 1 +fi + +if [ ! -d $PATH3 ] +then + echo "error: COCO_ROOT=$PATH3 is not a dir" +exit 1 +fi + +mindrecord_dir=$PATH3/RFCN_MINDRECORD/ +if [ $# -eq 5 ] +then + mindrecord_dir=$(get_real_path $5) + if [ ! -d $mindrecord_dir ] + then + echo "error: mindrecord_dir=$mindrecord_dir is not a dir" + exit 1 + fi +fi +echo $mindrecord_dir + +BASE_PATH=$(cd ./"`dirname $0`" || exit; pwd) +CONFIG_FILE="${BASE_PATH}/../default_config.yaml" + + +export DEVICE_NUM=1 +export RANK_SIZE=$DEVICE_NUM +export DEVICE_ID=$1 +export RANK_ID=0 + +if [ -d "eval" ]; +then + rm -rf ./eval +fi +mkdir ./eval +cp ../*.py ./eval +cp ../*.yaml ./eval +cp *.sh ./eval +cp -r ../src ./eval +cd ./eval || exit +env > env.log +echo "start eval for device $DEVICE_ID" +python eval.py --config_path=$CONFIG_FILE --coco_root=$PATH3 --mindrecord_dir=$mindrecord_dir \ +--device_target="GPU" --device_id=$DEVICE_ID --anno_path=$PATH1 --checkpoint_path=$PATH2 &> log & +cd .. diff --git a/research/cv/rfcn/scripts/run_standalone_train_gpu.sh b/research/cv/rfcn/scripts/run_standalone_train_gpu.sh new file mode 100644 index 0000000000000000000000000000000000000000..a3f7085d4e1caf75e00459570aeb1cbff5a471b2 --- /dev/null +++ b/research/cv/rfcn/scripts/run_standalone_train_gpu.sh @@ -0,0 +1,84 @@ +#!/bin/bash +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +if [ $# -le 2 ] +then + echo "Usage: sh run_standalone_train_gpu.sh [DEVICE_ID] [PRETRAINED_PATH] [COCO_ROOT] [MINDRECORD_DIR](option)" +exit 1 +fi + +get_real_path(){ + if [ "${1:0:1}" == "/" ]; then + echo "$1" + else + echo "$(realpath -m $PWD/$1)" + fi +} + +PATH1=$(get_real_path $2) +PATH2=$(get_real_path $3) +echo $PATH1 +echo $PATH2 + +if [ ! -f $PATH1 ] +then + echo "error: PRETRAINED_PATH=$PATH1 is not a file" +exit 1 +fi + +if [ ! -d $PATH2 ] +then + echo "error: COCO_ROOT=$PATH2 is not a dir" +exit 1 +fi + +mindrecord_dir=$PATH2/RFCN_MINDRECORD/ +if [ $# -eq 4 ] +then + mindrecord_dir=$(get_real_path $4) + if [ ! -d $mindrecord_dir ] + then + echo "error: mindrecord_dir=$mindrecord_dir is not a dir" + exit 1 + fi +fi +echo $mindrecord_dir + +BASE_PATH=$(cd ./"`dirname $0`" || exit; pwd) + +CONFIG_FILE="${BASE_PATH}/../config_standalone_gpu.yaml" + +ulimit -u unlimited +export DEVICE_NUM=1 +export DEVICE_ID=$1 +export RANK_ID=0 +export RANK_SIZE=1 + +if [ -d "train" ]; +then + rm -rf ./train +fi +mkdir ./train +cp ../*.py ./train +cp ../*.yaml ./train +cp *.sh ./train +cp -r ../src ./train +cd ./train || exit +echo "start training for device $DEVICE_ID" +env > env.log +python train.py --config_path=$CONFIG_FILE --coco_root=$PATH2 --mindrecord_dir=$mindrecord_dir \ +--device_id=$DEVICE_ID --pre_trained=$PATH1 --device_target="GPU" &> log & +cd .. diff --git a/research/cv/rfcn/src/convert_checkpoint.py b/research/cv/rfcn/src/convert_checkpoint.py new file mode 100644 index 0000000000000000000000000000000000000000..46631da47bff28cf894ec776222e2687932cac37 --- /dev/null +++ b/research/cv/rfcn/src/convert_checkpoint.py @@ -0,0 +1,62 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# =========================================================================== +""" +convert resnet pretrain model to faster_rcnn backbone pretrain model +""" +from mindspore.train.serialization import load_checkpoint, save_checkpoint +from mindspore.common.parameter import Parameter +from mindspore.common.tensor import Tensor +import mindspore.common.dtype as mstype +from model_utils.config import config + + +def load_weights(model_path, use_fp16_weight): + """ + load resnet pretrain checkpoint file. + + Args: + model_path (str): resnet pretrain checkpoint file . + use_fp16_weight(bool): whether save weight into float16. + + Returns: + parameter list(list): pretrain model weight list. + """ + ms_ckpt = load_checkpoint(model_path) + weights = {} + for msname in ms_ckpt: + if msname.startswith("layer") or msname.startswith("conv1") or msname.startswith("bn"): + param_name = "backbone." + msname + else: + param_name = msname + if "down_sample_layer.0" in param_name: + param_name = param_name.replace("down_sample_layer.0", "conv_down_sample") + if "down_sample_layer.1" in param_name: + param_name = param_name.replace("down_sample_layer.1", "bn_down_sample") + weights[param_name] = ms_ckpt[msname].data.asnumpy() + if use_fp16_weight: + dtype = mstype.float16 + else: + dtype = mstype.float32 + parameter_dict = {} + for name in weights: + parameter_dict[name] = Parameter(Tensor(weights[name], dtype), name=name) + param_list = [] + for key, value in parameter_dict.items(): + param_list.append({"name": key, "data": value}) + return param_list + +if __name__ == "__main__": + parameter_list = load_weights(config.ckpt_file, use_fp16_weight=False) + save_checkpoint(parameter_list, "resnet_backbone.ckpt") diff --git a/research/cv/rfcn/src/dataset.py b/research/cv/rfcn/src/dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..a148b7fd3def37488b26ab4e9b7e869d1dd1d602 --- /dev/null +++ b/research/cv/rfcn/src/dataset.py @@ -0,0 +1,584 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Rfcn dataset""" +from __future__ import division + +import os +import numpy as np +from numpy import random + +import cv2 +from PIL import Image +import mindspore.dataset as de +import mindspore.dataset.vision.c_transforms as C +from mindspore.mindrecord import FileWriter + +def bbox_overlaps(bboxes1, bboxes2, mode='iou'): + """Calculate the ious between each bbox of bboxes1 and bboxes2. + + Args: + bboxes1(ndarray): shape (n, 4) + bboxes2(ndarray): shape (k, 4) + mode(str): iou (intersection over union) or iof (intersection + over foreground) + + Returns: + ious(ndarray): shape (n, k) + """ + + assert mode in ['iou', 'iof'] + + bboxes1 = bboxes1.astype(np.float32) + bboxes2 = bboxes2.astype(np.float32) + rows = bboxes1.shape[0] + cols = bboxes2.shape[0] + ious = np.zeros((rows, cols), dtype=np.float32) + if rows * cols == 0: + return ious + exchange = False + if bboxes1.shape[0] > bboxes2.shape[0]: + bboxes1, bboxes2 = bboxes2, bboxes1 + ious = np.zeros((cols, rows), dtype=np.float32) + exchange = True + area1 = (bboxes1[:, 2] - bboxes1[:, 0] + 1) * (bboxes1[:, 3] - bboxes1[:, 1] + 1) + area2 = (bboxes2[:, 2] - bboxes2[:, 0] + 1) * (bboxes2[:, 3] - bboxes2[:, 1] + 1) + for i in range(bboxes1.shape[0]): + x_start = np.maximum(bboxes1[i, 0], bboxes2[:, 0]) + y_start = np.maximum(bboxes1[i, 1], bboxes2[:, 1]) + x_end = np.minimum(bboxes1[i, 2], bboxes2[:, 2]) + y_end = np.minimum(bboxes1[i, 3], bboxes2[:, 3]) + overlap = np.maximum(x_end - x_start + 1, 0) * np.maximum( + y_end - y_start + 1, 0) + if mode == 'iou': + union = area1[i] + area2 - overlap + else: + union = area1[i] if not exchange else area2 + ious[i, :] = overlap / union + if exchange: + ious = ious.T + return ious + + +class PhotoMetricDistortion: + """Photo Metric Distortion""" + def __init__(self, + brightness_delta=32, + contrast_range=(0.5, 1.5), + saturation_range=(0.5, 1.5), + hue_delta=18): + self.brightness_delta = brightness_delta + self.contrast_lower, self.contrast_upper = contrast_range + self.saturation_lower, self.saturation_upper = saturation_range + self.hue_delta = hue_delta + + def __call__(self, img, boxes, labels): + # random brightness + img = img.astype('float32') + + if random.randint(2): + delta = random.uniform(-self.brightness_delta, + self.brightness_delta) + img += delta + + # mode == 0 --> do random contrast first + # mode == 1 --> do random contrast last + mode = random.randint(2) + if mode == 1: + if random.randint(2): + alpha = random.uniform(self.contrast_lower, + self.contrast_upper) + img *= alpha + + # convert color from BGR to HSV + img = cv2.cvtColor(img, cv2.COLOR_BGR2HSV) + + # random saturation + if random.randint(2): + img[..., 1] *= random.uniform(self.saturation_lower, + self.saturation_upper) + + # random hue + if random.randint(2): + img[..., 0] += random.uniform(-self.hue_delta, self.hue_delta) + img[..., 0][img[..., 0] > 360] -= 360 + img[..., 0][img[..., 0] < 0] += 360 + + # convert color from HSV to BGR + img = cv2.cvtColor(img, cv2.COLOR_HSV2BGR) + + # random contrast + if mode == 0: + if random.randint(2): + alpha = random.uniform(self.contrast_lower, + self.contrast_upper) + img *= alpha + + # randomly swap channels + if random.randint(2): + img = img[..., random.permutation(3)] + + return img, boxes, labels + + +class Expand: + """expand image""" + def __init__(self, mean=(0, 0, 0), to_rgb=True, ratio_range=(1, 4)): + if to_rgb: + self.mean = mean[::-1] + else: + self.mean = mean + self.min_ratio, self.max_ratio = ratio_range + + def __call__(self, img, boxes, labels): + if random.randint(2): + return img, boxes, labels + + h, w, c = img.shape + ratio = random.uniform(self.min_ratio, self.max_ratio) + expand_img = np.full((int(h * ratio), int(w * ratio), c), + self.mean).astype(img.dtype) + left = int(random.uniform(0, w * ratio - w)) + top = int(random.uniform(0, h * ratio - h)) + expand_img[top:top + h, left:left + w] = img + img = expand_img + boxes += np.tile((left, top), 2) + return img, boxes, labels + + +def rescale_with_tuple(img, scale): + h, w = img.shape[:2] + scale_factor = min(max(scale) / max(h, w), min(scale) / min(h, w)) + new_size = int(w * float(scale_factor) + 0.5), int(h * float(scale_factor) + 0.5) + rescaled_img = cv2.resize(img, new_size, interpolation=cv2.INTER_LINEAR) + + return rescaled_img, scale_factor + + +def rescale_with_factor(img, scale_factor): + h, w = img.shape[:2] + new_size = int(w * float(scale_factor) + 0.5), int(h * float(scale_factor) + 0.5) + return cv2.resize(img, new_size, interpolation=cv2.INTER_NEAREST) + + +def rescale_column(img, img_shape, gt_bboxes, gt_label, gt_num, config): + """rescale operation for image""" + img_data, scale_factor = rescale_with_tuple(img, (config.img_width, config.img_height)) + if img_data.shape[0] > config.img_height: + img_data, scale_factor2 = rescale_with_tuple(img_data, (config.img_height, config.img_height)) + scale_factor = scale_factor*scale_factor2 + + gt_bboxes = gt_bboxes * scale_factor + gt_bboxes[:, 0::2] = np.clip(gt_bboxes[:, 0::2], 0, img_data.shape[1] - 1) + gt_bboxes[:, 1::2] = np.clip(gt_bboxes[:, 1::2], 0, img_data.shape[0] - 1) + + pad_h = config.img_height - img_data.shape[0] + pad_w = config.img_width - img_data.shape[1] + assert ((pad_h >= 0) and (pad_w >= 0)) + + pad_img_data = np.zeros((config.img_height, config.img_width, 3)).astype(img_data.dtype) + pad_img_data[0:img_data.shape[0], 0:img_data.shape[1], :] = img_data + + img_shape = (config.img_height, config.img_width, 1.0) + img_shape = np.asarray(img_shape, dtype=np.float32) + + return (pad_img_data, img_shape, gt_bboxes, gt_label, gt_num) + +def rescale_column_test(img, img_shape, gt_bboxes, gt_label, gt_num, config): + """rescale operation for image of eval""" + img_data, scale_factor = rescale_with_tuple(img, (config.img_width, config.img_height)) + if img_data.shape[0] > config.img_height: + img_data, scale_factor2 = rescale_with_tuple(img_data, (config.img_height, config.img_height)) + scale_factor = scale_factor*scale_factor2 + + pad_h = config.img_height - img_data.shape[0] + pad_w = config.img_width - img_data.shape[1] + assert ((pad_h >= 0) and (pad_w >= 0)) + + pad_img_data = np.zeros((config.img_height, config.img_width, 3)).astype(img_data.dtype) + pad_img_data[0:img_data.shape[0], 0:img_data.shape[1], :] = img_data + + img_shape = np.append(img_shape, (scale_factor, scale_factor)) + img_shape = np.asarray(img_shape, dtype=np.float32) + + return (pad_img_data, img_shape, gt_bboxes, gt_label, gt_num) + + +def resize_column(img, img_shape, gt_bboxes, gt_label, gt_num, config): + """resize operation for image""" + img_data = img + h, w = img_data.shape[:2] + img_data = cv2.resize( + img_data, (config.img_width, config.img_height), interpolation=cv2.INTER_LINEAR) + h_scale = config.img_height / h + w_scale = config.img_width / w + + scale_factor = np.array( + [w_scale, h_scale, w_scale, h_scale], dtype=np.float32) + img_shape = (config.img_height, config.img_width, 1.0) + img_shape = np.asarray(img_shape, dtype=np.float32) + + gt_bboxes = gt_bboxes * scale_factor + + gt_bboxes[:, 0::2] = np.clip(gt_bboxes[:, 0::2], 0, img_shape[1] - 1) + gt_bboxes[:, 1::2] = np.clip(gt_bboxes[:, 1::2], 0, img_shape[0] - 1) + + return (img_data, img_shape, gt_bboxes, gt_label, gt_num) + + +def resize_column_test(img, img_shape, gt_bboxes, gt_label, gt_num, config): + """resize operation for image of eval""" + img_data = img + h, w = img_data.shape[:2] + img_data = cv2.resize( + img_data, (config.img_width, config.img_height), interpolation=cv2.INTER_LINEAR) + h_scale = config.img_height / h + w_scale = config.img_width / w + + scale_factor = np.array( + [w_scale, h_scale, w_scale, h_scale], dtype=np.float32) + img_shape = np.append(img_shape, (h_scale, w_scale)) + img_shape = np.asarray(img_shape, dtype=np.float32) + + gt_bboxes = gt_bboxes * scale_factor + + gt_bboxes[:, 0::2] = np.clip(gt_bboxes[:, 0::2], 0, img_shape[1] - 1) + gt_bboxes[:, 1::2] = np.clip(gt_bboxes[:, 1::2], 0, img_shape[0] - 1) + + return (img_data, img_shape, gt_bboxes, gt_label, gt_num) + + +def impad_to_multiple_column(img, img_shape, gt_bboxes, gt_label, gt_num, config): + """impad operation for image""" + img_data = cv2.copyMakeBorder(img, + 0, config.img_height - img.shape[0], 0, config.img_width - img.shape[1], + cv2.BORDER_CONSTANT, + value=0) + img_data = img_data.astype(np.float32) + return (img_data, img_shape, gt_bboxes, gt_label, gt_num) + + +def imnormalize_column(img, img_shape, gt_bboxes, gt_label, gt_num): + """imnormalize operation for image""" + mean = np.asarray([123.675, 116.28, 103.53]) + std = np.asarray([58.395, 57.12, 57.375]) + img_data = img.copy().astype(np.float32) + cv2.cvtColor(img_data, cv2.COLOR_BGR2RGB, img_data) # inplace + cv2.subtract(img_data, np.float64(mean.reshape(1, -1)), img_data) # inplace + cv2.multiply(img_data, 1 / np.float64(std.reshape(1, -1)), img_data) # inplace + + img_data = img_data.astype(np.float32) + return (img_data, img_shape, gt_bboxes, gt_label, gt_num) + + +def flip_column(img, img_shape, gt_bboxes, gt_label, gt_num): + """flip operation for image""" + img_data = img + img_data = np.flip(img_data, axis=1) + flipped = gt_bboxes.copy() + _, w, _ = img_data.shape + + flipped[..., 0::4] = w - gt_bboxes[..., 2::4] - 1 + flipped[..., 2::4] = w - gt_bboxes[..., 0::4] - 1 + + return (img_data, img_shape, flipped, gt_label, gt_num) + + +def transpose_column(img, img_shape, gt_bboxes, gt_label, gt_num): + """transpose operation for image""" + img_data = img.transpose(2, 0, 1).copy() + img_data = img_data.astype(np.float32) + img_shape = img_shape.astype(np.float32) + gt_bboxes = gt_bboxes.astype(np.float32) + gt_label = gt_label.astype(np.int32) + gt_num = gt_num.astype(np.bool) + + return (img_data, img_shape, gt_bboxes, gt_label, gt_num) + + +def photo_crop_column(img, img_shape, gt_bboxes, gt_label, gt_num): + """photo crop operation for image""" + random_photo = PhotoMetricDistortion() + img_data, gt_bboxes, gt_label = random_photo(img, gt_bboxes, gt_label) + + return (img_data, img_shape, gt_bboxes, gt_label, gt_num) + + +def expand_column(img, img_shape, gt_bboxes, gt_label, gt_num): + """expand operation for image""" + expand = Expand() + img, gt_bboxes, gt_label = expand(img, gt_bboxes, gt_label) + + return (img, img_shape, gt_bboxes, gt_label, gt_num) + + +def preprocess_fn(image, box, is_training, config): + """Preprocess function for dataset.""" + def _infer_data(image_bgr, image_shape, gt_box_new, gt_label_new, gt_iscrowd_new_revert): + image_shape = image_shape[:2] + input_data = image_bgr, image_shape, gt_box_new, gt_label_new, gt_iscrowd_new_revert + + if config.keep_ratio: + input_data = rescale_column_test(*input_data, config=config) + else: + input_data = resize_column_test(*input_data, config=config) + input_data = imnormalize_column(*input_data) + + output_data = transpose_column(*input_data) + return output_data + + def _data_aug(image, box, is_training): + """Data augmentation function.""" + image_bgr = image.copy() + image_bgr[:, :, 0] = image[:, :, 2] + image_bgr[:, :, 1] = image[:, :, 1] + image_bgr[:, :, 2] = image[:, :, 0] + image_shape = image_bgr.shape[:2] + gt_box = box[:, :4] + gt_label = box[:, 4] + gt_iscrowd = box[:, 5] + + pad_max_number = 128 + gt_box_new = np.pad(gt_box, ((0, pad_max_number - box.shape[0]), (0, 0)), mode="constant", constant_values=0) + gt_label_new = np.pad(gt_label, ((0, pad_max_number - box.shape[0])), mode="constant", constant_values=-1) + gt_iscrowd_new = np.pad(gt_iscrowd, ((0, pad_max_number - box.shape[0])), mode="constant", constant_values=1) + gt_iscrowd_new_revert = (~(gt_iscrowd_new.astype(np.bool))).astype(np.int32) + + if not is_training: + return _infer_data(image_bgr, image_shape, gt_box_new, gt_label_new, gt_iscrowd_new_revert) + + flip = (np.random.rand() < config.flip_ratio) + expand = (np.random.rand() < config.expand_ratio) + input_data = image_bgr, image_shape, gt_box_new, gt_label_new, gt_iscrowd_new_revert + + if expand: + input_data = expand_column(*input_data) + if config.keep_ratio: + input_data = rescale_column(*input_data, config=config) + else: + input_data = resize_column(*input_data, config=config) + input_data = imnormalize_column(*input_data) + if flip: + input_data = flip_column(*input_data) + + output_data = transpose_column(*input_data) + return output_data + + return _data_aug(image, box, is_training) + + +def create_coco_label(is_training, config): + """Get image path and annotation from COCO.""" + from pycocotools.coco import COCO + + coco_root = config.coco_root + data_type = config.val_data_type + if is_training: + data_type = config.train_data_type + + # Classes need to train or test. + train_cls = config.coco_classes + train_cls_dict = {} + for i, cls in enumerate(train_cls): + train_cls_dict[cls] = i + + anno_json = os.path.join(coco_root, config.instance_set.format(data_type)) + + coco = COCO(anno_json) + classs_dict = {} + cat_ids = coco.loadCats(coco.getCatIds()) + for cat in cat_ids: + classs_dict[cat["id"]] = cat["name"] + + image_ids = coco.getImgIds() + image_files = [] + image_anno_dict = {} + + for img_id in image_ids: + image_info = coco.loadImgs(img_id) + file_name = image_info[0]["file_name"] + image_path = os.path.join(coco_root, data_type, file_name) + if not os.path.isfile(image_path): + print(file_name + " is in annotations but not exist") + continue + anno_ids = coco.getAnnIds(imgIds=img_id, iscrowd=None) + anno = coco.loadAnns(anno_ids) + annos = [] + for label in anno: + bbox = label["bbox"] + class_name = classs_dict[label["category_id"]] + if class_name in train_cls: + x1, x2 = bbox[0], bbox[0] + bbox[2] + y1, y2 = bbox[1], bbox[1] + bbox[3] + annos.append([x1, y1, x2, y2] + [train_cls_dict[class_name]] + [int(label["iscrowd"])]) + + image_files.append(image_path) + if annos: + image_anno_dict[image_path] = np.array(annos) + else: + image_anno_dict[image_path] = np.array([0, 0, 0, 0, 0, 1]) + + return image_files, image_anno_dict + + +def parse_json_annos_from_txt(anno_file, config): + """for user defined annotations text file, parse it to json format data""" + if not os.path.isfile(anno_file): + raise RuntimeError("Evaluation annotation file {} is not valid.".format(anno_file)) + + annos = { + "images": [], + "annotations": [], + "categories": [] + } + + if config.dataset == "coco": + classes = config.coco_classes + else: + classes = config.other_classes + + # set categories field + for i, cls_name in enumerate(classes): + annos["categories"].append({"id": i, "name": cls_name}) + + with open(anno_file, "rb") as f: + lines = f.readlines() + + img_id = 1 + anno_id = 1 + for line in lines: + line_str = line.decode("utf-8").strip() + line_split = str(line_str).split(' ') + # set image field + file_name = line_split[0] + annos["images"].append({"file_name": file_name, "id": img_id}) + # set annotations field + for anno_info in line_split[1:]: + anno = anno_info.split(",") + x = float(anno[0]) + y = float(anno[1]) + w = float(anno[2]) - float(anno[0]) + h = float(anno[3]) - float(anno[1]) + category_id = int(anno[4]) + iscrowd = int(anno[5]) + annos["annotations"].append({"bbox": [x, y, w, h], + "area": w * h, + "category_id": category_id, + "iscrowd": iscrowd, + "image_id": img_id, + "id": anno_id}) + anno_id += 1 + img_id += 1 + + return annos + + +def create_train_data_from_txt(image_dir, anno_path): + """Filter valid image file, which both in image_dir and anno_path.""" + def anno_parser(annos_str): + """Parse annotation from string to list.""" + annos = [] + for anno_str in annos_str: + anno = anno_str.strip().split(",") + xmin, ymin, xmax, ymax = list(map(float, anno[:4])) + cls_id = int(anno[4]) + iscrowd = int(anno[5]) + annos.append([xmin, ymin, xmax, ymax, cls_id, iscrowd]) + return annos + image_files = [] + image_anno_dict = {} + if not os.path.isdir(image_dir): + raise RuntimeError("Path given is not valid.") + if not os.path.isfile(anno_path): + raise RuntimeError("Annotation file is not valid.") + + with open(anno_path, "rb") as f: + lines = f.readlines() + for line in lines: + line_str = line.decode("utf-8").strip() + line_split = str(line_str).split(' ') + file_name = line_split[0] + image_path = os.path.join(image_dir, file_name) + if os.path.isfile(image_path): + image_anno_dict[image_path] = anno_parser(line_split[1:]) + image_files.append(image_path) + return image_files, image_anno_dict + +def data_to_mindrecord_byte_image(config, dataset="coco", is_training=True, prefix="rfcn.mindrecord", file_num=8): + """Create MindRecord file.""" + mindrecord_dir = config.mindrecord_dir + mindrecord_path = os.path.join(mindrecord_dir, prefix) + writer = FileWriter(mindrecord_path, file_num) + if dataset == "coco": + image_files, image_anno_dict = create_coco_label(is_training, config=config) + else: + image_files, image_anno_dict = create_train_data_from_txt(config.image_dir, config.anno_train_path) + + rfcn_json = { + "image": {"type": "bytes"}, + "annotation": {"type": "int32", "shape": [-1, 6]}, + } + writer.add_schema(rfcn_json, "rfcn_json") + + i = 0 + total = len(image_files) + for image_name in image_files: + # Here try block is to find corrupted images (Coco2014 has corrupted images) + img = Image.open(image_name) + try: + img = np.asarray(img) + except RuntimeError: + print("image error锛� " + image_name) + continue + i = i + 1 + with open(image_name, 'rb') as f: + img = f.read() + annos = np.array(image_anno_dict[image_name], dtype=np.int32) + + row = {"image": img, "annotation": annos} + writer.write_raw_data([row]) + + if i % 100 == 0: + print(str(i) + "/" + str(total) + " write success") + print(str(i) + "/" + str(total) + " write success") + writer.commit() + + +def create_rfcn_dataset(config, mindrecord_file, batch_size=2, device_num=1, rank_id=0, is_training=True, + num_parallel_workers=8, python_multiprocessing=False): + """Create Rfcn dataset with MindDataset.""" + cv2.setNumThreads(0) + de.config.set_prefetch_size(8) + ds = de.MindDataset(mindrecord_file, columns_list=["image", "annotation"], num_shards=device_num, shard_id=rank_id, + num_parallel_workers=num_parallel_workers, shuffle=is_training) + decode = C.Decode() + ds = ds.map(input_columns=["image"], operations=decode) + compose_map_func = (lambda image, annotation: preprocess_fn(image, annotation, is_training, config=config)) + + if is_training: + ds = ds.map(input_columns=["image", "annotation"], + output_columns=["image", "image_shape", "box", "label", "valid_num"], + column_order=["image", "image_shape", "box", "label", "valid_num"], + operations=compose_map_func, python_multiprocessing=python_multiprocessing, + num_parallel_workers=num_parallel_workers) + ds = ds.batch(batch_size, drop_remainder=True) + else: + ds = ds.map(input_columns=["image", "annotation"], + output_columns=["image", "image_shape", "box", "label", "valid_num"], + column_order=["image", "image_shape", "box", "label", "valid_num"], + operations=compose_map_func, + num_parallel_workers=num_parallel_workers) + ds = ds.batch(batch_size, drop_remainder=True) + return ds diff --git a/research/cv/rfcn/src/detecteval.py b/research/cv/rfcn/src/detecteval.py new file mode 100644 index 0000000000000000000000000000000000000000..e77e708fd78ebedeec6eb33d2139ffea6d147cdf --- /dev/null +++ b/research/cv/rfcn/src/detecteval.py @@ -0,0 +1,857 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""detect eval""" +from typing import List +import os +import csv +import warnings +import cv2 +import numpy as np + +from pycocotools.cocoeval import COCOeval +import matplotlib.pyplot as plt +from matplotlib import gridspec +import seaborn as sns + + +warnings.filterwarnings("ignore") +COLOR_MAP = [ + (0, 255, 255), + (0, 255, 0), + (255, 0, 0), + (0, 0, 255), + (255, 255, 0), + (255, 0, 255), + (0, 128, 128), + (0, 128, 0), + (128, 0, 0), + (0, 0, 128), + (128, 128, 0), + (128, 0, 128), +] + + +def write_list_to_csv(file_path, data_to_write, append=False): + print('Saving data into file [{}]...'.format(file_path)) + if append: + open_mode = 'a' + else: + open_mode = 'w' + with open(file_path, open_mode) as csvfile: + writer = csv.writer(csvfile) + writer.writerow(data_to_write) + + +def read_image(image_path): + image = cv2.imread(image_path) + if image is None: + return False, None + return True, image + + +def save_image(image_path, image): + return cv2.imwrite(image_path, image) + + +def draw_rectangle(image, pt1, pt2, label=None): + if label is not None: + map_index = label % len(COLOR_MAP) + color = COLOR_MAP[map_index] + else: + color = COLOR_MAP[0] + thickness = 5 + cv2.rectangle(image, pt1, pt2, color, thickness) + + +def draw_text(image, text, org, label=None): + if label is not None: + map_index = label % len(COLOR_MAP) + color = COLOR_MAP[map_index] + else: + color = COLOR_MAP[0] + font_face = cv2.FONT_HERSHEY_SIMPLEX + font_scale = 0.6 + thickness = 1 + cv2.putText(image, text, org, font_face, font_scale, color, thickness) + + +def draw_one_box(image, label, box, cat_id, line_thickness=None): + """draw_one_box""" + tl = line_thickness or round(0.002 * (image.shape[0] + image.shape[1]) / 2) + 1 + if cat_id is not None: + map_index = cat_id % len(COLOR_MAP) + color = COLOR_MAP[map_index] + else: + color = COLOR_MAP[0] + c1, c2 = (int(box[0]), int(box[1])), (int(box[2]), int(box[3])) + cv2.rectangle(image, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA) + + tf = max(tl - 1, 1) + t_size = cv2.getTextSize(label, 0, fontScale=tl / 6, thickness=tf // 2)[0] + c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3 + cv2.rectangle(image, c1, c2, color, -1, cv2.LINE_AA) + cv2.putText(image, label, (c1[0], c1[1] - 2), 0, tl / 6, [255, 255, 255], thickness=tf // 2, lineType=cv2.LINE_AA) + + +class DetectEval(COCOeval): + """DetectEval""" + def __init__(self, cocoGt=None, cocoDt=None, iouType="bbox"): + assert iouType == "bbox", "iouType only supported bbox" + + super().__init__(cocoGt, cocoDt, iouType) + if not self.cocoGt is None: + cat_infos = cocoGt.loadCats(cocoGt.getCatIds()) + self.params.labels = {} + for cat in cat_infos: + self.params.labels[cat["id"]] = cat["name"] + + # add new + def catId_summarize(self, catId, iouThr=None, areaRng="all", maxDets=100): + '''catId_summarize''' + p = self.params + aind = [i for i, aRng in enumerate(p.areaRngLbl) if aRng == areaRng] + mind = [i for i, mDet in enumerate(p.maxDets) if mDet == maxDets] + + s = self.eval["recall"] + if iouThr is not None: + iou = np.where(iouThr == p.iouThrs)[0] + s = s[iou] + + if isinstance(catId, int): + s = s[:, catId, aind, mind] + else: + s = s[:, :, aind, mind] + + not_empty = len(s[s > -1]) == 0 + if not_empty: + mean_s = -1 + else: + mean_s = np.mean(s[s > -1]) + return mean_s + + def compute_gt_dt_num(self): + '''compute_gt_dt_num''' + p = self.params + catIds_gt_num = {} + catIds_dt_num = {} + + for ids in p.catIds: + gts_cat_id = self.cocoGt.loadAnns(self.cocoGt.getAnnIds(catIds=[ids])) + dts_cat_id = self.cocoDt.loadAnns(self.cocoDt.getAnnIds(catIds=[ids])) + catIds_gt_num[ids] = len(gts_cat_id) + catIds_dt_num[ids] = len(dts_cat_id) + + return catIds_gt_num, catIds_dt_num + + def evaluate_ok_ng(self, img_id, catIds, iou_threshold=0.5): + """ + evaluate every if this image is ok銆乸recision_ng銆乺ecall_ng + img_id: int + cat_ids:list + iou_threshold:int + """ + p = self.params + img_id = int(img_id) + + # Save the results of precision_ng and recall_ng for each category on a picture + cat_id_result = {} + for cat_id in catIds: + gt = self._gts[img_id, cat_id] + dt = self._dts[img_id, cat_id] + ious = self.computeIoU(img_id, cat_id) + + # Sort dt in descending order, and only take the first 100 + inds = np.argsort([-d['score'] for d in dt], kind='mergesort') + dt = [dt[i] for i in inds] + + # p.maxDets must be set in ascending order + if len(dt) > p.maxDets[-1]: + dt = dt[0:p.maxDets[-1]] + + # The first case: gt, dt are both 0: skip + if not gt and not dt: + cat_id_result[cat_id] = (False, False) + continue + # The second case: gt = 0, dt !=0: precision_ng + if not gt and dt: + cat_id_result[cat_id] = (True, False) + continue + # The third case: gt != 0, dt = 0: recall_ng + if gt and not dt: + cat_id_result[cat_id] = (False, True) + continue + # The fourth case: gt and dt are matched in pairs + gtm = [0] * len(gt) + dtm = [0] * len(dt) + + for dind in range(len(dt)): + # dt:[a] gt [b] ious = [a*b] + iou = min([iou_threshold, 1 - 1e-10]) + # m records the position of the gt with the best match + m = -1 + for gind in range(len(gt)): + # If gt[gind] already matches, skip it. + if gtm[gind] > 0: + continue + # If the iou(dind, gind) is less than the threshold, traverse + if ious[dind, gind] < iou: + continue + iou = ious[dind, gind] + m = gind + if m == -1: + continue + dtm[dind] = 1 + gtm[m] = 1 + + # If gt is all matched, gtm is all 1 + precision_ng = sum(dtm) < len(dtm) + recall_ng = sum(gtm) < len(gtm) + cat_id_result[cat_id] = (precision_ng, recall_ng) + + # As long as the precision_ng in a class is True, the picture is precision_ng, and recall_ng is the same + # Subsequent development of NG pictures for each category can be saved + precision_result = False + recall_result = False + for ng in cat_id_result.values(): + precision_ng = ng[0] + recall_ng = ng[1] + if precision_ng: + precision_result = precision_ng + if recall_ng: + recall_result = recall_ng + return precision_result, recall_result + + def evaluate_every_class(self): + """ + compute every class's: + [label, tp_num, gt_num, dt_num, precision, recall] + """ + print("Evaluate every class's predision and recall") + p = self.params + cat_ids = p.catIds + labels = p.labels + result = [] + catIds_gt_num, catIds_dt_num = self.compute_gt_dt_num() + sum_gt_num = 0 + sum_dt_num = 0 + for value in catIds_gt_num.values(): + sum_gt_num += value + for value in catIds_dt_num.values(): + sum_dt_num += value + sum_tp_num = 0 + + for i, cat_id in enumerate(cat_ids): + # Here is hard-coded + stats = self.catId_summarize(catId=i) + recall = stats + gt_num = catIds_gt_num[cat_id] + tp_num = recall * gt_num + sum_tp_num += tp_num + dt_num = catIds_dt_num[cat_id] + if dt_num <= 0: + if gt_num == 0: + precision = -1 + else: + precision = 0 + else: + precision = tp_num / dt_num + label = labels[cat_id] + class_result = [label, int(round(tp_num)), gt_num, int(round(dt_num)), round(precision, 3), + round(recall, 3)] + result.append(class_result) + all_precision = sum_tp_num / sum_dt_num + all_recall = sum_tp_num / sum_gt_num + all_result = ["all", int(round(sum_tp_num)), sum_gt_num, int(round(sum_dt_num)), round(all_precision, 3), + round(all_recall, 3)] + result.append(all_result) + + print("Done") + return result + + def plot_pr_curve(self, eval_result_path): + + """ + precisions[T, R, K, A, M] + T: iou thresholds [0.5 : 0.05 : 0.95], idx from 0 to 9 + R: recall thresholds [0 : 0.01 : 1], idx from 0 to 100 + K: category, idx from 0 to ... + A: area range, (all, small, medium, large), idx from 0 to 3 + M: max dets, (1, 10, 100), idx from 0 to 2 + """ + print("Plot pr curve about every class") + precisions = self.eval["precision"] + p = self.params + cat_ids = p.catIds + labels = p.labels + + pr_dir = os.path.join(eval_result_path, "./pr_curve_image") + if not os.path.exists(pr_dir): + os.mkdir(pr_dir) + + for i, cat_id in enumerate(cat_ids): + pr_array1 = precisions[0, :, i, 0, 2] + x = np.arange(0.0, 1.01, 0.01) + # plot PR curve + plt.plot(x, pr_array1, label="iou=0.5," + labels[cat_id]) + plt.xlabel("recall") + plt.ylabel("precision") + plt.xlim(0, 1.0) + plt.ylim(0, 1.01) + plt.grid(True) + plt.legend(loc="lower left") + plt_path = os.path.join(pr_dir, "pr_curve_" + labels[cat_id] + ".png") + plt.savefig(plt_path) + plt.close(1) + print("Done") + + def save_images(self, config, eval_result_path, iou_threshold=0.5): + """ + save ok_images, precision_ng_images, recall_ng_images + Arguments: + config: dict, config about parameters + eval_result_path: str, path to save images + iou_threshold: int, iou_threshold + """ + print("Saving images of ok ng") + p = self.params + img_ids = p.imgIds + cat_ids = p.catIds if p.useCats else [-1] # list: [0,1,2,3....] + labels = p.labels + + dt = self.cocoDt.getAnnIds() + dts = self.cocoDt.loadAnns(dt) + + for img_id in img_ids: + img_id = int(img_id) + img_info = self.cocoGt.loadImgs(img_id) + + if config.dataset == "coco": + im_path_dir = os.path.join(config.coco_root, config.val_data_type) + elif config.dataset == "VOC": + im_path_dir = os.path.join(config.voc_root, 'eval', "JPEGImages") + + assert config.dataset in ("coco", "VOC") + + # Return whether the image is precision_ng or recall_ng + precision_ng, recall_ng = self.evaluate_ok_ng(img_id, cat_ids, iou_threshold) + # Save to ok_images + if not precision_ng and not recall_ng: + # origin image path + im_path = os.path.join(im_path_dir, img_info[0]['file_name']) + # output image path + im_path_out_dir = os.path.join(eval_result_path, 'ok_images') + if not os.path.exists(im_path_out_dir): + os.makedirs(im_path_out_dir) + im_path_out = os.path.join(im_path_out_dir, img_info[0]['file_name']) + + success, image = read_image(im_path) + assert success + + for obj in dts: + _id = obj["image_id"] + if _id == img_id: + bbox = obj["bbox"] + score = obj["score"] + category_id = obj["category_id"] + label = labels[category_id] + + xmin = int(bbox[0]) + ymin = int(bbox[1]) + width = int(bbox[2]) + height = int(bbox[3]) + xmax = xmin + width + ymax = ymin + height + + label = label + " " + str(round(score, 3)) + draw_one_box(image, label, (xmin, ymin, xmax, ymax), category_id) + save_image(im_path_out, image) + else: + # Save to precision_ng_images + if precision_ng: + # origin image path + im_path = os.path.join(im_path_dir, img_info[0]['file_name']) + # output image path + im_path_out_dir = os.path.join(eval_result_path, 'precision_ng_images') + if not os.path.exists(im_path_out_dir): + os.makedirs(im_path_out_dir) + im_path_out = os.path.join(im_path_out_dir, img_info[0]['file_name']) + + success, image = read_image(im_path) + assert success + + for obj in dts: + _id = obj["image_id"] + if _id == img_id: + bbox = obj["bbox"] + score = obj["score"] + category_id = obj["category_id"] + label = labels[category_id] + + xmin = int(bbox[0]) + ymin = int(bbox[1]) + width = int(bbox[2]) + height = int(bbox[3]) + xmax = xmin + width + ymax = ymin + height + + label = label + " " + str(round(score, 3)) + draw_one_box(image, label, (xmin, ymin, xmax, ymax), category_id) + save_image(im_path_out, image) + + # Save to recall_ng_images + if recall_ng: + # origin image path + im_path = os.path.join(im_path_dir, img_info[0]['file_name']) + # output image path + im_path_out_dir = os.path.join(eval_result_path, 'recall_ng_images') + if not os.path.exists(im_path_out_dir): + os.makedirs(im_path_out_dir) + + im_path_out = os.path.join(im_path_out_dir, img_info[0]['file_name']) + success, image = read_image(im_path) + if not success: + raise Exception('Failed reading image from [{}]'.format(im_path)) + for obj in dts: + _id = obj["image_id"] + if _id == img_id: + bbox = obj["bbox"] + score = obj["score"] + category_id = obj["category_id"] + label = labels[category_id] + + xmin = int(bbox[0]) + ymin = int(bbox[1]) + width = int(bbox[2]) + height = int(bbox[3]) + xmax = xmin + width + ymax = ymin + height + + label = label + " " + str(round(score, 3)) + draw_one_box(image, label, (xmin, ymin, xmax, ymax), category_id) + save_image(im_path_out, image) + + print("Done") + + def compute_precison_recall_f1(self, min_score=0.1): + '''compute_precison_recall_f1''' + + print('Compute precision, recall, f1...') + if not self.evalImgs: + print('Please run evaluate() first') + p = self.params + catIds = p.catIds if p.useCats == 1 else [-1] + labels = p.labels + + assert len(p.maxDets) == 1 + assert len(p.iouThrs) == 1 + assert len(p.areaRng) == 1 + + # get inds to evaluate + k_list = [n for n, k in enumerate(p.catIds)] + m_list = [m for n, m in enumerate(p.maxDets)] + a_list: List[int] = [n for n, a in enumerate(p.areaRng)] + i_list = [n for n, i in enumerate(p.imgIds)] + I0 = len(p.imgIds) + A0 = len(p.areaRng) + + # cat_pr_dict: + # {label1:[precision_li, recall_li, f1_li, score_li], label2:[precision_li, recall_li, f1_li, score_li]} + cat_pr_dict = {} + cat_pr_dict_origin = {} + + for k0 in k_list: + Nk = k0 * A0 * I0 + # areagRng + for a0 in a_list: + Na = a0 * I0 + # maxDet + for maxDet in m_list: + E = [self.evalImgs[Nk + Na + i] for i in i_list] + E = [e for e in E if not e is None] + if not E: + continue + dtScores = np.concatenate([e['dtScores'][0:maxDet] for e in E]) + + # different sorting method generates slightly different results. + # mergesort is used to be consistent as Matlab implementation. + inds = np.argsort(-dtScores, kind='mergesort') + dtScoresSorted = dtScores[inds] + + dtm = np.concatenate([e['dtMatches'][:, 0:maxDet] for e in E], axis=1)[:, inds] + dtIg = np.concatenate([e['dtIgnore'][:, 0:maxDet] for e in E], axis=1)[:, inds] + gtIg = np.concatenate([e['gtIgnore'] for e in E]) + npig = np.count_nonzero(gtIg == 0) + if npig == 0: + continue + tps = np.logical_and(dtm, np.logical_not(dtIg)) + fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg)) + + # Ensure that iou has only one value + assert (tps.shape[0]) == 1 + assert (fps.shape[0]) == 1 + + tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float) + fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float) + ids = catIds[k0] + label = labels[ids] + + self.calculate_pr_dict(tp_sum, fp_sum, label, npig, dtScoresSorted, cat_pr_dict, cat_pr_dict_origin, + min_score=min_score) + print("Done") + return cat_pr_dict, cat_pr_dict_origin + + def calculate_pr_dict(self, tp_sum, fp_sum, label, npig, dtScoresSorted, cat_pr_dict, cat_pr_dict_origin, + min_score=0.1): + '''calculate_pr_dict''' + # iou + for (tp, fp) in zip(tp_sum, fp_sum): + tp = np.array(tp) + fp = np.array(fp) + rc = tp / npig + pr = tp / (fp + tp + np.spacing(1)) + + f1 = np.divide(2 * (rc * pr), pr + rc, out=np.zeros_like(2 * (rc * pr)), where=pr + rc != 0) + + conf_thres = [int(i) * 0.01 for i in range(10, 100, 10)] + dtscores_ascend = dtScoresSorted[::-1] + inds = np.searchsorted(dtscores_ascend, conf_thres, side='left') + pr_new = [0.0] * len(conf_thres) + rc_new = [0.0] * len(conf_thres) + f1_new = [0.0] * len(conf_thres) + pr_ascend = pr[::-1] + rc_ascend = rc[::-1] + f1_ascend = f1[::-1] + try: + for i, ind in enumerate(inds): + if conf_thres[i] >= min_score: + pr_new[i] = pr_ascend[ind] + rc_new[i] = rc_ascend[ind] + f1_new[i] = f1_ascend[ind] + else: + pr_new[i] = 0.0 + rc_new[i] = 0.0 + f1_new[i] = 0.0 + except IndexError: + pass + # Ensure that the second, third, and fourth for loops only enter once + if label not in cat_pr_dict.keys(): + cat_pr_dict_origin[label] = [pr[::-1], rc[::-1], f1[::-1], dtScoresSorted[::-1]] + cat_pr_dict[label] = [pr_new, rc_new, f1_new, conf_thres] + else: + break + + def compute_tp_fp_confidence(self): + '''compute_tp_fp_confidence''' + + print('Compute tp and fp confidences') + if not self.evalImgs: + print('Please run evaluate() first') + p = self.params + catIds = p.catIds if p.useCats == 1 else [-1] + labels = p.labels + + assert len(p.maxDets) == 1 + assert len(p.iouThrs) == 1 + assert len(p.areaRng) == 1 + + # get inds to evaluate + m_list = [m for n, m in enumerate(p.maxDets)] + k_list = list(range(len(p.catIds))) + a_list = list(range(len(p.areaRng))) + i_list = list(range(len(p.imgIds))) + + I0 = len(p.imgIds) + A0 = len(p.areaRng) + # cat_dict + correct_conf_dict = {} + incorrect_conf_dict = {} + + for k0 in k_list: + Nk = k0 * A0 * I0 + # areagRng + for a0 in a_list: + Na = a0 * I0 + # maxDet + for maxDet in m_list: + E = [self.evalImgs[Nk + Na + i] for i in i_list] + E = [e for e in E if not e is None] + if not E: + continue + dtScores = np.concatenate([e['dtScores'][0:maxDet] for e in E]) + + inds = np.argsort(-dtScores, kind='mergesort') + dtScoresSorted = dtScores[inds] + + dtm = np.concatenate([e['dtMatches'][:, 0:maxDet] for e in E], axis=1)[:, inds] + dtIg = np.concatenate([e['dtIgnore'][:, 0:maxDet] for e in E], axis=1)[:, inds] + gtIg = np.concatenate([e['gtIgnore'] for e in E]) + npig = np.count_nonzero(gtIg == 0) + if npig == 0: + continue + tps = np.logical_and(dtm, np.logical_not(dtIg)) + fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg)) + + # Ensure that iou has only one value + assert (tps.shape[0]) == 1 + assert (fps.shape[0]) == 1 + + tp_inds = np.where(tps) + fp_inds = np.where(fps) + + tp_confidence = dtScoresSorted[tp_inds[1]] + fp_confidence = dtScoresSorted[fp_inds[1]] + tp_confidence_li = tp_confidence.tolist() + fp_confidence_li = fp_confidence.tolist() + ids = catIds[k0] + label = labels[ids] + + # Ensure that the second and third for loops only enter once + if label not in correct_conf_dict.keys(): + correct_conf_dict[label] = tp_confidence_li + else: + print("maxDet:", maxDet, " ", "areagRng:", p.areagRng) + break + + if label not in incorrect_conf_dict.keys(): + incorrect_conf_dict[label] = fp_confidence_li + else: + print("maxDet:", maxDet, " ", "areagRng:", p.areagRng) + break + print("Done") + return correct_conf_dict, incorrect_conf_dict + + def write_best_confidence_threshold(self, cat_pr_dict, cat_pr_dict_origin, eval_result_path): + """ + write best confidence threshold + """ + print("Write best confidence threshold to csv") + result_csv = os.path.join(eval_result_path, "best_threshold.csv") + result = ["cat_name", "best_f1", "best_precision", "best_recall", "best_score"] + write_list_to_csv(result_csv, result, append=False) + return_result = [] + for cat_name, cat_info in cat_pr_dict.items(): + f1_li = cat_info[2] + score_li = cat_info[3] + max_f1 = [f1 for f1 in f1_li if abs(f1 - max(f1_li)) <= 0.001] + thre_ = [0.003] + [int(i) * 0.001 for i in range(10, 100, 10)] + [0.099] + # Find the best confidence threshold for 10 levels of confidence thresholds + if len(max_f1) == 1: + # max_f1 is on the far right + if f1_li.index(max_f1) == len(f1_li) - 1: + index = f1_li.index(max_f1) - 1 + # max_f1 is in the middle + elif f1_li.index(max_f1) != len(f1_li) - 1 and f1_li.index(max_f1) != 0: + index_a = f1_li.index(max_f1) - 1 + index_b = f1_li.index(max_f1) + 1 + if f1_li[index_a] >= f1_li[index_b]: + index = index_a + else: + index = f1_li.index(max_f1) + # max_f1 is on the far left + elif f1_li.index(max_f1) == 0: + index = f1_li.index(max_f1) + + best_thre = score_li[index] + second_thre = [best_thre + i for i in thre_] + + elif len(max_f1) > 1: + thre_pre = [index for (index, value) in enumerate(f1_li) if abs(value - max(f1_li)) <= 0.001] + best_thre = score_li[thre_pre[int((len(thre_pre) - 1) / 2)]] + second_thre = [best_thre + i for i in thre_] + + # Reduce the step unit to find the second confidence threshold + cat_info_origin = cat_pr_dict_origin[cat_name] + dtscores_ascend = cat_info_origin[3] + inds = np.searchsorted(dtscores_ascend, second_thre, side='left') + + pr_second = [0] * len(second_thre) + rc_second = [0] * len(second_thre) + f1_second = [0] * len(second_thre) + + try: + for i, ind in enumerate(inds): + if ind >= len(cat_info_origin[0]): + ind = len(cat_info_origin[0]) - 1 + pr_second[i] = cat_info_origin[0][ind] + rc_second[i] = cat_info_origin[1][ind] + f1_second[i] = cat_info_origin[2][ind] + except IndexError: + pass + + best_f1 = max(f1_second) + best_index = f1_second.index(best_f1) + best_precision = pr_second[best_index] + best_recall = rc_second[best_index] + best_score = second_thre[best_index] + result = [cat_name, best_f1, best_precision, best_recall, best_score] + return_result.append(result) + write_list_to_csv(result_csv, result, append=True) + return return_result + + def plot_mc_curve(self, cat_pr_dict, eval_result_path): + """ + plot matrix-confidence curve + cat_pr_dict:{"label_name":[precision, recall, f1, score]} + """ + print('Plot mc curve') + savefig_path = os.path.join(eval_result_path, 'pr_cofidence_fig') + if not os.path.exists(savefig_path): + os.mkdir(savefig_path) + + xlabel = "Confidence" + ylabel = "Metric" + for cat_name, cat_info in cat_pr_dict.items(): + precision = [round(p, 3) for p in cat_info[0]] + recall = [round(r, 3) for r in cat_info[1]] + f1 = [round(f, 3) for f in cat_info[2]] + score = [round(s, 3) for s in cat_info[3]] + plt.figure(figsize=(9, 9)) + gs = gridspec.GridSpec(4, 1) + + plt.subplot(gs[:3, 0]) + # 1.precision-confidence + plt.plot(score, precision, linewidth=2, color="deepskyblue", label="precision") + + # 2.recall-confidence + plt.plot(score, recall, linewidth=2, color="limegreen", label="recall") + + # 3.f1-confidence + plt.plot(score, f1, linewidth=2, color="tomato", label="f1_score") + + plt.xlabel(xlabel) + plt.ylabel(ylabel) + plt.title(cat_name, fontsize=15) + + plt.xlim(0, 1) + plt.xticks((np.arange(0, 1, 0.1))) + plt.ylim(0, 1.10) + plt.legend(loc="lower left") + + row_name = ["conf_threshold", "precision", "recall", "f1"] + plt.grid(True) + plt.subplot(gs[3, 0]) + plt.axis('off') + + colors = ["white", "deepskyblue", "limegreen", "tomato"] + plt.table(cellText=[score, precision, recall, f1], rowLabels=row_name, loc='center', cellLoc='center', + rowLoc='center', rowColours=colors) + + plt.subplots_adjust(left=0.2, bottom=0.2) + plt.savefig(os.path.join(savefig_path, cat_name) + '.png', dpi=250) + print("Done") + + def plot_hist_curve(self, input_data, eval_result_path): + '''plot_hist_curve''' + + correct_conf_dict, incorrect_conf_dict = input_data[0], input_data[1] + savefig_path = os.path.join(eval_result_path, 'hist_curve_fig') + if not os.path.exists(savefig_path): + os.mkdir(savefig_path) + for l in correct_conf_dict.keys(): + plt.figure(figsize=(7, 7)) + if l in incorrect_conf_dict.keys() and correct_conf_dict[l] and incorrect_conf_dict[l]: + gs = gridspec.GridSpec(4, 1) + plt.subplot(gs[:3, 0]) + correct_conf_dict[l].sort() + correct_conf_dict[l].reverse() + col_name_correct = ['number', 'mean', 'max', 'min', 'min99%', 'min99.9%'] + col_val_correct = [len(correct_conf_dict[l]), + ('%.2f' % np.mean(correct_conf_dict[l])), + ('%.2f' % max(correct_conf_dict[l])), ('%.2f' % min(correct_conf_dict[l])), + ('%.2f' % correct_conf_dict[l][int(len(correct_conf_dict[l]) * 0.99) - 1]), + ('%.2f' % correct_conf_dict[l][int(len(correct_conf_dict[l]) * 0.999) - 1])] + sns.set_palette('hls') + sns.distplot(correct_conf_dict[l], bins=50, kde_kws={'color': 'b', 'lw': 3}, + hist_kws={'color': 'b', 'alpha': 0.3}) + plt.xlim((0, 1)) + plt.xlabel(l) + plt.ylabel("numbers") + ax1 = plt.twinx() + incorrect_conf_dict[l].sort() + incorrect_conf_dict[l].reverse() + col_val_incorrect = [len(incorrect_conf_dict[l]), + ('%.2f' % np.mean(incorrect_conf_dict[l])), + ('%.2f' % max(incorrect_conf_dict[l])), ('%.2f' % min(incorrect_conf_dict[l])), + ('%.2f' % incorrect_conf_dict[l][int(len(incorrect_conf_dict[l]) * 0.99) - 1]), + ('%.2f' % incorrect_conf_dict[l][int(len(incorrect_conf_dict[l]) * 0.999) - 1])] + sns.distplot(incorrect_conf_dict[l], bins=50, kde_kws={'color': 'r', 'lw': 3}, + hist_kws={'color': 'r', 'alpha': 0.3}, ax=ax1) + plt.grid(True) + plt.subplot(gs[3, 0]) + plt.axis('off') + row_name = ['', 'correct', 'incorrect'] + table = plt.table(cellText=[col_name_correct, col_val_correct, col_val_incorrect], rowLabels=row_name, + loc='center', cellLoc='center', rowLoc='center') + table.auto_set_font_size(False) + table.set_fontsize(10) + table.scale(1, 1.5) + plt.savefig(os.path.join(savefig_path, l) + '.jpg') + elif correct_conf_dict[l]: + gs = gridspec.GridSpec(4, 1) + plt.subplot(gs[:3, 0]) + correct_conf_dict[l].sort() + correct_conf_dict[l].reverse() + col_name_correct = ['number', 'mean', 'max', 'min', 'min99%', 'min99.9%'] + col_val_correct = [len(correct_conf_dict[l]), + ('%.4f' % np.mean(correct_conf_dict[l])), + ('%.4f' % max(correct_conf_dict[l])), ('%.2f' % min(correct_conf_dict[l])), + ('%.2f' % correct_conf_dict[l][int(len(correct_conf_dict[l]) * 0.99) - 1]), + ('%.2f' % correct_conf_dict[l][int(len(correct_conf_dict[l]) * 0.999) - 1])] + sns.set_palette('hls') + sns.distplot(correct_conf_dict[l], bins=50, kde_kws={'color': 'b', 'lw': 3}, + hist_kws={'color': 'b', 'alpha': 0.3}) + plt.xlim((0, 1)) + plt.xlabel(l) + plt.ylabel("numbers") + plt.grid(True) + plt.subplot(gs[3, 0]) + plt.axis('off') + row_name = ['', 'correct'] + table = plt.table(cellText=[col_name_correct, col_val_correct], rowLabels=row_name, + loc='center', cellLoc='center', rowLoc='center') + table.auto_set_font_size(False) + table.set_fontsize(10) + table.scale(1, 1.5) + plt.savefig(os.path.join(savefig_path, l) + '.jpg') + elif l in incorrect_conf_dict.keys() and incorrect_conf_dict[l]: + gs = gridspec.GridSpec(4, 1) + plt.subplot(gs[:3, 0]) + incorrect_conf_dict[l].sort() + incorrect_conf_dict[l].reverse() + col_name_correct = ['number', 'mean', 'max', 'min', 'min99%', 'min99.9%'] + col_val_correct = [len(incorrect_conf_dict[l]), + ('%.4f' % np.mean(incorrect_conf_dict[l])), + ('%.4f' % max(incorrect_conf_dict[l])), ('%.2f' % min(incorrect_conf_dict[l])), + ('%.2f' % incorrect_conf_dict[l][int(len(incorrect_conf_dict[l]) * 0.99) - 1]), + ('%.2f' % incorrect_conf_dict[l][int(len(incorrect_conf_dict[l]) * 0.999) - 1])] + sns.set_palette('hls') + sns.distplot(incorrect_conf_dict[l], bins=50, kde_kws={'color': 'b', 'lw': 3}, + hist_kws={'color': 'b', 'alpha': 0.3}) + plt.xlim((0, 1)) + plt.xlabel(l) + plt.grid(True) + plt.subplot(gs[3, 0]) + plt.axis('off') + row_name = ['', 'incorrect'] + table = plt.table(cellText=[col_name_correct, col_val_correct], rowLabels=row_name, + loc='center', cellLoc='center', rowLoc='center') + table.auto_set_font_size(False) + table.set_fontsize(10) + table.scale(1, 1.5) + plt.savefig(os.path.join(savefig_path, l) + '.jpg') + + +if __name__ == "__main__": + cocoeval = COCOeval_() diff --git a/research/cv/rfcn/src/eval_utils.py b/research/cv/rfcn/src/eval_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..a4a3cc5c5a661a101a73388fcb8d93c8df8656c8 --- /dev/null +++ b/research/cv/rfcn/src/eval_utils.py @@ -0,0 +1,184 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Coco metrics utils""" + +import os +import json +from collections import defaultdict +import numpy as np +from pycocotools.coco import COCO +from pycocotools.cocoeval import COCOeval +import mindspore.common.dtype as mstype +from mindspore import context + +from mindspore.train.serialization import load_checkpoint, load_param_into_net +from mindspore.common import Parameter +from src.dataset import data_to_mindrecord_byte_image, create_rfcn_dataset, parse_json_annos_from_txt +from src.util import bbox2result_1image, results2json + +from src.dataset import create_voc_dataset_txt + + +def create_eval_mindrecord(config): + """ eval_rfcn """ + print("CHECKING MINDRECORD FILES ...") + if not os.path.exists(config.mindrecord_file): + if not os.path.isdir(config.mindrecord_dir): + os.makedirs(config.mindrecord_dir) + if config.dataset == "coco": + if os.path.isdir(config.coco_root): + print("Create Mindrecord. It may take some time.") + data_to_mindrecord_byte_image(config, "coco", False, config.prefix, file_num=1) + print("Create Mindrecord Done, at {}".format(config.mindrecord_dir)) + else: + print("coco_root not exits.") + elif config.dataset == "VOC": + file_path = create_voc_dataset_txt(voc_dataset_dir=config.voc_data_path, + txt_save_path=config.anno_path, + year=str(config.voc_year), + is_training=False) + if file_path is not False: + config.anno_path = file_path + else: + print("VOC dataset txt file create failed") + exit(-1) + # only create when mindrecord does not exist + print("Create Mindrecord. It may take some time.") + data_to_mindrecord_byte_image(config, "other", False, config.prefix, file_num=1) + print("Create Mindrecord Done, at {}".format(config.mindrecord_dir)) + + else: + if os.path.isdir(config.image_dir) and os.path.exists(config.anno_path): + print("Create Mindrecord. It may take some time.") + data_to_mindrecord_byte_image(config, "other", False, config.prefix, file_num=1) + print("Create Mindrecord Done, at {}".format(config.mindrecord_dir)) + else: + print("IMAGE_DIR or ANNO_PATH not exits.") + + +def apply_eval(net, config, dataset_path, ckpt_path, anno_path): + """Rfcn evaluation.""" + if not os.path.isfile(ckpt_path): + raise RuntimeError("CheckPoint file {} is not valid.".format(ckpt_path)) + ds = create_rfcn_dataset(config, dataset_path, batch_size=config.test_batch_size, is_training=False, + num_parallel_workers=config.num_parallel_workers) + + param_dict = load_checkpoint(ckpt_path) + if config.device_target == "GPU": + for key, value in param_dict.items(): + tensor = value.asnumpy().astype(np.float32) + param_dict[key] = Parameter(tensor, key) + load_param_into_net(net, param_dict) + + net.set_train(False) + device_type = "Ascend" if context.get_context("device_target") == "Ascend" else "Others" + if device_type == "Ascend": + net.to_float(mstype.float16) + + eval_iter = 0 + total = ds.get_dataset_size() + outputs = [] + + if config.dataset != "coco": + dataset_coco = COCO() + dataset_coco.dataset, dataset_coco.anns, dataset_coco.cats, dataset_coco.imgs = dict(), dict(), dict(), dict() + dataset_coco.imgToAnns, dataset_coco.catToImgs = defaultdict(list), defaultdict(list) + dataset_coco.dataset = parse_json_annos_from_txt(anno_path, config) + dataset_coco.createIndex() + else: + dataset_coco = COCO(anno_path) + + print("\n========================================\n") + print("total images num: ", total) + print("Processing, please wait a moment.") + max_num = 128 + for data in ds.create_dict_iterator(num_epochs=1): + eval_iter = eval_iter + 1 + + img_data = data['image'] + img_metas = data['image_shape'] + gt_bboxes = data['box'] + gt_labels = data['label'] + gt_num = data['valid_num'] + + # run net + output = net(img_data, img_metas, gt_bboxes, gt_labels, gt_num) + + # output + all_bbox = output[0] + all_label = output[1] + all_mask = output[2] + + for j in range(config.test_batch_size): + all_bbox_squee = np.squeeze(all_bbox.asnumpy()[j, :, :]) + all_label_squee = np.squeeze(all_label.asnumpy()[j, :, :]) + all_mask_squee = np.squeeze(all_mask.asnumpy()[j, :, :]) + + all_bboxes_tmp_mask = all_bbox_squee[all_mask_squee, :] + all_labels_tmp_mask = all_label_squee[all_mask_squee] + + if all_bboxes_tmp_mask.shape[0] > max_num: + inds = np.argsort(-all_bboxes_tmp_mask[:, -1]) + inds = inds[:max_num] + all_bboxes_tmp_mask = all_bboxes_tmp_mask[inds] + all_labels_tmp_mask = all_labels_tmp_mask[inds] + + outputs_tmp = bbox2result_1image(all_bboxes_tmp_mask, all_labels_tmp_mask, config.num_classes) + + outputs.append(outputs_tmp) + + eval_types = ["bbox"] + reslut_path = "./{}epoch_results.pkl".format(config.current_epoch) + result_files = results2json(dataset_coco, outputs, reslut_path) + + return metrics_map(result_files, eval_types, dataset_coco, single_result=False) + + +def metrics_map(result_files, result_types, coco, max_dets=(100, 300, 1000), single_result=False): + """coco eval for Rfcn""" + + anns = json.load(open(result_files['bbox'])) + if not anns: + return 0 + + if isinstance(coco, str): + coco = COCO(coco) + assert isinstance(coco, COCO) + + for res_type in result_types: + result_file = result_files[res_type] + assert result_file.endswith('.json') + + coco_dets = coco.loadRes(result_file) + det_img_ids = coco_dets.getImgIds() + gt_img_ids = coco.getImgIds() + iou_type = 'bbox' if res_type == 'proposal' else res_type + cocoEval = COCOeval(coco, coco_dets, iou_type) + if res_type == 'proposal': + cocoEval.params.useCats = 0 + cocoEval.params.maxDets = list(max_dets) + + tgt_ids = gt_img_ids if not single_result else det_img_ids + + if res_type == 'proposal': + cocoEval.params.useCats = 0 + cocoEval.params.maxDets = list(max_dets) + + cocoEval.params.imgIds = tgt_ids + cocoEval.evaluate() + cocoEval.accumulate() + cocoEval.summarize() + + return cocoEval.stats[0] diff --git a/research/cv/rfcn/src/lr_schedule.py b/research/cv/rfcn/src/lr_schedule.py new file mode 100644 index 0000000000000000000000000000000000000000..f97d2a695e4e9b9445f8098d33493ea30e6be85e --- /dev/null +++ b/research/cv/rfcn/src/lr_schedule.py @@ -0,0 +1,40 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""lr generator for rfcn""" +import math + +def linear_warmup_learning_rate(current_step, warmup_steps, base_lr, init_lr): + lr_inc = (float(base_lr) - float(init_lr)) / float(warmup_steps) + learning_rate = float(init_lr) + lr_inc * current_step + return learning_rate + +def a_cosine_learning_rate(current_step, base_lr, warmup_steps, decay_steps): + base = float(current_step - warmup_steps) / float(decay_steps) + learning_rate = (1 + math.cos(base * math.pi)) / 2 * base_lr + return learning_rate + +def dynamic_lr(config, steps_per_epoch): + """dynamic learning rate generator""" + base_lr = config.base_lr + total_steps = steps_per_epoch * (config.epoch_size + 1) + warmup_steps = int(config.warmup_step) + lr = [] + for i in range(total_steps): + if i < warmup_steps: + lr.append(linear_warmup_learning_rate(i, warmup_steps, base_lr, base_lr * config.warmup_ratio)) + else: + lr.append(a_cosine_learning_rate(i, base_lr, warmup_steps, total_steps)) + + return lr diff --git a/research/cv/rfcn/src/model_utils/__init__.py b/research/cv/rfcn/src/model_utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/research/cv/rfcn/src/model_utils/config.py b/research/cv/rfcn/src/model_utils/config.py new file mode 100644 index 0000000000000000000000000000000000000000..df621ca6f86f455d1da889ca4d6cb9593858bf9d --- /dev/null +++ b/research/cv/rfcn/src/model_utils/config.py @@ -0,0 +1,135 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Parse arguments""" + +import os +import ast +import argparse +from pprint import pprint, pformat +import yaml + +class Config: + """ + Configuration namespace. Convert dictionary to members. + """ + def __init__(self, cfg_dict): + for k, v in cfg_dict.items(): + if isinstance(v, (list, tuple)): + setattr(self, k, [Config(x) if isinstance(x, dict) else x for x in v]) + else: + setattr(self, k, Config(v) if isinstance(v, dict) else v) + + def __str__(self): + return pformat(self.__dict__) + + def __repr__(self): + return self.__str__() + + +def parse_cli_to_yaml(parser, cfg, helper=None, choices=None, cfg_path="default_config.yaml"): + """ + Parse command line arguments to the configuration according to the default yaml. + + Args: + parser: Parent parser. + cfg: Base configuration. + helper: Helper description. + cfg_path: Path to the default yaml config. + """ + parser = argparse.ArgumentParser(description="[REPLACE THIS at config.py]", + parents=[parser]) + helper = {} if helper is None else helper + choices = {} if choices is None else choices + for item in cfg: + if not isinstance(cfg[item], list) and not isinstance(cfg[item], dict): + help_description = helper[item] if item in helper else "Please reference to {}".format(cfg_path) + choice = choices[item] if item in choices else None + if isinstance(cfg[item], bool): + parser.add_argument("--" + item, type=ast.literal_eval, default=cfg[item], choices=choice, + help=help_description) + else: + parser.add_argument("--" + item, type=type(cfg[item]), default=cfg[item], choices=choice, + help=help_description) + args = parser.parse_args() + return args + + +def parse_yaml(yaml_path): + """ + Parse the yaml config file. + + Args: + yaml_path: Path to the yaml config. + """ + with open(yaml_path, 'r') as fin: + try: + cfgs = yaml.load_all(fin.read(), Loader=yaml.FullLoader) + cfgs = [x for x in cfgs] + if len(cfgs) == 1: + cfg_helper = {} + cfg = cfgs[0] + cfg_choices = {} + elif len(cfgs) == 2: + cfg, cfg_helper = cfgs + cfg_choices = {} + elif len(cfgs) == 3: + cfg, cfg_helper, cfg_choices = cfgs + else: + raise ValueError("At most 3 docs (config, description for help, choices) are supported in config yaml") + print(cfg_helper) + except: + raise ValueError("Failed to parse yaml") + return cfg, cfg_helper, cfg_choices + + +def merge(args, cfg): + """ + Merge the base config from yaml file and command line arguments. + + Args: + args: Command line arguments. + cfg: Base configuration. + """ + args_var = vars(args) + for item in args_var: + cfg[item] = args_var[item] + return cfg + + +def get_config(): + """ + Get Config according to the yaml file and cli arguments. + """ + parser = argparse.ArgumentParser(description="default name", add_help=False) + current_dir = os.path.dirname(os.path.abspath(__file__)) + parser.add_argument("--config_path", type=str, default=os.path.join(current_dir, + "../../config_distribute_gpu.yaml"), + help="Config file path") + path_args, _ = parser.parse_known_args() + default, helper, choices = parse_yaml(path_args.config_path) + args = parse_cli_to_yaml(parser=parser, cfg=default, helper=helper, choices=choices, cfg_path=path_args.config_path) + default = Config(merge(args, default)) + default.feature_shapes = [ + [default.img_height // 16, default.img_width // 16] + ] + default.num_bboxes = default.num_anchors * sum([lst[0] * lst[1] for lst in default.feature_shapes]) + pprint(default) + print("Please check the above information for the configurations", flush=True) + + return default + + +config = get_config() diff --git a/research/cv/rfcn/src/model_utils/device_adapter.py b/research/cv/rfcn/src/model_utils/device_adapter.py new file mode 100644 index 0000000000000000000000000000000000000000..cb9653f59f94061447c0fe03d029966b06d1f369 --- /dev/null +++ b/research/cv/rfcn/src/model_utils/device_adapter.py @@ -0,0 +1,27 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Device adapter for ModelArts""" + +from .config import config + +if config.enable_modelarts: + from .moxing_adapter import get_device_id, get_device_num, get_rank_id, get_job_id +else: + from .local_adapter import get_device_id, get_device_num, get_rank_id, get_job_id + +__all__ = [ + "get_device_id", "get_device_num", "get_rank_id", "get_job_id" +] diff --git a/research/cv/rfcn/src/model_utils/local_adapter.py b/research/cv/rfcn/src/model_utils/local_adapter.py new file mode 100644 index 0000000000000000000000000000000000000000..0e7c529384330b0bf45a1a3f7c85a7244f3fdc3f --- /dev/null +++ b/research/cv/rfcn/src/model_utils/local_adapter.py @@ -0,0 +1,36 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Local adapter""" + +import os + +def get_device_id(): + device_id = os.getenv('DEVICE_ID', '0') + return int(device_id) + + +def get_device_num(): + device_num = os.getenv('RANK_SIZE', '1') + return int(device_num) + + +def get_rank_id(): + global_rank_id = os.getenv('RANK_ID', '0') + return int(global_rank_id) + + +def get_job_id(): + return "Local Job" diff --git a/research/cv/rfcn/src/model_utils/moxing_adapter.py b/research/cv/rfcn/src/model_utils/moxing_adapter.py new file mode 100644 index 0000000000000000000000000000000000000000..2f06c40592a20d3f667cc2a3453bc06d0bf13491 --- /dev/null +++ b/research/cv/rfcn/src/model_utils/moxing_adapter.py @@ -0,0 +1,122 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Moxing adapter for ModelArts""" + +import os +import functools +from mindspore import context +from mindspore.profiler import Profiler +from .config import config + +_global_sync_count = 0 + +def get_device_id(): + device_id = os.getenv('DEVICE_ID', '0') + return int(device_id) + + +def get_device_num(): + device_num = os.getenv('RANK_SIZE', '1') + return int(device_num) + + +def get_rank_id(): + global_rank_id = os.getenv('RANK_ID', '0') + return int(global_rank_id) + + +def get_job_id(): + job_id = os.getenv('JOB_ID') + job_id = job_id if job_id != "" else "default" + return job_id + +def sync_data(from_path, to_path): + """ + Download data from remote obs to local directory if the first url is remote url and the second one is local path + Upload data from local directory to remote obs in contrast. + """ + import moxing as mox + import time + global _global_sync_count + sync_lock = "/tmp/copy_sync.lock" + str(_global_sync_count) + _global_sync_count += 1 + + # Each server contains 8 devices as most. + if get_device_id() % min(get_device_num(), 8) == 0 and not os.path.exists(sync_lock): + print("from path: ", from_path) + print("to path: ", to_path) + mox.file.copy_parallel(from_path, to_path) + print("===finish data synchronization===") + try: + os.mknod(sync_lock) + except IOError: + pass + print("===save flag===") + + while True: + if os.path.exists(sync_lock): + break + time.sleep(1) + + print("Finish sync data from {} to {}.".format(from_path, to_path)) + + +def moxing_wrapper(pre_process=None, post_process=None): + """ + Moxing wrapper to download dataset and upload outputs. + """ + def wrapper(run_func): + @functools.wraps(run_func) + def wrapped_func(*args, **kwargs): + # Download data from data_url + if config.enable_modelarts: + if config.data_url: + sync_data(config.data_url, config.data_path) + print("Dataset downloaded: ", os.listdir(config.data_path)) + if config.checkpoint_url: + sync_data(config.checkpoint_url, config.load_path) + print("Preload downloaded: ", os.listdir(config.load_path)) + if config.train_url: + sync_data(config.train_url, config.output_path) + print("Workspace downloaded: ", os.listdir(config.output_path)) + + context.set_context(save_graphs_path=os.path.join(config.output_path, str(get_rank_id()))) + config.device_num = get_device_num() + config.device_id = get_device_id() + if not os.path.exists(config.output_path): + os.makedirs(config.output_path) + + if pre_process: + pre_process() + + if config.enable_profiling: + profiler = Profiler() + + run_func(*args, **kwargs) + + if config.enable_profiling: + profiler.analyse() + + # Upload data to train_url + if config.enable_modelarts: + if post_process: + post_process() + + if config.train_url: + print("Start to copy output directory") + sync_data(config.output_path, config.train_url) + return wrapped_func + return wrapper diff --git a/research/cv/rfcn/src/network_define.py b/research/cv/rfcn/src/network_define.py new file mode 100644 index 0000000000000000000000000000000000000000..151e15caa265d3a53f07b5ee3f102f9a95d75bc2 --- /dev/null +++ b/research/cv/rfcn/src/network_define.py @@ -0,0 +1,153 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Rfcn training network wrapper.""" + +import time +import numpy as np +import mindspore.nn as nn +from mindspore.common.tensor import Tensor +from mindspore.ops import composite as C +from mindspore.ops import functional as F +from mindspore import ParameterTuple +from mindspore.train.callback import Callback +from mindspore.nn.wrap.grad_reducer import DistributedGradReducer + +time_stamp_init = False +time_stamp_first = 0 + +class LossCallBack(Callback): + """ + Monitor the loss in training. + + If the loss is NAN or INF terminating training. + + Note: + If per_print_times is 0 do not print loss. + + Args: + per_print_times (int): Print loss every times. Default: 1. + """ + + def __init__(self, per_print_times=1, rank_id=0): + super(LossCallBack, self).__init__() + if not isinstance(per_print_times, int) or per_print_times < 0: + raise ValueError("print_step must be int and >= 0.") + self._per_print_times = per_print_times + self.count = 0 + self.loss_sum = 0 + self.rank_id = rank_id + + global time_stamp_init, time_stamp_first + if not time_stamp_init: + time_stamp_first = time.time() + time_stamp_init = True + + self.total = 0 + + def step_end(self, run_context): + """step_end event show loss and some information""" + + cb_params = run_context.original_args() + loss = cb_params.net_outputs.asnumpy() + cur_step_in_epoch = (cb_params.cur_step_num - 1) % cb_params.batch_num + 1 + + self.count += 1 + self.loss_sum += float(loss) + + if self.count >= 1: + global time_stamp_first + time_stamp_current = time.time() + total_loss = self.loss_sum / self.count + loss_file = open("./loss_{}.log".format(self.rank_id), "a+") + loss_file.write("%lu epoch: %s step: %s total_loss: %.5f" % + (time_stamp_current - time_stamp_first, cb_params.cur_epoch_num, cur_step_in_epoch, + total_loss)) + loss_file.write("\n") + loss_file.close() + + self.count = 0 + self.loss_sum = 0 + + +class LossNet(nn.Cell): + """Rfcn loss method""" + def construct(self, x1, x2, x3, x4, x5, x6): + return x1 + x2 + + +class WithLossCell(nn.Cell): + """ + Wrap the network with loss function to compute loss. + + Args: + backbone (Cell): The target network to wrap. + loss_fn (Cell): The loss function used to compute loss. + """ + def __init__(self, backbone, loss_fn): + super(WithLossCell, self).__init__(auto_prefix=False) + self._backbone = backbone + self._loss_fn = loss_fn + + def construct(self, x, img_shape, gt_bboxe, gt_label, gt_num): + loss1, loss2, loss3, loss4, loss5, loss6 = self._backbone(x, img_shape, gt_bboxe, gt_label, gt_num) + return self._loss_fn(loss1, loss2, loss3, loss4, loss5, loss6) + + @property + def backbone_network(self): + """ + Get the backbone network. + + Returns: + Cell, return backbone network. + """ + return self._backbone + + +class TrainOneStepCell(nn.Cell): + """ + Network training package class. + + Append an optimizer to the training network after that the construct function + can be called to create the backward graph. + + Args: + network (Cell): The training network. + optimizer (Cell): Optimizer for updating the weights. + sens (Number): The adjust parameter. Default value is 1.0. + reduce_flag (bool): The reduce flag. Default value is False. + mean (bool): Allreduce method. Default value is False. + degree (int): Device number. Default value is None. + """ + def __init__(self, network, optimizer, sens=1.0, reduce_flag=False, mean=True, degree=None): + super(TrainOneStepCell, self).__init__(auto_prefix=False) + self.network = network + self.network.set_grad() + self.weights = ParameterTuple(network.trainable_params()) + self.optimizer = optimizer + self.grad = C.GradOperation(get_by_list=True, + sens_param=True) + self.sens = Tensor((np.ones((1,)) * sens).astype(np.float32)) + self.reduce_flag = reduce_flag + if reduce_flag: + self.grad_reducer = DistributedGradReducer(optimizer.parameters, mean, degree) + + def construct(self, x, img_shape, gt_bboxe, gt_label, gt_num): + weights = self.weights + loss = self.network(x, img_shape, gt_bboxe, gt_label, gt_num) + grads = self.grad(self.network, weights)(x, img_shape, gt_bboxe, gt_label, gt_num, self.sens) + if self.reduce_flag: + grads = self.grad_reducer(grads) + + return F.depend(loss, self.optimizer(grads)) diff --git a/research/cv/rfcn/src/rfcn/__init__.py b/research/cv/rfcn/src/rfcn/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..77e230c1ab16910474b335c054f2a319f9f511c2 --- /dev/null +++ b/research/cv/rfcn/src/rfcn/__init__.py @@ -0,0 +1,28 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""FasterRcnn Init.""" + +from .resnet import ResNetFea, ResidualBlockUsing +from .bbox_assign_sample import BboxAssignSample +from .bbox_assign_sample_stage2 import BboxAssignSampleForLoss +from .proposal_generator import Proposal +from .rfcn_loss import Loss +from .rpn import RPN +from .anchor_generator import AnchorGenerator + +__all__ = [ + "ResNetFea", "BboxAssignSample", "BboxAssignSampleForLoss", "Proposal", "Loss", + "RPN", "AnchorGenerator", "ResidualBlockUsing" +] diff --git a/research/cv/rfcn/src/rfcn/anchor_generator.py b/research/cv/rfcn/src/rfcn/anchor_generator.py new file mode 100644 index 0000000000000000000000000000000000000000..03431bd4c163cc40315032fffbe2de580266f7a7 --- /dev/null +++ b/research/cv/rfcn/src/rfcn/anchor_generator.py @@ -0,0 +1,84 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Rfcn anchor generator.""" + +import numpy as np + +class AnchorGenerator(): + """Anchor generator for Rfcn.""" + def __init__(self, base_size, scales, ratios, scale_major=True, ctr=None): + """Anchor generator init method.""" + self.base_size = base_size + self.scales = np.array(scales) + self.ratios = np.array(ratios) + self.scale_major = scale_major + self.ctr = ctr + self.base_anchors = self.gen_base_anchors() + + def gen_base_anchors(self): + """Generate a single anchor.""" + w = self.base_size + h = self.base_size + if self.ctr is None: + x_ctr = 0.5 * (w - 1) + y_ctr = 0.5 * (h - 1) + else: + x_ctr, y_ctr = self.ctr + + h_ratios = np.sqrt(self.ratios) + w_ratios = 1 / h_ratios + if self.scale_major: + ws = (w * w_ratios[:, None] * self.scales[None, :]).reshape(-1) + hs = (h * h_ratios[:, None] * self.scales[None, :]).reshape(-1) + else: + ws = (w * self.scales[:, None] * w_ratios[None, :]).reshape(-1) + hs = (h * self.scales[:, None] * h_ratios[None, :]).reshape(-1) + + base_anchors = np.stack( + [ + x_ctr - 0.5 * (ws - 1), y_ctr - 0.5 * (hs - 1), + x_ctr + 0.5 * (ws - 1), y_ctr + 0.5 * (hs - 1) + ], + axis=-1).round() + + return base_anchors + + def _meshgrid(self, x, y, row_major=True): + """Generate grid.""" + xx = np.repeat(x.reshape(1, len(x)), len(y), axis=0).reshape(-1) + yy = np.repeat(y, len(x)) + if row_major: + return xx, yy + + return yy, xx + + def grid_anchors(self, featmap_size, stride=16): + """Generate anchor list.""" + base_anchors = self.base_anchors + + feat_h, feat_w = featmap_size + shift_x = np.arange(0, feat_w) * stride + shift_y = np.arange(0, feat_h) * stride + shift_xx, shift_yy = self._meshgrid(shift_x, shift_y) + shifts = np.stack([shift_xx, shift_yy, shift_xx, shift_yy], axis=-1) + shifts = shifts.astype(base_anchors.dtype) + # first feat_w elements correspond to the first row of shifts + # add A anchors (1, A, 4) to K shifts (K, 1, 4) to get + # shifted anchors (K, A, 4), reshape to (K*A, 4) + + all_anchors = base_anchors[None, :, :] + shifts[:, None, :] + all_anchors = all_anchors.reshape(-1, 4) + + return all_anchors diff --git a/research/cv/rfcn/src/rfcn/bbox_assign_sample.py b/research/cv/rfcn/src/rfcn/bbox_assign_sample.py new file mode 100644 index 0000000000000000000000000000000000000000..25a3e2e2bc7ada9a0089975fdee155329a172892 --- /dev/null +++ b/research/cv/rfcn/src/rfcn/bbox_assign_sample.py @@ -0,0 +1,167 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Rfcn positive and negative sample screening for RPN.""" + +import numpy as np +import mindspore.nn as nn +from mindspore.ops import operations as P +from mindspore.common.tensor import Tensor +import mindspore.common.dtype as mstype + + +class BboxAssignSample(nn.Cell): + """ + Bbox assigner and sampler definition. + + Args: + config (dict): Config. + batch_size (int): Batchsize. + num_bboxes (int): The anchor nums. + add_gt_as_proposals (bool): add gt bboxes as proposals flag. + + Returns: + Tensor, output tensor. + bbox_targets: bbox location, (batch_size, num_bboxes, 4) + bbox_weights: bbox weights, (batch_size, num_bboxes, 1) + labels: label for every bboxes, (batch_size, num_bboxes, 1) + label_weights: label weight for every bboxes, (batch_size, num_bboxes, 1) + + Examples: + BboxAssignSample(config, 2, 1024, True) + """ + + def __init__(self, config, batch_size, num_bboxes, add_gt_as_proposals): + super(BboxAssignSample, self).__init__() + cfg = config + self.dtype = np.float32 + self.ms_type = mstype.float32 + self.batch_size = batch_size + + self.neg_iou_thr = Tensor(cfg.neg_iou_thr, self.ms_type) + self.pos_iou_thr = Tensor(cfg.pos_iou_thr, self.ms_type) + self.min_pos_iou = Tensor(cfg.min_pos_iou, self.ms_type) + self.zero_thr = Tensor(0.0, self.ms_type) + + self.num_bboxes = num_bboxes + self.num_gts = cfg.num_gts + self.num_expected_pos = cfg.num_expected_pos + self.num_expected_neg = cfg.num_expected_neg + self.add_gt_as_proposals = add_gt_as_proposals + + if self.add_gt_as_proposals: + self.label_inds = Tensor(np.arange(1, self.num_gts + 1)) + + self.concat = P.Concat(axis=0) + self.max_gt = P.ArgMaxWithValue(axis=0) + self.max_anchor = P.ArgMaxWithValue(axis=1) + self.sum_inds = P.ReduceSum() + self.iou = P.IOU() + self.greaterequal = P.GreaterEqual() + self.greater = P.Greater() + self.select = P.Select() + self.gatherND = P.GatherNd() + self.squeeze = P.Squeeze() + self.cast = P.Cast() + self.logicaland = P.LogicalAnd() + self.less = P.Less() + self.random_choice_with_mask_pos = P.RandomChoiceWithMask(self.num_expected_pos) + self.random_choice_with_mask_neg = P.RandomChoiceWithMask(self.num_expected_neg) + self.reshape = P.Reshape() + self.equal = P.Equal() + self.bounding_box_encode = P.BoundingBoxEncode(means=(0.0, 0.0, 0.0, 0.0), stds=(1.0, 1.0, 1.0, 1.0)) + self.scatterNdUpdate = P.ScatterNdUpdate() + self.scatterNd = P.ScatterNd() + self.logicalnot = P.LogicalNot() + self.tile = P.Tile() + self.zeros_like = P.ZerosLike() + + self.assigned_gt_inds = Tensor(np.full(num_bboxes, -1, dtype=np.int32)) + self.assigned_gt_zeros = Tensor(np.array(np.zeros(num_bboxes), dtype=np.int32)) + self.assigned_gt_ones = Tensor(np.array(np.ones(num_bboxes), dtype=np.int32)) + self.assigned_gt_ignores = Tensor(np.full(num_bboxes, -1, dtype=np.int32)) + self.assigned_pos_ones = Tensor(np.array(np.ones(self.num_expected_pos), dtype=np.int32)) + + self.check_neg_mask = Tensor(np.array(np.ones(self.num_expected_neg - self.num_expected_pos), dtype=np.bool)) + self.range_pos_size = Tensor(np.arange(self.num_expected_pos).astype(self.dtype)) + self.check_gt_one = Tensor(np.full((self.num_gts, 4), -1, dtype=self.dtype)) + self.check_anchor_two = Tensor(np.full((self.num_bboxes, 4), -2, dtype=self.dtype)) + + def construct(self, gt_bboxes_i, gt_labels_i, valid_mask, bboxes, gt_valids): + """bbox_assign_sample construct""" + + gt_bboxes_i = self.select(self.cast(self.tile(self.reshape(self.cast(gt_valids, mstype.int32), \ + (self.num_gts, 1)), (1, 4)), mstype.bool_), gt_bboxes_i, self.check_gt_one) + bboxes = self.select(self.cast(self.tile(self.reshape(self.cast(valid_mask, mstype.int32), \ + (self.num_bboxes, 1)), (1, 4)), mstype.bool_), bboxes, self.check_anchor_two) + + overlaps = self.iou(bboxes, gt_bboxes_i) + + max_overlaps_w_gt_index, max_overlaps_w_gt = self.max_gt(overlaps) + _, max_overlaps_w_ac = self.max_anchor(overlaps) + + neg_sample_iou_mask = self.logicaland(self.greaterequal(max_overlaps_w_gt, self.zero_thr), \ + self.less(max_overlaps_w_gt, self.neg_iou_thr)) + assigned_gt_inds2 = self.select(neg_sample_iou_mask, self.assigned_gt_zeros, self.assigned_gt_inds) + + pos_sample_iou_mask = self.greaterequal(max_overlaps_w_gt, self.pos_iou_thr) + assigned_gt_inds3 = self.select(pos_sample_iou_mask, \ + max_overlaps_w_gt_index + self.assigned_gt_ones, assigned_gt_inds2) + assigned_gt_inds4 = assigned_gt_inds3 + for j in range(self.num_gts): + max_overlaps_w_ac_j = max_overlaps_w_ac[j:j+1:1] + overlaps_w_gt_j = self.squeeze(overlaps[j:j+1:1, ::]) + + pos_mask_j = self.logicaland(self.greaterequal(max_overlaps_w_ac_j, self.min_pos_iou), \ + self.equal(overlaps_w_gt_j, max_overlaps_w_ac_j)) + + assigned_gt_inds4 = self.select(pos_mask_j, self.assigned_gt_ones + j, assigned_gt_inds4) + + assigned_gt_inds5 = self.select(valid_mask, assigned_gt_inds4, self.assigned_gt_ignores) + + pos_index, valid_pos_index = self.random_choice_with_mask_pos(self.greater(assigned_gt_inds5, 0)) + + pos_check_valid = self.cast(self.greater(assigned_gt_inds5, 0), self.ms_type) + pos_check_valid = self.sum_inds(pos_check_valid, -1) + valid_pos_index = self.less(self.range_pos_size, pos_check_valid) + pos_index = pos_index * self.reshape(self.cast(valid_pos_index, mstype.int32), (self.num_expected_pos, 1)) + + pos_assigned_gt_index = self.gatherND(assigned_gt_inds5, pos_index) - self.assigned_pos_ones + pos_assigned_gt_index = pos_assigned_gt_index * self.cast(valid_pos_index, mstype.int32) + pos_assigned_gt_index = self.reshape(pos_assigned_gt_index, (self.num_expected_pos, 1)) + + neg_index, valid_neg_index = self.random_choice_with_mask_neg(self.equal(assigned_gt_inds5, 0)) + + num_pos = self.cast(self.logicalnot(valid_pos_index), self.ms_type) + num_pos = self.sum_inds(num_pos, -1) + unvalid_pos_index = self.less(self.range_pos_size, num_pos) + valid_neg_index = self.logicaland(self.concat((self.check_neg_mask, unvalid_pos_index)), valid_neg_index) + + pos_bboxes_ = self.gatherND(bboxes, pos_index) + pos_gt_bboxes_ = self.gatherND(gt_bboxes_i, pos_assigned_gt_index) + pos_gt_labels = self.gatherND(gt_labels_i, pos_assigned_gt_index) + + pos_bbox_targets_ = self.bounding_box_encode(pos_bboxes_, pos_gt_bboxes_) + + valid_pos_index = self.cast(valid_pos_index, mstype.int32) + valid_neg_index = self.cast(valid_neg_index, mstype.int32) + bbox_targets_total = self.scatterNd(pos_index, pos_bbox_targets_, (self.num_bboxes, 4)) + bbox_weights_total = self.scatterNd(pos_index, valid_pos_index, (self.num_bboxes,)) + labels_total = self.scatterNd(pos_index, pos_gt_labels, (self.num_bboxes,)) + total_index = self.concat((pos_index, neg_index)) + total_valid_index = self.concat((valid_pos_index, valid_neg_index)) + label_weights_total = self.scatterNd(total_index, total_valid_index, (self.num_bboxes,)) + + return bbox_targets_total, self.cast(bbox_weights_total, mstype.bool_), \ + labels_total, self.cast(label_weights_total, mstype.bool_) diff --git a/research/cv/rfcn/src/rfcn/bbox_assign_sample_stage2.py b/research/cv/rfcn/src/rfcn/bbox_assign_sample_stage2.py new file mode 100644 index 0000000000000000000000000000000000000000..005845ff33d58181682962f1540e66c2fb3a71a5 --- /dev/null +++ b/research/cv/rfcn/src/rfcn/bbox_assign_sample_stage2.py @@ -0,0 +1,206 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Rfcn positive and negative sample screening for Rcnn.""" + +import numpy as np +import mindspore.nn as nn +import mindspore.common.dtype as mstype +from mindspore.ops import operations as P +from mindspore.common.tensor import Tensor + + +class BboxAssignSampleForLoss(nn.Cell): + """ + Bbox assigner and sampler definition. + + Args: + config (dict): Config. + batch_size (int): Batchsize. + num_bboxes (int): The anchor nums. + add_gt_as_proposals (bool): add gt bboxes as proposals flag. + + Returns: + Tensor, output tensor. + bbox_targets: bbox location, (batch_size, num_bboxes, 4) + bbox_weights: bbox weights, (batch_size, num_bboxes, 1) + labels: label for every bboxes, (batch_size, num_bboxes, 1) + label_weights: label weight for every bboxes, (batch_size, num_bboxes, 1) + + Examples: + BboxAssignSampleForLoss(config, 2, 1024, True) + """ + + def __init__(self, config, batch_size, num_bboxes, add_gt_as_proposals): + super(BboxAssignSampleForLoss, self).__init__() + cfg = config + self.dtype = np.float32 + self.ms_type = mstype.float32 + self.batch_size = batch_size + self.neg_iou_thr = cfg.neg_iou_thr_stage2 + self.pos_iou_thr = cfg.pos_iou_thr_stage2 + self.min_pos_iou = cfg.min_pos_iou_stage2 + self.num_gts = cfg.num_gts + self.num_bboxes = num_bboxes + self.num_expected_pos = cfg.num_expected_pos_stage2 + self.num_expected_neg = cfg.num_expected_neg_stage2 + self.num_expected_total = cfg.num_expected_total_stage2 + + self.add_gt_as_proposals = add_gt_as_proposals + self.label_inds = Tensor(np.arange(1, self.num_gts + 1).astype(np.int32)) + self.add_gt_as_proposals_valid = Tensor(np.full(self.num_gts, self.add_gt_as_proposals, dtype=np.int32)) + + self.concat = P.Concat(axis=0) + self.max_gt = P.ArgMaxWithValue(axis=0) + self.max_anchor = P.ArgMaxWithValue(axis=1) + self.sum_inds = P.ReduceSum() + self.iou = P.IOU() + self.greaterequal = P.GreaterEqual() + self.greater = P.Greater() + self.select = P.Select() + self.gatherND = P.GatherNd() + self.squeeze = P.Squeeze() + self.cast = P.Cast() + self.logicaland = P.LogicalAnd() + self.less = P.Less() + self.random_choice_with_mask_pos = P.RandomChoiceWithMask(self.num_expected_pos) + self.random_choice_with_mask_neg = P.RandomChoiceWithMask(self.num_expected_neg) + self.reshape = P.Reshape() + self.equal = P.Equal() + self.bounding_box_encode = P.BoundingBoxEncode(means=(0.0, 0.0, 0.0, 0.0), stds=(0.1, 0.1, 0.2, 0.2)) + self.concat_axis1 = P.Concat(axis=1) + self.logicalnot = P.LogicalNot() + self.tile = P.Tile() + + # Check + self.check_gt_one = Tensor(np.full((self.num_gts, 4), -1, dtype=self.dtype)) + self.check_anchor_two = Tensor(np.full((self.num_bboxes, 4), -2, dtype=self.dtype)) + + # Init tensor + self.assigned_gt_inds = Tensor(np.full(num_bboxes, -1, dtype=np.int32)) + self.assigned_gt_zeros = Tensor(np.array(np.zeros(num_bboxes), dtype=np.int32)) + self.assigned_gt_ones = Tensor(np.array(np.ones(num_bboxes), dtype=np.int32)) + self.assigned_gt_ignores = Tensor(np.full(num_bboxes, -1, dtype=np.int32)) + self.assigned_pos_ones = Tensor(np.array(np.ones(self.num_expected_pos), dtype=np.int32)) + + self.gt_ignores = Tensor(np.full(self.num_gts, -1, dtype=np.int32)) + self.range_pos_size = Tensor(np.arange(self.num_expected_pos).astype(self.dtype)) + self.check_neg_mask = Tensor(np.array(np.ones(self.num_expected_neg - self.num_expected_pos), dtype=np.bool)) + self.bboxs_neg_mask = Tensor(np.zeros((self.num_expected_neg, 4), dtype=self.dtype)) + self.labels_neg_mask = Tensor(np.array(np.zeros(self.num_expected_neg), dtype=np.uint8)) + + self.reshape_shape_pos = (self.num_expected_pos, 1) + self.reshape_shape_neg = (self.num_expected_neg, 1) + + self.scalar_zero = Tensor(0.0, dtype=self.ms_type) + self.scalar_neg_iou_thr = Tensor(self.neg_iou_thr, dtype=self.ms_type) + self.scalar_pos_iou_thr = Tensor(self.pos_iou_thr, dtype=self.ms_type) + self.scalar_min_pos_iou = Tensor(self.min_pos_iou, dtype=self.ms_type) + + def construct(self, gt_bboxes_i, gt_labels_i, valid_mask, bboxes, gt_valids): + """bbox_assign_sample_stage2 construct""" + + # Convert the coordinates of gT_bboxes_i to -1 + gt_bboxes_i = self.select(self.cast(self.tile(self.reshape(self.cast(gt_valids, mstype.int32), \ + (self.num_gts, 1)), (1, 4)), mstype.bool_), \ + gt_bboxes_i, self.check_gt_one) + + # Replace the illegal coordinates in bboxes with -2 + bboxes = self.select(self.cast(self.tile(self.reshape(self.cast(valid_mask, mstype.int32), \ + (self.num_bboxes, 1)), (1, 4)), mstype.bool_), \ + bboxes, self.check_anchor_two) + + # Calculate the IOU value of the rPN-generated box and the real box + overlaps = self.iou(bboxes, gt_bboxes_i) + + # Match each RPN generated box with the label box that intersects the largest + max_overlaps_w_gt_index, max_overlaps_w_gt = self.max_gt(overlaps) + _, max_overlaps_w_ac = self.max_anchor(overlaps) + + # The mask of the negative sample in the box generated by RPN is calculated. Max_overlaps_w_gt greater than + # or equal to 0 and less than 0.5 is used as the negative sample + neg_sample_iou_mask = self.logicaland(self.greaterequal(max_overlaps_w_gt, + self.scalar_zero), + self.less(max_overlaps_w_gt, + self.scalar_neg_iou_thr)) + assigned_gt_inds2 = self.select(neg_sample_iou_mask, self.assigned_gt_zeros, self.assigned_gt_inds) + + # Find positive sample mask IOU greater than or equal to 0.5 + pos_sample_iou_mask = self.greaterequal(max_overlaps_w_gt, self.scalar_pos_iou_thr) + assigned_gt_inds3 = self.select(pos_sample_iou_mask, \ + max_overlaps_w_gt_index + self.assigned_gt_ones, assigned_gt_inds2) + + for j in range(self.num_gts): # 128 + max_overlaps_w_ac_j = max_overlaps_w_ac[j:j+1:1] + overlaps_w_ac_j = overlaps[j:j+1:1, ::] + temp1 = self.greaterequal(max_overlaps_w_ac_j, self.scalar_min_pos_iou) + temp2 = self.squeeze(self.equal(overlaps_w_ac_j, max_overlaps_w_ac_j)) + pos_mask_j = self.logicaland(temp1, temp2) + assigned_gt_inds3 = self.select(pos_mask_j, (j+1)*self.assigned_gt_ones, assigned_gt_inds3) + + + assigned_gt_inds5 = self.select(valid_mask, assigned_gt_inds3, self.assigned_gt_ignores) + + bboxes = self.concat((gt_bboxes_i, bboxes)) + label_inds_valid = self.select(gt_valids, self.label_inds, self.gt_ignores) + label_inds_valid = label_inds_valid * self.add_gt_as_proposals_valid + assigned_gt_inds5 = self.concat((label_inds_valid, assigned_gt_inds5)) + + # Get pos index + pos_index, valid_pos_index = self.random_choice_with_mask_pos(self.greater(assigned_gt_inds5, 0)) + pos_check_valid = self.cast(self.greater(assigned_gt_inds5, 0), self.ms_type) + pos_check_valid = self.sum_inds(pos_check_valid, -1) + valid_pos_index = self.less(self.range_pos_size, pos_check_valid) + pos_index = pos_index * self.reshape(self.cast(valid_pos_index, mstype.int32), (self.num_expected_pos, 1)) + + # Calculate the number of positive samples + num_pos = self.sum_inds(self.cast(self.logicalnot(valid_pos_index), self.ms_type), -1) + valid_pos_index = self.cast(valid_pos_index, mstype.int32) + pos_index = self.reshape(pos_index, self.reshape_shape_pos) + valid_pos_index = self.reshape(valid_pos_index, self.reshape_shape_pos) + # Get the index position of the positive sample + pos_index = pos_index * valid_pos_index + pos_assigned_gt_index = self.gatherND(assigned_gt_inds5, pos_index) - self.assigned_pos_ones + pos_assigned_gt_index = self.reshape(pos_assigned_gt_index, self.reshape_shape_pos) + pos_assigned_gt_index = pos_assigned_gt_index * valid_pos_index + pos_gt_labels = self.gatherND(gt_labels_i, pos_assigned_gt_index) + + # Get neg index + # 512 negative samples were randomly selected + neg_index, valid_neg_index = self.random_choice_with_mask_neg(self.equal(assigned_gt_inds5, 0)) + + unvalid_pos_index = self.less(self.range_pos_size, num_pos) + valid_neg_index = self.logicaland(self.concat((self.check_neg_mask, unvalid_pos_index)), valid_neg_index) + neg_index = self.reshape(neg_index, self.reshape_shape_neg) + + valid_neg_index = self.cast(valid_neg_index, mstype.int32) + valid_neg_index = self.reshape(valid_neg_index, self.reshape_shape_neg) + neg_index = neg_index * valid_neg_index + # compose + # Box of positive sample is selected according to pos_index + pos_bboxes_ = self.gatherND(bboxes, pos_index) + # Pick the box with negative samples according to neg_index + neg_bboxes_ = self.gatherND(bboxes, neg_index) + pos_assigned_gt_index = self.reshape(pos_assigned_gt_index, self.reshape_shape_pos) + pos_gt_bboxes_ = self.gatherND(gt_bboxes_i, pos_assigned_gt_index) + pos_bbox_targets_ = self.bounding_box_encode(pos_bboxes_, pos_gt_bboxes_) + + # Put the box with the positive sample and the box with the negative sample together + total_bboxes = self.concat((pos_bboxes_, neg_bboxes_)) + total_deltas = self.concat((pos_bbox_targets_, self.bboxs_neg_mask)) + total_labels = self.concat((pos_gt_labels, self.labels_neg_mask)) + valid_pos_index = self.reshape(valid_pos_index, self.reshape_shape_pos) + valid_neg_index = self.reshape(valid_neg_index, self.reshape_shape_neg) + total_mask = self.concat((valid_pos_index, valid_neg_index)) + return total_bboxes, total_deltas, total_labels, total_mask diff --git a/research/cv/rfcn/src/rfcn/proposal_generator.py b/research/cv/rfcn/src/rfcn/proposal_generator.py new file mode 100644 index 0000000000000000000000000000000000000000..d45802abd60ae2309f2742fa4714a9239375812e --- /dev/null +++ b/research/cv/rfcn/src/rfcn/proposal_generator.py @@ -0,0 +1,200 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Rfcn proposal generator.""" + +import numpy as np +import mindspore.nn as nn +import mindspore.common.dtype as mstype +from mindspore.ops import operations as P +from mindspore import Tensor + + +class Proposal(nn.Cell): + """ + Proposal subnet. + + Args: + config (dict): Config. + batch_size (int): Batchsize. + num_classes (int) - Class number. + use_sigmoid_cls (bool) - Select sigmoid or softmax function. + target_means (tuple) - Means for encode function. Default: (.0, .0, .0, .0). + target_stds (tuple) - Stds for encode function. Default: (1.0, 1.0, 1.0, 1.0). + + Returns: + Tuple, tuple of output tensor,(proposal, mask). + + Examples: + Proposal(config = config, batch_size = 1, num_classes = 81, use_sigmoid_cls = True, \ + target_means=(.0, .0, .0, .0), target_stds=(1.0, 1.0, 1.0, 1.0)) + """ + def __init__(self, + config, + batch_size, + num_classes, + use_sigmoid_cls, + target_means=(.0, .0, .0, .0), + target_stds=(1.0, 1.0, 1.0, 1.0) + ): + super(Proposal, self).__init__() + cfg = config + self.batch_size = batch_size + self.num_classes = num_classes + self.target_means = target_means + self.target_stds = target_stds + self.use_sigmoid_cls = use_sigmoid_cls + + if self.use_sigmoid_cls: + self.cls_out_channels = num_classes - 1 + self.activation = P.Sigmoid() + self.reshape_shape = (-1, 1) + else: + self.cls_out_channels = num_classes + self.activation = P.Softmax(axis=1) + self.reshape_shape = (-1, 2) + + if self.cls_out_channels <= 0: + raise ValueError('num_classes={} is too small'.format(num_classes)) + + self.num_pre = cfg.rpn_proposal_nms_pre + self.min_box_size = cfg.rpn_proposal_min_bbox_size + self.nms_thr = cfg.rpn_proposal_nms_thr + self.nms_post = cfg.rpn_proposal_nms_post + self.nms_across_levels = cfg.rpn_proposal_nms_across_levels + self.max_num = cfg.rpn_proposal_max_num + # rfcn only have one C4 level + self.num_levels = 1 + + # Op Define + self.squeeze = P.Squeeze() + self.reshape = P.Reshape() + self.cast = P.Cast() + + self.feature_shapes = cfg.feature_shapes + + self.transpose_shape = (1, 2, 0) + + self.decode = P.BoundingBoxDecode(max_shape=(cfg.img_height, cfg.img_width), \ + means=self.target_means, \ + stds=self.target_stds) + + self.nms = P.NMSWithMask(self.nms_thr) + self.concat_axis0 = P.Concat(axis=0) + self.concat_axis1 = P.Concat(axis=1) + self.split = P.Split(axis=1, output_num=5) + self.min = P.Minimum() + self.gatherND = P.GatherNd() + self.slice = P.Slice() + self.select = P.Select() + self.greater = P.Greater() + self.transpose = P.Transpose() + self.tile = P.Tile() + self.set_train_local(config, training=True) + + self.dtype = np.float32 + self.ms_type = mstype.float32 + self.multi_10 = Tensor(10.0, self.ms_type) + self.num_anchors = cfg.num_anchors + + def set_train_local(self, config, training=True): + """Set training flag.""" + self.training_local = training + + cfg = config + self.topK_stage1 = () + self.topK_shape = () + total_max_topk_input = 0 + if not self.training_local: + self.num_pre = cfg.rpn_nms_pre + self.min_box_size = cfg.rpn_min_bbox_min_size + self.nms_thr = cfg.rpn_nms_thr + self.nms_post = cfg.rpn_nms_post + self.nms_across_levels = cfg.rpn_nms_across_levels + self.max_num = cfg.rpn_max_num + + for shp in self.feature_shapes: + k_num = min(self.num_pre, (shp[0] * shp[1] * cfg.num_anchors)) + total_max_topk_input += k_num + self.topK_stage1 += (k_num,) + self.topK_shape += ((k_num, 1),) + + self.topKv2 = P.TopK(sorted=True) + self.topK_shape_stage2 = (self.max_num, 1) + self.min_float_num = -65500.0 + self.topK_mask = Tensor(self.min_float_num * np.ones(total_max_topk_input, np.float32)) + + def construct(self, rpn_cls_score_total, rpn_bbox_pred_total, anchor_list): + """proposal generator""" + + proposals_tuple = () + masks_tuple = () + for img_id in range(self.batch_size): + cls_score_list = () + bbox_pred_list = () + # Extract the box of each layer (RFCN only has one layer of C4) + for i in range(self.num_levels): + rpn_cls_score_i = self.squeeze(rpn_cls_score_total[i][img_id:img_id+1:1, ::, ::, ::]) + rpn_bbox_pred_i = self.squeeze(rpn_bbox_pred_total[i][img_id:img_id+1:1, ::, ::, ::]) + + cls_score_list = cls_score_list + (rpn_cls_score_i,) + bbox_pred_list = bbox_pred_list + (rpn_bbox_pred_i,) + + proposals, masks = self.get_bboxes_single(cls_score_list, bbox_pred_list, anchor_list) + proposals_tuple += (proposals,) + masks_tuple += (masks,) + return proposals_tuple, masks_tuple + + def get_bboxes_single(self, cls_scores, bbox_preds, mlvl_anchors): + """Get proposal boundingbox.""" + mlvl_proposals = () + mlvl_mask = () + for idx in range(self.num_levels): + rpn_cls_score = self.transpose(cls_scores[idx], self.transpose_shape) + rpn_bbox_pred = self.transpose(bbox_preds[idx], self.transpose_shape) + anchors = mlvl_anchors[idx] + + rpn_cls_score = self.reshape(rpn_cls_score, self.reshape_shape) + rpn_cls_score = self.activation(rpn_cls_score) + rpn_cls_score_process = self.cast(self.squeeze(rpn_cls_score[::, 0::]), self.ms_type) + + rpn_bbox_pred_process = self.cast(self.reshape(rpn_bbox_pred, (-1, 4)), self.ms_type) + + scores_sorted, topk_inds = self.topKv2(rpn_cls_score_process, self.topK_stage1[idx]) + + topk_inds = self.reshape(topk_inds, self.topK_shape[idx]) + + bboxes_sorted = self.gatherND(rpn_bbox_pred_process, topk_inds) + anchors_sorted = self.cast(self.gatherND(anchors, topk_inds), self.ms_type) + + proposals_decode = self.decode(anchors_sorted, bboxes_sorted) + + proposals_decode = self.concat_axis1((proposals_decode, self.reshape(scores_sorted, self.topK_shape[idx]))) + proposals, _, mask_valid = self.nms(proposals_decode) + + mlvl_proposals = mlvl_proposals + (proposals,) + mlvl_mask = mlvl_mask + (mask_valid,) + + proposals = self.concat_axis0(mlvl_proposals) + masks = self.concat_axis0(mlvl_mask) + + _, _, _, _, scores = self.split(proposals) + scores = self.squeeze(scores) + topk_mask = self.cast(self.topK_mask, self.ms_type) + scores_using = self.select(masks, scores, topk_mask) + _, topk_inds = self.topKv2(scores_using, self.max_num) + topk_inds = self.reshape(topk_inds, self.topK_shape_stage2) + proposals = self.gatherND(proposals, topk_inds) + masks = self.gatherND(masks, topk_inds) + return proposals, masks diff --git a/research/cv/rfcn/src/rfcn/resnet.py b/research/cv/rfcn/src/rfcn/resnet.py new file mode 100644 index 0000000000000000000000000000000000000000..3386ca21a11774c436838ea6f521a41aec5d84cb --- /dev/null +++ b/research/cv/rfcn/src/rfcn/resnet.py @@ -0,0 +1,262 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Resnet backbone.""" + +import numpy as np +import mindspore.nn as nn +from mindspore.ops import operations as P +from mindspore.common.tensor import Tensor +from mindspore.ops import functional as F + + +def weight_init_ones(shape): + """Weight init.""" + return Tensor(np.full(shape, 0.01).astype(np.float32)) + + +def _conv(in_channels, out_channels, kernel_size=3, stride=1, padding=0, pad_mode='pad'): + """Conv2D wrapper.""" + shape = (out_channels, in_channels, kernel_size, kernel_size) + weights = weight_init_ones(shape) + return nn.Conv2d(in_channels, out_channels, + kernel_size=kernel_size, stride=stride, padding=padding, + pad_mode=pad_mode, weight_init=weights, has_bias=False) + + +def _BatchNorm2dInit(out_chls, momentum=0.1, affine=True, use_batch_statistics=True): + """Batchnorm2D wrapper.""" + dtype = np.float32 + gamma_init = Tensor(np.array(np.ones(out_chls)).astype(dtype)) + beta_init = Tensor(np.array(np.ones(out_chls) * 0).astype(dtype)) + moving_mean_init = Tensor(np.array(np.ones(out_chls) * 0).astype(dtype)) + moving_var_init = Tensor(np.array(np.ones(out_chls)).astype(dtype)) + return nn.BatchNorm2d(out_chls, momentum=momentum, affine=affine, gamma_init=gamma_init, + beta_init=beta_init, moving_mean_init=moving_mean_init, + moving_var_init=moving_var_init, use_batch_statistics=use_batch_statistics) + + +class ResNetFea(nn.Cell): + """ + ResNet architecture. + + Args: + block (Cell): Block for network. + layer_nums (list): Numbers of block in different layers. + in_channels (list): Input channel in each layer. + out_channels (list): Output channel in each layer. + weights_update (bool): Weight update flag. + Returns: + Tensor, output tensor. + + Examples: + >>> ResNet(ResidualBlock, + >>> [3, 4, 6, 3], + >>> [64, 256, 512, 1024], + >>> [256, 512, 1024, 2048], + >>> False) + """ + def __init__(self, + block, + layer_nums, + in_channels, + out_channels, + weights_update=False): + super(ResNetFea, self).__init__() + + if not len(layer_nums) == len(in_channels) == len(out_channels) == 4: + raise ValueError("the length of " + "layer_num, inchannel, outchannel list must be 4!") + + bn_training = False + self.conv1 = _conv(3, 64, kernel_size=7, stride=2, padding=3, pad_mode='pad') + self.bn1 = _BatchNorm2dInit(64, affine=bn_training, use_batch_statistics=bn_training) + self.relu = P.ReLU() + self.maxpool = P.MaxPool(kernel_size=3, strides=2, pad_mode="SAME") + self.weights_update = weights_update + + if not self.weights_update: + self.conv1.weight.requires_grad = False + + self.layer1 = self._make_layer(block, + layer_nums[0], + in_channel=in_channels[0], + out_channel=out_channels[0], + stride=1, + training=bn_training, + weights_update=self.weights_update) + self.layer2 = self._make_layer(block, + layer_nums[1], + in_channel=in_channels[1], + out_channel=out_channels[1], + stride=2, + training=bn_training, + weights_update=True) + self.layer3 = self._make_layer(block, + layer_nums[2], + in_channel=in_channels[2], + out_channel=out_channels[2], + stride=2, + training=bn_training, + weights_update=True) + self.layer4 = self._make_layer(block, + layer_nums[3], + in_channel=in_channels[3], + out_channel=out_channels[3], + stride=1, + training=bn_training, + weights_update=True) + + def _make_layer(self, block, layer_num, in_channel, out_channel, stride, training=False, weights_update=False): + """Make block layer.""" + layers = [] + down_sample = False + if stride != 1 or in_channel != out_channel: + down_sample = True + resblk = block(in_channel, + out_channel, + stride=stride, + down_sample=down_sample, + training=training, + weights_update=weights_update) + layers.append(resblk) + + for _ in range(1, layer_num): + resblk = block(out_channel, out_channel, stride=1, training=training, weights_update=weights_update) + layers.append(resblk) + + return nn.SequentialCell(layers) + + def construct(self, x): + """ + construct the ResNet Network + + Args: + x: input feature data. + + Returns: + Tensor, output tensor. + """ + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + c1 = self.maxpool(x) + + c2 = self.layer1(c1) + identity = c2 + if not self.weights_update: + identity = F.stop_gradient(c2) + c3 = self.layer2(identity) + c4 = self.layer3(c3) + c5 = self.layer4(c4) + + return c4, c5 + + +class ResidualBlockUsing(nn.Cell): + """ + ResNet V1 residual block definition. + + Args: + in_channels (int) - Input channel. + out_channels (int) - Output channel. + stride (int) - Stride size for the initial convolutional layer. Default: 1. + down_sample (bool) - If to do the downsample in block. Default: False. + momentum (float) - Momentum for batchnorm layer. Default: 0.1. + training (bool) - Training flag. Default: False. + weights_updata (bool) - Weights update flag. Default: False. + + Returns: + Tensor, output tensor. + + Examples: + ResidualBlock(3,256,stride=2,down_sample=True) + """ + expansion = 4 + + def __init__(self, + in_channels, + out_channels, + stride=1, + down_sample=False, + momentum=0.1, + training=False, + weights_update=False): + super(ResidualBlockUsing, self).__init__() + + self.affine = weights_update + + out_chls = out_channels // self.expansion + self.conv1 = _conv(in_channels, out_chls, kernel_size=1, stride=1, padding=0) + self.bn1 = _BatchNorm2dInit(out_chls, momentum=momentum, affine=self.affine, use_batch_statistics=training) + + self.conv2 = _conv(out_chls, out_chls, kernel_size=3, stride=stride, padding=1) + self.bn2 = _BatchNorm2dInit(out_chls, momentum=momentum, affine=self.affine, use_batch_statistics=training) + + self.conv3 = _conv(out_chls, out_channels, kernel_size=1, stride=1, padding=0) + self.bn3 = _BatchNorm2dInit(out_channels, momentum=momentum, affine=self.affine, use_batch_statistics=training) + + if training: + self.bn1 = self.bn1.set_train() + self.bn2 = self.bn2.set_train() + self.bn3 = self.bn3.set_train() + + if not weights_update: + self.conv1.weight.requires_grad = False + self.conv2.weight.requires_grad = False + self.conv3.weight.requires_grad = False + + self.relu = P.ReLU() + self.downsample = down_sample + if self.downsample: + self.conv_down_sample = _conv(in_channels, out_channels, kernel_size=1, stride=stride, padding=0) + self.bn_down_sample = _BatchNorm2dInit(out_channels, momentum=momentum, affine=self.affine, + use_batch_statistics=training) + if training: + self.bn_down_sample = self.bn_down_sample.set_train() + if not weights_update: + self.conv_down_sample.weight.requires_grad = False + self.add = P.Add() + + def construct(self, x): + """ + construct the ResNet V1 residual block + + Args: + x: input feature data. + + Returns: + Tensor, output tensor. + """ + identity = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + if self.downsample: + identity = self.conv_down_sample(identity) + identity = self.bn_down_sample(identity) + + out = self.add(out, identity) + out = self.relu(out) + + return out diff --git a/research/cv/rfcn/src/rfcn/rfcn_loss.py b/research/cv/rfcn/src/rfcn/rfcn_loss.py new file mode 100644 index 0000000000000000000000000000000000000000..d41fc7ee5ae8f0ced81e65ec8b994e10a6c4bbcc --- /dev/null +++ b/research/cv/rfcn/src/rfcn/rfcn_loss.py @@ -0,0 +1,89 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Rfcn Loss network.""" + +import numpy as np +import mindspore.numpy as np2 + +import mindspore.common.dtype as mstype +import mindspore.nn as nn +from mindspore.ops import operations as P +from mindspore.common.tensor import Tensor + +class Loss(nn.Cell): + """ + Rfcn Loss subnet. + + Args: + config (dict) - Config. + num_classes (int) - Class number. + + Returns: + Tuple, tuple of output tensor. + + Examples: + Loss(config=config, num_classes = 81) + """ + def __init__(self, + config, + num_classes + ): + super(Loss, self).__init__() + cfg = config + self.dtype = np.float32 + self.ms_type = mstype.float32 + self.rfcn_loss_cls_weight = Tensor(np.array(cfg.rfcn_loss_cls_weight).astype(self.dtype)) + self.rfcn_loss_reg_weight = Tensor(np.array(cfg.rfcn_loss_reg_weight).astype(self.dtype)) + self.num_classes = num_classes + self.logicaland = P.LogicalAnd() + self.loss_cls = P.SoftmaxCrossEntropyWithLogits() + self.loss_bbox = P.SmoothL1Loss(beta=1.0) + self.onehot = P.OneHot() + self.greater = P.Greater() + self.cast = P.Cast() + self.sum_loss = P.ReduceSum() + self.on_value = Tensor(1.0, mstype.float32) + self.off_value = Tensor(0.0, mstype.float32) + self.value = Tensor(1.0, self.ms_type) + + + def construct(self, x_cls, x_reg, bbox_targets, labels, mask): + """rfcn loss construct""" + if self.training: + labels = self.onehot(labels, self.num_classes, self.on_value, self.off_value) + loss, loss_cls, loss_reg, loss_print = self.loss(x_cls, x_reg, bbox_targets, labels, mask) + out = (loss, loss_cls, loss_reg, loss_print) + else: + out = (x_cls, (x_cls / self.value), x_reg, x_cls) + + return out + + + def loss(self, cls_score, bbox_pred, bbox_targets, labels, weights): + """Loss method.""" + loss_print = () + loss_cls, _ = self.loss_cls(cls_score, labels) + bbox_pred = bbox_pred[:, 4:8] + loss_reg = self.loss_bbox(bbox_pred, bbox_targets) + loss_loc = np2.sum(loss_reg, axis=1) / 4 + # compute total loss + weights = self.cast(weights, self.ms_type) + loss_cls = loss_cls * weights + loss_loc = loss_loc * weights + + loss = loss_loc * self.rfcn_loss_reg_weight + loss_cls * self.rfcn_loss_cls_weight + loss = np2.sum(loss) / self.sum_loss(weights, (0,)) + loss_print += (loss_cls, loss_loc) + return loss, loss_cls, loss_reg, loss_print diff --git a/research/cv/rfcn/src/rfcn/rfcn_resnet.py b/research/cv/rfcn/src/rfcn/rfcn_resnet.py new file mode 100644 index 0000000000000000000000000000000000000000..fe9665746e3f273d28b5d5afbec34b8b8c243cf9 --- /dev/null +++ b/research/cv/rfcn/src/rfcn/rfcn_resnet.py @@ -0,0 +1,481 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Rfcn based on ResNet.""" + +import numpy as np + +import mindspore.nn as nn +from mindspore.ops import operations as P +from mindspore.common.tensor import Tensor +import mindspore.common.dtype as mstype +from mindspore.ops import functional as F +from mindspore.ops.operations import _inner_ops as inner +from .resnet import ResNetFea, ResidualBlockUsing +from .bbox_assign_sample_stage2 import BboxAssignSampleForLoss +from .proposal_generator import Proposal +from .rfcn_loss import Loss +from .rpn import RPN +from .anchor_generator import AnchorGenerator + + +class Rfcn_Resnet(nn.Cell): + """ + Rfcn Network. + + Note: + backbone = resnet + + Returns: + Tuple, tuple of output tensor. + rpn_loss: Scalar, Total loss of RPN subnet. + rfcn_loss: Scalar, Total loss of Loss subnet. + rpn_cls_loss: Scalar, Classification loss of RPN subnet. + rpn_reg_loss: Scalar, Regression loss of RPN subnet. + rfcn_cls_loss: Scalar, Classification loss of Loss subnet. + rfcn_reg_loss: Scalar, Regression loss of Loss subnet. + + Examples: + net = Rfcn_Resnet() + """ + def __init__(self, config): + super(Rfcn_Resnet, self).__init__() + self.dtype = np.float32 + self.ms_type = mstype.float32 + self.train_batch_size = config.batch_size + self.num_classes = config.num_classes + self.anchor_scales = config.anchor_scales + self.anchor_ratios = config.anchor_ratios + self.anchor_strides = config.anchor_strides + self.target_means = tuple(config.rfcn_target_means) + self.target_stds = tuple(config.rfcn_target_stds) + + # Anchor generator + anchor_base_sizes = None + self.anchor_base_sizes = list( + self.anchor_strides) if anchor_base_sizes is None else anchor_base_sizes + + self.anchor_generators = [] + for anchor_base in self.anchor_base_sizes: + self.anchor_generators.append( + AnchorGenerator(anchor_base, self.anchor_scales, self.anchor_ratios)) + + self.num_anchors = len(self.anchor_ratios) * len(self.anchor_scales) + + featmap_sizes = config.feature_shapes + assert len(featmap_sizes) == len(self.anchor_generators) + + self.anchor_list = self.get_anchors(featmap_sizes) + + # Backbone resnet + self.backbone = ResNetFea(ResidualBlockUsing, + config.resnet_block, + config.resnet_in_channels, + config.resnet_out_channels, + False) + + # Rpn and rpn loss + self.gt_labels_stage1 = Tensor(np.ones((self.train_batch_size, config.num_gts)).astype(np.uint8)) + self.rpn_with_loss = RPN(config, + self.train_batch_size, + config.rpn_in_channels, + config.rpn_feat_channels, + config.num_anchors, + config.rpn_cls_out_channels) + + # Proposal + self.proposal_generator = Proposal(config, + self.train_batch_size, + config.activate_num_classes, + config.use_sigmoid_cls) + self.proposal_generator.set_train_local(config, True) + self.proposal_generator_test = Proposal(config, + config.test_batch_size, + config.activate_num_classes, + config.use_sigmoid_cls) + self.proposal_generator_test.set_train_local(config, False) + + # Assign and sampler stage two + self.bbox_assigner_sampler_for_loss = BboxAssignSampleForLoss(config, self.train_batch_size, + config.num_bboxes_stage2, True) + self.decode = P.BoundingBoxDecode(max_shape=(config.img_height, config.img_width), means=self.target_means, \ + stds=self.target_stds) + + # compute rfcn loss + self.loss = Loss(config, self.num_classes) + + # Op declare + self.squeeze = P.Squeeze() + self.cast = P.Cast() + + self.concat = P.Concat(axis=0) + self.concat_1 = P.Concat(axis=1) + self.concat_2 = P.Concat(axis=2) + self.reshape = P.Reshape() + self.select = P.Select() + self.greater = P.Greater() + self.transpose = P.Transpose() + + # Improve speed + self.concat_start = min(self.num_classes - 2, 55) + self.concat_end = (self.num_classes - 1) + + # Test mode + self.test_mode_init(config) + + # Init tensor + self.init_tensor(config) + + # for roi pooling + self.k = config.k + self.group_size = config.group_size + self.n_cls_reg = config.n_cls_reg + self.spatial_scale = 1.0 / self.anchor_strides[0] # 1 / 16 + self.roi_nums_test = config.roi_nums_test + self.num_classes = config.num_classes + + + self.resnet101_conv_new = nn.Conv2d(2048, 1024, kernel_size=(1, 1), has_bias=True) + self.generatePsScoreMap = nn.Conv2d(1024, self.k * self.k * self.num_classes, kernel_size=(1, 1), has_bias=True) + self.generateLocMap = nn.Conv2d(1024, self.k * self.k * self.n_cls_reg * 4, kernel_size=(1, 1), has_bias=True) + + + self.roi_nums = (config.num_expected_pos_stage2 + config.num_expected_neg_stage2) * config.batch_size + self.psRoI_score = inner.PsROIPooling(pooled_height=self.k, pooled_width=self.k, num_rois=self.roi_nums, + spatial_scale=self.spatial_scale, out_dim=self.num_classes, + group_size=self.group_size) + self.psRoI_loc = inner.PsROIPooling(pooled_height=self.k, pooled_width=self.k, num_rois=self.roi_nums, + spatial_scale=self.spatial_scale, out_dim=self.n_cls_reg * 4, + group_size=self.group_size) + + self.psRoI_score_test = inner.PsROIPooling(pooled_height=self.k, pooled_width=self.k, + num_rois=self.roi_nums_test, spatial_scale=self.spatial_scale, + out_dim=self.num_classes, group_size=self.group_size) + self.psRoI_loc_test = inner.PsROIPooling(pooled_height=self.k, pooled_width=self.k, + num_rois=self.roi_nums_test, spatial_scale=self.spatial_scale, + out_dim=self.n_cls_reg * 4, group_size=self.group_size) + + self.avg_pool_score = nn.AvgPool2d(kernel_size=self.k, stride=self.k) + self.avg_pool_loc = nn.AvgPool2d(kernel_size=self.k, stride=self.k) + + + def test_mode_init(self, config): + """ + Initialize test_mode from the config file. + + Args: + config (file): config file. + test_batch_size (int): Size of test batch. + rpn_max_num (int): max num of rpn. + test_score_thresh (float): threshold of test score. + test_iou_thr (float): threshold of test iou. + + Examples: + self.test_mode_init(config) + """ + self.test_batch_size = config.test_batch_size + self.split = P.Split(axis=0, output_num=self.test_batch_size) + self.split_shape = P.Split(axis=0, output_num=4) + self.split_scores = P.Split(axis=1, output_num=self.num_classes) + self.split_cls = P.Split(axis=0, output_num=self.num_classes-1) + self.tile = P.Tile() + self.gather = P.GatherNd() + + self.rpn_max_num = config.rpn_max_num + + self.zeros_for_nms = Tensor(np.zeros((self.rpn_max_num, 3)).astype(self.dtype)) + self.ones_mask = np.ones((self.rpn_max_num, 1)).astype(np.bool) + self.zeros_mask = np.zeros((self.rpn_max_num, 1)).astype(np.bool) + self.bbox_mask = Tensor(np.concatenate((self.ones_mask, self.zeros_mask, + self.ones_mask, self.zeros_mask), axis=1)) + self.nms_pad_mask = Tensor(np.concatenate((self.ones_mask, self.ones_mask, + self.ones_mask, self.ones_mask, self.zeros_mask), axis=1)) + + self.test_score_thresh = Tensor(np.ones((self.rpn_max_num, 1)).astype(self.dtype) * config.test_score_thr) + self.test_score_zeros = Tensor(np.ones((self.rpn_max_num, 1)).astype(self.dtype) * 0) + self.test_box_zeros = Tensor(np.ones((self.rpn_max_num, 4)).astype(self.dtype) * -1) + self.test_iou_thr = Tensor(np.ones((self.rpn_max_num, 1)).astype(self.dtype) * config.test_iou_thr) + self.test_max_per_img = config.test_max_per_img + self.nms_test = P.NMSWithMask(config.test_iou_thr) + self.softmax = P.Softmax(axis=1) + self.logicand = P.LogicalAnd() + self.oneslike = P.OnesLike() + self.test_topk = P.TopK(sorted=True) + self.test_num_proposal = self.test_batch_size * self.rpn_max_num + + def init_tensor(self, config): + + roi_index = [np.array(np.ones((config.num_expected_pos_stage2 + config.num_expected_neg_stage2, 1)) * i, + dtype=self.dtype) for i in range(self.train_batch_size)] + + roi_index_test = [np.array(np.ones((config.rpn_max_num, 1)) * i, dtype=self.dtype) \ + for i in range(self.test_batch_size)] + + self.roi_index_tensor = Tensor(np.concatenate(roi_index)) + self.roi_index_test_tensor = Tensor(np.concatenate(roi_index_test)) + + def construct(self, img_data, img_metas, gt_bboxes, gt_labels, gt_valids): + """ + construct the Rfcn Network. + + Args: + img_data: input image data. + img_metas: meta label of img. + gt_bboxes (Tensor): get the value of bboxes. + gt_labels (Tensor): get the value of labels. + gt_valids (Tensor): get the valid part of bboxes. + + Returns: + Tuple,tuple of output tensor + """ + c4, c5 = self.backbone(img_data) + rpn_loss, cls_score, bbox_pred, rpn_cls_loss, rpn_reg_loss, _ = self.rpn_with_loss(c4, + img_metas, + self.anchor_list, + gt_bboxes, + self.gt_labels_stage1, + gt_valids) + + if self.training: + proposal, proposal_mask = self.proposal_generator(cls_score, bbox_pred, self.anchor_list) + else: + proposal, proposal_mask = self.proposal_generator_test(cls_score, bbox_pred, self.anchor_list) + + gt_labels = self.cast(gt_labels, mstype.int32) + gt_valids = self.cast(gt_valids, mstype.int32) + bboxes_tuple = () + deltas_tuple = () + labels_tuple = () + mask_tuple = () + if self.training: + for i in range(self.train_batch_size): + gt_bboxes_i = self.squeeze(gt_bboxes[i:i + 1:1, ::]) + + gt_labels_i = self.squeeze(gt_labels[i:i + 1:1, ::]) + gt_labels_i = self.cast(gt_labels_i, mstype.uint8) + + gt_valids_i = self.squeeze(gt_valids[i:i + 1:1, ::]) + gt_valids_i = self.cast(gt_valids_i, mstype.bool_) + + bboxes, deltas, labels, mask = self.bbox_assigner_sampler_for_loss(gt_bboxes_i, + gt_labels_i, + proposal_mask[i], + proposal[i][::, 0:4:1], + gt_valids_i) + bboxes_tuple += (bboxes,) + deltas_tuple += (deltas,) + labels_tuple += (labels,) + mask_tuple += (mask,) + + bbox_targets = self.concat(deltas_tuple) + rfcn_labels = self.concat(labels_tuple) + bbox_targets = F.stop_gradient(bbox_targets) + rfcn_labels = F.stop_gradient(rfcn_labels) + rfcn_labels = self.cast(rfcn_labels, mstype.int32) + else: + mask_tuple += proposal_mask + bbox_targets = proposal_mask + rfcn_labels = proposal_mask + for p_i in proposal: + bboxes_tuple += (p_i[::, 0:4:1],) + + if self.training: + if self.train_batch_size > 1: + bboxes_all = self.concat(bboxes_tuple) + else: + bboxes_all = bboxes_tuple[0] + rois = self.concat_1((self.roi_index_tensor, bboxes_all)) + else: + if self.test_batch_size > 1: + bboxes_all = self.concat(bboxes_tuple) + else: + bboxes_all = bboxes_tuple[0] + rois = self.concat_1((self.roi_index_test_tensor, bboxes_all)) + + rois = self.cast(rois, mstype.float32) + rois = F.stop_gradient(rois) + + # roi pooling + out_put = self.resnet101_conv_new(c5) + score_map = self.generatePsScoreMap(out_put) + loc_map = self.generateLocMap(out_put) + + if self.training: + score_pooling = self.psRoI_score(score_map, rois)[0] + loc_pooling = self.psRoI_loc(loc_map, rois)[0] + else: + score_pooling = self.psRoI_score_test(score_map, rois)[0] + loc_pooling = self.psRoI_loc_test(loc_map, rois)[0] + + roi_scores = self.avg_pool_score(score_pooling) + roi_locs = self.avg_pool_loc(loc_pooling) + roi_scores = self.squeeze(roi_scores) + roi_locs = self.squeeze(roi_locs) + + rfcn_masks = self.concat(mask_tuple) + rfcn_masks = F.stop_gradient(rfcn_masks) + rfcn_mask_squeeze = self.squeeze(self.cast(rfcn_masks, mstype.bool_)) + rfcn_loss, rfcn_cls_loss, rfcn_reg_loss, _ = self.loss(roi_scores, + roi_locs, + bbox_targets, + rfcn_labels, + rfcn_mask_squeeze) + output = () + if self.training: + output += (rpn_loss, rfcn_loss, rpn_cls_loss, rpn_reg_loss, rfcn_cls_loss, rfcn_reg_loss) + else: + output = self.get_det_bboxes(rfcn_cls_loss, rfcn_reg_loss, rfcn_masks, bboxes_all, img_metas) + + return output + + def get_det_bboxes(self, cls_logits, reg_logits, mask_logits, rois, img_metas): + """Get the actual detection box.""" + scores = self.softmax(cls_logits) + boxes_all = () + for i in range(self.num_classes): + reg_logits_i = self.squeeze(reg_logits[::, 4:8:1]) + out_boxes_i = self.decode(rois, reg_logits_i) + boxes_all += (out_boxes_i,) + img_metas_all = self.split(img_metas) + scores_all = self.split(scores) + mask_all = self.split(self.cast(mask_logits, mstype.int32)) + boxes_all_with_batchsize = () + for i in range(self.test_batch_size): + scale = self.split_shape(self.squeeze(img_metas_all[i])) + scale_h = scale[2] + scale_w = scale[3] + boxes_tuple = () + for j in range(self.num_classes): + boxes_tmp = self.split(boxes_all[j]) + out_boxes_h = boxes_tmp[i] / scale_h + out_boxes_w = boxes_tmp[i] / scale_w + boxes_tuple += (self.select(self.bbox_mask, out_boxes_w, out_boxes_h),) + boxes_tmp = self.split(boxes_all[0]) + out_boxes_h = boxes_tmp[i] / scale_h + out_boxes_w = boxes_tmp[i] / scale_w + boxes_tuple += (self.select(self.bbox_mask, out_boxes_w, out_boxes_h),) + + boxes_all_with_batchsize += (boxes_tuple,) + + output = self.multiclass_nms(boxes_all_with_batchsize, scores_all, mask_all) + return output + + def multiclass_nms(self, boxes_all, scores_all, mask_all): + """Multiscale postprocessing.""" + all_bboxes = () + all_labels = () + all_masks = () + + for i in range(self.test_batch_size): + bboxes = boxes_all[i] + scores = scores_all[i] + masks = self.cast(mask_all[i], mstype.bool_) + + res_boxes_tuple = () + res_labels_tuple = () + res_masks_tuple = () + for j in range(self.num_classes - 1): + k = j + 1 + _cls_scores = scores[::, k:k + 1:1] + _bboxes = self.squeeze(bboxes[k]) + _mask_o = self.reshape(masks, (self.rpn_max_num, 1)) + + cls_mask = self.greater(_cls_scores, self.test_score_thresh) + _mask = self.logicand(_mask_o, cls_mask) + + _reg_mask = self.cast(self.tile(self.cast(_mask, mstype.int32), (1, 4)), mstype.bool_) + + _bboxes = self.select(_reg_mask, _bboxes, self.test_box_zeros) + + _cls_scores = self.select(_mask, _cls_scores, self.test_score_zeros) + __cls_scores = self.squeeze(_cls_scores) + scores_sorted, topk_inds = self.test_topk(__cls_scores, self.rpn_max_num) + topk_inds = self.reshape(topk_inds, (self.rpn_max_num, 1)) + scores_sorted = self.reshape(scores_sorted, (self.rpn_max_num, 1)) + _bboxes_sorted = self.gather(_bboxes, topk_inds) + _mask_sorted = self.gather(_mask, topk_inds) + + scores_sorted = self.tile(scores_sorted, (1, 4)) + cls_dets = self.concat_1((_bboxes_sorted, scores_sorted)) + cls_dets = P.Slice()(cls_dets, (0, 0), (self.rpn_max_num, 5)) + + cls_dets, _index, _mask_nms = self.nms_test(cls_dets) + _index = self.reshape(_index, (self.rpn_max_num, 1)) + _mask_nms = self.reshape(_mask_nms, (self.rpn_max_num, 1)) + + _mask_n = self.gather(_mask_sorted, _index) + + _mask_n = self.logicand(_mask_n, _mask_nms) + cls_labels = self.oneslike(_index) * j + res_boxes_tuple += (cls_dets,) + res_labels_tuple += (cls_labels,) + res_masks_tuple += (_mask_n,) + + res_boxes_start = self.concat(res_boxes_tuple[:self.concat_start]) + res_labels_start = self.concat(res_labels_tuple[:self.concat_start]) + res_masks_start = self.concat(res_masks_tuple[:self.concat_start]) + + res_boxes_end = self.concat(res_boxes_tuple[self.concat_start:self.concat_end]) + res_labels_end = self.concat(res_labels_tuple[self.concat_start:self.concat_end]) + res_masks_end = self.concat(res_masks_tuple[self.concat_start:self.concat_end]) + + res_boxes = self.concat((res_boxes_start, res_boxes_end)) + res_labels = self.concat((res_labels_start, res_labels_end)) + res_masks = self.concat((res_masks_start, res_masks_end)) + + reshape_size = (self.num_classes - 1) * self.rpn_max_num + res_boxes = self.reshape(res_boxes, (1, reshape_size, 5)) + res_labels = self.reshape(res_labels, (1, reshape_size, 1)) + res_masks = self.reshape(res_masks, (1, reshape_size, 1)) + + all_bboxes += (res_boxes,) + all_labels += (res_labels,) + all_masks += (res_masks,) + + all_bboxes = self.concat(all_bboxes) + all_labels = self.concat(all_labels) + all_masks = self.concat(all_masks) + return all_bboxes, all_labels, all_masks + + def get_anchors(self, featmap_sizes): + """Get anchors according to feature map sizes. + + Args: + featmap_sizes (list[tuple]): Multi-level feature map sizes. + img_metas (list[dict]): Image meta info. + + Returns: + tuple: anchors of each image, valid flags of each image + """ + num_levels = len(featmap_sizes) + + # since feature map sizes of all images are the same, we only compute + # anchors for one time + multi_level_anchors = () + for i in range(num_levels): + anchors = self.anchor_generators[i].grid_anchors( + featmap_sizes[i], self.anchor_strides[i]) + multi_level_anchors += (Tensor(anchors.astype(self.dtype)),) + + return multi_level_anchors + +class Rfcn_Infer(nn.Cell): + def __init__(self, config): + super(Rfcn_Infer, self).__init__() + self.network = Rfcn_Resnet(config) + self.network.set_train(False) + + def construct(self, img_data, img_metas): + output = self.network(img_data, img_metas, None, None, None) + return output diff --git a/research/cv/rfcn/src/rfcn/rpn.py b/research/cv/rfcn/src/rfcn/rpn.py new file mode 100644 index 0000000000000000000000000000000000000000..3e2a5f4c59519d4eff7371931f5d301cac4a7479 --- /dev/null +++ b/research/cv/rfcn/src/rfcn/rpn.py @@ -0,0 +1,317 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""RPN for Rfcn""" +import numpy as np +import mindspore.nn as nn +import mindspore.common.dtype as mstype +from mindspore import context, Tensor +from mindspore.ops import operations as P +from mindspore.ops import functional as F +from mindspore.common.initializer import initializer +from .bbox_assign_sample import BboxAssignSample + + +class RpnRegClsBlock(nn.Cell): + """ + Rpn reg cls block for rpn layer + + Args: + in_channels (int) - Input channels of shared convolution. + feat_channels (int) - Output channels of shared convolution. + num_anchors (int) - The anchor number. + cls_out_channels (int) - Output channels of classification convolution. + weight_conv (Tensor) - weight init for rpn conv. + bias_conv (Tensor) - bias init for rpn conv. + weight_cls (Tensor) - weight init for rpn cls conv. + bias_cls (Tensor) - bias init for rpn cls conv. + weight_reg (Tensor) - weight init for rpn reg conv. + bias_reg (Tensor) - bias init for rpn reg conv. + + Returns: + Tensor, output tensor. + """ + def __init__(self, + in_channels, + feat_channels, + num_anchors, + cls_out_channels, + weight_conv, + bias_conv, + weight_cls, + bias_cls, + weight_reg, + bias_reg): + super(RpnRegClsBlock, self).__init__() + self.rpn_conv = nn.Conv2d(in_channels, feat_channels, kernel_size=3, stride=1, pad_mode='same', + has_bias=True, weight_init=weight_conv, bias_init=bias_conv) + self.relu = nn.ReLU() + + self.rpn_cls = nn.Conv2d(feat_channels, num_anchors * cls_out_channels, kernel_size=1, pad_mode='valid', + has_bias=True, weight_init=weight_cls, bias_init=bias_cls) + self.rpn_reg = nn.Conv2d(feat_channels, num_anchors * 4, kernel_size=1, pad_mode='valid', + has_bias=True, weight_init=weight_reg, bias_init=bias_reg) + + def construct(self, x): + x = self.relu(self.rpn_conv(x)) + + x1 = self.rpn_cls(x) + x2 = self.rpn_reg(x) + + return x1, x2 + + +class RPN(nn.Cell): + """ + ROI proposal network.. + + Args: + config (dict) - Config. + batch_size (int) - Batchsize. + in_channels (int) - Input channels of shared convolution. + feat_channels (int) - Output channels of shared convolution. + num_anchors (int) - The anchor number. + cls_out_channels (int) - Output channels of classification convolution. + + Returns: + Tuple, tuple of output tensor. + + Examples: + RPN(config=config, batch_size=2, in_channels=256, feat_channels=1024, + num_anchors=3, cls_out_channels=512) + """ + def __init__(self, + config, + batch_size, + in_channels, + feat_channels, + num_anchors, + cls_out_channels): + super(RPN, self).__init__() + cfg_rpn = config + self.dtype = np.float32 + self.ms_type = mstype.float32 + self.device_type = "Ascend" if context.get_context("device_target") == "Ascend" else "Others" + self.slice_index = () + self.feature_anchor_shape = () + self.slice_index += (0,) + index = 0 + for shape in cfg_rpn.feature_shapes: + self.slice_index += (self.slice_index[index] + shape[0] * shape[1] * num_anchors,) + self.feature_anchor_shape += (shape[0] * shape[1] * num_anchors * batch_size,) + index += 1 + + self.num_anchors = num_anchors + self.batch_size = batch_size + self.test_batch_size = cfg_rpn.test_batch_size + self.num_layers = 1 + self.real_ratio = Tensor(np.ones((1, 1)).astype(self.dtype)) + + self.rpn_convs_list = nn.layer.CellList(self._make_rpn_layer(self.num_layers, in_channels, feat_channels, + num_anchors, cls_out_channels)) + + self.transpose = P.Transpose() + self.reshape = P.Reshape() + self.concat = P.Concat(axis=0) + self.fill = P.Fill() + self.placeh1 = Tensor(np.ones((1,)).astype(self.dtype)) + + self.trans_shape = (0, 2, 3, 1) + + self.reshape_shape_reg = (-1, 4) + self.reshape_shape_cls = (-1,) + self.rpn_loss_reg_weight = Tensor(np.array(cfg_rpn.rpn_loss_reg_weight).astype(self.dtype)) + self.rpn_loss_cls_weight = Tensor(np.array(cfg_rpn.rpn_loss_cls_weight).astype(self.dtype)) + self.num_expected_total = Tensor(np.array(cfg_rpn.num_expected_neg * self.batch_size).astype(self.dtype)) + self.num_bboxes = cfg_rpn.num_bboxes + self.get_targets = BboxAssignSample(cfg_rpn, self.batch_size, self.num_bboxes, False) + self.CheckValid = P.CheckValid() + self.sum_loss = P.ReduceSum() + self.loss_cls = P.SigmoidCrossEntropyWithLogits() + self.loss_bbox = P.SmoothL1Loss(beta=1.0/9.0) + self.squeeze = P.Squeeze() + self.cast = P.Cast() + self.tile = P.Tile() + self.zeros_like = P.ZerosLike() + self.loss = Tensor(np.zeros((1,)).astype(self.dtype)) + self.clsloss = Tensor(np.zeros((1,)).astype(self.dtype)) + self.regloss = Tensor(np.zeros((1,)).astype(self.dtype)) + + def _make_rpn_layer(self, num_layers, in_channels, feat_channels, num_anchors, cls_out_channels): + """ + make rpn layer for rpn proposal network + + Args: + num_layers (int) - layer num. + in_channels (int) - Input channels of shared convolution. + feat_channels (int) - Output channels of shared convolution. + num_anchors (int) - The anchor number. + cls_out_channels (int) - Output channels of classification convolution. + + Returns: + List, list of RpnRegClsBlock cells. + """ + rpn_layer = [] + + shp_weight_conv = (feat_channels, in_channels, 3, 3) + shp_bias_conv = (feat_channels,) + weight_conv = initializer('Normal', shape=shp_weight_conv, dtype=self.ms_type).to_tensor() + bias_conv = initializer(0, shape=shp_bias_conv, dtype=self.ms_type).to_tensor() + + shp_weight_cls = (num_anchors * cls_out_channels, feat_channels, 1, 1) + shp_bias_cls = (num_anchors * cls_out_channels,) + weight_cls = initializer('Normal', shape=shp_weight_cls, dtype=self.ms_type).to_tensor() + bias_cls = initializer(0, shape=shp_bias_cls, dtype=self.ms_type).to_tensor() + + shp_weight_reg = (num_anchors * 4, feat_channels, 1, 1) + shp_bias_reg = (num_anchors * 4,) + weight_reg = initializer('Normal', shape=shp_weight_reg, dtype=self.ms_type).to_tensor() + bias_reg = initializer(0, shape=shp_bias_reg, dtype=self.ms_type).to_tensor() + + for i in range(num_layers): + rpn_reg_cls_block = RpnRegClsBlock(in_channels, feat_channels, num_anchors, cls_out_channels, \ + weight_conv, bias_conv, weight_cls, \ + bias_cls, weight_reg, bias_reg) + if self.device_type == "Ascend": + rpn_reg_cls_block.to_float(mstype.float16) + rpn_layer.append(rpn_reg_cls_block) + + for i in range(1, num_layers): + rpn_layer[i].rpn_conv.weight = rpn_layer[0].rpn_conv.weight + rpn_layer[i].rpn_cls.weight = rpn_layer[0].rpn_cls.weight + rpn_layer[i].rpn_reg.weight = rpn_layer[0].rpn_reg.weight + + rpn_layer[i].rpn_conv.bias = rpn_layer[0].rpn_conv.bias + rpn_layer[i].rpn_cls.bias = rpn_layer[0].rpn_cls.bias + rpn_layer[i].rpn_reg.bias = rpn_layer[0].rpn_reg.bias + + return rpn_layer + + def construct(self, inputs, img_metas, anchor_list, gt_bboxes, gt_labels, gt_valids): + """rpn construct""" + loss_print = () + rpn_cls_score = () + rpn_bbox_pred = () + rpn_cls_score_total = () + rpn_bbox_pred_total = () + + x1, x2 = self.rpn_convs_list[0](inputs) + + rpn_cls_score_total = rpn_cls_score_total + (x1,) + rpn_bbox_pred_total = rpn_bbox_pred_total + (x2,) + + x1 = self.transpose(x1, self.trans_shape) + x1 = self.reshape(x1, self.reshape_shape_cls) + + x2 = self.transpose(x2, self.trans_shape) + x2 = self.reshape(x2, self.reshape_shape_reg) + + rpn_cls_score = rpn_cls_score + (x1,) + rpn_bbox_pred = rpn_bbox_pred + (x2,) + + loss = self.loss + clsloss = self.clsloss + regloss = self.regloss + bbox_targets = () + bbox_weights = () + labels = () + label_weights = () + + output = () + if self.training: + for i in range(self.batch_size): + multi_level_flags = () + anchor_list_tuple = () + + for j in range(self.num_layers): + res = self.cast(self.CheckValid(anchor_list[j], self.squeeze(img_metas[i:i + 1:1, ::])), + mstype.int32) + multi_level_flags = multi_level_flags + (res,) + anchor_list_tuple = anchor_list_tuple + (anchor_list[j],) + + valid_flag_list = self.concat(multi_level_flags) + anchor_using_list = self.concat(anchor_list_tuple) + + gt_bboxes_i = self.squeeze(gt_bboxes[i:i + 1:1, ::]) + gt_labels_i = self.squeeze(gt_labels[i:i + 1:1, ::]) + gt_valids_i = self.squeeze(gt_valids[i:i + 1:1, ::]) + + bbox_target, bbox_weight, label, label_weight = self.get_targets(gt_bboxes_i, + gt_labels_i, + self.cast(valid_flag_list, + mstype.bool_), + anchor_using_list, gt_valids_i) + + bbox_target = self.cast(bbox_target, self.ms_type) + bbox_weight = self.cast(bbox_weight, self.ms_type) + label = self.cast(label, self.ms_type) + label_weight = self.cast(label_weight, self.ms_type) + + for j in range(self.num_layers): + begin = self.slice_index[j] + end = self.slice_index[j + 1] + stride = 1 + bbox_targets += (bbox_target[begin:end:stride, ::],) + bbox_weights += (bbox_weight[begin:end:stride],) + labels += (label[begin:end:stride],) + label_weights += (label_weight[begin:end:stride],) + + for i in range(self.num_layers): + bbox_target_using = () + bbox_weight_using = () + label_using = () + label_weight_using = () + + for j in range(self.batch_size): + bbox_target_using += (bbox_targets[i + (self.num_layers * j)],) + bbox_weight_using += (bbox_weights[i + (self.num_layers * j)],) + label_using += (labels[i + (self.num_layers * j)],) + label_weight_using += (label_weights[i + (self.num_layers * j)],) + + bbox_target_with_batchsize = self.concat(bbox_target_using) + bbox_weight_with_batchsize = self.concat(bbox_weight_using) + label_with_batchsize = self.concat(label_using) + label_weight_with_batchsize = self.concat(label_weight_using) + + # stop + bbox_target_ = F.stop_gradient(bbox_target_with_batchsize) + bbox_weight_ = F.stop_gradient(bbox_weight_with_batchsize) + label_ = F.stop_gradient(label_with_batchsize) + label_weight_ = F.stop_gradient(label_weight_with_batchsize) + + cls_score_i = self.cast(rpn_cls_score[i], self.ms_type) + reg_score_i = self.cast(rpn_bbox_pred[i], self.ms_type) + + loss_cls = self.loss_cls(cls_score_i, label_) + loss_cls_item = loss_cls * label_weight_ + loss_cls_item = self.sum_loss(loss_cls_item, (0,)) / self.num_expected_total + + loss_reg = self.loss_bbox(reg_score_i, bbox_target_) + bbox_weight_ = self.tile(self.reshape(bbox_weight_, (self.feature_anchor_shape[i], 1)), (1, 4)) + loss_reg = loss_reg * bbox_weight_ + loss_reg_item = self.sum_loss(loss_reg, (1,)) + loss_reg_item = self.sum_loss(loss_reg_item, (0,)) / self.num_expected_total + + loss_total = self.rpn_loss_cls_weight * loss_cls_item + self.rpn_loss_reg_weight * loss_reg_item + + loss += loss_total + loss_print += (loss_total, loss_cls_item, loss_reg_item) + clsloss += loss_cls_item + regloss += loss_reg_item + + output = (loss, rpn_cls_score_total, rpn_bbox_pred_total, clsloss, regloss, loss_print) + else: + output = (self.placeh1, rpn_cls_score_total, rpn_bbox_pred_total, self.placeh1, self.placeh1, self.placeh1) + + return output diff --git a/research/cv/rfcn/src/util.py b/research/cv/rfcn/src/util.py new file mode 100644 index 0000000000000000000000000000000000000000..cfda9b1ff979d3306a5679d21fdce552b07e88ab --- /dev/null +++ b/research/cv/rfcn/src/util.py @@ -0,0 +1,321 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""coco eval for rfcn""" + +import json +import os +import csv +import shutil +import numpy as np +from pycocotools.coco import COCO +from src.detecteval import DetectEval + +_init_value = np.array(0.0) +summary_init = { + 'Precision/mAP': _init_value, + 'Precision/mAP@.50IOU': _init_value, + 'Precision/mAP@.75IOU': _init_value, + 'Precision/mAP (small)': _init_value, + 'Precision/mAP (medium)': _init_value, + 'Precision/mAP (large)': _init_value, + 'Recall/AR@1': _init_value, + 'Recall/AR@10': _init_value, + 'Recall/AR@100': _init_value, + 'Recall/AR@100 (small)': _init_value, + 'Recall/AR@100 (medium)': _init_value, + 'Recall/AR@100 (large)': _init_value, +} + + +def write_list_to_csv(file_path, data_to_write, append=False): + if append: + open_mode = 'a' + else: + open_mode = 'w' + with open(file_path, open_mode) as csvfile: + writer = csv.writer(csvfile) + writer.writerow(data_to_write) + + +def coco_eval(config, result_files, result_types, coco, max_dets=(100, 300, 1000), single_result=False, + plot_detect_result=False): + """coco eval for rfcn""" + anns = json.load(open(result_files['bbox'])) + if not anns: + return summary_init + + if isinstance(coco, str): + coco = COCO(coco) + assert isinstance(coco, COCO) + + for res_type in result_types: + result_file = result_files[res_type] + assert result_file.endswith('.json') + + coco_dets = coco.loadRes(result_file) + gt_img_ids = coco.getImgIds() + det_img_ids = coco_dets.getImgIds() + iou_type = 'bbox' if res_type == 'proposal' else res_type + cocoEval = DetectEval(coco, coco_dets, iou_type) + if res_type == 'proposal': + cocoEval.params.useCats = 0 + cocoEval.params.maxDets = list(max_dets) + + tgt_ids = gt_img_ids if not single_result else det_img_ids + + if single_result: + res_dict = dict() + for id_i in tgt_ids: + cocoEval = DetectEval(coco, coco_dets, iou_type) + if res_type == 'proposal': + cocoEval.params.useCats = 0 + cocoEval.params.maxDets = list(max_dets) + + cocoEval.params.imgIds = [id_i] + cocoEval.evaluate() + cocoEval.accumulate() + cocoEval.summarize() + res_dict.update({coco.imgs[id_i]['file_name']: cocoEval.stats[1]}) + + cocoEval = DetectEval(coco, coco_dets, iou_type) + if res_type == 'proposal': + cocoEval.params.useCats = 0 + cocoEval.params.maxDets = list(max_dets) + + cocoEval.params.imgIds = tgt_ids + cocoEval.evaluate() + cocoEval.accumulate() + cocoEval.summarize() + + summary_metrics = { + 'Precision/mAP': cocoEval.stats[0], + 'Precision/mAP@.50IOU': cocoEval.stats[1], + 'Precision/mAP@.75IOU': cocoEval.stats[2], + 'Precision/mAP (small)': cocoEval.stats[3], + 'Precision/mAP (medium)': cocoEval.stats[4], + 'Precision/mAP (large)': cocoEval.stats[5], + 'Recall/AR@1': cocoEval.stats[6], + 'Recall/AR@10': cocoEval.stats[7], + 'Recall/AR@100': cocoEval.stats[8], + 'Recall/AR@100 (small)': cocoEval.stats[9], + 'Recall/AR@100 (medium)': cocoEval.stats[10], + 'Recall/AR@100 (large)': cocoEval.stats[11], + } + + print("summary_metrics: ") + print(summary_metrics) + + if plot_detect_result: + res = calcuate_pr_rc_f1(config, coco, coco_dets, tgt_ids, iou_type) + return res + + return summary_metrics + + +def calcuate_pr_rc_f1(config, coco, coco_dets, tgt_ids, iou_type): + """calcuate_pr_rc_f1""" + cocoEval = DetectEval(coco, coco_dets, iou_type) + cocoEval.params.imgIds = tgt_ids + cocoEval.evaluate() + cocoEval.accumulate() + cocoEval.summarize() + stats_all = cocoEval.stats + + eval_result_path = os.path.abspath("./eval_result") + if os.path.exists(eval_result_path): + shutil.rmtree(eval_result_path) + os.mkdir(eval_result_path) + + result_csv = os.path.join(eval_result_path, "statistics.csv") + eval_item = ["ap@0.5:0.95", "ap@0.5", "ap@0.75", "ar@0.5:0.95", "ar@0.5", "ar@0.75"] + write_list_to_csv(result_csv, eval_item, append=False) + eval_result = [round(stats_all[0], 3), round(stats_all[1], 3), round(stats_all[2], 3), round(stats_all[6], 3), + round(stats_all[7], 3), round(stats_all[8], 3)] + write_list_to_csv(result_csv, eval_result, append=True) + write_list_to_csv(result_csv, [], append=True) + # 1.2 plot_pr_curve + cocoEval.plot_pr_curve(eval_result_path) + + # 2 + E = DetectEval(coco, coco_dets, iou_type) + E.params.iouThrs = [0.5] + E.params.maxDets = [100] + E.params.areaRng = [[0 ** 2, 1e5 ** 2]] + E.evaluate() + # 2.1 plot hist_curve of every class's tp's confidence and fp's confidence + confidence_dict = E.compute_tp_fp_confidence() + E.plot_hist_curve(confidence_dict, eval_result_path) + + # 2.2 write best_threshold and p r to csv and plot + cat_pr_dict, cat_pr_dict_origin = E.compute_precison_recall_f1() + # E.write_best_confidence_threshold(cat_pr_dict, cat_pr_dict_origin, eval_result_path) + best_confidence_thres = E.write_best_confidence_threshold(cat_pr_dict, cat_pr_dict_origin, eval_result_path) + print("best_confidence_thres: ", best_confidence_thres) + E.plot_mc_curve(cat_pr_dict, eval_result_path) + + # 3 + # 3.1 compute every class's p r and save every class's p and r at iou = 0.5 + E = DetectEval(coco, coco_dets, iouType='bbox') + E.params.iouThrs = [0.5] + E.params.maxDets = [100] + E.params.areaRng = [[0 ** 2, 1e5 ** 2]] + E.evaluate() + E.accumulate() + result = E.evaluate_every_class() + print_info = ["class_name", "tp_num", "gt_num", "dt_num", "precision", "recall"] + write_list_to_csv(result_csv, print_info, append=True) + print("class_name", "tp_num", "gt_num", "dt_num", "precision", "recall") + for class_result in result: + print(class_result) + write_list_to_csv(result_csv, class_result, append=True) + + # 3.2 save ng / ok images + E.save_images(config, eval_result_path, 0.5) + + return stats_all[0] + + +def xyxy2xywh(bbox): + _bbox = bbox.tolist() + return [ + _bbox[0], + _bbox[1], + _bbox[2] - _bbox[0] + 1, + _bbox[3] - _bbox[1] + 1, + ] + + +def bbox2result_1image(bboxes, labels, num_classes): + """Convert detection results to a list of numpy arrays. + + Args: + bboxes (Tensor): shape (n, 5) + labels (Tensor): shape (n, ) + num_classes (int): class number, including background class + + Returns: + list(ndarray): bbox results of each class + """ + if bboxes.shape[0] == 0: + result = [np.zeros((0, 5), dtype=np.float32) for i in range(num_classes - 1)] + else: + result = [bboxes[labels == i, :] for i in range(num_classes - 1)] + return result + + +def proposal2json(dataset, results): + """convert proposal to json mode""" + img_ids = dataset.getImgIds() + json_results = [] + dataset_len = dataset.get_dataset_size() * 2 + for idx in range(dataset_len): + img_id = img_ids[idx] + bboxes = results[idx] + for i in range(bboxes.shape[0]): + data = dict() + data['image_id'] = img_id + data['bbox'] = xyxy2xywh(bboxes[i]) + data['score'] = float(bboxes[i][4]) + data['category_id'] = 1 + json_results.append(data) + return json_results + + +def det2json(dataset, results): + """convert det to json mode""" + cat_ids = dataset.getCatIds() + img_ids = dataset.getImgIds() + json_results = [] + dataset_len = len(img_ids) + for idx in range(dataset_len): + img_id = img_ids[idx] + + if idx == len(results): break + result = results[idx] + for label, result_label in enumerate(result): + bboxes = result_label + for i in range(bboxes.shape[0]): + data = dict() + data['image_id'] = img_id + data['bbox'] = xyxy2xywh(bboxes[i]) + data['score'] = float(bboxes[i][4]) + data['category_id'] = cat_ids[label] + json_results.append(data) + + return json_results + + +def segm2json(dataset, results): + """convert segm to json mode""" + bbox_json_results = [] + segm_json_results = [] + for idx in range(len(dataset)): + img_id = dataset.img_ids[idx] + det, seg = results[idx] + for label, det_label in enumerate(det): + # bbox results + bboxes = det_label + for i in range(bboxes.shape[0]): + data = dict() + data['image_id'] = img_id + data['bbox'] = xyxy2xywh(bboxes[i]) + data['score'] = float(bboxes[i][4]) + data['category_id'] = dataset.cat_ids[label] + bbox_json_results.append(data) + + if len(seg) == 2: + segms = seg[0][label] + mask_score = seg[1][label] + else: + segms = seg[label] + mask_score = [bbox[4] for bbox in bboxes] + for i in range(bboxes.shape[0]): + data = dict() + data['image_id'] = img_id + data['score'] = float(mask_score[i]) + data['category_id'] = dataset.cat_ids[label] + segms[i]['counts'] = segms[i]['counts'].decode() + data['segmentation'] = segms[i] + segm_json_results.append(data) + return bbox_json_results, segm_json_results + + +def results2json(dataset, results, out_file): + """convert result convert to json mode""" + result_files = dict() + if isinstance(results[0], list): + json_results = det2json(dataset, results) + result_files['bbox'] = '{}.{}.json'.format(out_file, 'bbox') + result_files['proposal'] = '{}.{}.json'.format(out_file, 'bbox') + with open(result_files['bbox'], 'w') as fp: + json.dump(json_results, fp) + elif isinstance(results[0], tuple): + json_results = segm2json(dataset, results) + result_files['bbox'] = '{}.{}.json'.format(out_file, 'bbox') + result_files['proposal'] = '{}.{}.json'.format(out_file, 'bbox') + result_files['segm'] = '{}.{}.json'.format(out_file, 'segm') + with open(result_files['bbox'], 'w') as fp: + json.dump(json_results[0], fp) + with open(result_files['segm'], 'w') as fp: + json.dump(json_results[1], fp) + elif isinstance(results[0], np.ndarray): + json_results = proposal2json(dataset, results) + result_files['proposal'] = '{}.{}.json'.format(out_file, 'proposal') + with open(result_files['proposal'], 'w') as fp: + json.dump(json_results, fp) + else: + raise TypeError('invalid type of results') + + return result_files diff --git a/research/cv/rfcn/train.py b/research/cv/rfcn/train.py new file mode 100644 index 0000000000000000000000000000000000000000..332ea8e54728f678e081e8644533d924f36ee6b6 --- /dev/null +++ b/research/cv/rfcn/train.py @@ -0,0 +1,180 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""train RFCN and get checkpoint files.""" + +import os +import time +import numpy as np + +import mindspore.common.dtype as mstype +from mindspore import context, Tensor, Parameter +from mindspore.communication.management import init, get_rank, get_group_size +from mindspore.train.callback import CheckpointConfig, ModelCheckpoint, TimeMonitor +from mindspore.train import Model +from mindspore.context import ParallelMode +from mindspore.train.serialization import load_checkpoint, load_param_into_net +from mindspore.nn import SGD +from mindspore.common import set_seed + +from src.network_define import LossCallBack, WithLossCell, TrainOneStepCell, LossNet +from src.dataset import data_to_mindrecord_byte_image, create_rfcn_dataset +from src.lr_schedule import dynamic_lr +from src.model_utils.config import config +from src.model_utils.moxing_adapter import moxing_wrapper +from src.model_utils.device_adapter import get_device_id +from src.rfcn.rfcn_resnet import Rfcn_Resnet + +set_seed(1) +context.set_context(mode=context.GRAPH_MODE, device_target=config.device_target, device_id=get_device_id()) + +context.set_context(reserve_class_name_in_scope=False) + +if config.device_target == "GPU": + context.set_context(enable_graph_kernel=True) +else: + raise RuntimeError("now, RFCN only support GPU.") + +if config.run_distribute: + init("nccl") + context.reset_auto_parallel_context() + rank = get_rank() + device_num = get_group_size() + context.set_auto_parallel_context(device_num=device_num, parallel_mode=ParallelMode.DATA_PARALLEL, + gradients_mean=True) +else: + rank = 0 + device_num = 1 + +def train_rfcn_(): + """ train_rfcn_ """ + print("Start create dataset!") + # It will generate mindrecord file in config.mindrecord_dir, + # and the file name is Rfcn_coco.mindrecord0, 1, ... file_num. + if config.dataset == "coco": + prefix = "Rfcn_coco.mindrecord" + else: + prefix = "Rfcn_other.mindrecord" + mindrecord_dir = config.mindrecord_dir + mindrecord_file = os.path.join(mindrecord_dir, prefix + "0") + print("CHECKING MINDRECORD FILES ...") + if rank == 0 and not os.path.exists(mindrecord_file): + if not os.path.isdir(mindrecord_dir): + os.makedirs(mindrecord_dir) + if config.dataset == "coco": + if os.path.isdir(config.coco_root): + if not os.path.exists(config.coco_root): + print("Please make sure config:coco_root is valid.") + raise ValueError(config.coco_root) + print("Create Mindrecord. It may take some time.") + data_to_mindrecord_byte_image(config, "coco", True, prefix) + print("Create Mindrecord Done, at {}".format(mindrecord_dir)) + else: + print("coco_root not exits.") + else: + if os.path.isdir(config.image_dir) and os.path.exists(config.anno_path): + if not os.path.exists(config.image_dir): + print("Please make sure config:image_dir is valid.") + raise ValueError(config.image_dir) + print("Create Mindrecord. It may take some time.") + data_to_mindrecord_byte_image(config, "other", True, prefix) + print("Create Mindrecord Done, at {}".format(mindrecord_dir)) + else: + print("image_dir or anno_path not exits.") + + while not os.path.exists(mindrecord_file + ".db"): + time.sleep(5) + + print("CHECKING MINDRECORD FILES DONE!") + + # When create MindDataset, using the fitst mindrecord file, such as rfcn.mindrecord0. + dataset = create_rfcn_dataset(config, mindrecord_file, batch_size=config.batch_size, + device_num=device_num, rank_id=rank, + num_parallel_workers=config.num_parallel_workers, + python_multiprocessing=config.python_multiprocessing) + + dataset_size = dataset.get_dataset_size() + print("Create dataset done!") + + return dataset_size, dataset + +def modelarts_pre_process(): + config.save_checkpoint_path = config.output_path + + +@moxing_wrapper(pre_process=modelarts_pre_process) +def train_rfcn(): + """ train_rfcn """ + dataset_size, dataset = train_rfcn_() + net = Rfcn_Resnet(config=config) + net = net.set_train() + + load_path = config.pre_trained + if load_path != "": + param_dict = load_checkpoint(load_path) + + key_mapping = {'down_sample_layer.1.beta': 'bn_down_sample.beta', + 'down_sample_layer.1.gamma': 'bn_down_sample.gamma', + 'down_sample_layer.0.weight': 'conv_down_sample.weight', + 'down_sample_layer.1.moving_mean': 'bn_down_sample.moving_mean', + 'down_sample_layer.1.moving_variance': 'bn_down_sample.moving_variance', + } + for oldkey in list(param_dict.keys()): + if not oldkey.startswith(('backbone', 'end_point', 'global_step', 'learning_rate', 'moments', 'momentum')): + data = param_dict.pop(oldkey) + newkey = 'backbone.' + oldkey + param_dict[newkey] = data + oldkey = newkey + for k, v in key_mapping.items(): + if k in oldkey: + newkey = oldkey.replace(k, v) + param_dict[newkey] = param_dict.pop(oldkey) + break + for item in list(param_dict.keys()): + if not item.startswith('backbone'): + param_dict.pop(item) + + for key, value in param_dict.items(): + tensor = value.asnumpy().astype(np.float32) + param_dict[key] = Parameter(tensor, key) + load_param_into_net(net, param_dict) + + loss = LossNet() + lr = Tensor(dynamic_lr(config, dataset_size), mstype.float32) + + opt = SGD(params=net.trainable_params(), learning_rate=lr, momentum=config.momentum, + weight_decay=config.weight_decay, loss_scale=config.loss_scale) + net_with_loss = WithLossCell(net, loss) + if config.run_distribute: + net = TrainOneStepCell(net_with_loss, opt, sens=config.loss_scale, reduce_flag=True, + mean=True, degree=device_num) + else: + net = TrainOneStepCell(net_with_loss, opt, sens=config.loss_scale) + + time_cb = TimeMonitor(data_size=dataset_size) + loss_cb = LossCallBack(rank_id=rank) + cb = [time_cb, loss_cb] + if rank == 0 and config.save_checkpoint: + ckptconfig = CheckpointConfig(save_checkpoint_steps=config.save_checkpoint_epochs * dataset_size, + keep_checkpoint_max=config.keep_checkpoint_max) + save_checkpoint_path = os.path.join(config.save_checkpoint_path, "ckpt_" + str(rank) + "/") + ckpoint_cb = ModelCheckpoint(prefix='rfcn', directory=save_checkpoint_path, config=ckptconfig) + cb += [ckpoint_cb] + + model = Model(net) + model.train(config.epoch_size, dataset, callbacks=cb, dataset_sink_mode=False) + +if __name__ == '__main__': + train_rfcn()