diff --git a/research/cv/FCANet/README_CN.md b/research/cv/FCANet/README_CN.md new file mode 100644 index 0000000000000000000000000000000000000000..edc4cabab03127344ba5c6e4cc0853690b289586 --- /dev/null +++ b/research/cv/FCANet/README_CN.md @@ -0,0 +1,226 @@ +# 鐩綍 + +- [鐩綍](#鐩綍) +- [FCANet鎻忚堪](#FCANet鎻忚堪) +- [妯″瀷鏋舵瀯](#妯″瀷鏋舵瀯) +- [鏁版嵁闆哴(#鏁版嵁闆�) +- [鐜瑕佹眰](#鐜瑕佹眰) +- [鑴氭湰璇存槑](#鑴氭湰璇存槑) + - [鑴氭湰浠g爜](#鑴氭湰浠g爜) + - [鑴氭湰鍙傛暟](#鑴氭湰鍙傛暟) + - [鍑嗗杩囩▼](#鍑嗗杩囩▼) + - [璁粌杩囩▼](#璁粌杩囩▼) + - [鍚姩](#鍚姩) + - [缁撴灉](#缁撴灉) + - [璇勪及杩囩▼](#璇勪及杩囩▼) + - [鍚姩](#鍚姩-1) + - [缁撴灉](#缁撴灉-1) +- [妯″瀷璇存槑](#妯″瀷璇存槑) + - [妯″瀷鎬ц兘](#妯″瀷鎬ц兘) +- [闅忔満鎯呭喌鐨勬弿杩癩(#闅忔満鎯呭喌鐨勬弿杩�) +- [ModelZoo](#modelzoo) + +<!-- /TOC --> + +# FCANet鎻忚堪 + +FCANet鏄竴绉嶅熀浜庡垵濮嬬偣鍏虫敞鐨勪氦浜掑垎鍓茬綉缁滐紝閫氳繃鐢ㄦ埛浜や簰鐐瑰嚮鍓嶈儗鏅偣锛屼笉鏂慨澶嶏紝鏈€缁堝緱鍒扮簿缁嗗垎鍓茬粨鏋溿€�(CVPR 2020) + +[璁烘枃](https://openaccess.thecvf.com/content_CVPR_2020/papers/Lin_Interactive_Image_Segmentation_With_First_Click_Attention_CVPR_2020_paper.pdf)锛歓heng Lin, Zhao Zhang, Lin-Zhuo Chen, Ming-Ming Cheng锛孲hao-Ping Lu锛孖nteractive Image Segmentation with First Click Attention. (CVPR2020) + +# 妯″瀷鏋舵瀯 + +FCANet鎬讳綋缃戠粶鏋舵瀯濡備笅锛� + +[閾炬帴](https://openaccess.thecvf.com/content_CVPR_2020/papers/Lin_Interactive_Image_Segmentation_With_First_Click_Attention_CVPR_2020_paper.pdf) + +# 鏁版嵁闆� + +浣跨敤鐨勬暟鎹泦锛氬鐞嗗悗鐨勬爣鍑嗕氦浜掑垎鍓叉牸寮� (ISF) + +Augmented PASCAL [ [GoogleDrive](https://drive.google.com/file/d/1sQgd_H6m9TGRcPVFJYzGK6u77pKuPDls) | [BaiduYun](https://pan.baidu.com/s/1xshbtKxp1glLyoEmQZGBlg) pwd: **o8vi** ] + +GrabCut [ [GoogleDrive](https://drive.google.com/file/d/1CKzgFbk0guEBpewgpMUaWrM_-KSVSUyg/view?usp=sharing) | [BaiduYun](https://pan.baidu.com/s/1Sc3vcHrocYQr9PCvti1Heg) pwd: **2hi9** ] + +Berkeley [ [GoogleDrive](https://drive.google.com/file/d/16GD6Ko3IohX8OsSHvemKG8zqY07TIm_i/view?usp=sharing) | [BaiduYun](https://pan.baidu.com/s/16kAidalC5UWy9payMvlTRA) pwd: **4w5g** ] + +鎴戜滑涔熷湪 `(./dataset/)` 涓彁渚涗簡浠庡師濮嬫暟鎹泦杞彉鎴愯鏍煎紡鐨勪唬鐮� + +Augmented PASCAL [ 鍘熷鏁版嵁闆� [PASCAL VOC](http://host.robots.ox.ac.uk/pascal/VOC/voc2012/index.html) | [SBD](http://home.bharathh.info/pubs/codes/SBD/download.html) ] + +```shell +python generate_dataset_pascal_sbd.py --src_pascal_path [source pascal path] --src_sbd_path [source sbd path] +``` + +GrabCut [ 鍘熷鏁版嵁闆� [GrabCut](https://github.com/saic-vul/fbrs_interactive_segmentation/releases/download/v1.0/GrabCut.zip) ] + +```shell +python generate_dataset_grabcut.py --src_grabcut_path [source grabcut path] +``` + +Berkeley [ 鍘熷鏁版嵁闆� [Berkeley](https://github.com/saic-vul/fbrs_interactive_segmentation/releases/download/v1.0/Berkeley.zip) ] + +```shell +python generate_dataset_berkeley.py --src_berkeley_path [source berkeley path] +``` + +# 鐜瑕佹眰 + +- 纭欢锛圓scend锛� + - 浣跨敤Ascend鏉ユ惌寤虹‖浠剁幆澧冦€� +- 妗嗘灦 + - [MindSpore](https://www.mindspore.cn/install) +- 濡傞渶鏌ョ湅璇︽儏锛岃鍙傝濡備笅璧勬簮锛� + - [MindSpore 鏁欑▼](https://www.mindspore.cn/tutorial/training/zh-CN/master/index.html) + - [MindSpore Python API](https://www.mindspore.cn/doc/api_python/zh-CN/master/index.html) +- python绗笁鏂瑰簱锛坮equirements.txt锛� + - opencv-python + - scipy + - tqdm + +# 鑴氭湰璇存槑 + +## 鑴氭湰浠g爜 + +```text +鈹溾攢鈹€ FCANet + 鈹溾攢鈹€ README_CN.md # 妯″瀷鐩稿叧鎻忚堪 + 鈹溾攢鈹€ dataset # 涓嬭浇鍚庣殑鏁版嵁闆嗚В鍘嬪悗鏀惧湪杩欓噷 + 鈹� 鈹溾攢鈹€PASCAL_SBD + 鈹� 鈹溾攢鈹€Berkeley + 鈹� 鈹溾攢鈹€GrabCut + 鈹� 鈹溾攢鈹€generate_dataset_pascal_sbd.py # 鐢熸垚鏍囧噯鏁版嵁闆� augmented pascal + 鈹� 鈹溾攢鈹€generate_dataset_berkeley.py # 鐢熸垚鏍囧噯鏁版嵁闆� berkeley + 鈹� 鈹斺攢鈹€generate_dataset_grabcut.py # 鐢熸垚鏍囧噯鏁版嵁闆� grabcut + 鈹溾攢鈹€ scripts + 鈹� 鈹溾攢鈹€run_standalone_train.sh # 鐢ㄤ簬鍗曞崱璁粌鐨剆hell鑴氭湰 + 鈹� 鈹斺攢鈹€run_eval.sh # 鐢ㄤ簬璇勪及鐨剆hell鑴氭湰 + 鈹溾攢鈹€ src + 鈹� 鈹溾攢鈹€model # 妯″瀷鏋舵瀯 + 鈹� 鈹� 鈹溾攢鈹€fcanet.py # fcanet妯″瀷鏋舵瀯 + 鈹� 鈹� 鈹溾攢鈹€res2net.py # res2net涓诲共缃戠粶鏋舵瀯 + 鈹� 鈹� 鈹斺攢鈹€res2net_pretrained_mindspore.ckpt # 涓嬭浇鍚庣殑res2net棰勮缁冩ā鍨� + 鈹� 鈹溾攢鈹€config.py # 鍙傛暟閰嶇疆 + 鈹� 鈹溾攢鈹€dataloader_cut.py # 鏁版嵁闆嗗鍏� + 鈹� 鈹溾攢鈹€helpers.py # 杈呭姪鐨勫嚱鏁� + 鈹� 鈹溾攢鈹€my_custom_transforms.py # 璁粌鏃跺€欑殑鏁版嵁澧炲己 + 鈹� 鈹斺攢鈹€trainer.py # 璁粌鍣� + 鈹溾攢鈹€ train.py # 璁粌鑴氭湰 + 鈹斺攢鈹€ eval.py # 璇勪及鑴氭湰 +``` + +## 鑴氭湰鍙傛暟 + +妯″瀷璁粌鍜岃瘎浼拌繃绋嬩腑浣跨敤鐨勫弬鏁板彲浠ュ湪config.py涓缃�: + +```python +"dataset_path": "./dataset/", # 鏁版嵁闆嗗瓨鏀句綅缃� +"backbone_pretrained": "./src/model/res2net_pretrained.ckpt", # 棰勮缁冪殑res2net妯″瀷浣嶇疆 +"dataset_train": "PASCAL_SBD", # 璁粌鏁版嵁闆� +"datasets_val": ["GrabCut", "Berkeley"], # 娴嬭瘯鏁版嵁闆� +"epochs": 33, # 璁粌epoch鏁� +"train_only_epochs": 32, # 涓嶈繘琛寁alidation鐨別poch鏁� +"val_robot_interval": 1, # validation鐨勯棿闅旀暟 +"lr": 0.007, # 璁粌鍒濆瀛︿範鐜� +"batch_size": 8, # 鏁版嵁鎵规澶у皬 +"max_num": 0, # 璁粌鐨勫浘鍍忔暟閲忥紝0浠h〃鍏ㄩ儴鍥剧墖 +"size": (384, 384), # 璁粌鍥惧儚灏哄 +"device": "Ascend", # 杩愯璁惧 +"num_workers": 4, # 鏁版嵁鐢熸垚绾跨▼鏁� +"itis_pro": 0.7, # 杩唬璁粌姒傜巼 +"max_point_num": 20, # 璇勬祴鏃舵渶澶х殑鐐规暟 +"record_point_num": 5, # 璇勬祴鏃惰褰曠殑鐐规暟 +"pred_tsh": 0.5, # 浜屽€煎垎鍓茬殑闃堝€� +"miou_target": [0.90, 0.90], # 璇勬祴鏃跺涓嶅悓鏁版嵁闆哾atasets_val鐨勭洰鏍噈IoU +``` + +## 鍑嗗杩囩▼ + +- 涓嬭浇鏁版嵁闆� ,鍒涘缓鏂囦欢澶瑰苟鏀惧湪瀵瑰簲鐩綍浣嶇疆,濡俙(./dataset/)`锛岃璺緞鍙湪config涓璥"dataset_path"`椤逛慨鏀� 銆� +- 涓嬭浇res2net101@imagenet棰勮缁冩ā鍨� [ [GoogleDrive](https://drive.google.com/file/d/1xmbYJOiYvYCp1i_gmif0R8F4Nbl0YupX/view?usp=sharing) | [BaiduYun](https://pan.baidu.com/s/1E9a6PkZ7w_qnOa3iU3ragQ) pwd: **1t4n** ] ,鏀惧湪瀵瑰簲鐩綍浣嶇疆`(./src/model/res2net_pretrained.ckpt)`锛岃璺緞鍙湪config涓璥"backbone_pretrained"`椤逛慨鏀� 銆傞璁粌妯″瀷涔熷彲閫氳繃 [res2net 浠g爜](https://gitee.com/mindspore/models/tree/master/research/cv/res2net) 璁粌寰楀埌銆� + +## 璁粌杩囩▼ + +### 鍚姩 + +鎮ㄥ彲浠ヤ娇鐢╬ython鎴杝hell鑴氭湰杩涜璁粌銆� + +```shell +# 璁粌绀轰緥 + python: + Ascend鍗曞崱璁粌绀轰緥锛欴EVICE_ID=[DEVICE_ID] python train.py + + shell: + Ascend鍗曞崱璁粌绀轰緥: bash ./scripts/run_standalone_train.sh [DEVICE_ID] +``` + +### 缁撴灉 + +ckpt鏂囦欢灏嗗瓨鍌ㄥ湪鐢熸垚鐨刞./snapshot/` 璺緞涓嬶紝璁粌鏃ュ織灏嗚璁板綍鍒� `./train.log` 涓€傝缁冩棩蹇楅儴鍒嗙ず渚嬪涓嬶細 + +```shell +Epoch [000]=> |-lr:0.0070000-| +Training : +Loss: 0.141: 100%|鈻堚枅鈻堚枅鈻堚枅鈻堚枅鈻堚枅鈻堚枅鈻堚枅鈻堚枅鈻堚枅鈻堚枅鈻堚枅鈻堚枅鈻堚枅鈻堚枅鈻堚枅鈻堚枅鈻堚枅鈻堚枅鈻堚枅鈻堚枅鈻堚枅鈻堚枅鈻堚枅鈻堚枅鈻堚枅鈻堚枅鈻堚枅鈻堚枅鈻堚枅鈻堚枅鈻坾 3229/3229 [37:54<00:00, 1.74it/s] +``` + +## 璇勪及杩囩▼ + +### 鍚姩 + +璁粌鑴氭湰鏈€鍚庝細杩涜涓€娆¤瘎浼�, 鎮ㄥ彲浠ヤ娇鐢╬ython鎴杝hell鑴氭湰杩涜璇勪及,灏嗚缁冨ソ鐨勬ā鍨嬫斁鍦ㄤ綅缃� `[PRETRAINED MODEL]` 涓紝鑻ヤ负璁粌鏃朵繚瀛樿矾寰勫垯涓� `./snapshot/model-epoch-32.ckpt` + +```shell +# 璇勪及绀轰緥 + python: + DEVICE_ID=[DEVICE_ID] python eval.py -r [PRETRAINED MODEL] + + shell: + bash ./scripts/run_eval.sh [DEVICE_ID] [PRETRAINED MODEL] +``` + +### 缁撴灉 + +鍙互鍦� `./eval.log` 鏌ョ湅璇勪及缁撴灉銆� + +```shell +Validation Robot: [GrabCut] +(point_num_target_avg : 2.76) +(pos_points_num_avg : 1.92) (neg_points_num_avg : 0.84) +(point_num_miou_avg : [0. 0.785 0.877 0.916 0.933 0.946]) + +Validation Robot: [Berkeley] +(point_num_target_avg : 4.85) +(pos_points_num_avg : 3.03) (neg_points_num_avg : 1.82) +(point_num_miou_avg : [0. 0.745 0.86 0.895 0.912 0.922]) +``` + +# 妯″瀷璇存槑 + +## 妯″瀷鎬ц兘 + +| 鍙傛暟 | Ascend | +| -------------------------- | ------------------------------------- | +| 妯″瀷鍚嶇О | FCANet | +| 妯″瀷鐗堟湰 | Res2Net鐗堟湰 | +| 杩愯鐜 | Ascend 910; CPU 2.60GHz, 192cores; Memory 755G; OS Euler2.8 | +| 涓婁紶鏃堕棿 | 2021-12-18 | +| 鏁版嵁闆� | Augmented PASCAL, GrabCut, Berkeley | +| 璁粌鍙傛暟 | src/config.py | +| 浼樺寲鍣� | SGD | +| 鎹熷け鍑芥暟 | CrossEntropyLoss | +| 鏈€缁堟崯澶� | 0.082 | +| 骞冲潎浜や簰鐐规暟 (NoC) | GrabCut (NoC@90=2.76), Berkeley (NoC@90=4.85) | +| 璁粌鎬绘椂闂� | 24 h | +| 璇勪及鎬绘椂闂� | 10 min | +| 鍗晄tep鏃堕棿 | 0.8s/ | +| 鍗℃暟 | 1 | +| 鑴氭湰 | [閾炬帴](https://gitee.com/mindspore/models/tree/master/research/cv/FCANet) | + +# 闅忔満鎯呭喌鐨勬弿杩� + +鎴戜滑鍦� `trainer.py` 鑴氭湰涓缃簡闅忔満绉嶅瓙銆� + +# ModelZoo + +璇锋牳瀵瑰畼鏂� [涓婚〉](https://gitee.com/mindspore/models)銆� \ No newline at end of file diff --git a/research/cv/FCANet/dataset/generate_dataset_berkeley.py b/research/cv/FCANet/dataset/generate_dataset_berkeley.py new file mode 100644 index 0000000000000000000000000000000000000000..8ce0b5f926f10c2e4d101289a331c5cf86199951 --- /dev/null +++ b/research/cv/FCANet/dataset/generate_dataset_berkeley.py @@ -0,0 +1,80 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" generate the interactive segmentation format of berkeley dataset """ +import os +import shutil +import argparse +from pathlib import Path +import numpy as np +from tqdm import tqdm +from PIL import Image + + +def save_image(path, img, if_pal=False): + """save image with palette""" + img = Image.fromarray(img) + if if_pal: + img.putpalette([0, 0, 0, 128, 0, 0] + [0, 0, 0] * 253 + [224, 224, 192]) + img.save(path) + + +def process(img_ids, split): + """ process dataset split""" + global berkeley_path, dataset_path + f_output = open(dataset_path / "list" / (split + ".txt"), "w") + for img_id in tqdm(img_ids): + shutil.copyfile( + berkeley_path / "images" / (img_id + ".jpg"), + dataset_path / "img" / (img_id + ".jpg"), + ) + gt = np.array(Image.open(berkeley_path / "masks" / (img_id + ".png"))) + gt_ins = (gt[:, :, 0] > 127).astype(np.uint8) + id_ins = img_id + "#001" + save_image(dataset_path / "gt" / (id_ins + ".png"), gt_ins, if_pal=True) + f_output.write(id_ins + "\n") + f_output.close() + + +if __name__ == "__main__": + # parameters of datasets path + parser = argparse.ArgumentParser(description="Generate ISF Dataset Berkeley") + parser.add_argument( + "--dst_path", + type=str, + default="./", + help="destination path of generated dataset", + ) + parser.add_argument( + "--src_berkeley_path", + type=str, + default="./path/to/source/berkeley/Berkeley", + help="source path of berkeley dataset", + ) + args = parser.parse_args() + + # create folder + dataset_path = Path(args.dst_path) / "Berkeley" + os.makedirs(dataset_path, exist_ok=True) + for folder in ["img", "gt", "list"]: + os.makedirs(dataset_path / folder, exist_ok=True) + + # set original dataset path + berkeley_path = Path(args.src_berkeley_path) + + # get ids list + berkeley_val_ids = [t.stem for t in (berkeley_path / "masks").glob("*.png")] + + # process val split + process(berkeley_val_ids, "val") diff --git a/research/cv/FCANet/dataset/generate_dataset_grabcut.py b/research/cv/FCANet/dataset/generate_dataset_grabcut.py new file mode 100644 index 0000000000000000000000000000000000000000..0ad28d57eb9d459163708519511f0b9cde551fd3 --- /dev/null +++ b/research/cv/FCANet/dataset/generate_dataset_grabcut.py @@ -0,0 +1,80 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" generate the interactive segmentation format of grabcut dataset """ +import os +import argparse +from pathlib import Path +import numpy as np +from tqdm import tqdm +from PIL import Image + + +def save_image(path, img, if_pal=False): + """save image with palette""" + img = Image.fromarray(img) + if if_pal: + img.putpalette([0, 0, 0, 128, 0, 0] + [0, 0, 0] * 253 + [224, 224, 192]) + img.save(path) + + +def process(img_ids, split): + """ process dataset split""" + global grabcut_path, dataset_path + f_output = open(dataset_path / "list" / (split + ".txt"), "w") + for img_id in tqdm(img_ids): + img = np.array(Image.open(grabcut_path / "data_GT" / img_id.name)) + save_image(dataset_path / "img" / (img_id.stem + ".png"), img, if_pal=False) + gt = np.array(Image.open(grabcut_path / "boundary_GT" / (img_id.stem + ".bmp"))) + gt_ins = np.zeros_like(gt) + gt_ins[gt == 255] = 1 + gt_ins[gt == 128] = 255 + id_ins = img_id.stem + "#001" + save_image(dataset_path / "gt" / (id_ins + ".png"), gt_ins, if_pal=True) + f_output.write(id_ins + "\n") + + f_output.close() + + +if __name__ == "__main__": + # parameters of datasets path + parser = argparse.ArgumentParser(description="Generate ISF Dataset GrabCut") + parser.add_argument( + "--dst_path", + type=str, + default="./", + help="destination path of generated dataset", + ) + parser.add_argument( + "--src_grabcut_path", + type=str, + default="./path/to/source/grabcut/GrabCut", + help="source path of grabcut dataset", + ) + args = parser.parse_args() + + # create folder + dataset_path = Path(args.dst_path) / "GrabCut" + os.makedirs(dataset_path, exist_ok=True) + for folder in ["img", "gt", "list"]: + os.makedirs(dataset_path / folder, exist_ok=True) + + # set original dataset path + grabcut_path = Path(args.src_grabcut_path) + + # get ids list + grabcut_val_ids = [t for t in (grabcut_path / "data_GT").glob("*.*")] + + # process val split + process(grabcut_val_ids, "val") diff --git a/research/cv/FCANet/dataset/generate_dataset_pascal_sbd.py b/research/cv/FCANet/dataset/generate_dataset_pascal_sbd.py new file mode 100644 index 0000000000000000000000000000000000000000..d0add2dc76a09a9cae07f663599417266d879284 --- /dev/null +++ b/research/cv/FCANet/dataset/generate_dataset_pascal_sbd.py @@ -0,0 +1,126 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" generate the interactive segmentation format of augmented pascal dataset """ +import os +import shutil +import argparse +from pathlib import Path +import numpy as np +from tqdm import tqdm +from PIL import Image +from scipy.io import loadmat + + +def save_image(path, img, if_pal=False): + """save image with palette""" + img = Image.fromarray(img) + if if_pal: + img.putpalette([0, 0, 0, 128, 0, 0] + [0, 0, 0] * 253 + [224, 224, 192]) + img.save(path) + + +def get_list_from_file(file): + """get list from txt file""" + with open(file) as f: + lines = f.read().splitlines() + return lines + + +def process(img_ids, split): + """ process dataset split""" + global pascal_all_ids, pascal_path, sbd_path, dataset_path + f_output = open(dataset_path / "list" / (split + ".txt"), "w") + for img_id in tqdm(img_ids): + if img_id in pascal_all_ids: + img_path = pascal_path / "JPEGImages" / (img_id + ".jpg") + gt_path = pascal_path / "SegmentationObject" / (img_id + ".png") + gt = np.array(Image.open(gt_path)) + else: + img_path = sbd_path / "img" / (img_id + ".jpg") + gt_path = sbd_path / "inst" / (img_id + ".mat") + gt = loadmat(gt_path)["GTinst"][0]["Segmentation"][0] + + shutil.copyfile(img_path, dataset_path / "img" / (img_id + ".jpg")) + + for i in set(gt.flat): + if i == 0 or i > 254: + continue + id_ins = img_id + "#" + str(i).zfill(3) + f_output.write(id_ins + "\n") + gt_ins = (gt == i).astype(np.uint8) + gt_ins[gt == 255] = 255 + save_image(dataset_path / "gt" / (id_ins + ".png"), gt_ins, if_pal=True) + + f_output.close() + + +if __name__ == "__main__": + # parameters of datasets path + parser = argparse.ArgumentParser(description="Generate ISF Dataset PASCAL_SBD") + parser.add_argument( + "--dst_path", + type=str, + default="./", + help="destination path of generated dataset", + ) + parser.add_argument( + "--src_pascal_path", + type=str, + default="./path/to/source/pascal/VOCdevkit/VOC2012", + help="source path of pascal dataset", + ) + parser.add_argument( + "--src_sbd_path", + type=str, + default="./path/to/source/sbd/benchmark_RELEASE/dataset", + help="source path of sbd dataset", + ) + args = parser.parse_args() + + # create folder + dataset_path = Path(args.dst_path) / "PASCAL_SBD" + os.makedirs(dataset_path, exist_ok=True) + for folder in ["img", "gt", "list"]: + os.makedirs(dataset_path / folder, exist_ok=True) + + # set original datasets path + pascal_path = Path(args.src_pascal_path) + sbd_path = Path(args.src_sbd_path) + + # get ids list + pascal_train_ids = get_list_from_file( + pascal_path / "ImageSets" / "Segmentation" / "train.txt" + ) + pascal_val_ids = get_list_from_file( + pascal_path / "ImageSets" / "Segmentation" / "val.txt" + ) + pascal_all_ids = pascal_train_ids + pascal_val_ids + sbd_train_ids = get_list_from_file(sbd_path / "train.txt") + sbd_val_ids = get_list_from_file(sbd_path / "val.txt") + sbd_all_ids = sbd_train_ids + sbd_val_ids + pasbd_train_ids = list( + ( + set(pascal_train_ids) + | set(pascal_val_ids) + | set(sbd_train_ids) + | set(sbd_val_ids) + ) + - set(pascal_val_ids) + ) + pasbd_val_ids = list(pascal_val_ids) + + # process two splits + process(pasbd_train_ids, "train") + process(pasbd_val_ids, "val") diff --git a/research/cv/FCANet/eval.py b/research/cv/FCANet/eval.py new file mode 100644 index 0000000000000000000000000000000000000000..9ae064c29d9f981a6e45e9fb22c7b4d13026bdc9 --- /dev/null +++ b/research/cv/FCANet/eval.py @@ -0,0 +1,34 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" eval fcanet """ +import argparse +from src.config import config +from src.trainer import Trainer + +if __name__ == "__main__": + # set resume path + parser = argparse.ArgumentParser(description="Argparse for FCANet-Eval") + parser.add_argument("-r", "--resume", type=str, default="./fcanet_pretrained.pth") + args = parser.parse_args() + + # set config + p = config + p["resume"] = args.resume + + # set trainer + mine = Trainer(p) + + # eval + mine.validation_robot(0, tsh=p["pred_tsh"], resize=p["size"][0]) diff --git a/research/cv/FCANet/requirements.txt b/research/cv/FCANet/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..97728e436f3ae598813b5e739d25ae66a4a2b4e6 Binary files /dev/null and b/research/cv/FCANet/requirements.txt differ diff --git a/research/cv/FCANet/scripts/run_eval.sh b/research/cv/FCANet/scripts/run_eval.sh new file mode 100644 index 0000000000000000000000000000000000000000..a69c6b8f0e2be6672375b860570356603bacfc20 --- /dev/null +++ b/research/cv/FCANet/scripts/run_eval.sh @@ -0,0 +1,23 @@ +#!/bin/bash +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +if [ -d "eval" ]; +then + rm -rf ./eval +fi +mkdir ./eval +cd ./eval || exit +DEVICE_ID=$1 python ../eval.py -r $2 > eval.log 2>&1 & +cd .. \ No newline at end of file diff --git a/research/cv/FCANet/scripts/run_standalone_train.sh b/research/cv/FCANet/scripts/run_standalone_train.sh new file mode 100644 index 0000000000000000000000000000000000000000..7e500db1bd2d8fff80cbd7fa8698bfbdd3aa812b --- /dev/null +++ b/research/cv/FCANet/scripts/run_standalone_train.sh @@ -0,0 +1,24 @@ +#!/bin/bash +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +if [ -d "train" ]; +then + rm -rf ./train +fi +mkdir ./train +cd ./train || exit +DEVICE_ID=$1 python ../train.py > train.log 2>&1 & +cd .. \ No newline at end of file diff --git a/research/cv/FCANet/src/config.py b/research/cv/FCANet/src/config.py new file mode 100644 index 0000000000000000000000000000000000000000..8797a2d08d465895d23d8dec45a8839875d5a32f --- /dev/null +++ b/research/cv/FCANet/src/config.py @@ -0,0 +1,37 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" +network config setting, will be used in train.py and eval.py +""" +config = { + "dataset_path": "./dataset/", + "backbone_pretrained": "./src/model/res2net_pretrained.ckpt", + "dataset_train": "PASCAL_SBD", + "datasets_val": ["GrabCut", "Berkeley"], + "epochs": 33, + "train_only_epochs": 32, + "val_robot_interval": 1, + "lr": 0.007, + "batch_size": 8, + "max_num": 0, + "size": (384, 384), + "device": "Ascend", + "num_workers": 4, + "itis_pro": 0.7, + "max_point_num": 20, + "record_point_num": 5, + "pred_tsh": 0.5, + "miou_target": [0.90, 0.90], +} diff --git a/research/cv/FCANet/src/dataloader_cut.py b/research/cv/FCANet/src/dataloader_cut.py new file mode 100644 index 0000000000000000000000000000000000000000..4caebc6be041de34bc62fd069529106313c58047 --- /dev/null +++ b/research/cv/FCANet/src/dataloader_cut.py @@ -0,0 +1,132 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" +Dataset loader, will be used in trainer.py +""" +import random +from pathlib import Path +import numpy as np +from PIL import Image + +class GeneralCutDataset(): + """ + Dataset loader + """ + def __init__( + self, + dataset_path, + datasets, + list_file="train.txt", + transform=None, + max_num=0, + batch_size=0, + suffix=None, + ): + super().__init__() + if not isinstance(datasets, list): + datasets = [datasets] + data_lists = [ + str(Path(dataset_path) / dataset / "list" / list_file) + for dataset in datasets + ] + # load ids + self.imgs_list, self.gts_list = [], [] + for data_list in data_lists: + with open(data_list) as f: + lines = f.read().splitlines() + if data_list.split("/")[-3] == "PASCAL_SBD": + lines = lines[:] + + if suffix is None: + img_suffix = ( + (Path(data_list.split("list")[0]) / "img") + .glob("{}.*".format(lines[0].split("#")[0])) + .__next__() + .suffix + ) + gt_suffix = ( + (Path(data_list.split("list")[0]) / "gt") + .glob("{}.*".format(lines[0])) + .__next__() + .suffix + ) + suffix_tmp = [img_suffix, gt_suffix] + else: + suffix_tmp = suffix + + for line in lines: + self.imgs_list.append( + data_list.split("list")[0] + + "img/" + + line.split("#")[0] + + suffix_tmp[0] + ) + self.gts_list.append( + data_list.split("list")[0] + "gt/" + line + suffix_tmp[1] + ) + + # set actual sample number, 0 means all + if max_num != 0 and len(self.imgs_list) > abs(max_num): + indices = ( + random.sample(range(len(self.imgs_list)), max_num) + if max_num > 0 + else range(abs(max_num)) + ) + self.imgs_list = [self.imgs_list[i] for i in indices] + self.gts_list = [self.gts_list[i] for i in indices] + + # set actual sample number according to batch size, 0 means no change, positive number means cutoff, positive number means completion + if batch_size > 0: + actual_num = (len(self.imgs_list) // batch_size) * batch_size + self.imgs_list = self.imgs_list[:actual_num] + self.gts_list = self.gts_list[:actual_num] + elif batch_size < 0: + if len(self.imgs_list) % abs(batch_size) != 0: + actual_num = ((len(self.imgs_list) // abs(batch_size)) + 1) * abs( + batch_size + ) + add_num = actual_num - len(self.imgs_list) + for add_i in range(add_num): + self.imgs_list.append(self.imgs_list[add_i]) + self.gts_list.append(self.gts_list[add_i]) + + self.ids_list = [t.split("/")[-1].split(".")[0] for t in self.gts_list] + + self.transform = transform + + def __len__(self): + return len(self.imgs_list) + + def __getitem__(self, index): + return ( + self.transform(self.get_sample(index)) + if self.transform is not None + else self.get_sample(index) + ) + + def get_sample(self, index): + """generate samples""" + img, gt = ( + np.array(Image.open(self.imgs_list[index])), + np.array(Image.open(self.gts_list[index])) + ) + gt = (gt == 1).astype(np.uint8) * 255 + sample = {"img": img, "gt": gt} + sample["meta"] = {"id": str(Path(self.gts_list[index]).stem), "id_num": index} + sample["meta"]["source_size"] = np.array(gt.shape[::-1]) + sample["meta"]["img_path"] = self.imgs_list[index] + sample["meta"]["gt_path"] = self.gts_list[index] + + return sample diff --git a/research/cv/FCANet/src/helpers.py b/research/cv/FCANet/src/helpers.py new file mode 100644 index 0000000000000000000000000000000000000000..934b80d84426f4c9378ddc839886eddd9ddf898d --- /dev/null +++ b/research/cv/FCANet/src/helpers.py @@ -0,0 +1,195 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" +Useful functions +""" +import random +import numpy as np +from scipy.ndimage.morphology import distance_transform_edt + +random.seed(10) + +########################################[ Scheduler ]######################################## + + +def poly_lr(epoch, epoch_max=30, lr=1e-4, power=0.9, cutoff_epoch=29): + return lr * (1 - (1.0 * min(epoch, cutoff_epoch) / epoch_max)) ** power + + +class PolyLR: + """polynomial learning rate scheduler""" + + def __init__(self, epoch_max=30, base_lr=1e-4, power=0.9, cutoff_epoch=29): + self.epoch, self.epoch_max, self.base_lr, self.power, self.cutoff_epoch = ( + 0, + epoch_max, + base_lr, + power, + cutoff_epoch, + ) + + def get_lr(self): + return ( + self.base_lr + * (1 - (1.0 * min(self.epoch, self.cutoff_epoch) / self.epoch_max)) + ** self.power + ) + + def step(self): + self.epoch = self.epoch + 1 + + +########################################[ General ]######################################## + + +def get_points_mask(size, points): + """ generate point mask from point list """ + mask = np.zeros(size[::-1]).astype(np.uint8) + if list(points): + points = np.array(points) + mask[points[:, 1], points[:, 0]] = 1 + return mask + + +def get_points_list(mask): + """ generate point list from point mask """ + pts_y, pts_x = np.where(mask == 1) + pts_xy = np.concatenate((pts_x[:, np.newaxis], pts_y[:, np.newaxis]), axis=1) + return pts_xy.tolist() + + +########################################[ Robot Strategy ]######################################## + + +def get_anno_point(pred, gt, anno_points): + """ get next click for robot user""" + fn_map, fp_map = ( + (gt == 1) & (pred == 0), + (gt == 0) & (pred == 1), + ) + + fn_map = np.pad(fn_map, ((1, 1), (1, 1)), "constant") + fndist_map = distance_transform_edt(fn_map) + fndist_map = fndist_map[1:-1, 1:-1] + + fp_map = np.pad(fp_map, ((1, 1), (1, 1)), "constant") + fpdist_map = distance_transform_edt(fp_map) + fpdist_map = fpdist_map[1:-1, 1:-1] + + if isinstance(anno_points, list): + for pt in anno_points: + fndist_map[pt[1], pt[0]] = fpdist_map[pt[1], pt[0]] = 0 + else: + fndist_map[anno_points == 1] = 0 + fpdist_map[anno_points == 1] = 0 + + if np.max(fndist_map) > np.max(fpdist_map): + usr_map, if_pos = fndist_map, True + else: + usr_map, if_pos = fpdist_map, False + + [y_mlist, x_mlist] = np.where(usr_map == np.max(usr_map)) + pt_new = (x_mlist[0], y_mlist[0]) + return pt_new, if_pos + + +########################################[ Train Sample Strategy ]######################################## + + +def get_pos_points_walk(gt, pos_point_num, step=0.2, margin=0.2): + """ sample random positive clicks""" + if pos_point_num == 0: + return [] + + pos_points = [] + choice_map_margin = (gt == 1).astype(np.int64) + choice_map_margin = np.pad(choice_map_margin, ((1, 1), (1, 1)), "constant") + dist_map_margin = distance_transform_edt(choice_map_margin)[1:-1, 1:-1] + + if isinstance(margin, list): + margin = random.choice(margin) + + if 0 < margin < 1.0: + margin = int(dist_map_margin.max() * margin) + + choice_map_margin = dist_map_margin > margin + + choice_map_step = np.ones_like(gt).astype(np.int64) + choice_map_step = np.pad(choice_map_step, ((1, 1), (1, 1)), "constant") + + if isinstance(step, list): + step = random.choice(step) + + if 0 < step < 1.0: + step = int(np.sqrt((gt == 1).sum() / np.pi) * 2 * step) + + for _ in range(pos_point_num): + dist_map_step = distance_transform_edt(choice_map_step)[1:-1, 1:-1] + pts_y, pts_x = np.where((choice_map_margin) & (dist_map_step > step)) + pts_xy = np.concatenate((pts_x[:, np.newaxis], pts_y[:, np.newaxis]), axis=1) + if not list(pts_xy): + break + pt_new = tuple(pts_xy[random.randint(0, len(pts_xy) - 1), :]) + pos_points.append(pt_new) + choice_map_step[pt_new[1] + 1, pt_new[0] + 1] = 0 + + return pos_points + + +def get_neg_points_walk(gt, neg_point_num, margin_min=0.06, margin_max=0.48, step=0.2): + """ sample random negative clicks""" + if neg_point_num == 0: + return [] + + neg_points = [] + + if isinstance(margin_min, list): + margin_min = random.choice(margin_min) + if isinstance(margin_max, list): + margin_max = random.choice(margin_max) + + if (0 < margin_min < 1.0) and (0 < margin_max < 1.0): + fg = (gt == 1).astype(np.int64) + fg = np.pad(fg, ((1, 1), (1, 1)), "constant") + dist_fg = distance_transform_edt(fg)[1:-1, 1:-1] + margin_max = min(max(int(dist_fg.max() * margin_min), 3), 10) * ( + margin_max / margin_min + ) + margin_min = min(max(int(dist_fg.max() * margin_min), 3), 10) + + choice_map_margin = (gt != 1).astype(np.int64) + dist_map_margin = distance_transform_edt(choice_map_margin) + choice_map_margin = (dist_map_margin > margin_min) & (dist_map_margin < margin_max) + + choice_map_step = np.ones_like(gt).astype(np.int64) + choice_map_step = np.pad(choice_map_step, ((1, 1), (1, 1)), "constant") + + if isinstance(step, list): + step = random.choice(step) + + if 0 < step < 1.0: + step = int(np.sqrt((gt == 1).sum() / np.pi) * 2 * step) + + for _ in range(neg_point_num): + dist_map_step = distance_transform_edt(choice_map_step)[1:-1, 1:-1] + pts_y, pts_x = np.where((choice_map_margin) & (dist_map_step > step)) + pts_xy = np.concatenate((pts_x[:, np.newaxis], pts_y[:, np.newaxis]), axis=1) + if not list(pts_xy): + break + pt_new = tuple(pts_xy[random.randint(0, len(pts_xy) - 1), :]) + neg_points.append(pt_new) + choice_map_step[pt_new[1] + 1, pt_new[0] + 1] = 0 + + return neg_points diff --git a/research/cv/FCANet/src/model/__init__.py b/research/cv/FCANet/src/model/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/research/cv/FCANet/src/model/fcanet.py b/research/cv/FCANet/src/model/fcanet.py new file mode 100644 index 0000000000000000000000000000000000000000..a5bb72ba44baabb0b4a93591eb7a2132b8258dd7 --- /dev/null +++ b/research/cv/FCANet/src/model/fcanet.py @@ -0,0 +1,451 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" +fcanet network +""" +import numpy as np +import mindspore.nn as nn +from mindspore.ops import ReduceMean +from mindspore.context import ParallelMode +from mindspore.ops import operations as P +from mindspore.ops import composite as C +from mindspore.ops import functional as F +from mindspore.parallel._auto_parallel_context import auto_parallel_context +from mindspore import Parameter +from src.model.res2net import res2net101 + +ResizeFunc = P.ResizeBilinear + + +#######################################[ FCANet ]####################################### + + +def conv1x1(in_planes, out_planes, stride=1): + """1x1 convolution""" + return nn.Conv2d( + in_planes, out_planes, kernel_size=1, stride=stride, weight_init="he_normal" + ) + + +def conv3x3(in_planes, out_planes, stride=1, dilation=1, padding=1): + """3x3 convolution""" + return nn.Conv2d( + in_planes, + out_planes, + kernel_size=3, + stride=stride, + pad_mode="same", + padding=0, + dilation=dilation, + weight_init="he_normal", + ) + + +class ASPPConv(nn.Cell): + """ASPP convolution""" + + def __init__( + self, in_channels, out_channels, atrous_rate=1, use_batch_statistics=True + ): + super(ASPPConv, self).__init__() + if atrous_rate == 1: + conv = nn.Conv2d( + in_channels, + out_channels, + kernel_size=1, + has_bias=False, + weight_init="he_normal", + ) + else: + conv = nn.Conv2d( + in_channels, + out_channels, + kernel_size=3, + pad_mode="pad", + padding=atrous_rate, + dilation=atrous_rate, + weight_init="he_normal", + ) + + bn = nn.BatchNorm2d(out_channels, use_batch_statistics=use_batch_statistics) + relu = nn.ReLU() + self.aspp_conv = nn.SequentialCell([conv, bn, relu]) + + def construct(self, x): + out = self.aspp_conv(x) + return out + + +class ASPPPooling(nn.Cell): + """ASPP pooling""" + + def __init__(self, in_channels, out_channels, use_batch_statistics=True): + super(ASPPPooling, self).__init__() + self.conv = nn.SequentialCell( + [ + nn.Conv2d( + in_channels, out_channels, kernel_size=1, weight_init="he_normal" + ), + nn.BatchNorm2d(out_channels, use_batch_statistics=use_batch_statistics), + nn.ReLU(), + ] + ) + self.shape = P.Shape() + + def construct(self, x): + out = nn.AvgPool2d(x.shape[2:])(x) + out = self.conv(out) + out = ResizeFunc(x.shape[2:], True)(out) + return out + + +class ASPP(nn.Cell): + """ASPP module""" + + def __init__( + self, + atrous_rates, + in_channels=2048, + out_channels=256, + use_batch_statistics=True, + ): + super(ASPP, self).__init__() + self.aspp1 = ASPPConv( + in_channels, + out_channels, + atrous_rates[0], + use_batch_statistics=use_batch_statistics, + ) + self.aspp2 = ASPPConv( + in_channels, + out_channels, + atrous_rates[1], + use_batch_statistics=use_batch_statistics, + ) + self.aspp3 = ASPPConv( + in_channels, + out_channels, + atrous_rates[2], + use_batch_statistics=use_batch_statistics, + ) + self.aspp4 = ASPPConv( + in_channels, + out_channels, + atrous_rates[3], + use_batch_statistics=use_batch_statistics, + ) + self.aspp_pooling = ASPPPooling(in_channels, out_channels) + self.conv1 = nn.Conv2d( + out_channels * (len(atrous_rates) + 1), + out_channels, + kernel_size=1, + weight_init="he_normal", + ) + self.bn1 = nn.BatchNorm2d( + out_channels, use_batch_statistics=use_batch_statistics + ) + self.relu = nn.ReLU() + self.concat = P.Concat(axis=1) + + def construct(self, x): + """ASPP construct""" + x1 = self.aspp1(x) + x2 = self.aspp2(x) + x3 = self.aspp3(x) + x4 = self.aspp4(x) + x5 = self.aspp_pooling(x) + x = self.concat((x1, x2)) + x = self.concat((x, x3)) + x = self.concat((x, x4)) + x = self.concat((x, x5)) + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + return x + + +class Decoder(nn.Cell): + """decoder module""" + + def __init__(self, in_ch, side_ch, side_ch_reduce, out_ch, use_batch_statistics): + super(Decoder, self).__init__() + self.side_conv = conv1x1(side_ch, side_ch_reduce) + self.side_bn = nn.BatchNorm2d( + side_ch_reduce, use_batch_statistics=use_batch_statistics + ) + self.merge_conv1 = conv3x3(in_ch + side_ch_reduce, out_ch) + self.merge_bn1 = nn.BatchNorm2d( + out_ch, use_batch_statistics=use_batch_statistics + ) + self.merge_conv2 = conv3x3(out_ch, out_ch) + self.merge_bn2 = nn.BatchNorm2d( + out_ch, use_batch_statistics=use_batch_statistics + ) + self.relu = nn.ReLU() + self.shape = P.Shape() + self.concat = P.Concat(axis=1) + + def construct(self, x, side): + """Decoder construct""" + side = self.side_conv(side) + side = self.side_bn(side) + side = self.relu(side) + x = ResizeFunc(side.shape[2:], True)(x) + x = self.concat((x, side)) + x = self.merge_conv1(x) + x = self.merge_bn1(x) + x = self.relu(x) + x = self.merge_conv2(x) + x = self.merge_bn2(x) + x = self.relu(x) + return x + + +class PredDecoder(nn.Cell): + """predict module""" + + def __init__(self, in_ch, use_batch_statistics): + super(PredDecoder, self).__init__() + self.conv1 = conv3x3(in_ch, in_ch // 2) + self.bn1 = nn.BatchNorm2d(in_ch // 2, use_batch_statistics=use_batch_statistics) + self.conv2 = conv3x3(in_ch // 2, in_ch // 2) + self.bn2 = nn.BatchNorm2d(in_ch // 2, use_batch_statistics=use_batch_statistics) + self.conv3 = conv1x1(in_ch // 2, 1) + self.relu = nn.ReLU() + + def construct(self, x): + """predict construct""" + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.conv2(x) + x = self.bn2(x) + x = self.relu(x) + x = self.conv3(x) + return x + + +class FcaModule(nn.Cell): + """first click attention module""" + + def __init__(self, in_ch, use_batch_statistics): + super(FcaModule, self).__init__() + self.conv1 = conv3x3(in_ch, 256, stride=2) + self.bn1 = nn.BatchNorm2d(256, use_batch_statistics=use_batch_statistics) + self.conv2 = conv3x3(256, 256) + self.bn2 = nn.BatchNorm2d(256, use_batch_statistics=use_batch_statistics) + self.conv3 = conv3x3(256, 256) + self.bn3 = nn.BatchNorm2d(256, use_batch_statistics=use_batch_statistics) + self.conv4 = conv3x3(256, 512, stride=2) + self.bn4 = nn.BatchNorm2d(512, use_batch_statistics=use_batch_statistics) + self.conv5 = conv3x3(512, 512) + self.bn5 = nn.BatchNorm2d(512, use_batch_statistics=use_batch_statistics) + self.conv6 = conv3x3(512, 512) + self.bn6 = nn.BatchNorm2d(512, use_batch_statistics=use_batch_statistics) + self.relu = nn.ReLU() + + def construct(self, x): + """first click attention module construct""" + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.conv2(x) + x = self.bn2(x) + x = self.relu(x) + x = self.conv3(x) + x = self.bn3(x) + x = self.relu(x) + x = self.conv4(x) + x = self.bn4(x) + x = self.relu(x) + x = self.conv5(x) + x = self.bn5(x) + x = self.relu(x) + x = self.conv6(x) + x = self.bn6(x) + x = self.relu(x) + return x + + +def get_mask_gauss(mask_dist_src, sigma): + """generate gauss mask from distance mask""" + return P.Exp()(-2.772588722 * (mask_dist_src ** 2) / (sigma ** 2)) + + +class FCANet(nn.Cell): + """ main network""" + + def __init__(self, size=512, backbone_pretrained=None): + super(FCANet, self).__init__() + use_batch_statistics = None + resnet = res2net101(input_channels=5) + if backbone_pretrained is not None: + resnet.load_pretrained_model(backbone_pretrained) + self.resnet = resnet + + self.aspp = ASPP( + [max(int(i * size / 512 + 0.5), 1) for i in [1, 6, 12, 18]], + 2048 + 512, + 256, + use_batch_statistics=use_batch_statistics, + ) + self.decoder = Decoder( + in_ch=256, + side_ch=256, + side_ch_reduce=48, + out_ch=256, + use_batch_statistics=use_batch_statistics, + ) + self.pred_decoder = PredDecoder( + in_ch=256, use_batch_statistics=use_batch_statistics + ) + self.first_conv = FcaModule(256 + 1, use_batch_statistics=use_batch_statistics) + self.first_pred_decoder = PredDecoder( + in_ch=512, use_batch_statistics=use_batch_statistics + ) + self.concat = P.Concat(axis=1) + self.shape = P.Shape() + + def construct(self, img, pos_mask_dist_src, neg_mask_dist_src, pos_mask_dist_first): + """ main network construct""" + img_with_anno = self.concat((img, get_mask_gauss(pos_mask_dist_src, 10))) + img_with_anno = self.concat( + (img_with_anno, get_mask_gauss(neg_mask_dist_src, 10)) + ) + l1, _, _, l4 = self.resnet(img_with_anno) + + first_map = ResizeFunc(l1.shape[2:], True)( + get_mask_gauss(pos_mask_dist_first, 30) + ) + l1_first = self.concat((l1, first_map)) + l1_first = self.first_conv(l1_first) + result_first = self.first_pred_decoder(l1_first) + result_first = ResizeFunc(img.shape[2:], True)(result_first) + + l4 = self.concat((l1_first, l4)) + x = self.aspp(l4) + x = self.decoder(x, l1) + x = self.pred_decoder(x) + x = ResizeFunc(img.shape[2:], True)(x) + return [x, result_first] + + +#######################################[ FCANet Training Module]####################################### + + +def _get_parallel_mode(): + """Get parallel mode.""" + return auto_parallel_context().get_parallel_mode() + + +class MyWithLossCell(nn.Cell): + """ network with loss""" + + def __init__(self, backbone, loss_fn, batch_size=8, size=384): + super(MyWithLossCell, self).__init__(auto_prefix=False) + self._backbone = backbone + self._loss_fn = loss_fn + self.minimum = P.Minimum() + self.maximum = P.Maximum() + self.assign = P.Assign() + self.out_tmp = Parameter( + np.ones([batch_size, 1, size, size], dtype=np.float32), + name="out", + requires_grad=False, + ) + + def construct( + self, + img, + pos_mask_dist_src, + neg_mask_dist_src, + pos_mask_dist_first, + gt, + click_loss_weight, + first_loss_weight, + ): + """ MyWithLossCell construct""" + out = self._backbone( + img, pos_mask_dist_src, neg_mask_dist_src, pos_mask_dist_first + ) + loss = ReduceMean(False)( + self._loss_fn(out[0], gt) * click_loss_weight + ) + ReduceMean(False)(self._loss_fn(out[1], gt) * first_loss_weight) + return F.depend(loss, self.assign(self.out_tmp, out[0])) + + +class MyTrainOneStepCell(nn.Cell): + """ cell for training one step """ + + def __init__(self, network_with_loss, network, criterion, optimizer, sens=1.0): + super(MyTrainOneStepCell, self).__init__(auto_prefix=False) + self.network_with_loss = network_with_loss + self.network = network + self.criterion = criterion + self.network.set_grad() + self.network.add_flags(defer_inline=True) + self.weights = optimizer.parameters + self.optimizer = optimizer + self.grad = C.GradOperation(get_by_list=True, sens_param=True) + self.sens = sens + self.reducer_flag = False + self.grad_reducer = None + parallel_mode = _get_parallel_mode() + if parallel_mode in (ParallelMode.DATA_PARALLEL, ParallelMode.HYBRID_PARALLEL): + self.reducer_flag = True + if self.reducer_flag: + mean = _get_mirror_mean() + degree = _get_device_num() + self.grad_reducer = DistributedGradReducer( + optimizer.parameters, mean, degree + ) + self.minimum = P.Minimum() + self.maximum = P.Maximum() + + def construct( + self, + img, + pos_mask_dist_src, + neg_mask_dist_src, + pos_mask_dist_first, + gt, + click_loss_weight, + first_loss_weight, + ): + """ MyTrainOneStepCell construct""" + weights = self.weights + loss = self.network_with_loss( + img, + pos_mask_dist_src, + neg_mask_dist_src, + pos_mask_dist_first, + gt, + click_loss_weight, + first_loss_weight, + ) + sens = P.Fill()(P.DType()(loss), P.Shape()(loss), self.sens) + grads = self.grad(self.network_with_loss, weights)( + img, + pos_mask_dist_src, + neg_mask_dist_src, + pos_mask_dist_first, + gt, + click_loss_weight, + first_loss_weight, + sens, + ) + if self.reducer_flag: + grads = self.grad_reducer(grads) + return F.depend(loss, self.optimizer(grads)) diff --git a/research/cv/FCANet/src/model/res2net.py b/research/cv/FCANet/src/model/res2net.py new file mode 100644 index 0000000000000000000000000000000000000000..a587756545adfe7555ed51956d85c8c9abbd319b --- /dev/null +++ b/research/cv/FCANet/src/model/res2net.py @@ -0,0 +1,385 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Res2Net Backbone""" +import math +import mindspore.nn as nn +from mindspore import Tensor +from mindspore.ops import operations as P +from mindspore.common.parameter import Parameter +from mindspore.train.serialization import load_checkpoint, load_param_into_net +import mindspore.common.dtype as mstype + + +def conv1x1(in_planes, out_planes, stride=1): + """1x1 convolution""" + return nn.Conv2d( + in_planes, out_planes, kernel_size=1, stride=stride, weight_init="he_normal" + ) + + +def conv3x3(in_planes, out_planes, stride=1, dilation=1, padding=1): + """3x3 convolution""" + return nn.Conv2d( + in_planes, + out_planes, + kernel_size=3, + stride=stride, + pad_mode="same", + padding=0, + dilation=dilation, + weight_init="he_normal", + ) + + +class Resnet(nn.Cell): + """ official resnet""" + + def __init__( + self, + block, + block_num, + output_stride=16, + use_batch_statistics=True, + input_channels=3, + ): + super(Resnet, self).__init__() + self.inplanes = 64 + self.conv1_0 = nn.Conv2d( + input_channels, + 32, + 3, + stride=2, + pad_mode="same", + padding=0, + weight_init="he_normal", + ) + self.bn1_0 = nn.BatchNorm2d( + 32, eps=1e-4, use_batch_statistics=use_batch_statistics + ) + + self.conv1_1 = nn.Conv2d( + 32, 32, 3, stride=1, pad_mode="same", padding=0, weight_init="he_normal" + ) + + self.bn1_1 = nn.BatchNorm2d( + 32, eps=1e-4, use_batch_statistics=use_batch_statistics + ) + self.conv1_2 = nn.Conv2d( + 32, + self.inplanes, + 3, + stride=1, + pad_mode="same", + padding=0, + weight_init="he_normal", + ) + + self.bn1 = nn.BatchNorm2d( + self.inplanes, eps=1e-4, use_batch_statistics=use_batch_statistics + ) + self.relu = nn.ReLU() + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode="same") + self.layer1 = self._make_layer( + block, 64, block_num[0], use_batch_statistics=use_batch_statistics + ) + self.layer2 = self._make_layer( + block, + 128, + block_num[1], + stride=2, + use_batch_statistics=use_batch_statistics, + ) + + if output_stride == 16: + self.layer3 = self._make_layer( + block, + 256, + block_num[2], + stride=2, + use_batch_statistics=use_batch_statistics, + ) + self.layer4 = self._make_layer( + block, + 512, + block_num[3], + stride=1, + base_dilation=2, + grids=[1, 2, 4], + use_batch_statistics=use_batch_statistics, + ) + elif output_stride == 8: + self.layer3 = self._make_layer( + block, + 256, + block_num[2], + stride=1, + base_dilation=2, + use_batch_statistics=use_batch_statistics, + ) + self.layer4 = self._make_layer( + block, + 512, + block_num[3], + stride=1, + base_dilation=4, + grids=[1, 2, 4], + use_batch_statistics=use_batch_statistics, + ) + + def _make_layer( + self, + block, + planes, + blocks, + stride=1, + base_dilation=1, + grids=None, + use_batch_statistics=True, + ): + """ res2net make_layer""" + if stride != 1 or self.inplanes != planes * block.expansion: + if stride == 1: + downsample = nn.SequentialCell( + [ + conv1x1(self.inplanes, planes * block.expansion, stride), + nn.BatchNorm2d( + planes * block.expansion, + eps=1e-4, + use_batch_statistics=use_batch_statistics, + ), + ] + ) + else: + downsample = nn.SequentialCell( + [ + nn.MaxPool2d(kernel_size=2, stride=2, pad_mode="same"), + conv1x1(self.inplanes, planes * block.expansion, stride=1), + nn.BatchNorm2d( + planes * block.expansion, + eps=1e-4, + use_batch_statistics=use_batch_statistics, + ), + ] + ) + + if grids is None: + grids = [1] * blocks + + layers = [ + block( + self.inplanes, + planes, + stride, + downsample, + dilation=base_dilation * grids[0], + use_batch_statistics=use_batch_statistics, + stype="stage", + ) + ] + self.inplanes = planes * block.expansion + for i in range(1, blocks): + layers.append( + block( + self.inplanes, + planes, + dilation=base_dilation * grids[i], + use_batch_statistics=use_batch_statistics, + ) + ) + + return nn.SequentialCell(layers) + + def construct(self, x): + """ res2net construct""" + x = self.conv1_0(x) + x = self.bn1_0(x) + x = self.relu(x) + x = self.conv1_1(x) + x = self.bn1_1(x) + x = self.relu(x) + out = self.conv1_2(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.maxpool(out) + + l1 = self.layer1(out) + l2 = self.layer2(l1) + l3 = self.layer3(l2) + l4 = self.layer4(l3) + + return l1, l2, l3, l4 + + def load_pretrained_model(self, ckpt_file): + """ load res2net pretrained model""" + ms_ckpt = load_checkpoint(ckpt_file, net=None) + weights = {} + for msname in ms_ckpt: + param_name = msname + if "layer1" in param_name: + if "down_sample_layer.0" in param_name: + param_name = param_name.replace( + "down_sample_layer.0", "downsample.0" + ) + if "down_sample_layer.1" in param_name: + param_name = param_name.replace( + "down_sample_layer.1", "downsample.1" + ) + elif "layer4" in param_name: + if "down_sample_layer.1" in param_name: + param_name = param_name.replace( + "down_sample_layer.1", "downsample.0" + ) + if "down_sample_layer.2" in param_name: + param_name = param_name.replace( + "down_sample_layer.2", "downsample.1" + ) + else: + if "down_sample_layer.1" in param_name: + param_name = param_name.replace( + "down_sample_layer.1", "downsample.1" + ) + if "down_sample_layer.2" in param_name: + param_name = param_name.replace( + "down_sample_layer.2", "downsample.2" + ) + weights[param_name] = ms_ckpt[msname].data.asnumpy() + + parameter_dict = {} + for name in weights: + parameter_dict[name] = Parameter( + Tensor(weights[name], mstype.float32), name=name + ) + + tmp = self.conv1_0.weight + tmp[:, :3, :, :] = parameter_dict["conv1_0.weight"] + parameter_dict["conv1_0.weight"] = tmp + + param_not_load = load_param_into_net(self, parameter_dict) + + print( + "Load pretrained model from [{}]!([{}] not load!)".format( + ckpt_file, len(param_not_load) + ) + ) + + +class Bottle2neck(nn.Cell): + """ res2net block""" + + expansion = 4 + + def __init__( + self, + inplanes, + planes, + stride=1, + downsample=None, + dilation=1, + use_batch_statistics=True, + baseWidth=26, + scale=4, + stype="normal", + ): + super(Bottle2neck, self).__init__() + assert scale > 1, "Res2Net is ResNet when scale = 1" + width = int( + math.floor(planes * self.expansion // self.expansion * (baseWidth / 64.0)) + ) + channel = width * scale + self.conv1 = conv1x1(inplanes, channel) + self.bn1 = nn.BatchNorm2d( + channel, eps=1e-4, use_batch_statistics=use_batch_statistics + ) + + if stype == "stage": + self.pool = nn.AvgPool2d(kernel_size=3, stride=stride, pad_mode="same") + + self.convs = nn.CellList() + self.bns = nn.CellList() + for _ in range(scale - 1): + self.convs.append(conv3x3(width, width, stride, dilation, dilation)) + self.bns.append( + nn.BatchNorm2d( + width, eps=1e-4, use_batch_statistics=use_batch_statistics + ) + ) + + self.conv3 = conv1x1(channel, planes * self.expansion) + self.bn3 = nn.BatchNorm2d( + planes * self.expansion, eps=1e-4, use_batch_statistics=use_batch_statistics + ) + + self.relu = nn.ReLU() + self.downsample = downsample + + self.add = P.Add() + self.scale = scale + self.width = width + self.stride = stride + self.stype = stype + self.split = P.Split(axis=1, output_num=scale) + self.cat = P.Concat(axis=1) + + def construct(self, x): + """ bottle2neck construct""" + identity = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + spx = self.split(out) + + sp = self.convs[0](spx[0]) + sp = self.relu(self.bns[0](sp)) + out = sp + + for i in range(1, self.scale - 1): + if self.stype == "stage": + sp = spx[i] + else: + sp = sp[:, :, :, :] + sp = sp + spx[i] + + sp = self.convs[i](sp) + sp = self.relu(self.bns[i](sp)) + out = self.cat((out, sp)) + + if self.stype == "normal": + out = self.cat((out, spx[self.scale - 1])) + elif self.stype == "stage": + out = self.cat((out, self.pool(spx[self.scale - 1]))) + + out = self.conv3(out) + out = self.bn3(out) + + if self.downsample is not None: + identity = self.downsample(x) + + out = self.add(out, identity) + out = self.relu(out) + return out + + +def res2net101(output_stride=16, use_batch_statistics=None, input_channels=3): + """ res2net101 """ + return Resnet( + Bottle2neck, + [3, 4, 23, 3], + output_stride=output_stride, + use_batch_statistics=use_batch_statistics, + input_channels=input_channels, + ) diff --git a/research/cv/FCANet/src/my_custom_transforms.py b/research/cv/FCANet/src/my_custom_transforms.py new file mode 100644 index 0000000000000000000000000000000000000000..07dfb3b6610b1637aa2b1c42f1c9db5f153f8a02 --- /dev/null +++ b/research/cv/FCANet/src/my_custom_transforms.py @@ -0,0 +1,492 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" +custom transforms for our sample +""" +import random +import numpy as np +from PIL import Image +from scipy.ndimage.morphology import distance_transform_edt +import cv2 +from src import helpers + +########################################[ Function ]######################################## + + +def img_resize_point(img, size): + """ resize the point from mask to mask""" + (h, w) = img.shape + if not isinstance(size, tuple): + size = (int(w * size), int(h * size)) + M = np.array([[size[0] / w, 0, 0], [0, size[1] / h, 0]]) + + pts_y, pts_x = np.where(img == 1) + pts_xy = np.concatenate((pts_x[:, np.newaxis], pts_y[:, np.newaxis]), axis=1) + pts_xy_new = np.dot(np.insert(pts_xy, 2, 1, axis=1), M.T).astype(np.int64) + + img_new = np.zeros(size[::-1], dtype=np.uint8) + for pt in pts_xy_new: + img_new[pt[1], pt[0]] = 1 + return img_new + + +########################################[ General ]######################################## + +# Compose operations +class Compose: + """ compose multiple transforms""" + + def __init__(self, transforms): + self.transforms = transforms + + def __call__(self, img): + for t in self.transforms: + img = t(img) + return img + + +class Transfer: + """ transfer the data tyle of samples """ + + def __init__(self, if_div=True, elems_do=None, elems_undo=None): + self.if_div = if_div + if elems_undo is None: + elems_undo = [] + self.elems_do, self.elems_undo = elems_do, (["meta"] + elems_undo) + + def __call__(self, sample): + for elem in sample.keys(): + if self.elems_do is not None and elem not in self.elems_do: + continue + if elem in self.elems_undo: + continue + tmp = sample[elem] + tmp = tmp[np.newaxis, :, :] if tmp.ndim == 2 else tmp.transpose((2, 0, 1)) + tmp = tmp / 255 if self.if_div else tmp + tmp = tmp.astype(np.float32) + sample[elem] = tmp + return sample + + +########################################[ Basic Image Augmentation ]######################################## + + +class RandomFlip: + """ random flip operation """ + + def __init__( + self, direction=Image.FLIP_LEFT_RIGHT, p=0.5, elems_do=None, elems_undo=None + ): + self.direction, self.p = direction, p + if elems_undo is None: + elems_undo = [] + self.elems_do, self.elems_undo = elems_do, (["meta"] + elems_undo) + + def __call__(self, sample): + if random.random() < self.p: + for elem in sample.keys(): + if self.elems_do is not None and elem not in self.elems_do: + continue + if elem in self.elems_undo: + continue + sample[elem] = np.array( + Image.fromarray(sample[elem]).transpose(self.direction) + ) + sample["meta"]["flip"] = 1 + else: + sample["meta"]["flip"] = 0 + return sample + + +class Resize: + """ resize operation """ + + def __init__( + self, size, mode=None, elems_point=None, elems_do=None, elems_undo=None, + ): + self.size, self.mode = size, mode + + if elems_point is None: + elems_point = ["pos_points_mask", "neg_points_mask"] + self.elems_point = elems_point + + if elems_undo is None: + elems_undo = [] + self.elems_do, self.elems_undo = elems_do, (["meta"] + elems_undo) + + def __call__(self, sample): + for elem in sample.keys(): + if self.elems_do is not None and elem not in self.elems_do: + continue + if elem in self.elems_undo: + continue + + if elem in self.elems_point: + sample[elem] = img_resize_point(sample[elem], self.size) + continue + + if self.mode is None: + mode = ( + cv2.INTER_LINEAR + if len(sample[elem].shape) == 3 + else cv2.INTER_NEAREST + ) + sample[elem] = cv2.resize(sample[elem], self.size, interpolation=mode) + return sample + + +class Crop: + """ crop operation """ + + def __init__(self, x_range, y_range, elems_do=None, elems_undo=None): + self.x_range, self.y_range = x_range, y_range + if elems_undo is None: + elems_undo = [] + self.elems_do, self.elems_undo = elems_do, (["meta"] + elems_undo) + + def __call__(self, sample): + for elem in sample.keys(): + if self.elems_do is not None and elem not in self.elems_do: + continue + if elem in self.elems_undo: + continue + sample[elem] = sample[elem][ + self.y_range[0] : self.y_range[1], + self.x_range[0] : self.x_range[1], + ..., + ] + + sample["meta"]["crop_size"] = np.array( + (self.x_range[1] - self.x_range[0], self.y_range[1] - self.y_range[0]) + ) + sample["meta"]["crop_lt"] = np.array((self.x_range[0], self.y_range[0])) + return sample + + +########################################[ Interactive Segmentation ]######################################## + + +class MatchShortSideResize: + """ resize the samples with short side of fixed size """ + + def __init__(self, size, if_must=True, elems_do=None, elems_undo=None): + self.size, self.if_must = size, if_must + if elems_undo is None: + elems_undo = [] + self.elems_do, self.elems_undo = elems_do, (["meta"] + elems_undo) + + def __call__(self, sample): + src_size = sample["gt"].shape[::-1] + + if ( + (not self.if_must) + and (src_size[0] >= self.size) + and (src_size[1] >= self.size) + ): + return sample + + src_short_size = min(src_size[0], src_size[1]) + dst_size = ( + int(self.size * src_size[0] / src_short_size), + int(self.size * src_size[1] / src_short_size), + ) + assert dst_size[0] == self.size or dst_size[1] == self.size + Resize(size=dst_size)(sample) + return sample + + +class FgContainCrop: + """ random crop the sample with foreground of at least 1 pixels """ + + def __init__(self, crop_size, if_whole=False, elems_do=None, elems_undo=None): + self.crop_size, self.if_whole = crop_size, if_whole + if elems_undo is None: + elems_undo = [] + self.elems_do, self.elems_undo = elems_do, (["meta"] + elems_undo) + + def __call__(self, sample): + gt = sample["gt"] + src_size = gt.shape[::-1] + x_range, y_range = ( + [0, src_size[0] - self.crop_size[0]], + [0, src_size[1] - self.crop_size[1]], + ) + + if not (gt > 127).any(): + pass + elif self.if_whole: + bbox = cv2.boundingRect((gt > 127).astype(np.uint8)) + + if bbox[2] <= self.crop_size[0]: + x_range[1] = min(x_range[1], bbox[0]) + x_range[0] = max(x_range[0], bbox[0] + bbox[2] - self.crop_size[0]) + else: + x_range = [bbox[0], bbox[0] + bbox[2] - self.crop_size[0]] + + if bbox[3] <= self.crop_size[1]: + y_range[1] = min(y_range[1], bbox[1]) + y_range[0] = max(y_range[0], bbox[1] + bbox[3] - self.crop_size[1]) + else: + y_range = [bbox[1], bbox[1] + bbox[3] - self.crop_size[1]] + else: + pts_y, pts_x = np.where(gt > 127) + pts_xy = np.concatenate( + (pts_x[:, np.newaxis], pts_y[:, np.newaxis]), axis=1 + ) + sp_x, sp_y = pts_xy[random.randint(0, len(pts_xy) - 1)] + x_range[1], y_range[1] = min(x_range[1], sp_x), min(y_range[1], sp_y) + x_range[0], y_range[0] = ( + max(x_range[0], sp_x + 1 - self.crop_size[0]), + max(y_range[0], sp_y + 1 - self.crop_size[1]), + ) + + x_st = random.randint(x_range[0], x_range[1]) + y_st = random.randint(y_range[0], y_range[1]) + Crop( + x_range=(x_st, x_st + self.crop_size[0]), + y_range=(y_st, y_st + self.crop_size[1]), + )(sample) + return sample + + +########################################[ Interactive Segmentation (Points) ]######################################## + + +class CatPointMask: + """ cat the point mask into th input """ + + def __init__(self, mode="NO", if_repair=True): + self.mode, self.if_repair = mode, if_repair + + def __call__(self, sample): + gt = sample["gt"] + + if "pos_points_mask" in sample.keys() and self.if_repair: + sample["pos_points_mask"][gt <= 127] = 0 + if "neg_points_mask" in sample.keys() and self.if_repair: + sample["neg_points_mask"][gt > 127] = 0 + + if_gt_empty = not (gt > 127).any() + + if ( + (not if_gt_empty) + and (not sample["pos_points_mask"].any()) + and self.if_repair + ): + if gt[gt.shape[0] // 2, gt.shape[1] // 2] > 127: + sample["pos_points_mask"][gt.shape[0] // 2, gt.shape[1] // 2] = 1 + else: + pts_y, pts_x = np.where(gt > 127) + pts_xy = np.concatenate( + (pts_x[:, np.newaxis], pts_y[:, np.newaxis]), axis=1 + ) + pt_pos = pts_xy[random.randint(0, len(pts_xy) - 1)] + sample["pos_points_mask"][pt_pos[1], pt_pos[0]] = 1 + + pos_points_mask, neg_points_mask = ( + sample["pos_points_mask"], + sample["neg_points_mask"], + ) + + if self.mode == "DISTANCE_POINT_MASK_SRC": + max_dist = 255 + if if_gt_empty: + pos_points_mask_dist = np.ones(gt.shape).astype(np.float64) * max_dist + else: + pos_points_mask_dist = distance_transform_edt(1 - pos_points_mask) + pos_points_mask_dist = np.minimum(pos_points_mask_dist, max_dist) + + if not neg_points_mask.any(): + neg_points_mask_dist = np.ones(gt.shape).astype(np.float64) * max_dist + else: + neg_points_mask_dist = distance_transform_edt(1 - neg_points_mask) + neg_points_mask_dist = np.minimum(neg_points_mask_dist, max_dist) + + pos_points_mask_dist, neg_points_mask_dist = ( + pos_points_mask_dist * 255, + neg_points_mask_dist * 255, + ) + sample["pos_mask_dist_src"] = pos_points_mask_dist + sample["neg_mask_dist_src"] = neg_points_mask_dist + + elif self.mode == "DISTANCE_POINT_MASK_FIRST": + max_dist = 255 + if if_gt_empty: + pos_points_mask_dist = np.ones(gt.shape).astype(np.float64) * max_dist + else: + gt_tmp = (sample["gt"] > 127).astype(np.uint8) + pred = np.zeros_like(gt_tmp) + pt, _ = helpers.get_anno_point(pred, gt_tmp, []) + pos_points_mask = np.zeros_like(gt_tmp) + pos_points_mask[pt[1], pt[0]] = 1 + pos_points_mask_dist = distance_transform_edt(1 - pos_points_mask) + pos_points_mask_dist = np.minimum(pos_points_mask_dist, max_dist) + pos_points_mask_dist = pos_points_mask_dist * 255 + sample["pos_mask_dist_first"] = pos_points_mask_dist + return sample + + +class SimulatePoints: + """ simulate the clicks for training """ + + def __init__(self, mode="random", max_point_num=10, if_fixed=False): + self.mode = mode + self.max_point_num = max_point_num + self.if_fixed = if_fixed + + def __call__(self, sample): + if self.if_fixed: + object_id = sample["meta"]["id"] + str_seed = 0 + for c in object_id: + str_seed += ord(c) + str_seed = str_seed % 50 + random.seed(str_seed) + + pos_point_num, neg_point_num = random.randint(1, 10), random.randint(0, 10) + + gt = (sample["gt"] > 127).astype(np.uint8) + + if self.mode == "strategy#05": + + pos_points = np.array( + helpers.get_pos_points_walk( + gt, pos_point_num, step=[7, 10, 20], margin=[5, 10, 15, 20] + ) + ) + neg_points = np.array( + helpers.get_neg_points_walk( + gt, + neg_point_num, + margin_min=[15, 40, 60], + margin_max=[80], + step=[10, 15, 25], + ) + ) + + pos_points_mask, neg_points_mask = ( + np.zeros_like(sample["gt"]), + np.zeros_like(sample["gt"]), + ) + if list(pos_points): + pos_points_mask[pos_points[:, 1], pos_points[:, 0]] = 1 + if list(neg_points): + neg_points_mask[neg_points[:, 1], neg_points[:, 0]] = 1 + + sample["pos_points_mask"] = pos_points_mask + sample["neg_points_mask"] = neg_points_mask + + return sample + + +current_epoch = 0 +record_anno = {} +record_crop_lt = {} +record_if_flip = {} + + +class ITIS_Crop: + """ iterative training with crop""" + + def __init__(self, itis_pro=0, mode="random", crop_size=(384, 384)): + self.itis_pro = itis_pro + self.mode = mode + self.crop_size = crop_size + + def __call__(self, sample): + global current_epoch, record_anno, record_crop_lt, record_if_flip + + object_id = sample["meta"]["id"] + if (random.random() < self.itis_pro) and current_epoch != 0: + Crop( + x_range=( + record_crop_lt[object_id][0], + record_crop_lt[object_id][0] + self.crop_size[0], + ), + y_range=( + record_crop_lt[object_id][1], + record_crop_lt[object_id][1] + self.crop_size[1], + ), + )(sample) + RandomFlip(p=(1.5 if record_if_flip[object_id] == 1 else -1))(sample) + sample["pos_points_mask"] = helpers.get_points_mask( + sample["gt"].shape[::-1], record_anno[object_id][0] + ) + sample["neg_points_mask"] = helpers.get_points_mask( + sample["gt"].shape[::-1], record_anno[object_id][1] + ) + else: + FgContainCrop(crop_size=self.crop_size, if_whole=False)(sample) + RandomFlip(p=-1)(sample) + SimulatePoints(mode=self.mode)(sample) + + return sample + + +class Decouple: + """ decouple the sample items for mindspore""" + + def __init__(self, elems=None): + if elems is None: + elems = ["img", "gt", "id"] + self.elems = elems + + def __call__(self, sample): + return ( + sample["img"], + sample["gt"], + sample["pos_points_mask"], + sample["neg_points_mask"], + sample["pos_mask_dist_src"], + sample["neg_mask_dist_src"], + sample["pos_mask_dist_first"], + sample["click_loss_weight"], + sample["first_loss_weight"], + np.array(sample["meta"]["id_num"], dtype=np.int32), + np.array(sample["meta"]["crop_lt"]), + np.array(sample["meta"]["flip"]), + ) + + +class GeneLossWeight: + """ generate the loss weight""" + + def __init__(self): + pass + + def __call__(self, sample): + + pos_dist = sample["pos_mask_dist_src"] / 255.0 + neg_dist = sample["neg_mask_dist_src"] / 255.0 + first_dist = sample["pos_mask_dist_first"] / 255.0 + gt = (sample["gt"] > 127).astype(np.float64) + + tsh, low, high = 100, 0.8, 2.0 + pos_dist = np.minimum(pos_dist, np.ones_like(pos_dist) * tsh) + neg_dist = np.minimum(neg_dist, np.ones_like(neg_dist) * tsh) + pos_loss_weight = low + (1.0 - pos_dist / tsh) * (high - low) + neg_loss_weight = low + (1.0 - neg_dist / tsh) * (high - low) + pos_loss_weight[gt <= 0.5] = 0 + neg_loss_weight[gt > 0.5] = 0 + click_loss_weight = np.maximum(pos_loss_weight, neg_loss_weight) + + first_dist = np.minimum(first_dist, np.ones_like(first_dist) * tsh) + first_dist[gt <= 0.5] = tsh + first_loss_weight = low + (1.0 - first_dist / tsh) * (high - low) + + sample["click_loss_weight"] = click_loss_weight * 255.0 + sample["first_loss_weight"] = first_loss_weight * 255.0 + + return sample diff --git a/research/cv/FCANet/src/trainer.py b/research/cv/FCANet/src/trainer.py new file mode 100644 index 0000000000000000000000000000000000000000..85596f4d93297594357110e4dd31539fc2d63e7f --- /dev/null +++ b/research/cv/FCANet/src/trainer.py @@ -0,0 +1,366 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" +the trainer class for train.py and eval.py +""" +from copy import deepcopy +import random +import cv2 +import numpy as np +from tqdm import tqdm +from PIL import Image +from scipy.ndimage.morphology import distance_transform_edt + +import mindspore.dataset as ds +import mindspore.nn as nn +from mindspore.common import dtype as mstype +from mindspore.ops.operations import SigmoidCrossEntropyWithLogits +from mindspore import context, Tensor +from mindspore.ops import operations as P +from mindspore.train.serialization import save_checkpoint, load_checkpoint + +from src import helpers +from src import my_custom_transforms as mtr +from src.dataloader_cut import GeneralCutDataset +from src.model import fcanet + +random.seed(10) + +class Trainer(): + """ Trainer for training and eval""" + def __init__(self, p): + self.p = p + context.set_context(mode=context.GRAPH_MODE, device_target=p["device"]) + + # set train and eval data and dataloader + transform_train = mtr.Compose( + [ + mtr.MatchShortSideResize(size=p["size"][0]), + mtr.ITIS_Crop( + itis_pro=p["itis_pro"], mode="strategy#05", crop_size=p["size"] + ), + mtr.CatPointMask(mode="DISTANCE_POINT_MASK_SRC"), + mtr.CatPointMask(mode="DISTANCE_POINT_MASK_FIRST"), + mtr.GeneLossWeight(), + mtr.Transfer(), + mtr.Decouple(), + ] + ) + + self.train_set = GeneralCutDataset( + p["dataset_path"], + p["dataset_train"], + "train.txt", + transform=transform_train, + max_num=p["max_num"], + batch_size=-self.p["batch_size"], + ) + self.train_loader = ds.GeneratorDataset( + self.train_set, + [ + "img", + "gt", + "pos_points_mask", + "neg_points_mask", + "pos_mask_dist_src", + "neg_mask_dist_src", + "pos_mask_dist_first", + "click_loss_weight", + "first_loss_weight", + "id_num", + "crop_lt", + "flip", + ], + num_parallel_workers=p["num_workers"], + ) + + self.train_loader = self.train_loader.shuffle(buffer_size=5) + self.train_loader = self.train_loader.batch( + p["batch_size"], drop_remainder=True + ) + self.train_loader = self.train_loader.repeat(1) + + self.val_robot_sets = [] + for dataset_val in p["datasets_val"]: + self.val_robot_sets.append( + GeneralCutDataset( + p["dataset_path"], + dataset_val, + "val.txt", + transform=None, + max_num=p["max_num"], + batch_size=0, + ) + ) + + # set network + self.model = fcanet.FCANet( + size=p["size"][0], backbone_pretrained=p["backbone_pretrained"] + ) + + # set loss function and learing rate scheduler + self.criterion = SigmoidCrossEntropyWithLogits() + self.scheduler = helpers.PolyLR( + epoch_max=30, base_lr=p["lr"], power=0.9, cutoff_epoch=29 + ) + self.best_metric = [-1.0, -1.0] + + # resume from checkpoint + if p["resume"] is not None: + load_checkpoint(p["resume"], net=self.model) + print("Load model from [{}]".format(p["resume"])) + + def training(self, epoch): + """ train one epoch""" + print("Training :") + mtr.current_epoch = epoch + loss_total = 0 + + # set one training step + backbone_params = list( + filter(lambda x: "resnet" in x.name, self.model.trainable_params()) + ) + other_params = list( + filter(lambda x: "resnet" not in x.name, self.model.trainable_params()) + ) + + group_params = [ + {"params": backbone_params, "lr": self.scheduler.get_lr()}, + {"params": other_params, "lr": 1 * self.scheduler.get_lr()}, + {"order_params": self.model.trainable_params()}, + ] + + optimizer = nn.SGD( + group_params, + learning_rate=self.scheduler.get_lr(), + momentum=0.9, + weight_decay=5e-4, + nesterov=False, + ) + + tmptmp = fcanet.MyWithLossCell( + self.model, self.criterion, self.p["batch_size"], self.p["size"][0] + ) + trainonestep = fcanet.MyTrainOneStepCell( + tmptmp, self.model, self.criterion, optimizer + ) + trainonestep.set_train(True) + # train process + tbar = tqdm(total=self.train_loader.get_dataset_size()) + + i = 0 + + for i, sample_batched in enumerate(self.train_loader.create_dict_iterator()): + ( + img, + gt, + pos_mask_dist_src, + neg_mask_dist_src, + pos_mask_dist_first, + click_loss_weight, + first_loss_weight, + ) = [ + sample_batched[k] + for k in [ + "img", + "gt", + "pos_mask_dist_src", + "neg_mask_dist_src", + "pos_mask_dist_first", + "click_loss_weight", + "first_loss_weight", + ] + ] + + loss = trainonestep( + img, + pos_mask_dist_src, + neg_mask_dist_src, + pos_mask_dist_first, + gt, + click_loss_weight, + first_loss_weight, + ) + + tbar.update(1) + loss_total += loss.asnumpy() + tbar.set_description("Loss: %.3f" % (loss_total / (i + 1))) + + output = tmptmp.out_tmp + output = P.Sigmoid()(output) + + # record the previous result for iterative training + if self.p["itis_pro"] > 0: + current_batchsize = sample_batched["pos_points_mask"].shape[0] + for j in range(current_batchsize): + gt = (sample_batched["gt"].asnumpy()[j, 0, :, :] > 0.5).astype( + np.uint8 + ) + pos_points_mask = ( + sample_batched["pos_points_mask"].asnumpy()[j, 0, :, :] > 0.0001 + ).astype(np.uint8) + neg_points_mask = ( + sample_batched["neg_points_mask"].asnumpy()[j, 0, :, :] > 0.0001 + ).astype(np.uint8) + + result = output.asnumpy()[j, 0, :, :] + + pred = (result > 0.2).astype(np.uint8) + + pt, if_pos = helpers.get_anno_point( + pred, gt, np.maximum(pos_points_mask, neg_points_mask) + ) + if if_pos: + pos_points_mask[pt[1], pt[0]] = 1 + else: + neg_points_mask[pt[1], pt[0]] = 1 + + object_id = self.train_set.ids_list[sample_batched["id_num"].asnumpy()[j]] + + mtr.record_anno[object_id] = [ + helpers.get_points_list(pos_points_mask), + helpers.get_points_list(neg_points_mask), + ] + mtr.record_crop_lt[object_id] = list( + sample_batched["crop_lt"].asnumpy()[j] + ) + mtr.record_if_flip[object_id] = int(sample_batched["flip"].asnumpy()[j]) + + tbar.close() + print("Loss: %.3f" % (loss_total / (i + 1))) + + def validation_robot(self, epoch, tsh=0.5, resize=None): + """ validation with robot user""" + self.model.set_train(False) + print("+" * 79) + for index, val_robot_set in enumerate(self.val_robot_sets): + print("Validation Robot: [{}] ".format(self.p["datasets_val"][index])) + self.validation_robot_dataset(index, val_robot_set, tsh, resize) + + def validation_robot_dataset(self, index, val_robot_set, tsh, resize): + """ validation with robot user for each dataset """ + tbar = tqdm(val_robot_set) + img_num = len(val_robot_set) + point_num_target_sum, pos_points_num, neg_points_num = 0, 0, 0 + point_num_miou_sum = [0] * (self.p["record_point_num"] + 1) + for _, sample in enumerate(tbar): + gt = np.array(Image.open(sample["meta"]["gt_path"])) + pred = np.zeros_like(gt) + pos_points, neg_points = [], [] + if_get_target = False + for point_num in range(1, self.p["max_point_num"] + 1): + pt, if_pos = helpers.get_anno_point( + pred, gt, pos_points + neg_points + ) + if if_pos: + pos_points.append(pt) + if not if_get_target: + pos_points_num += 1 + else: + neg_points.append(pt) + if not if_get_target: + neg_points_num += 1 + + sample_cpy = deepcopy(sample) + sample_cpy["pos_points_mask"] = helpers.get_points_mask( + gt.shape[::-1], pos_points + ) + sample_cpy["neg_points_mask"] = helpers.get_points_mask( + gt.shape[::-1], neg_points + ) + + if resize is not None: + if isinstance(resize, int): + short_len = min(gt.shape[0], gt.shape[1]) + dsize = ( + int(gt.shape[1] * resize / short_len), + int(gt.shape[0] * resize / short_len), + ) + elif isinstance(resize, tuple): + dsize = resize + + mtr.Resize(dsize)(sample_cpy) + mtr.CatPointMask(mode="DISTANCE_POINT_MASK_SRC", if_repair=False)( + sample_cpy + ) + + if point_num == 1: + pos_mask_first = sample_cpy["pos_points_mask"].copy() + + sample_cpy["pos_mask_dist_first"] = ( + np.minimum(distance_transform_edt(1 - pos_mask_first), 255.0) + * 255.0 + ) + + mtr.Transfer()(sample_cpy) + + img = Tensor(sample_cpy["img"][None, :, :, :], mstype.float32) + pos_mask_dist_src = Tensor( + sample_cpy["pos_mask_dist_src"][None, :, :, :], mstype.float32 + ) + neg_mask_dist_src = Tensor( + sample_cpy["neg_mask_dist_src"][None, :, :, :], mstype.float32 + ) + pos_mask_dist_first = Tensor( + sample_cpy["pos_mask_dist_first"][None, :, :, :], mstype.float32 + ) + + output = self.model( + img, pos_mask_dist_src, neg_mask_dist_src, pos_mask_dist_first + ) + + output = output[0] + output = P.Sigmoid()(output) + + result = output.asnumpy()[0, 0, :, :] + + if resize is not None: + result = cv2.resize( + result, gt.shape[::-1], interpolation=cv2.INTER_LINEAR + ) + + pred = (result > tsh).astype(np.uint8) + miou = ((pred == 1) & (gt == 1)).sum() / ( + ((pred == 1) | (gt == 1)) & (gt != 255) + ).sum() + + if point_num <= self.p["record_point_num"]: + point_num_miou_sum[point_num] += miou + + if (not if_get_target) and ( + miou >= self.p["miou_target"][index] + or point_num == self.p["max_point_num"] + ): + point_num_target_sum += point_num + if_get_target = True + + if if_get_target and point_num >= self.p["record_point_num"]: + break + + print("(point_num_target_avg : {})".format(point_num_target_sum / img_num)) + print( + "(pos_points_num_avg : {}) (neg_points_num_avg : {})".format( + pos_points_num / img_num, neg_points_num / img_num + ) + ) + print( + "(point_num_miou_avg : {})\n".format( + np.array([round(i / img_num, 3) for i in point_num_miou_sum]) + ) + ) + + def save_model_ckpt(self, path): + """ save checkpoint""" + save_checkpoint(self.model, path) diff --git a/research/cv/FCANet/train.py b/research/cv/FCANet/train.py new file mode 100644 index 0000000000000000000000000000000000000000..a34f45a22bf0a9e81d58dfdc712ce9bd2daec567 --- /dev/null +++ b/research/cv/FCANet/train.py @@ -0,0 +1,59 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" train fcanet """ +import os +import time +from src.config import config +from src.trainer import Trainer + +if __name__ == "__main__": + # set config + p = config + p["resume"] = None + p["snapshot_path"] = "./snapshot" + os.makedirs(p["snapshot_path"], exist_ok=True) + split_line_num = 79 + # start + print( + "Start time : ", time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.time())) + ) + print("-" * split_line_num, "\ninfos : ", p, "\n" + "-" * split_line_num) + + # set trainer + mine = Trainer(p) + + for epoch in range(p["epochs"]): + lr_str = "{:.7f}".format(mine.scheduler.get_lr()) + print( + "-" * split_line_num + "\n" + "Epoch [{:03d}]=> |-lr:{}-| \n".format(epoch, lr_str) + ) + # training + if p["train_only_epochs"] >= 0: + mine.training(epoch) + mine.scheduler.step() + + if epoch < p["train_only_epochs"]: + continue + + # validation-robot + if (epoch + 1) % p["val_robot_interval"] == 0: + mine.save_model_ckpt( + "{}/model-epoch-{}.ckpt".format(p["snapshot_path"], str(epoch).zfill(3)) + ) + mine.validation_robot(epoch, tsh=p["pred_tsh"], resize=p["size"][0]) + print( + "-" * split_line_num + "\nEnd time : ", + time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.time())), + )