diff --git a/research/cv/DecoMR/README.md b/research/cv/DecoMR/README.md new file mode 100644 index 0000000000000000000000000000000000000000..4b7259e840557fe4f4d46779fa9b5b169e19def7 --- /dev/null +++ b/research/cv/DecoMR/README.md @@ -0,0 +1,301 @@ +# 鐩綍 + +<!-- TOC --> + +- [鐩綍](#鐩綍) +- [DecoMR鎻忚堪](#decomr鎻忚堪) + - [妯″瀷鏋舵瀯](#妯″瀷鏋舵瀯) +- [鏁版嵁闆哴(#鏁版嵁闆�) + - [浣跨敤鐨勬暟鎹泦](#浣跨敤鐨勬暟鎹泦) + - [鏁版嵁缁勭粐](#鏁版嵁缁勭粐) + - [鏁版嵁棰勫鐞哴(#鏁版嵁棰勫鐞�) + - [鍏朵粬鏁版嵁](#鍏朵粬鏁版嵁) + - [鏈」鐩渶瑕佺敤鍒扮殑鍏朵粬鏁版嵁缁勭粐](#鏈」鐩渶瑕佺敤鍒扮殑鍏朵粬鏁版嵁缁勭粐) +- [棰勮缁冩ā鍨媇(#棰勮缁冩ā鍨�) +- [鐜瑕佹眰](#鐜瑕佹眰) +- [蹇€熷叆闂╙(#蹇€熷叆闂�) +- [鑴氭湰璇存槑](#鑴氭湰璇存槑) + - [鑴氭湰鍙婃牱渚嬩唬鐮乚(#鑴氭湰鍙婃牱渚嬩唬鐮�) + - [鑴氭湰鍙傛暟](#鑴氭湰鍙傛暟) +- [璁粌杩囩▼](#璁粌杩囩▼) + - [鍗曞崱璁粌](#鍗曞崱璁粌) + - [鍒嗗竷寮忚缁僝(#鍒嗗竷寮忚缁�) +- [鎺ㄧ悊](#鎺ㄧ悊) + - [鎺ㄧ悊杩囩▼](#鎺ㄧ悊杩囩▼) + - [鎺ㄧ悊缁撴灉](#鎺ㄧ悊缁撴灉) +- [鎬ц兘](#鎬ц兘) + - [璁粌鎬ц兘](#璁粌鎬ц兘) + - [鎺ㄧ悊鎬ц兘](#鎺ㄧ悊鎬ц兘) +- [闅忔満鎯呭喌璇存槑](#闅忔満鎯呭喌璇存槑) +- [鍏朵粬鎯呭喌璇存槑](#鍏朵粬鎯呭喌璇存槑) +- [璐$尞鎸囧崡](#璐$尞鎸囧崡) +- [ModelZoo涓婚〉](#modelzoo涓婚〉) + +<!-- /TOC --> + +# DecoMR + +## DecoMR鎻忚堪 + +**3D Human Mesh Regression with Dense Correspondence** +[Wang Zeng, Wanli Ouyang, Ping Luo, Wentao Liu, Xiaogang Wang] +CVPR 2020 锛岃鏂囧彲浠嶽DecoMR](https://openaccess.thecvf.com/content_CVPR_2020/papers/Zeng_3D_Human_Mesh_Regression_With_Dense_Correspondence_CVPR_2020_paper.pdf)涓嬭浇 + +### 妯″瀷鏋舵瀯 + +DecoMR妯″瀷鎻愬嚭涓€绉峬odel-free鐨勪笁缁翠汉浣撶綉鏍间及璁℃鏋讹紝瀹冩樉寮忓湴寤虹珛浜嗙綉鏍间笌灞€閮ㄥ浘鍍忕壒寰佸湪UV绌洪棿锛堝嵆鐢ㄤ簬涓夌淮缃戞牸绾圭悊鏄犲皠鐨勪簩缁寸┖闂达級涓殑瀵嗛泦瀵瑰簲鍏崇郴銆傚疄楠岃〃鏄庯紝鎵€鎻愬嚭鐨勫眬閮ㄧ壒寰佸榻愬拰杩炵画UV Map鍦ㄥ涓叕鍏卞熀鍑嗕笂浼樹簬鐜版湁鐨勫熀浜�3D缃戞牸鐨勬柟娉曘€� + +## 鏁版嵁闆� + +### 浣跨敤鐨勬暟鎹泦 + +- [UP-3D](http://files.is.tuebingen.mpg.de/classner/up/): 璇ユ暟鎹泦鐢ㄤ簬璁粌鍜屾祴璇曘€傚彲浠ヤ粠閾炬帴[UP-3D zip](http://files.is.tuebingen.mpg.de/classner/up/datasets/up-3d.zip)涓嬭浇鏁版嵁闆嗙殑鍘嬬缉鍖咃紝鍖呮嫭璁粌闆嗗拰娴嬭瘯闆嗐€傝В鍘嬪畬鍚�, 璇峰湪config.py瀹屾垚璺緞閰嶇疆銆� + +### 鏁版嵁缁勭粐 + +```text +鈹溾攢鈹€ up-3d +鈹� 鈹溾攢鈹€ _image.png # up-3d鏁版嵁 +鈹� 鈹溾攢鈹€ _body.pkl # 濮挎€佸拰褰㈢姸鏍囨敞 +鈹� 鈹溾攢鈹€ _joints.npy # 鍏抽敭鐐规爣娉� +鈹� 鈹溾攢鈹€ trainval.txt # 璁粌闆嗘暟鎹紪鍙� +鈹� 鈹溾攢鈹€ test.txt # 娴嬭瘯闆嗘暟鎹紪鍙� +``` + +### 鏁版嵁棰勫鐞� + +鏁版嵁涓嬭浇鍜岃В鍘嬪畬鎴愬悗锛岃繍琛屽涓嬪懡浠ゅ畬鎴恥p-3d鏁版嵁闆嗙殑棰勫鐞嗭紝浠庤€岀敓鎴愰」鐩墍闇€瑕佺殑鏁版嵁鏍囨敞鍜実t_iuv_img銆� + + ```shell + python preprocess_dataset.py --train_files --eval_files --gt_iuv + ``` + +### 鍏朵粬鏁版嵁 + +鏈」鐩墍闇€鐨勪竴浜涘叾浠栧繀瑕佹暟鎹紝璇蜂粠璁烘枃鎸囧畾閾炬帴[data](https://drive.google.com/drive/folders/1xWBVfQa7OZ14VgT9BVO9Lj_kDqRAcQ-e)閾炬帴杩涜涓嬭浇锛屼笅杞藉畬瑙e帇鍒�./DecoMR鍗冲彲浣跨敤銆傝繕闇€瑕佷粠[Unite the People repository](https://github.com/classner/up)閫氳繃浠ヤ笅鑴氭湰涓嬭浇SMPL妯℃澘锛� + + ```shell + wget https://github.com/classner/up/raw/master/models/3D/basicModel_neutral_lbs_10_207_0_v1.0.0.pkl --directory-prefix=data + ``` + +姝ゅ锛岄渶瑕佸湪[male and female models](https://smpl.is.tue.mpg.de/)涓嬭浇鎬у埆妯″瀷锛屼笅杞藉畬鍚庤В鍘嬪埌data鐩綍銆� + +#### 鏈」鐩渶瑕佺敤鍒扮殑鍏朵粬鏁版嵁缁勭粐 + +```text +鈹溾攢鈹€ data + 鈹溾攢鈹€ uv_sampler + 鈹� 鈹溾攢鈹€ paras_h0064_w0064_BF.npz # BF 64x64鍙傛暟 + 鈹� 鈹溾攢鈹€ paras_h0064_w0064_SMPL.npz # SMPL 64x64鍙傛暟 + 鈹� 鈹溾攢鈹€ paras_h0128_w0128_BF.npz # BF 128x128鍙傛暟 + 鈹� 鈹溾攢鈹€ smpl_boundry_free_template.obj # BF SMPL妯℃澘 + 鈹� 鈹溾攢鈹€ smpl_fbx_template.obj # 閫氱敤SMPL妯℃澘 + 鈹溾攢鈹€ basicmodel_f_lbs_10_207_0_v1.0.0.pkl # 濂虫€MPL妯″瀷鍙傛暟 + 鈹溾攢鈹€ basicmodel_m_lbs_10_207_0_v1.0.0.pkl # 鐢锋€MPL妯″瀷鍙傛暟 + 鈹溾攢鈹€ basicmodel_neutral_lbs_10_207_0_v1.0.0.pkl # 閫氱敤SMPL妯″瀷鍙傛暟 + 鈹溾攢鈹€ BF_ref_map_64.npy # BF鍙傜収鍥炬暟鎹� + 鈹溾攢鈹€ J_regressor_extra.npy # 鍏宠妭鐐瑰洖褰掑櫒 + 鈹溾攢鈹€ namesUPlsp.txt # 娴嬭瘯闆嗗浘鍍忕紪鍙� + 鈹溾攢鈹€ reference_mesh.obj # 鍙傜収缃戞牸鍙傛暟 + 鈹溾攢鈹€ segm_per_v_overlap.pkl # 閲嶅悎椤剁偣鍒嗗壊鍙傛暟 + 鈹溾攢鈹€ SMPL_ref_map_64.npy # SMPL鍙傜収鍥炬暟鎹� + 鈹溾攢鈹€ vertex_texture.npy # 椤剁偣绾圭悊鏁版嵁 + 鈹溾攢鈹€ weight_p24_h0128_w0128_BF.npy # BF 128x128uv鏉冮噸鍙傛暟 +``` + +## 棰勮缁冩ā鍨� + +pytorch棰勮缁冩ā鍨�(resnet50) + +ResNet涓诲共缃戠粶閫夌敤resnet50鐨勭粨鏋勶紝鍖呭惈鍗风Н灞傚拰鍏ㄨ繛鎺ュ眰鍦ㄥ唴鍏辨湁50灞傦紝鏈ā鍨嬩笉浣跨敤鍏ㄨ繛鎺ュ眰銆傛暣浣撶敱5涓猄tage缁勬垚锛岀涓€涓猄tage瀵硅緭鍑鸿繘琛岄澶勭悊锛屽悗鍥涗釜Stage鍒嗗埆鍖呭惈3,4,6,3涓狟ottleneck銆� + +涓嬭浇 [ResNet50棰勮缁冩ā鍨媇(https://download.pytorch.org/models/resnet50-19c8e357.pth) + +mindspore棰勮缁冩ā鍨� + +涓嬭浇pytorch棰勮缁冩ā鍨嬶紝鍐嶈繍琛屽涓嬭剼鏈紝寰楀埌瀵瑰簲鐨刴indspore妯″瀷锛屽皢mindspore妯″瀷杈撳嚭鍒癲ata鏂囦欢澶逛腑銆傛敞锛氳繍琛岃鑴氭湰闇€瑕佸悓鏃跺畨瑁卲ytorch鐜(娴嬭瘯鐗堟湰鍙蜂负1.3锛孋PU 鎴� GPU) + +```bash +# MODEL_NAME: 妯″瀷鍚嶇Оvgg鎴杛esnet +# PTH_FILE: 寰呰浆鎹㈡ā鍨嬫枃浠剁粷瀵硅矾寰� +# MSP_FILE: 杈撳嚭妯″瀷鏂囦欢缁濆璺緞 +bash convert_model.sh [MODEL_NAME] [PTH_FILE] [MSP_FILE] +``` + +## 鐜瑕佹眰 + +- 鍏蜂綋鐨刾ython绗笁鏂瑰簱锛岃requirements.txt鏂囦欢 +- 瀹夎渚濊禆锛氭湰椤圭洰鐢ㄥ埌opendr鏉ユ覆鏌�3d缃戞牸锛岄渶瑕佸畨瑁卭pendr锛屽畨瑁呭墠闇€瑕佸畨瑁呬緷璧栵細 + + ```shell + sudo apt-get install libglu1-mesa-dev freeglut3-dev mesa-common-dev + sudo apt-get install libosmesa6-dev + sudo apt-get install gfortran + pip install --force-reinstall pip==19 + pip install -r requirements.txt + ``` + +## 蹇€熷叆闂� + +閫氳繃瀹樻柟缃戠珯瀹夎MindSpore鍚庯紝鎮ㄥ彲浠ユ寜鐓у涓嬫楠よ繘琛岃缁冨拰璇勪及锛� + +- 杩涘叆script鏂囦欢澶癸紝杩愯锛� + + ```bash + # 杩愯鍗曞崱璁粌绀轰緥 + bash ./run_train_standalone_gpu.sh up-3d 3 5 30 16 './ckpt' + + # 杩愯鍒嗗竷寮忚缁冪ず渚婫PU + bash ./run_train_distribute_gpu.sh up-3d 8 5 30 16 './ckpt' + + # 杩愯璇勪及绀轰緥 + bash ./run_eval.sh up-3d 16 + ``` + +## 鑴氭湰璇存槑 + +### 鑴氭湰鍙婃牱渚嬩唬鐮� + +```bash +鈹溾攢鈹€ DecoMR + 鈹溾攢 README.md # 妯″瀷鐩稿叧璇存槑 + 鈹溾攢 preprocess_datasets.py # 璁粌娴嬭瘯鍥惧儚棰勫鐞� + 鈹溾攢 preprocess_surreal.py # surreal璁粌娴嬭瘯鍥惧儚棰勫鐞� + 鈹溾攢 eval.py # 璇勪及鑴氭湰 + 鈹溾攢 train.py # 璁粌鑴氭湰 + 鈹溾攢 pretrained_model_convert + 鈹� 鈹溾攢 pth_to_msp.py # pth鏂囦欢杞崲鎴恈kpt鏂囦欢 + 鈹� 鈹溾攢 resnet_msp.py # mindspore涓媟esnet棰勮缁冩ā鍨嬬殑缃戠粶缁撴瀯 + 鈹� 鈹溾攢 resnet_pth.py # pytorch涓媟esnet棰勮缁冩ā鍨嬬殑缃戠粶缁撴瀯 + 鈹溾攢 scripts + 鈹� 鈹溾攢 convert_model.sh # 杞崲棰勮缁冩ā鍨� + 鈹� 鈹溾攢 run_eval.sh # 鍚姩璇勪及 + 鈹� 鈹溾攢 run_train_distribute_gpu.sh # 鍚姩澶氬崱璁粌 + 鈹� 鈹溾攢 run_train_standalone_gpu.sh # 鍚姩鍗曞崱璁粌 + 鈹溾攢 datasets + 鈹� 鈹溾攢 preprocess + 鈹� 鈹溾攢 generate_gt_iuv.py # gt_iuv鍥惧儚鐢熸垚 + 鈹� 鈹溾攢 surreal.py # surreal鏁版嵁闆嗛澶勭悊 + 鈹� 鈹斺攢 up_3d.py # up-3d鏁版嵁闆嗛澶勭悊 + 鈹溾攢 base_dataset.py # 鏁版嵁闆嗗姞杞� + 鈹斺攢 surreal_dataset.py # surreal鏁版嵁闆嗗姞杞� + 鈹溾攢 models + 鈹� 鈹溾攢 dense_cnn.py # 缃戠粶瀹氫箟 + 鈹� 鈹溾攢 DMR.py # CNet鍜孡Net瀹氫箟绔埌绔殑缁勫悎 + 鈹� 鈹溾攢 geometric_layers.py # 鍑犱綍鍙樻崲灞� + 鈹� 鈹溾攢 grid_sample.py # grid_sample绠楀瓙 + 鈹� 鈹溾攢 layer.py # 缃戠粶灞� + 鈹� 鈹溾攢 resnet.py # resnet鐨勭綉缁滅粨鏋勶紝resnet50鐗堟湰 + 鈹� 鈹溾攢 smpl.py # smpl妯℃澘 + 鈹� 鈹溾攢 upsample.py # 涓婇噰鏍风綉缁� + 鈹� 鈹溾攢 uv_generator.py # uv鍥惧儚鐢熸垚 + 鈹� 鈹溾攢 TrainOneStepDP.py # CNet鍗曟璁粌 + 鈹� 鈹溾攢 TrainOneStepEnd.py # LNet鍗曟璁粌 + 鈹� 鈹溾攢 WithLossCellDP.py # CNet鎹熷け + 鈹� 鈹斺攢 WithLossCellEnd.py # LNet鎹熷け + 鈹溾攢 utils + 鈹� 鈹溾攢 config.py # 鏁版嵁璺緞 + 鈹� 鈹溾攢 imutils.py # 鍥惧儚澶勭悊鍑芥暟 + 鈹� 鈹溾攢 objfile.py # obj鏂囦欢璇诲彇 + 鈹� 鈹溾攢 renderer.py # iuv鍥惧儚娓叉煋 + 鈹� 鈹溾攢 train_options.py # 璁粌鍙傛暟 +``` + +### 鑴氭湰鍙傛暟 + +鍏蜂綋鍙傛暟璇存槑鍜屼慨鏀硅untils涓璽rain_options鏂囦欢銆� + +## 璁粌杩囩▼ + +### 鍗曞崱璁粌 + +- 浣跨敤鍗曞崱璁粌杩愯涓嬮潰鐨勫懡浠�: + + ```bash + python train.py --dataset=up-3d --device_id=3 --num_epochs_dp=5 --num_epochs_end=30 --batch_size=16 --ckpt_dir='./ckpt' + 鎴栵細 + bash ./run_train_standalone_gpu.sh up-3d 3 5 30 16 './ckpt' + ``` + +### 鍒嗗竷寮忚缁� + +- 浣跨敤鍒嗗竷寮忚缁冭繍琛屼笅闈㈢殑鍛戒护: + + ```bash + python train.py --run_distribute=True --ngpus=8 --dataset=up-3d --num_epochs_dp=5 --num_epochs_end=30 --batch_size=16 --ckpt_dir='./ckpt' + 鎴栵細 + bash ./run_train_distribute_gpu.sh up-3d 8 5 30 16 './ckpt' + ``` + +## 鎺ㄧ悊 + +### 鎺ㄧ悊杩囩▼ + +- 浣跨敤濡備笅鍛戒护杩涜鎺ㄧ悊: + + ```bash + python eval.py --dataset=up-3d --batch_size=16 + 鎴栵細 + bash ./run_eval.sh up-3d 16 + ``` + +### 鎺ㄧ悊缁撴灉 + +> *** Final Results *** +> Shape Error: 202.17 + +## 鎬ц兘 + +### 璁粌鎬ц兘 + +璁粌鎬ц兘濡備笅锛� + +| Parameters | GPU | +| -------------------------- |--------------------------------------------------------------| +| Model Version | DecoMR | +| Resource | PCIE 3090-24G | +| uploaded Date | 06/28/2022 (month/day/year) | +| MindSpore Version | 1.8.0 | +| Dataset | up-3d | +| Training Parameters | epoch_dp=5, epoch_end=30, steps per epoch=55, batch_size=128 | +| Optimizer | Adam | +| Loss Function | MSE, BSE, L1 | +| outputs | probability | +| Loss | | +| Speed | 2245 ms/step锛�8pcs锛� | +| Total time | 1.8h | +| Parameters (M) | 236.9 | +| Checkpoint for Fine tuning | (.ckpt file) | +| Scripts | + +### 鎺ㄧ悊鎬ц兘 + + 鎺ㄧ悊鎬ц兘濡備笅锛� + +| Parameters | GPU | +|---------------------|-----------------------------| +| Model Version | DecoMR | +| Resource | GPU | +| Uploaded Date | 06/28/2022 (month/day/year) | +| MindSpore Version | 1.8.0 | +| Dataset | up-3d | +| batch_size | 16 | +| outputs | probability | +| shape error | 202.17 | +| Model for inference | | + +## 闅忔満鎯呭喌璇存槑 + +浣跨敤浜唗rain.py涓殑闅忔満绉嶅瓙銆� + +## 鍏朵粬鎯呭喌璇存槑 + +鐢变簬鏈」鐩瘮杈冧緷璧栬绠楄祫婧愶紝澶у鏁拌缁冩姤閿欐儏鍐典负鏁版嵁鍔犺浇绾跨▼鏁板お澶ф垨鑰卋atch_size澶ぇ鎵€瀵艰嚧锛岃В鍐虫柟娉曚负璋冨皬train_options涓殑num_workers锛屾垨鑰呭噺灏廱atch_size + +## 璐$尞鎸囧崡 + +濡傛灉浣犳兂鍙備笌璐$尞鏄囨€濈殑宸ヤ綔褰撲腑锛岃闃呰[鏄囨€濊础鐚寚鍗梋(https://gitee.com/mindspore/models/blob/master/CONTRIBUTING_CN.md)鍜孾how_to_contribute](https://gitee.com/mindspore/models/tree/master/how_to_contribute) + +## ModelZoo 涓婚〉 + +璇锋祻瑙堝畼鏂筟涓婚〉](https://gitee.com/mindspore/models)銆� diff --git a/research/cv/DecoMR/datasets/base_dataset.py b/research/cv/DecoMR/datasets/base_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..65913d4931e27b97402fd20b6e247e04bab4346a --- /dev/null +++ b/research/cv/DecoMR/datasets/base_dataset.py @@ -0,0 +1,352 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import os +from os.path import join +import numpy as np +from utils import config as cfg +from utils.imutils import crop, flip_img, flip_pose, flip_kp, transform, rot_aa +from mindspore import dataset +from mindspore.dataset import GeneratorDataset +import mindspore.dataset.vision as c_vision +from datasets.surreal_dataset import SurrealDataset +import cv2 + +class BaseDataset: + """ + Base Dataset Class - Handles data loading and augmentation. + Able to handle heterogeneous datasets (different annotations available for different datasets). + You need to update the path to each dataset in utils/config.py. + """ + def __init__(self, options, dataset_name, use_augmentation=True, is_train=True, use_IUV=False): + super(BaseDataset, self).__init__() + self.dataset_name = dataset_name + self.is_train = is_train + self.options = options + self.img_dir = cfg.DATASET_FOLDERS[dataset_name] + self.normalize_img = c_vision.Normalize(mean=cfg.IMG_NORM_MEAN, std=cfg.IMG_NORM_STD) + self.data = np.load(cfg.DATASET_FILES[is_train][dataset_name]) + self.imgname = self.data['imgname'] + + # Get paths to gt masks, if available + try: + self.maskname = self.data['maskname'] + except KeyError: + pass + try: + self.partname = self.data['partname'] + except KeyError: + pass + + # Get gender data, if available + try: + gender = self.data['gender'] + self.gender = np.array([0 if str(g) == 'm' else 1 for g in gender]).astype(np.int32) + except KeyError: + self.gender = -1 * np.ones(len(self.imgname)).astype(np.int32) + + # Bounding boxes are assumed to be in the center and scale format + self.scale = self.data['scale'] + self.center = self.data['center'] + + # If False, do not do augmentation + self.use_augmentation = use_augmentation + + # Get gt SMPL parameters, if available + try: + self.pose = self.data['pose'].astype(float) + self.betas = self.data['shape'].astype(float) + self.has_smpl = np.ones(len(self.imgname)).astype(int) + if dataset == 'mpi-inf-3dhp': + self.has_smpl = self.data['has_smpl'].astype(int) + + except KeyError: + self.has_smpl = np.zeros(len(self.imgname)).astype(int) + + # Get gt 3D pose, if available + try: + self.pose_3d = self.data['S'] + self.has_pose_3d = 1 + except KeyError: + self.has_pose_3d = 0 + + # Get 2D keypoints + try: + self.keypoints = self.data['part'] + except KeyError: + self.keypoints = np.zeros((len(self.imgname), 24, 3)) + + self.length = self.scale.shape[0] + self.use_IUV = use_IUV + self.has_dp = np.zeros(len(self.imgname)) + + if self.use_IUV: + if self.dataset_name in ['h36m-train', 'up-3d', 'h36m-test', 'h36m-train-hmr']: + self.iuvname = self.data['iuv_names'] + self.has_dp = self.has_smpl + self.uv_type = options.uv_type + self.iuv_dir = join(self.img_dir, '{}_IUV_gt'.format(self.uv_type)) + + # Using fitted SMPL parameters from SPIN or not + if self.is_train and options.use_spin_fit and self.dataset_name in ['coco', 'lsp-orig', 'mpii',\ + 'lspet', 'mpi-inf-3dhp']: + fit_file = cfg.FIT_FILES[is_train][self.dataset_name] + fit_data = np.load(fit_file) + self.pose = fit_data['pose'].astype(float) + self.betas = fit_data['betas'].astype(float) + self.has_smpl = fit_data['valid_fit'].astype(int) + + if self.use_IUV: + self.uv_type = options.uv_type + self.iuvname = self.data['iuv_names'] + self.has_dp = self.has_smpl + self.fit_joint_error = self.data['fit_errors'].astype(np.float32) + self.iuv_dir = join(self.img_dir, '{}_IUV_SPIN_fit'.format(self.uv_type)) + + def augm_params(self): + """Get augmentation parameters.""" + flip = 0 # flipping + pn = np.ones(3) # per channel pixel-noise + rot = 0 # rotation + sc = 1 # scaling + if self.is_train: + if self.options.use_augmentation: + # We flip with probability 1/2 + if np.random.uniform() <= 0.5: + flip = 1 + + # Each channel is multiplied with a number + # in the area [1-opt.noiseFactor,1+opt.noiseFactor] + pn = np.random.uniform(1-self.options.noise_factor, 1+self.options.noise_factor, 3) + + # The rotation is a number in the area [-2*rotFactor, 2*rotFactor] + rot = min(2*self.options.rot_factor, + max(-2*self.options.rot_factor, np.random.randn()*self.options.rot_factor)) + + # The scale is multiplied with a number + # in the area [1-scaleFactor,1+scaleFactor] + sc = min(1+self.options.scale_factor, + max(1-self.options.scale_factor, np.random.randn()*self.options.scale_factor+1)) + # but it is zero with probability 3/5 + if np.random.uniform() <= 0.6: + rot = 0 + + return flip, pn, rot, sc + + def rgb_processing(self, rgb_img, center, scale, rot, flip, pn): + """Process rgb image and do augmentation.""" + rgb_img = crop(rgb_img, center, scale, + [self.options.img_res, self.options.img_res], rot=rot) + + if flip: + rgb_img = flip_img(rgb_img) + + # in the rgb image we add pixel noise in a channel-wise manner + rgb_img[:, :, 0] = np.minimum(255.0, np.maximum(0.0, rgb_img[:, :, 0] * pn[0])) + rgb_img[:, :, 1] = np.minimum(255.0, np.maximum(0.0, rgb_img[:, :, 1] * pn[1])) + rgb_img[:, :, 2] = np.minimum(255.0, np.maximum(0.0, rgb_img[:, :, 2] * pn[2])) + + rgb_img = rgb_img / 255.0 + return rgb_img + + def j2d_processing(self, kp, center, scale, r, f): + """Process gt 2D keypoints and apply all augmentation transforms.""" + nparts = kp.shape[0] + for i in range(nparts): + kp[i, 0:2] = transform(kp[i, 0:2]+1, center, scale, + [self.options.img_res, self.options.img_res], rot=r) + # convert to normalized coordinates + kp[:, :-1] = 2.*kp[:, :-1]/self.options.img_res - 1. + # flip the x coordinates + if f: + kp = flip_kp(kp) + kp = kp.astype('float32') + return kp + + def j3d_processing(self, S, r, f): + """Process gt 3D keypoints and apply all augmentation transforms.""" + # in-plane rotation + rot_mat = np.eye(3) + if not r == 0: + rot_rad = -r * np.pi / 180 + sn, cs = np.sin(rot_rad), np.cos(rot_rad) + rot_mat[0, :2] = [cs, -sn] + rot_mat[1, :2] = [sn, cs] + S = np.einsum('ij,kj->ki', rot_mat, S) + # flip the x coordinates + if f: + S = flip_kp(S) + S = S.astype('float32') + return S + + def pose_processing(self, pose, r, f): + """Process SMPL theta parameters and apply all augmentation transforms.""" + # rotation or the pose parameters + pose[:3] = rot_aa(pose[:3], r) + # flip the pose parameters + if f: + pose = flip_pose(pose) + + pose = pose.astype('float32') + return pose + + def iuv_processing(self, IUV, center, scale, rot, flip, pn): + """Process rgb image and do augmentation.""" + IUV = crop(IUV, center, scale, + [self.options.img_res, self.options.img_res], rot=rot) + + if flip: + IUV = flip_img(IUV) + IUV = np.transpose(IUV.astype('float32'), (2, 0, 1)) + if self.uv_type == 'BF': + mask = (IUV[0] > 0).astype('float32') + IUV[1] = (255 - IUV[1]) * mask + else: + print('Flip augomentation for SMPL default UV map is not supported yet.') + else: + IUV = np.transpose(IUV.astype('float32'), (2, 0, 1)) + return IUV + + def __getitem__(self, index): + item = {} + scale = self.scale[index].copy() + center = self.center[index].copy() + + # Get augmentation parameters + flip, pn, rot, sc = self.augm_params() + + # Load image + imgname = join(self.img_dir, str(self.imgname[index])) + try: + img = cv2.imread(imgname)[:, :, ::-1].copy().astype(np.float32) + except TypeError: + print(imgname) + orig_shape = np.array(img.shape)[:2] + item['scale'] = (sc * scale).astype('float32') + item['center'] = center.astype('float32') + item['orig_shape'] = orig_shape.astype('int32') + + # Process image + img = self.rgb_processing(img, center, sc * scale, rot, flip, pn).astype('float32') + # Store image before normalization to use it in visualization + item['img_orig'] = img.copy() + item['img'] = np.transpose(self.normalize_img(img).astype('float32'), (2, 0, 1)) + item['imgname'] = imgname + + # Get SMPL parameters, if available + has_smpl = self.has_smpl[index] + item['has_smpl'] = has_smpl + if has_smpl: + pose = self.pose[index].copy() + betas = self.betas[index].copy() + else: + pose = np.zeros(72) + betas = np.zeros(10) + item['pose'] = self.pose_processing(pose, rot, flip) + item['betas'] = betas.astype('float32') + + # Get 3D pose, if available + item['has_pose_3d'] = self.has_pose_3d + if self.has_pose_3d: + S = self.pose_3d[index].copy() + St = self.j3d_processing(S.copy()[:, :-1], rot, flip) + S[:, :-1] = St + item['pose_3d'] = S + else: + item['pose_3d'] = np.zeros((24, 4)).astype('float32') + + # Get 2D keypoints and apply augmentation transforms + keypoints = self.keypoints[index].copy() + item['keypoints'] = self.j2d_processing(keypoints, center, sc * scale, rot, flip) + + # Get GT SMPL joints (For the compatibility with SURREAL dataset) + item['keypoints_smpl'] = np.zeros((24, 3)).astype('float32') + item['pose_3d_smpl'] = np.zeros((24, 4)).astype('float32') + item['has_pose_3d_smpl'] = 0 + + # Pass path to segmentation mask, if available + # Cannot load the mask because each mask has different size, so they cannot be stacked in one tensor + try: + item['maskname'] = self.maskname[index] + except AttributeError: + item['maskname'] = '' + try: + item['partname'] = self.partname[index] + except AttributeError: + item['partname'] = '' + item['gender'] = self.gender[index] + + if self.use_IUV: + IUV = np.zeros((3, img.shape[1], img.shape[2])).astype('float32') + iuvname = '' + has_dp = self.has_dp[index] + try: + fit_error = self.fit_joint_error[index] + except AttributeError: + fit_error = 0.0 # For the dataset with GT mesh, fit_error is set 0 + + if has_dp: + iuvname = join(self.iuv_dir, str(self.iuvname[index])) + if os.path.exists(iuvname): + IUV = cv2.imread(iuvname).copy() + IUV = self.iuv_processing(IUV, center, sc * scale, rot, flip, pn) # process UV map + else: + has_dp = 0 + print("GT IUV image: {} does not exist".format(iuvname)) + + item['gt_iuv'] = IUV + item['iuvname'] = iuvname + item['has_dp'] = has_dp + item['fit_joint_error'] = fit_error + + if self.use_IUV: + return item['scale'], item['center'], item['orig_shape'], item['img_orig'], item['img'],\ + item['imgname'], item['has_smpl'], item['pose'], item['betas'], item['has_pose_3d'],\ + item['pose_3d'], item['keypoints'], item['keypoints_smpl'], item['pose_3d_smpl'], \ + item['has_pose_3d_smpl'], item['maskname'], item['partname'], item['gender'], item['gt_iuv'],\ + item['iuvname'], item['has_dp'], item['fit_joint_error'] + + return item['scale'], item['center'], item['orig_shape'], item['img_orig'], item['img'], \ + item['imgname'], item['has_smpl'], item['pose'], item['betas'], item['has_pose_3d'], \ + item['pose_3d'], item['keypoints'], item['keypoints_smpl'], item['pose_3d_smpl'], \ + item['has_pose_3d_smpl'], item['maskname'], item['partname'], item['gender'] + + def __len__(self): + return len(self.imgname) + +def optional_dataset(dataset_name, options, is_train=True, use_IUV=False): + if dataset_name == 'up-3d': + return BaseDataset(options, 'up-3d', is_train=is_train, use_IUV=use_IUV) + if dataset_name == 'surreal': + return SurrealDataset(options, is_train=is_train, use_IUV=use_IUV) + + raise ValueError('Undefined dataset') + +def create_dataset(dataset_name, options, is_train=True, use_IUV=False): + mydataset = optional_dataset(dataset_name, options, is_train=is_train, use_IUV=use_IUV) + if use_IUV: + column = ["scale", "center", "orig_shape", "img_orig", "img", "imgname", "has_smpl", "pose", + "betas", "has_pose_3d", "pose_3d", "keypoints", "keypoints_smpl", + "pose_3d_smpl", "has_pose_3d_smpl", "maskname", "partname", "gender", + "gt_iuv", "iuvname", "has_dp", "fit_joint_error"] + else: + column = ["scale", "center", "orig_shape", "img_orig", "img", "imgname", "has_smpl", "pose", + "betas", "has_pose_3d", "pose_3d", "keypoints", "keypoints_smpl", + "pose_3d_smpl", "has_pose_3d_smpl", "maskname", "partname", "gender"] + + all_dataset = GeneratorDataset(mydataset, column_names=column, num_parallel_workers=options.num_workers, + shuffle=options.shuffle_train, num_shards=options.group_size, shard_id=options.rank) + + return all_dataset diff --git a/research/cv/DecoMR/datasets/preprocess/__init__.py b/research/cv/DecoMR/datasets/preprocess/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e47eda0ed3847987d95c14738c2ebb20bb8cdc42 --- /dev/null +++ b/research/cv/DecoMR/datasets/preprocess/__init__.py @@ -0,0 +1,19 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +from .up_3d import up_3d_extract +from .surreal import extract_surreal_eval +from .surreal import extract_surreal_train +from .generate_gt_iuv import process_dataset, process_surreal diff --git a/research/cv/DecoMR/datasets/preprocess/generate_gt_iuv.py b/research/cv/DecoMR/datasets/preprocess/generate_gt_iuv.py new file mode 100644 index 0000000000000000000000000000000000000000..081e7c6cc13f29c35e4b49ace192f2c88972a519 --- /dev/null +++ b/research/cv/DecoMR/datasets/preprocess/generate_gt_iuv.py @@ -0,0 +1,287 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import os +from os.path import join, exists +import mindspore +from mindspore import ops, Tensor, nn +from utils.renderer import render_IUV +from utils import config as cfg +import numpy as np +import cv2 +from tqdm import tqdm +from models.smpl import SMPL + +def cal_cam(origin_2d, target_2d): + tmp_o = origin_2d - ops.ReduceMean(keep_dims=True)(origin_2d, 0) + tmp_t = target_2d - ops.ReduceMean(keep_dims=True)(target_2d, 0) + scale = (tmp_t * tmp_o).sum() / (tmp_o * tmp_o).sum() + trans = ops.ReduceMean(keep_dims=True)(target_2d, 0) / scale - ops.ReduceMean(keep_dims=True)(origin_2d, 0) + + err = (origin_2d + trans) * scale - target_2d + err = ops.ReduceMean(keep_dims=False)(nn.Norm(axis=1)(err)) + cam = ops.Zeros()(3, mindspore.float32) + + cam[0] = scale + cam[1:] = trans.T + return cam, err + +def process_image(img, joint, pose, beta, smpl, renderer, uv_type): + to_lsp = list(range(14)) + + H, W, _ = img.shape + pose = Tensor(pose, dtype=mindspore.float32) + beta = Tensor(beta, dtype=mindspore.float32) + joint = Tensor(joint, dtype=mindspore.float32) + vertices = smpl(ops.ExpandDims()(pose, 0), ops.ExpandDims()(beta, 0)) + img = img.astype('float') / 255 + + joint3d = smpl.get_joints(vertices)[0, to_lsp] + + origin_2d = joint3d[:, :2] + target_2d = joint[to_lsp, :2] + vis = joint[to_lsp, -1] + origin_2d = origin_2d.asnumpy() + target_2d = target_2d.asnumpy() + i = (vis > 0).asnumpy() + origin_2d = Tensor(origin_2d[i]) + target_2d = Tensor(target_2d[i]) + + target_2d[:, 0] = (2 * target_2d[:, 0] - W) / W + target_2d[:, 1] = (2 * target_2d[:, 1] - H) / W + + cam, _ = cal_cam(origin_2d, target_2d) + uv_tmp = render_IUV(img, vertices[0].asnumpy(), cam.asnumpy(), renderer) + uv_im = np.zeros(uv_tmp.shape) + uv_im[:, :, 0] = 1 - uv_tmp[:, :, 0] + uv_im[:, :, 1] = uv_tmp[:, :, 1] + mask_im = uv_im.max(axis=-1) > 0 + mask_im = mask_im[:, :, np.newaxis] + + uv_im_int = np.around(uv_im * 255).astype('uint8') + mask_im_int = mask_im.astype('uint8') + + iuv_im_out = np.concatenate((mask_im_int, uv_im_int), axis=-1) + return iuv_im_out + +def cal_projection_err(joint, pose, beta, smpl): + to_lsp = list(range(14)) + + pose = Tensor(pose) + beta = Tensor(beta) + joint = Tensor(joint) + vertices = smpl(ops.ExpandDims()(pose, 0), ops.ExpandDims()(beta, 0)) + joint3d = smpl.get_joints(vertices)[0, to_lsp] + + origin_2d = joint3d[:, :2] + target_2d = joint[to_lsp, :2] + vis = joint[to_lsp, -1] + origin_2d = origin_2d[vis > 0] + target_2d = target_2d[vis > 0] + size = (target_2d.max(dim=0)[0] - target_2d.min(dim=0)[0]).max() + _, err = cal_cam(origin_2d, target_2d) + normalized_err = err / (size + 1e-8) + return normalized_err.item() + +def process_dataset(dataset, is_train, uv_type, smpl, renderer): + dataset_file = cfg.DATASET_FILES[is_train][dataset] + data = np.load(dataset_file, allow_pickle=True) + imgnames = data['imgname'] + centers = data['center'] + + keypoints = data['part'] + + flag_fit = False + if dataset in ['coco', 'lsp-orig', 'mpii', 'lspet', 'mpi-inf-3dhp'] and is_train: + flag_fit = True + fit_file = cfg.FIT_FILES[is_train][dataset] + fit_data = np.load(fit_file) + poses = fit_data['pose'].astype(np.float) + betas = fit_data['betas'].astype(np.float) + has_smpl = fit_data['valid_fit'].astype(np.int) + else: + poses = data['pose'] + betas = data['shape'] + has_smpl = np.ones(poses.shape[0]) + + img_dir = cfg.DATASET_FOLDERS[dataset] + + iuv_dir = join(img_dir, '{}_IUV_gt'.format(uv_type)) + if flag_fit: + iuv_dir = join(img_dir, '{}_IUV_SPIN_fit'.format(uv_type)) + fit_errors = [] + iuvnames = [] + for i in tqdm(range(len(imgnames))): + + img_path = join(img_dir, imgnames[i]) + + center = np.round(centers[i]).astype('int') + + im_name = imgnames[i] + iuv_name = im_name[:-4] + '_{0}_{1}.png'.format(center[0], center[1]) + iuvnames.append(iuv_name) + + output_path = join(iuv_dir, iuv_name) + if not exists(os.path.dirname(output_path)): + os.makedirs(os.path.dirname(output_path)) + + if not exists(output_path) and has_smpl[i] > 0: + im = cv2.imread(img_path) + joint = keypoints[i] + pose = poses[i] + beta = betas[i] + gt_iuv = process_image(im, joint, pose, beta, smpl, renderer, uv_type) + cv2.imwrite(output_path, gt_iuv) + + if flag_fit: + projection_err = 1.0 + if has_smpl[i] > 0: + joint = keypoints[i] + pose = poses[i] + beta = betas[i] + projection_err = cal_projection_err(joint, pose, beta, smpl) + fit_errors.append(projection_err) + + save_data = dict(data) + save_data['iuv_names'] = iuvnames + if flag_fit: + save_data['fit_errors'] = fit_errors + + np.savez(dataset_file, **save_data) + + return 0 + +# The joint of SURREAL is different from other dataset,so the generation of +# IUV image is also a little different from other datasets. +def process_surreal(is_train, uv_type, renderer): + dataset_file = cfg.DATASET_FILES[is_train]['surreal'] + root_dir = cfg.DATASET_FOLDERS['surreal'] + iuv_dir = join(root_dir, '{}_IUV_gt'.format(uv_type), 'data', 'cmu', 'train') + + smpl_female = SMPL(cfg.FEMALE_SMPL_FILE) + smpl_male = SMPL(cfg.MALE_SMPL_FILE) + H = 240 + W = 320 + img_empty = np.zeros([H, W, 3]) + + data = np.load(dataset_file, allow_pickle=True) + shape_list = data['shape'] + pose_list = data['pose'] + gender_list = data['gender'] + part24_list = data['part_smpl'] + videoname_list = data['videoname'] + framenum_list = data['framenum'] + dataset_size = len(data['gender']) + iuvnames = [] + + for i in tqdm(range(dataset_size)): + + videoname = videoname_list[i] + framenum = framenum_list[i] + iuv_name = videoname[:-4] + '_{}.png'.format(framenum) + output_path = join(iuv_dir, iuv_name) + if not exists(os.path.dirname(output_path)): + os.makedirs(os.path.dirname(output_path)) + iuvnames.append(iuv_name) + + if not exists(output_path): + shape = shape_list[i] + pose = pose_list[i] + gender = gender_list[i] + part24 = part24_list[i, :, :-1] + + pose_t = Tensor.from_numpy(pose).astype('float32') + shape_t = Tensor.from_numpy(shape).astype('float32') + if gender == 'f': + vertices = smpl_female(pose_t.unsqueeze(0), shape_t.unsqueeze(0)) + joint3d = smpl_female.get_smpl_joints(vertices)[0] + else: + vertices = smpl_male(pose_t.unsqueeze(0), shape_t.unsqueeze(0)) + joint3d = smpl_male.get_smpl_joints(vertices)[0] + + origin_2d = joint3d[:, :2] + target_2d = Tensor(part24).astype('float32') + + target_2d[:, 0] = (2 * target_2d[:, 0] - W) / W + target_2d[:, 1] = (2 * target_2d[:, 1] - H) / W + cam, _ = cal_cam(origin_2d, target_2d) + + uv_tmp = render_IUV(img_empty, vertices[0].detach().cpu().numpy(), cam.detach().cpu().numpy(), renderer) + uv_im = np.zeros(uv_tmp.shape) + uv_im[:, :, 0] = 1 - uv_tmp[:, :, 0] + uv_im[:, :, 1] = uv_tmp[:, :, 1] + mask_im = uv_im.max(axis=-1) > 0 + mask_im = mask_im[:, :, np.newaxis] + + uv_im_int = np.around(uv_im * 255).astype('uint8') + mask_im_int = mask_im.astype('uint8') + + iuv_im_out = np.concatenate((mask_im_int, uv_im_int), axis=-1) + + flag_plt = False + if flag_plt: + import matplotlib.pyplot as plt + from skimage.draw import circle + from models.dense_cnn import warp_feature + from models.uv_generator import Index_UV_Generator + uv_sampler = Index_UV_Generator(128, uv_type=uv_type) + + video_dir = join(root_dir, 'data', 'cmu', 'train') + cap = cv2.VideoCapture(join(video_dir, videoname)) + cap.set(cv2.CAP_PROP_POS_FRAMES, framenum) + _, img = cap.read() + # the img should be flipped first + img = np.fliplr(img)[:, :, ::-1].copy().astype(np.float32) + + joint = part24 + for j2d in joint[:, :2]: + rr, cc = circle(j2d[1], j2d[0], 2, img.shape[0:2]) + img[rr, cc] = [255, 0, 0] + + plt.subplot(2, 2, 1) + plt.imshow(img[:, :, ::-1] / 255) + + plt.subplot(2, 2, 2) + tmp = iuv_im_out + plt.imshow(tmp[:, :, ::-1]) + + plt.subplot(2, 2, 3) + iuv = Tensor(iuv_im_out).astype('float32') + iuv[:, :, 1:] = iuv[:, :, 1:] / 255.0 + uv_map = warp_feature(ops.ExpandDims()(iuv.transpose(2, 0, 1), 0), + ops.ExpandDims()(Tensor(img).transpose(2, 0, 1), 0), 128) + uv_map = uv_map[0, :3].transpose(1, 2, 0).asnumpy() + plt.imshow(uv_map[:, :, ::-1] / 255) + + plt.subplot(2, 2, 4) + texture = uv_sampler.resample(ops.ExpandDims()(Tensor(uv_map), 0))[0] + vert = (vertices[0, :, :2].cpu() + cam[1:]) * cam[0] + vert[:, 0] = (vert[:, 0] * W + W) / 2 + vert[:, 1] = (vert[:, 1] * W + H) / 2 + + vert = vert.long() + back_img = texture.new_zeros(img.shape) + for v_i in range(vert.shape[0]): + back_img[vert[v_i, 1], vert[v_i, 0], :] = back_img[vert[v_i, 1], vert[v_i, 0], :] + texture[v_i, :] + + plt.imshow(uv_sampler.mask.cpu().numpy()) + plt.imshow(back_img.cpu().numpy()[:, :, ::-1] / 255) + + cv2.imwrite(output_path, iuv_im_out) + + save_data = dict(data) + save_data['iuv_names'] = iuvnames + np.savez(dataset_file, **save_data) + return 0 diff --git a/research/cv/DecoMR/datasets/preprocess/surreal.py b/research/cv/DecoMR/datasets/preprocess/surreal.py new file mode 100644 index 0000000000000000000000000000000000000000..b0cd1e72c6abcd1a7d33b7aeec3616758531dd99 --- /dev/null +++ b/research/cv/DecoMR/datasets/preprocess/surreal.py @@ -0,0 +1,252 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import os +from os.path import join +import math +from scipy.io import loadmat +import numpy as np +import cv2 +import transforms3d + + +def rotateBody(RzBody, pelvisRotVec): + angle = np.linalg.norm(pelvisRotVec) + Rpelvis = transforms3d.axangles.axangle2mat(pelvisRotVec / angle, angle) + globRotMat = np.dot(RzBody, Rpelvis) + R90 = transforms3d.euler.euler2mat(np.pi / 2, 0, 0) + globRotAx, globRotAngle = transforms3d.axangles.mat2axangle(np.dot(R90, globRotMat)) + globRotVec = globRotAx * globRotAngle + return globRotVec + +# Extract SURREAL training dataset +def extract_surreal_train(dataset_path, out_path): + shapes_, poses_ = [], [] + scales_, centers_, parts_, S_ = [], [], [], [] + genders_ = [] + videonames_ = [] + framenums_ = [] + + # bbox expansion factor + scaleFactor = 1.2 + height = 240 + width = 320 + + train_path = join(dataset_path, 'cmu', 'train') + dirs1 = os.listdir(train_path) + dirs1.sort() + for dir1 in dirs1: + path_tmp1 = join(train_path, dir1) + dirs2 = os.listdir(path_tmp1) + dirs2.sort() + for dir2 in dirs2: + path_tmp2 = join(path_tmp1, dir2) + info_files = os.listdir(path_tmp2) + info_files.sort() + for info_file in info_files: + if info_file.endswith('_info.mat'): + file_path = join(path_tmp2, info_file) + info = loadmat(file_path) + seq_len = info['gender'].shape[0] + videoname = join(dir1, dir2, info_file.replace('_info.mat', '.mp4')) + print(videoname) + + ind = np.arrange(0, seq_len, 10) + # read GT data + shape = info['shape'][:, ind] + pose = info['pose'][:, ind] + part24 = info['joints2D'][:, :, ind].transpose(1, 0) + joint3d24 = info['joints3D'][:, :, ind].transpose(1, 0) + gender = info['gender'][ind, 0] + zrot = info['zrot'][ind, 0] + + # The video of SURREAL is mirrored, and the 2D joints location are consistent with the video. + # In order to get the image consistent with the SMPL parameters, + # we need to mirror the video and 2D joints. + + part24[:, 0] = width - 1 - part24[:, 0] # Mirror the 2D joints + + bbox = [min(part24[:, 0]), min(part24[:, 1]), + max(part24[:, 0]), max(part24[:, 1])] + center = [(bbox[2] + bbox[0]) / 2, (bbox[3] + bbox[1]) / 2] + scale = scaleFactor * max(bbox[2] - bbox[0], bbox[3] - bbox[1]) / 200 + + # Some frames in SURREAL contains no human, + # so we need to remove the frames where human is outside the image. + if bbox[0] < 0 or bbox[1] < 0 or bbox[2] > width or bbox[3] > height: + continue + + # rotate 3D joint to align with camera + RBody = np.array([[0, 0, 1], + [0, 1, 0], + [-1, 0, 0]]) + joint3d24 = np.dot(RBody, joint3d24.T).T + + # rotate SMPL to align with camera + RzBody = np.array(((math.cos(zrot), -math.sin(zrot), 0), + (math.sin(zrot), math.cos(zrot), 0), + (0, 0, 1))) + pose[0:3] = rotateBody(RzBody, pose[0:3]) + aa = pose[:3] + per_rdg, _ = cv2.Rodrigues(aa) + resrot, _ = cv2.Rodrigues(np.dot(RBody, per_rdg)) + aa_new = (resrot.T)[0] + pose[:3] = aa_new + + # store data + part = np.ones([24, 3]) + part[:, :-1] = part24 + S = np.ones([24, 4]) + S[:, :-1] = joint3d24 + videonames_.append(videoname) + framenums_.append(ind) + genders_.append(gender) + centers_.append(center) + scales_.append(scale) + parts_.append(part) + shapes_.append(shape) + poses_.append(pose) + S_.append(S) + + # # store the data struct + # if not os.path.isdir(out_path): + os.makedirs(out_path) + out_file = os.path.join(out_path, 'surreal_train.npz') + np.savez(out_file, + gender=genders_, + videoname=videonames_, + framenum=framenums_, + center=centers_, + scale=scales_, + pose=poses_, + shape=shapes_, + part_smpl=parts_, + S_smpl=S_) + +# Extract the val dataset of SURREAL. +def extract_surreal_eval(dataset_path, out_path): + + eval_names = [] + with open(join(dataset_path, 'namescmu.txt'), 'r') as f: + for line in f: + tmp = line.split('val/')[1] + tmp = tmp.split('\t')[0] + eval_names.append(tmp) + + # Some val images contain no human body, so we only use the + # meaningful val images as BodyNet. + with open(join(dataset_path, 'valid_list.txt'), 'r') as f: + valid_list = f.readline() + valid_list = valid_list.split('[')[1].split(']')[0] + + valid_list = valid_list[1:-1].split(',') + valid_list = [int(ind) - 1 for ind in valid_list] + valid_eval_names = [eval_names[tmp] for tmp in valid_list] + + shapes_, poses_ = [], [] + scales_, centers_, parts_, S_ = [], [], [], [] + genders_ = [] + videonames_ = [] + framenums_ = [] + + # bbox expansion factor + scaleFactor = 1.2 + width = 320 + + val_path = join(dataset_path, 'cmu', 'val') + for videoname in valid_eval_names: + info_file = videoname[:-4] + '_info.mat' + file_path = join(val_path, info_file) + info = loadmat(file_path) + seq_len = info['gender'].shape[0] + print(videoname) + + if seq_len < 2: # ignore the video with only 1 frame. + continue + + ind = seq_len // 2 # choose the middle frame + + # read GT data + shape = info['shape'][:, ind] + pose = info['pose'][:, ind] + part24 = info['joints2D'][:, :, ind].transpose(1, 0) + joint3d24 = info['joints3D'][:, :, ind].transpose(1, 0) + gender = info['gender'][ind] + gender = 'f' if gender == 0 else 'm' # 0: female; 1: male + zrot = info['zrot'][ind, 0] + + # The video of SURREAL is mirrored, and the 2D joints location are consistent with the video. + # In order to get the image consistent with the SMPL parameters, + # we need to mirror the video and 2D joints. + + part24[:, 0] = width - 1 - part24[:, 0] # Mirror the 2D joints + + bbox = [min(part24[:, 0]), min(part24[:, 1]), + max(part24[:, 0]), max(part24[:, 1])] + center = [(bbox[2] + bbox[0]) / 2, (bbox[3] + bbox[1]) / 2] + scale = scaleFactor * max(bbox[2] - bbox[0], bbox[3] - bbox[1]) / 200 + + # Some frames in SURREAL contains no human, + # so we need to remove the frames where human is outside the image. + if bbox[0] < 0 or bbox[1] < 0 or bbox[2] > width or bbox[3] > height: + continue + + # rotate 3D joint to align with camera + RBody = np.array([[0, 0, 1], + [0, 1, 0], + [-1, 0, 0]]) + joint3d24 = np.dot(RBody, joint3d24.T).T + + # rotate SMPL to align with camera + RzBody = np.array(((math.cos(zrot), -math.sin(zrot), 0), + (math.sin(zrot), math.cos(zrot), 0), + (0, 0, 1))) + pose[0:3] = rotateBody(RzBody, pose[0:3]) + aa = pose[:3] + per_rdg, _ = cv2.Rodrigues(aa) + resrot, _ = cv2.Rodrigues(np.dot(RBody, per_rdg)) + aa_new = (resrot.T)[0] + pose[:3] = aa_new + + # store data + part = np.ones([24, 3]) + part[:, :-1] = part24 + S = np.ones([24, 4]) + S[:, :-1] = joint3d24 + videonames_.append(videoname) + framenums_.append(ind) + genders_.append(gender) + centers_.append(center) + scales_.append(scale) + parts_.append(part) + shapes_.append(shape) + poses_.append(pose) + S_.append(S) + + # store the data struct + if not os.path.isdir(out_path): + os.makedirs(out_path) + out_file = os.path.join(out_path, 'surreal_val.npz') + + np.savez(out_file, + gender=genders_, + videoname=videonames_, + framenum=framenums_, + center=centers_, + scale=scales_, + part_smpl=parts_, + pose=poses_, + shape=shapes_, + S_smpl=S_) diff --git a/research/cv/DecoMR/datasets/preprocess/up_3d.py b/research/cv/DecoMR/datasets/preprocess/up_3d.py new file mode 100644 index 0000000000000000000000000000000000000000..bfaaa782b400374745a9ee276e9fa65172aeaec7 --- /dev/null +++ b/research/cv/DecoMR/datasets/preprocess/up_3d.py @@ -0,0 +1,97 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import os +import pickle +import numpy as np +import scipy.misc +from tqdm import tqdm + + +def up_3d_extract(dataset_path, out_path, mode): + # bbox expansion factor + scaleFactor = 1.2 + + # structs we need + imgnames_, scales_, centers_, parts_ = [], [], [], [] + poses_, shapes_ = [], [] + + # training/test splits + if mode == 'trainval': + txt_file = os.path.join(dataset_path, 'trainval.txt') + elif mode == 'lsp_test': + txt_file = 'data/namesUPlsp.txt' + elif mode == 'train': + txt_file = os.path.join(dataset_path, 'train.txt') + elif mode == 'test': + txt_file = os.path.join(dataset_path, 'test.txt') + elif mode == 'val': + txt_file = os.path.join(dataset_path, 'val.txt') + + + file = open(txt_file, 'r') + txt_content = file.read() + imgs = txt_content.split('\n') + for img_i in tqdm(imgs): + # skip empty row in txt + if not img_i: + continue + + # image name + img_base = img_i[1:-10] + img_name = '%s_image.png' % img_base + + keypoints_file = os.path.join(dataset_path, '%s_joints.npy'%img_base) + keypoints = np.load(keypoints_file) + vis = keypoints[2] + keypoints = keypoints[:2].T + + part = np.zeros([24, 3]) + part[:14] = np.hstack([keypoints, np.vstack(vis)]) + + render_name = os.path.join(dataset_path, '%s_render_light.png' % img_base) + I = scipy.misc.imread(render_name) + ys, xs = np.where(np.min(I, axis=2) < 255) + bbox = np.array([np.min(xs), np.min(ys), np.max(xs) + 1, np.max(ys) + 1]) + center = [(bbox[2] + bbox[0]) / 2, (bbox[3] + bbox[1]) / 2] + scale = scaleFactor * max(bbox[2] - bbox[0], bbox[3] - bbox[1]) / 200. + + # pose and shape + pkl_file = os.path.join(dataset_path, '%s_body.pkl' % img_base) + pkl = pickle.load(open(pkl_file, 'rb'), encoding='iso-8859-1') + pose = pkl['pose'] + shape = pkl['betas'] + rt = pkl['rt'] + if max(rt) > 0: + print(rt) + + # store data + imgnames_.append(img_name) + centers_.append(center) + scales_.append(scale) + parts_.append(part) + poses_.append(pose) + shapes_.append(shape) + + # store the data struct + if not os.path.isdir(out_path): + os.makedirs(out_path) + out_file = os.path.join(out_path, 'up_3d_%s.npz' % mode) + np.savez(out_file, imgname=imgnames_, + center=centers_, + scale=scales_, + part=parts_, + pose=poses_, + shape=shapes_) diff --git a/research/cv/DecoMR/datasets/surreal_dataset.py b/research/cv/DecoMR/datasets/surreal_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..1175c113604003974f7a6ec975185f387b17b092 --- /dev/null +++ b/research/cv/DecoMR/datasets/surreal_dataset.py @@ -0,0 +1,309 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import os +from os.path import join +import numpy as np +from utils import config as cfg +from utils.imutils import crop, flip_img, flip_pose, flip_kp, transform, rot_aa +import mindspore.dataset.vision.c_transforms as c_vision +import cv2 + + +def flip_smpl_kp(kp): + """Flip SMPL 24 keypoints.""" + flipped_parts = [0, 2, 1, 3, 5, 4, 6, 8, 7, 9, 11, 10, 12, 14, 13, 15, 17, 16, 19, 18, 21, 20, 23, 22] + kp = kp[flipped_parts] + kp[:, 0] = - kp[:, 0] + return kp + +class SurrealDataset: + """ + Base Dataset Class - Handles data loading and augmentation. + Able to handle heterogeneous datasets (different annotations available for different datasets). + You need to update the path to each dataset in utils/config.py. + """ + def __init__(self, options, use_augmentation=True, is_train=True, use_IUV=False): + super(SurrealDataset, self).__init__() + all_dataset = 'surreal' + + self.is_train = is_train + self.options = options + root_dir = cfg.DATASET_FOLDERS[all_dataset] + if self.is_train: + self.video_dir = join(root_dir, 'cmu/train') + else: + self.video_dir = join(root_dir, 'cmu/val') + + self.normalize_img = c_vision.Normalize(mean=cfg.IMG_NORM_MEAN, std=cfg.IMG_NORM_STD) + self.data = np.load(cfg.DATASET_FILES[is_train][all_dataset]) + + self.videoname = self.data['videoname'] + self.framenum = self.data['framenum'] + self.scale = self.data['scale'] + self.center = self.data['center'] + self.use_augmentation = use_augmentation + + # Get gender data, if available + try: + self.gender = self.data['gender'] + #self.gender = np.array([0 if str(g) == 'm' else 1 for g in gender]).astype(np.int32) + except KeyError: + self.gender = -1 * np.ones(len(self.imgname)).astype(np.int32) + + # Get gt SMPL parameters, if available + try: + self.pose = self.data['pose'].astype(float) + self.betas = self.data['shape'].astype(float) + self.has_smpl = np.ones(len(self.imgname)).astype(int) + except KeyError: + self.has_smpl = np.zeros(len(self.imgname)).astype(int) + + # Get gt 3D pose, if available + try: + self.pose_3d_smpl = self.data['S_smpl'] + self.has_pose_3d_smpl = 1 + except KeyError: + self.has_pose_3d_smpl = 0 + + # Get 2D keypoints + try: + self.keypoints_smpl = self.data['part_smpl'] + except KeyError: + self.keypoints_smpl = np.zeros((len(self.scale), 24, 3)) + + self.length = self.scale.shape[0] + + self.use_IUV = use_IUV + self.has_dp = np.zeros(len(self.scale)) + + if self.use_IUV and is_train: + self.iuvname = self.data['iuv_names'] + self.has_dp = self.has_smpl + self.uv_type = options.uv_type + self.iuv_dir = join(root_dir, '{}_IUV_gt'.format(self.uv_type), 'cmu/train') + + + def augm_params(self): + """Get augmentation parameters.""" + flip = 0 # flipping + pn = np.ones(3) # per channel pixel-noise + rot = 0 # rotation + sc = 1 # scaling + if self.is_train: + if self.options.use_augmentation: + # We flip with probability 1/2 + if np.random.uniform() <= 0.5: + flip = 1 + + # Each channel is multiplied with a number + # in the area [1-opt.noiseFactor,1+opt.noiseFactor] + pn = np.random.uniform(1-self.options.noise_factor, 1+self.options.noise_factor, 3) + + # The rotation is a number in the area [-2*rotFactor, 2*rotFactor] + rot = min(2*self.options.rot_factor, + max(-2*self.options.rot_factor, np.random.randn()*self.options.rot_factor)) + + # The scale is multiplied with a number + # in the area [1-scaleFactor,1+scaleFactor] + sc = min(1+self.options.scale_factor, + max(1-self.options.scale_factor, np.random.randn()*self.options.scale_factor+1)) + # but it is zero with probability 3/5 + if np.random.uniform() <= 0.6: + rot = 0 + + return flip, pn, rot, sc + + def rgb_processing(self, rgb_img, center, scale, rot, flip, pn): + """Process rgb image and do augmentation.""" + rgb_img = crop(rgb_img, center, scale, + [self.options.img_res, self.options.img_res], rot=rot) + + if flip: + rgb_img = flip_img(rgb_img) + + # in the rgb image we add pixel noise in a channel-wise manner + rgb_img[:, :, 0] = np.minimum(255.0, np.maximum(0.0, rgb_img[:, :, 0] * pn[0])) + rgb_img[:, :, 1] = np.minimum(255.0, np.maximum(0.0, rgb_img[:, :, 1] * pn[1])) + rgb_img[:, :, 2] = np.minimum(255.0, np.maximum(0.0, rgb_img[:, :, 2] * pn[2])) + + rgb_img = rgb_img / 255.0 + return rgb_img + + def smpl_j2d_processing(self, kp, center, scale, r, f): + """Process gt 2D keypoints and apply all augmentation transforms.""" + nparts = kp.shape[0] + for i in range(nparts): + kp[i, 0:2] = transform(kp[i, 0:2] + 1, center, scale, + [self.options.img_res, self.options.img_res], rot=r) + # convert to normalized coordinates + kp[:, :-1] = 2. * kp[:, :-1] / self.options.img_res - 1. + # flip the x coordinates + if f: + kp = flip_kp(kp) + kp = kp.astype('float32') + return kp + + def smpl_j3d_processing(self, S, r, f): + """Process gt 3D keypoints and apply all augmentation transforms.""" + # in-plane rotation + rot_mat = np.eye(3) + if not r == 0: + rot_rad = -r * np.pi / 180 + sn, cs = np.sin(rot_rad), np.cos(rot_rad) + rot_mat[0, :2] = [cs, -sn] + rot_mat[1, :2] = [sn, cs] + S = np.einsum('ij,kj->ki', rot_mat, S) + # flip the x coordinates + if f: + S = flip_smpl_kp(S) + S = S.astype('float32') + return S + + def pose_processing(self, pose, r, f): + """Process SMPL theta parameters and apply all augmentation transforms.""" + # rotation or the pose parameters + pose[:3] = rot_aa(pose[:3], r) + # flip the pose parameters + if f: + pose = flip_pose(pose) + # (72),float + pose = pose.astype('float32') + return pose + + def iuv_processing(self, IUV, center, scale, rot, flip, pn): + """Process rgb image and do augmentation.""" + IUV = crop(IUV, center, scale, + [self.options.img_res, self.options.img_res], rot=rot) + + if flip: + IUV = flip_img(IUV) + if self.uv_type == 'BF': + mask = (IUV[0] > 0).astype('float32') + IUV[1] = (255 - IUV[1]) * mask + else: + print('Flip augomentation for SMPL default UV map is not supported yet.') + IUV = np.transpose(IUV.astype('float32'), (2, 0, 1)) + return IUV + + def __getitem__(self, index): + item = {} + scale = self.scale[index].copy() + center = self.center[index].copy() + + # Get augmentation parameters + flip, pn, rot, sc = self.augm_params() + + # Extract the frame from the video + videoname = self.videoname[index] + frame = self.framenum[index] + cap = cv2.VideoCapture(join(self.video_dir, videoname)) + cap.set(cv2.CAP_PROP_POS_FRAMES, frame) + _, img = cap.read() + # The image should be mirrored first + img = np.fliplr(img)[:, :, ::-1].copy().astype(np.float32) + orig_shape = np.array(img.shape)[:2] + item['scale'] = (sc * scale).astype('float32') + item['center'] = center.astype('float32') + item['orig_shape'] = orig_shape.astype('int32') + + # Process image + img = self.rgb_processing(img, center, sc * scale, rot, flip, pn).astype('float32') + # Store image before normalization to use it in visualization + item['img_orig'] = img.copy() + item['img'] = np.transpose(self.normalize_img(img).astype('float32'), (2, 0, 1)) + item['imgname'] = videoname + '_frame_{}'.format('frame') + + # Get SMPL parameters, if available + has_smpl = self.has_smpl[index] + item['has_smpl'] = has_smpl + if has_smpl: + pose = self.pose[index].copy() + betas = self.betas[index].copy() + else: + pose = np.zeros(72) + betas = np.zeros(10) + item['pose'] = self.pose_processing(pose, rot, flip) + item['betas'] = betas.astype('float32') + + # Surreal dataset does NOT provide the ground truth of keypoints. + # The keypoints of SURREAL is the 24 joints defined by SMPL model. + item['keypoints'] = np.zeros((24, 3)).astype('float32') + item['pose_3d'] = np.zeros((24, 4)).astype('float32') + item['has_pose_3d'] = 0 + + # Get 3D and 2D GT SMPL joints (For the compatibility with SURREAL dataset) + item['has_pose_3d_smpl'] = self.has_pose_3d_smpl + if self.has_pose_3d_smpl: + S = self.pose_3d_smpl[index].copy() + St = self.smpl_j3d_processing(S.copy()[:, :-1], rot, flip) + S[:, :-1] = St + item['pose_3d_smpl'] = S.astype('float32') + else: + item['pose_3d_smpl'] = np.zeros((24, 4)).astype('float32') + + # Get 2D keypoints and apply augmentation transforms + keypoints = self.keypoints_smpl[index].copy() + item['keypoints_smpl'] = self.j2d_processing(keypoints, center, sc * scale, rot, flip) + + # Pass path to segmentation mask, if available + # Cannot load the mask because each mask has different size, so they cannot be stacked in one tensor + try: + item['maskname'] = self.maskname[index] + except AttributeError: + item['maskname'] = '' + try: + item['partname'] = self.partname[index] + except AttributeError: + item['partname'] = '' + item['gender'] = self.gender[index] + + if self.use_IUV: + IUV = np.zeros((3, img.shape[1], img.shape[2])).astype('float32') + iuvname = '' + has_dp = self.has_dp[index] + try: + fit_error = self.fit_joint_error[index] + except AttributeError: + fit_error = 0.0 # For the dataset with GT mesh, fit_error is set 0 + + if has_dp: + iuvname = join(self.iuv_dir, str(self.iuvname[index])) + if os.path.exists(iuvname): + IUV = cv2.imread(iuvname).copy() + IUV = self.iuv_processing(IUV, center, sc * scale, rot, flip, pn) # process UV map + else: + has_dp = 0 + print("GT IUV image: {} does not exist".format(iuvname)) + + item['gt_iuv'] = IUV + item['iuvname'] = iuvname + item['has_dp'] = has_dp + item['fit_joint_error'] = fit_error + + if self.use_IUV: + return item['scale'], item['center'], item['orig_shape'], item['img_orig'], item['img'],\ + item['imgname'], item['has_smpl'], item['pose'], item['betas'], item['has_pose_3d'],\ + item['pose_3d'], item['keypoints'], item['keypoints_smpl'], item['pose_3d_smpl'], \ + item['has_pose_3d_smpl'], item['maskname'], item['partname'], item['gender'], item['gt_iuv'],\ + item['iuvname'], item['has_dp'], item['fit_joint_error'] + + return item['scale'], item['center'], item['orig_shape'], item['img_orig'], item['img'], \ + item['imgname'], item['has_smpl'], item['pose'], item['betas'], item['has_pose_3d'], \ + item['pose_3d'], item['keypoints'], item['keypoints_smpl'], item['pose_3d_smpl'], \ + item['has_pose_3d_smpl'], item['maskname'], item['partname'], item['gender'] + + def __len__(self): + return len(self.imgname) diff --git a/research/cv/DecoMR/eval.py b/research/cv/DecoMR/eval.py new file mode 100644 index 0000000000000000000000000000000000000000..d28dc19dce72c6e5fdafd01ed90c823f0e4c1baa --- /dev/null +++ b/research/cv/DecoMR/eval.py @@ -0,0 +1,133 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import os +import time +import numpy as np +import mindspore.numpy as msnp +from mindspore import Tensor +from mindspore import context, ops, load_checkpoint +from datasets.base_dataset import BaseDataset +from datasets.surreal_dataset import SurrealDataset +from datasets.base_dataset import create_dataset +from models import SMPL +from models import dense_cnn, DMR +from models.uv_generator import Index_UV_Generator +from utils import config as cfg +from utils import TrainOptions + + +context.set_context(mode=context.PYNATIVE_MODE, device_target='GPU', device_id=0) + +# Define command-line arguments +options = TrainOptions().parse_args() +options.num_workers = 4 + +def run_evaluation(): + # Create SMPL model + dataset_name = options.dataset + log_freq = options.log_freq + smpl = SMPL() + if dataset_name == 'surreal': + smpl_male = SMPL(cfg.MALE_SMPL_FILE) + smpl_female = SMPL(cfg.FEMALE_SMPL_FILE) + + if dataset_name in ['up-3d', 'surreal']: + eval_shape = True + + CNet = dense_cnn.DPNet(warp_lv=options.warp_level, norm_type=options.norm_type) + LNet = dense_cnn.get_LNet(options) + DMR_model = DMR.DMR(CNet, LNet) + DMR_model.set_train(False) + options.group_size = options.group_size * 3 + batch_size = options.batch_size + + # Create dataloader for the dataset + if dataset_name == 'surreal': + all_dataset = create_dataset('surreal', options, is_train=False, use_IUV=False) + data = SurrealDataset(options, is_train=False, use_IUV=False) + else: + all_dataset = create_dataset('up-3d', options, is_train=False, use_IUV=False) + data = BaseDataset(options, 'up-3d', is_train=False, use_IUV=False) + + shape_err = np.zeros(len(data)) + dataset = all_dataset.batch(options.batch_size) + + print('data loader finish') + + iter1 = dataset.create_dict_iterator() + start_time = time.time() + shape_err_list = msnp.zeros((30)) + for i in range(30): + ckpt = os.path.join(options.eval_dir, 'dmr_{}.ckpt'.format(i+1)) + load_checkpoint(ckpt, net=DMR_model) + for step, batch in enumerate(iter1): + # Get ground truth annotations from the batch + gt_pose = batch['pose'] + gt_betas = batch['betas'] + gt_vertices = smpl(gt_pose, gt_betas) + images = batch['img'] + curr_batch_size = images.shape[0] + uv_res = options.uv_res + uv_type = options.uv_type + sampler = Index_UV_Generator(UV_height=uv_res, UV_width=-1, uv_type=uv_type) + _, pred_uv_map, _ = DMR_model(images) + pred_vertices = sampler.resample(pred_uv_map.astype("float32")).astype("float32") + + # Shape evaluation (Mean per-vertex error) + if eval_shape: + if dataset_name == 'surreal': + gender = batch['gender'] + gt_vertices = smpl_male(gt_pose, gt_betas) + gt_vertices_female = smpl_female(gt_pose, gt_betas) + temp = gt_vertices.asnumpy() + temp[gender == 1, :, :] = gt_vertices_female.asnumpy()[gender == 1, :, :] + gt_vertices = Tensor.from_numpy(temp) + + gt_pelvis_mesh = smpl.get_eval_joints(gt_vertices) + pred_pelvis_mesh = smpl.get_eval_joints(pred_vertices) + gt_pelvis_mesh = (gt_pelvis_mesh[:, [2]] + gt_pelvis_mesh[:, [3]]) / 2 + pred_pelvis_mesh = (pred_pelvis_mesh[:, [2]] + pred_pelvis_mesh[:, [3]]) / 2 + + opsum = ops.ReduceSum(keep_dims=True) + opmean = ops.ReduceMean(keep_dims=True) + sqrt = ops.Sqrt() + + se = sqrt(((pred_vertices - pred_pelvis_mesh - gt_vertices + gt_pelvis_mesh) ** 2)) + se = opsum(se, -1).squeeze() + se = opmean(se, -1).squeeze() + + shape_err[step * batch_size:step * batch_size + curr_batch_size] = se + + # Print intermediate results during evaluation + if step % log_freq == log_freq - 1: + if eval_shape: + print('Shape Error: ' + str(1000 * shape_err[:step * batch_size].mean())) + print() + + shape_err_list[i] = shape_err.mean() + + shape_err = min(shape_err_list) + # Print final results during evaluation + print('*** Final Results ***') + print() + if eval_shape: + print('Shape Error: ' + str(1000 * shape_err)) + print('Total_time: ', time.time() - start_time, 's') + print('Performance: ', (time.time() - start_time) * 1000 / (30 * dataset.get_dataset_size()), 'ms/step') + print() + +if __name__ == '__main__': + run_evaluation() diff --git a/research/cv/DecoMR/models/DMR.py b/research/cv/DecoMR/models/DMR.py new file mode 100644 index 0000000000000000000000000000000000000000..813a8e3cd66980cc2b5bbbe73f372b6b7e2c0481 --- /dev/null +++ b/research/cv/DecoMR/models/DMR.py @@ -0,0 +1,32 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +""" +The model of DMR +""" + +from mindspore import nn + +class DMR(nn.Cell): + def __init__(self, CNet, LNet): + super(DMR, self).__init__() + self.CNet = CNet + self.LNet = LNet + + def construct(self, images): + pred_dp, dp_feature, codes = self.CNet(images) + pred_uv_map, pred_camera = self.LNet(pred_dp, dp_feature, codes) + + return pred_dp, pred_uv_map, pred_camera diff --git a/research/cv/DecoMR/models/TrainOneStepDP.py b/research/cv/DecoMR/models/TrainOneStepDP.py new file mode 100644 index 0000000000000000000000000000000000000000..46c84f6370bc6d61b99ad93d5076890965b655a4 --- /dev/null +++ b/research/cv/DecoMR/models/TrainOneStepDP.py @@ -0,0 +1,62 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +from mindspore import ops +from mindspore import nn +from mindspore.parallel._utils import (_get_device_num, _get_gradients_mean, _get_parallel_mode) +from mindspore.context import ParallelMode +from mindspore.ops import composite as C +from mindspore.ops import functional as F +from mindspore.nn.wrap.grad_reducer import DistributedGradReducer + +class TrainOneStepDP(nn.Cell): + def __init__(self, network, optimizer, sens=1.0): + super(TrainOneStepDP, self).__init__(auto_prefix=False) + self.network = network + self.network.set_grad() + self.optimizer = optimizer + self.weights = self.optimizer.parameters + self.grad = C.GradOperation(get_by_list=True, sens_param=True) + self.sens = sens + self.reducer_flag = False + self.grad_reducer = F.identity + self.parallel_mode = _get_parallel_mode() + self.reducer_flag = self.parallel_mode in (ParallelMode.DATA_PARALLEL, ParallelMode.HYBRID_PARALLEL) + if self.reducer_flag: + self.mean = _get_gradients_mean() + self.degree = _get_device_num() + if isinstance(self.optimizer, (nn.AdaSumByGradWrapCell, nn.AdaSumByDeltaWeightWrapCell)): + from mindspore.communication.management import get_group_size, create_group, get_rank + group_number = get_group_size() // 8 + self.degree = int(self.degree / group_number) + group_list = [list(range(x * self.degree, (x + 1) * self.degree)) for x in range(group_number)] + current_index = get_rank() // 8 + server_group_name = "allreduce_" + str(current_index) + create_group(server_group_name, group_list[current_index]) + self.grad_reducer = DistributedGradReducer(self.weights, self.mean, self.degree, + group=server_group_name) + else: + self.grad_reducer = DistributedGradReducer(self.weights, self.mean, self.degree) + + def construct(self, *inputs, **kwargs): + out = self.network(*inputs) + loss, pred_dp, dp_feature, codes = out + sens_tuple = (ops.ones_like(loss) * self.sens,) + for i in range(1, len(out)): + sens_tuple += (ops.zeros_like(out[i]),) + grads = self.grad(self.network, self.weights)(*inputs, sens_tuple) + grads = self.grad_reducer(grads) + loss = F.depend(loss, self.optimizer(grads)) + return loss, pred_dp, dp_feature, codes diff --git a/research/cv/DecoMR/models/TrainOneStepEnd.py b/research/cv/DecoMR/models/TrainOneStepEnd.py new file mode 100644 index 0000000000000000000000000000000000000000..fa0b07c1b557edfa7b62d9b384f6a345e53e8376 --- /dev/null +++ b/research/cv/DecoMR/models/TrainOneStepEnd.py @@ -0,0 +1,62 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +from mindspore import nn +from mindspore import ops +from mindspore.parallel._utils import (_get_device_num, _get_gradients_mean, _get_parallel_mode) +from mindspore.context import ParallelMode +from mindspore.ops import composite as C +from mindspore.ops import functional as F +from mindspore.nn.wrap.grad_reducer import DistributedGradReducer + +class TrainOneStepEnd(nn.Cell): + def __init__(self, network, optimizer, sens=1.0): + super(TrainOneStepEnd, self).__init__(auto_prefix=False) + self.network = network + self.network.set_grad() + self.optimizer = optimizer + self.weights = self.optimizer.parameters + self.grad = C.GradOperation(get_by_list=True, sens_param=True) + self.sens = sens + self.reducer_flag = False + self.grad_reducer = F.identity + self.parallel_mode = _get_parallel_mode() + self.reducer_flag = self.parallel_mode in (ParallelMode.DATA_PARALLEL, ParallelMode.HYBRID_PARALLEL) + if self.reducer_flag: + self.mean = _get_gradients_mean() + self.degree = _get_device_num() + if isinstance(self.optimizer, (nn.AdaSumByGradWrapCell, nn.AdaSumByDeltaWeightWrapCell)): + from mindspore.communication.management import get_group_size, create_group, get_rank + group_number = get_group_size() // 8 + self.degree = int(self.degree / group_number) + group_list = [list(range(x * self.degree, (x + 1) * self.degree)) for x in range(group_number)] + current_index = get_rank() // 8 + server_group_name = "allreduce_" + str(current_index) + create_group(server_group_name, group_list[current_index]) + self.grad_reducer = DistributedGradReducer(self.weights, self.mean, self.degree, + group=server_group_name) + else: + self.grad_reducer = DistributedGradReducer(self.weights, self.mean, self.degree) + + def construct(self, *inputs, **kwargs): + out = self.network(*inputs) + loss_total, CLoss, LLoss = out + sens_tuple = (ops.ones_like(loss_total) * self.sens,) + for i in range(1, len(out)): + sens_tuple += (ops.zeros_like(out[i]),) + grads = self.grad(self.network, self.weights)(*inputs, sens_tuple) + grads = self.grad_reducer(grads) + loss_total = F.depend(loss_total, self.optimizer(grads)) + return loss_total, CLoss, LLoss diff --git a/research/cv/DecoMR/models/WithLossCellDP.py b/research/cv/DecoMR/models/WithLossCellDP.py new file mode 100644 index 0000000000000000000000000000000000000000..894a642f2bd74e65e3b2e07e38204af1d29a2e17 --- /dev/null +++ b/research/cv/DecoMR/models/WithLossCellDP.py @@ -0,0 +1,92 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import mindspore +from mindspore import ops +import mindspore.nn as nn + +class WithLossCellDP(nn.Cell): + def __init__(self, CNet, options, auto_prefix=False): + super(WithLossCellDP, self).__init__(auto_prefix=auto_prefix) + self.CNet = CNet + self.criterion_uv = nn.L1Loss() + self.criterion_mask = nn.BCELoss(reduction='mean') + self.options = options + self.lam_dp_uv = options.lam_dp_uv + self.lam_dp_mask = options.lam_dp_mask + self.adaptive_weight = options.adaptive_weight + self.expand_dims = ops.ExpandDims() + + def error_adaptive_weight(self, fit_joint_error): + weight = (1 - 10 * fit_joint_error) + weight[weight <= 0] = 0 + return weight + + def dp_loss(self, pred_dp, gt_dp, has_dp, weight=None): + + pred_dp_shape = pred_dp * has_dp.astype(pred_dp.dtype).mean() + gt_dp_shape = gt_dp * has_dp.astype(pred_dp.dtype).mean() + + if gt_dp_shape.shape[0] > 0: + gt_dp_shape_temp = self.expand_dims(gt_dp_shape[:, 0], 1) + gt_mask_shape = gt_dp_shape_temp > 0 + gt_mask_shape = gt_mask_shape.astype(pred_dp.dtype) + + gt_uv_shape = gt_dp_shape[:, 1:] + + pred_mask_shape = self.expand_dims(pred_dp_shape[:, 0], 1) + pred_uv_shape = pred_dp_shape[:, 1:] + + interpolate_bilinear = ops.ResizeBilinear((gt_dp.shape[2], gt_dp.shape[3])) + interpolate_nearest = ops.ResizeNearestNeighbor((gt_dp.shape[2], gt_dp.shape[3])) + pred_mask_shape = interpolate_bilinear(pred_mask_shape) + pred_uv_shape = interpolate_nearest(pred_uv_shape) + + if weight is not None: + weight = weight[:, None, None, None] * has_dp.astype(weight.dtype).mean() + else: + weight = 1.0 + + pred_mask_shape = ops.clip_by_value(pred_mask_shape, 0.0, 1.0) + + loss_mask = self.criterion_mask(pred_mask_shape, gt_mask_shape) + gt_uv_weight = (gt_uv_shape.abs().max(axis=1, keepdims=True) > 0).astype(pred_dp.dtype) + weight_ratio = gt_uv_weight.mean(axis=-1).mean(axis=-1)[:, :, None, None] + 1e-8 + gt_uv_weight = gt_uv_weight / weight_ratio + + loss_uv = self.criterion_uv(gt_uv_weight * pred_uv_shape, gt_uv_weight * gt_uv_shape) + loss_uv = (loss_uv * weight).mean() + + return loss_mask, loss_uv + return pred_dp.sum() * 0, pred_dp.sum() * 0 + + def construct(self, *inputs, **kwargs): + dtype = mindspore.float32 + has_dp, images, gt_dp_iuv, fit_joint_error = inputs + pred_dp, dp_feature, codes = self.CNet(images) + + if self.adaptive_weight: + ada_weight = self.error_adaptive_weight(fit_joint_error).astype(dtype) + else: + ada_weight = None + + #loss on dense pose result + loss_dp_mask, loss_dp_uv = self.dp_loss(pred_dp, gt_dp_iuv, has_dp, ada_weight) + loss_dp_mask = loss_dp_mask * self.lam_dp_mask + loss_dp_uv = loss_dp_uv * self.lam_dp_uv + + loss_total = loss_dp_mask + loss_dp_uv + + return loss_total, pred_dp, dp_feature, codes diff --git a/research/cv/DecoMR/models/WithLossCellEnd.py b/research/cv/DecoMR/models/WithLossCellEnd.py new file mode 100644 index 0000000000000000000000000000000000000000..3f157a9e938472e4d5115cb72dca123d1d237116 --- /dev/null +++ b/research/cv/DecoMR/models/WithLossCellEnd.py @@ -0,0 +1,338 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import mindspore +from mindspore import Tensor +import mindspore.nn as nn +from mindspore import ops +from mindspore.ops import constexpr +import mindspore.numpy as np +from models.grid_sample import grid_sample +from models.geometric_layers import orthographic_projection +from models import SMPL +from models.uv_generator import Index_UV_Generator + + +@constexpr +def generate_Tensor(temp): + return Tensor(temp, dtype=mindspore.float32) + +def generate_Tensor_Int(temp): + return Tensor(temp, dtype=mindspore.int32) + +class WithLossCellEnd(nn.Cell): + def __init__(self, DMR, options, uv_weight, tv_factor, auto_prefix=False): + super(WithLossCellEnd, self).__init__(auto_prefix=False) + + self.DMR = DMR + self.sampler = Index_UV_Generator(UV_height=options.uv_res, UV_width=-1, uv_type=options.uv_type) + self.uv_weight = uv_weight + self.tv_factor = tv_factor + + self.options = options + self.adaptive_weight = options.adaptive_weight + self.lam_dp_uv = options.lam_dp_uv + self.lam_dp_mask = options.lam_dp_mask + self.lam_uv = options.lam_uv + self.lam_tv = options.lam_tv + self.lam_mesh = options.lam_mesh + self.lam_key3d = options.lam_key3d + self.lam_key2d = options.lam_key2d + self.lam_key3d_smpl = options.lam_key3d_smpl + self.lam_key2d_smpl = options.lam_key2d_smpl + self.lam_con = options.lam_con + self.gtkey3d_from_mesh = options.gtkey3d_from_mesh + self.use_smpl_joints = options.use_smpl_joints + + self.criterion_mask = nn.BCELoss(reduction='mean') + self.criterion_shape = nn.L1Loss() + self.criterion_uv = nn.L1Loss() + self.criterion_keypoints = nn.MSELoss(reduction='none') + self.criterion_keypoints_3d = nn.L1Loss(reduction='none') + self.criterion_regr = nn.MSELoss() + self.expand_dims = ops.ExpandDims() + self.abs = ops.Abs() + self.sum = ops.ReduceSum() + self.ones = ops.Ones() + self.cat2 = ops.Concat(2) + self.meshgrid = ops.Meshgrid(indexing="ij") + self.stack = ops.Stack(axis=0) + self.grid_sample = grid_sample() + self.smpl = SMPL() + self.fill = ops.Fill() + + def error_adaptive_weight(self, fit_joint_error): + weight = (1 - 10 * fit_joint_error) + weight[weight <= 0] = 0 + return weight + + def dp_loss(self, pred_dp, gt_dp, has_dp, weight=None): + + pred_dp_shape = pred_dp * has_dp.astype(pred_dp.dtype).mean() + gt_dp_shape = gt_dp * has_dp.astype(pred_dp.dtype).mean() + + if gt_dp_shape.shape[0] > 0: + gt_dp_shape_temp = self.expand_dims(gt_dp_shape[:, 0], 1) + gt_mask_shape = gt_dp_shape_temp > 0 + gt_mask_shape = gt_mask_shape.astype(pred_dp.dtype) + + gt_uv_shape = gt_dp_shape[:, 1:] + + pred_mask_shape = self.expand_dims(pred_dp_shape[:, 0], 1) + pred_uv_shape = pred_dp_shape[:, 1:] + + interpolate_bilinear = ops.ResizeBilinear((gt_dp.shape[2], gt_dp.shape[3])) + interpolate_nearest = ops.ResizeNearestNeighbor((gt_dp.shape[2], gt_dp.shape[3])) + pred_mask_shape = interpolate_bilinear(pred_mask_shape) + pred_uv_shape = interpolate_nearest(pred_uv_shape) + + if weight is not None: + weight = weight[:, None, None, None] * has_dp.astype(weight.dtype).mean() + else: + weight = 1.0 + + pred_mask_shape = ops.clip_by_value(pred_mask_shape, 0.0, 1.0) + + loss_mask = self.criterion_mask(pred_mask_shape, gt_mask_shape) + gt_uv_weight = (gt_uv_shape.abs().max(axis=1, keepdims=True) > 0).astype(pred_dp.dtype) + weight_ratio = gt_uv_weight.mean(axis=-1).mean(axis=-1)[:, :, None, None] + 1e-8 + gt_uv_weight = gt_uv_weight / weight_ratio + + loss_uv = self.criterion_uv(gt_uv_weight * pred_uv_shape, gt_uv_weight * gt_uv_shape) + loss_uv = (loss_uv * weight).mean() + + return loss_mask, loss_uv + + return pred_dp.sum() * 0, pred_dp.sum() * 0 + + def uv_loss(self, pred_uv_map, gt_uv_map, has_smpl, weight=None): + + uv_weight = self.uv_weight.astype(pred_uv_map.dtype) + pred_uv_map_shape = pred_uv_map * has_smpl.astype(pred_uv_map.dtype).mean() + gt_uv_map_with_shape = gt_uv_map * has_smpl.astype(pred_uv_map.dtype).mean() + if gt_uv_map_with_shape.shape[0] > 0: + if weight is not None: + ada_weight = weight[:, None, None, None] * has_smpl.astype(weight.dtype).mean() + else: + ada_weight = 1.0 + loss = self.criterion_uv(pred_uv_map_shape * uv_weight, gt_uv_map_with_shape * uv_weight) + loss = (loss * ada_weight).mean() + return loss + + return self.fill(mindspore.float32, (1,), 0) + + def tv_loss(self, uv_map): + tv = self.abs(uv_map[:, 0:-1, 0:-1, :] - uv_map[:, 0:-1, 1:, :]) \ + + self.abs(uv_map[:, 0:-1, 0:-1, :] - uv_map[:, 1:, 0:-1, :]) + return self.sum(tv) / self.tv_factor + + def shape_loss(self, pred_vertices, gt_vertices, has_smpl, weight=None): + + pred_vertices_with_shape = pred_vertices * has_smpl.astype(pred_vertices.type).mean() + gt_vertices_with_shape = gt_vertices * has_smpl.astype(pred_vertices.type).mean() + + if weight is not None: + weight = weight[:, None, None] * has_smpl.astype(weight.dtype).mean() + else: + weight = 1 + + if gt_vertices_with_shape.shape[0] > 0: + loss = self.criterion_shape(pred_vertices_with_shape, gt_vertices_with_shape) + loss = (loss * weight).mean() + return loss + + return self.fill(mindspore.float32, (1,), 0) + + + def keypoint_3d_loss(self, pred_keypoints_3d, gt_keypoints_3d, has_pose_3d, weight=None): + + if gt_keypoints_3d.shape[2] == 3: + tmp = self.ones((gt_keypoints_3d.shape[0], gt_keypoints_3d.shape[1], 1), gt_keypoints_3d.dtype) + gt_keypoints_3d = self.cat2((gt_keypoints_3d, tmp)) + + conf = self.expand_dims(gt_keypoints_3d[:, :, -1], -1).copy() + gt_keypoints_3d = gt_keypoints_3d[:, :, :-1].copy() + gt_keypoints_3d = gt_keypoints_3d * has_pose_3d.astype(gt_keypoints_3d.dtype).mean() + conf = conf * has_pose_3d.astype(conf.dtype).mean() + + if weight is not None: + weight = weight[:, None, None] * has_pose_3d.astype(weight.dtype).mean() + conf = conf * weight + + pred_keypoints_3d = pred_keypoints_3d * has_pose_3d.astype(pred_keypoints_3d.dtype).mean() + if gt_keypoints_3d.shape[0] > 0: + # Align the origin of the first 24 keypoints with the pelvis. + gt_pelvis = (gt_keypoints_3d[:, 2, :] + gt_keypoints_3d[:, 3, :]) / 2 + pred_pelvis = (pred_keypoints_3d[:, 2, :] + pred_keypoints_3d[:, 3, :]) / 2 + gt_keypoints_3d = gt_keypoints_3d - gt_pelvis[:, None, :] + pred_keypoints_3d = pred_keypoints_3d - pred_pelvis[:, None, :] + + return (conf * self.criterion_keypoints_3d(pred_keypoints_3d, gt_keypoints_3d)).mean() + + return self.fill(mindspore.float32, (1,), 0) + + def smpl_keypoint_3d_loss(self, pred_keypoints_3d, gt_keypoints_3d, has_pose_3d, weight=None): + + if gt_keypoints_3d.shape[2] == 3: + tmp = self.ones((gt_keypoints_3d.shape[0], gt_keypoints_3d.shape[1], 1), gt_keypoints_3d.dtype) + gt_keypoints_3d = self.cat2((gt_keypoints_3d, tmp)) + + conf = self.expand_dims(gt_keypoints_3d[:, :, -1], -1).copy() + gt_keypoints_3d = gt_keypoints_3d[:, :, :-1].copy() + gt_keypoints_3d = gt_keypoints_3d * has_pose_3d.astype(gt_keypoints_3d.dtype).mean() + conf = conf * has_pose_3d.astype(conf.dtype).mean() + + if weight is not None: + weight = weight[:, None, None] * has_pose_3d.astype(weight.dtype).mean() + conf = conf * weight + + pred_keypoints_3d = pred_keypoints_3d * has_pose_3d.astype(pred_keypoints_3d.dtype).mean() + if gt_keypoints_3d.shape[0] > 0: + + gt_root_joint = gt_keypoints_3d[:, 0, :] + pred_root_joint = pred_keypoints_3d[:, 0, :] + gt_keypoints_3d = gt_keypoints_3d - gt_root_joint[:, None, :] + pred_keypoints_3d = pred_keypoints_3d - pred_root_joint[:, None, :] + + return (conf * self.criterion_keypoints_3d(pred_keypoints_3d, gt_keypoints_3d)).mean() + + return self.fill(mindspore.float32, (1,), 0) + + def keypoint_loss(self, pred_keypoints_2d, gt_keypoints_2d, weight=None): + + if gt_keypoints_2d.shape[2] == 3: + conf = self.expand_dims(gt_keypoints_2d[:, :, -1], -1).copy() + else: + conf = 1 + + if weight is not None: + weight = weight[:, None, None] + conf = conf * weight + + loss = (conf * self.criterion_keypoints(pred_keypoints_2d, gt_keypoints_2d[:, :, :-1])).mean() + return loss + + def consistent_loss(self, dp, uv_map, camera, weight=None): + + tmp = np.arange(0, dp.shape[-1], 1) / (dp.shape[-1] - 1) + tmp = generate_Tensor_Int(tmp) + tmp = tmp * 2 - 1 + loc_y, loc_x = self.meshgrid((tmp, tmp)) + + loc = ops.BroadcastTo((dp.shape[0], -1, -1, -1))(self.stack((loc_x, loc_y))) + dp_mask = self.expand_dims((dp[:, 0] > 0.5).astype(mindspore.float32), 1) + loc = dp_mask * loc + + dp_tmp = dp_mask * (dp[:, 1:] * 2 - 1) + + uv_map = uv_map[:, :, :, :-1] + camera = camera.view(-1, 1, 1, 3) + uv_map = uv_map + camera[:, :, :, 1:] + uv_map = uv_map * self.expand_dims(camera[:, :, :, 0], -1) + warp_loc = self.grid_sample(uv_map.transpose(0, 3, 1, 2), dp_tmp.transpose(0, 2, 3, 1))[:, :2] + warp_loc = warp_loc * dp_mask + + if weight is not None: + weight = weight[:, None, None, None] + dp_mask = dp_mask * weight + + loss_con = nn.MSELoss()(warp_loc * dp_mask, loc * dp_mask) + return loss_con + + + def construct(self, *inputs, **kwargs): + + imges, has_dp, has_smpl, has_pose_3d, has_pose_3d_smpl, gt_dp_iuv, gt_uv_map, gt_vertices, \ + fit_joint_error, gt_keypoints_2d, gt_keypoints_3d, gt_keypoints_2d_smpl, gt_keypoints_3d_smpl = inputs + + pred_dp, pred_uv_map, pred_camera = self.DMR(imges) + + sampled_vertices = self.sampler.resample(pred_uv_map.astype("float32")).astype("float32") + + if self.adaptive_weight: + ada_weight = self.error_adaptive_weight(fit_joint_error).astype("float32") + else: + ada_weight = None + + loss_dp_mask, loss_dp_uv = self.dp_loss(pred_dp, gt_dp_iuv, has_dp, ada_weight) + loss_dp_mask = loss_dp_mask * self.lam_dp_mask + loss_dp_uv = loss_dp_uv * self.lam_dp_uv + CLoss = loss_dp_mask + loss_dp_uv + + loss_uv = self.uv_loss(gt_uv_map.astype("float32"), pred_uv_map.astype("float32"), + has_smpl, ada_weight).astype("float32") * self.lam_uv + + loss_tv = 0.0 + if self.lam_tv > 0: + loss_tv = self.tv_loss(pred_uv_map) * self.lam_tv + + loss_mesh = 0.0 + #loss on mesh + if self.lam_mesh > 0: + loss_mesh = self.shape_loss(sampled_vertices, gt_vertices, + has_smpl, ada_weight) * self.lam_mesh + + batch_size = pred_dp.shape[0] + weight_key = self.ones((batch_size), mindspore.float32) + if self.gtkey3d_from_mesh: + if ada_weight is not None: + weight_key = ada_weight + has_pose_3d = self.ones((batch_size), mindspore.float32) + gt_keypoints_3d_mesh = self.smpl.get_train_joints(gt_vertices) + gt_keypoints_3d_mesh = ops.Concat(-1)((gt_keypoints_3d_mesh, + self.ones((batch_size, 24, 1), gt_keypoints_3d_mesh.dtype))) + gt_keypoints_3d = gt_keypoints_3d_mesh + + + sampled_joints_3d = self.smpl.get_train_joints(sampled_vertices) + loss_keypoints_3d = self.keypoint_3d_loss(sampled_joints_3d, gt_keypoints_3d, has_pose_3d, weight_key) + loss_keypoints_3d = loss_keypoints_3d * self.lam_key3d + + sampled_joints_2d = orthographic_projection(sampled_joints_3d, pred_camera)[:, :, :2] + loss_keypoints_2d = self.keypoint_loss(sampled_joints_2d, gt_keypoints_2d) * self.lam_key2d + + loss_keypoints_3d_smpl = 0.0 + loss_keypoints_2d_smpl = 0.0 + weight_key_smpl = self.ones((batch_size), mindspore.float32) + if self.gtkey3d_from_mesh: + if ada_weight is not None: + weight_key_smpl = ada_weight + has_pose_3d = self.ones((batch_size), mindspore.float32) + gt_keypoints_3d_mesh = self.smpl.get_train_joints(gt_vertices) + gt_keypoints_3d_mesh = ops.Concat(-1)((gt_keypoints_3d_mesh, + self.ones((batch_size, 24, 1), gt_keypoints_3d_mesh.dtype))) + gt_keypoints_3d_smpl = gt_keypoints_3d_mesh + + if self.use_smpl_joints: + sampled_joints_3d_smpl = self.smpl.get_smpl_joints(sampled_vertices) + loss_keypoints_3d_smpl = self.smpl_keypoint_3d_loss(sampled_joints_3d_smpl, + gt_keypoints_3d_smpl, has_pose_3d_smpl, weight_key_smpl) + loss_keypoints_3d_smpl = loss_keypoints_3d_smpl * self.lam_key3d_smpl + + sampled_joints_2d_smpl = orthographic_projection(sampled_joints_3d_smpl, pred_camera)[:, :, :2] + loss_keypoints_2d_smpl = self.keypoint_loss(sampled_joints_2d_smpl, gt_keypoints_2d_smpl) \ + *self.lam_key2d_smpl + + #consistent loss + loss_con = 0.0 + if not self.lam_con == 0: + loss_con = self.consistent_loss(gt_dp_iuv, pred_uv_map, pred_camera, ada_weight) * self.lam_con + + LLoss = loss_uv + loss_tv + loss_keypoints_3d + loss_keypoints_2d + loss_con + \ + loss_keypoints_3d_smpl + loss_keypoints_2d_smpl + loss_tv + loss_mesh + + loss_total = CLoss + LLoss + + return loss_total, CLoss, LLoss diff --git a/research/cv/DecoMR/models/__init__.py b/research/cv/DecoMR/models/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..cb863fbbd8267a5cef13195ba7c0e94f3e2a0959 --- /dev/null +++ b/research/cv/DecoMR/models/__init__.py @@ -0,0 +1,17 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +from .smpl import SMPL +from .geometric_layers import rodrigues diff --git a/research/cv/DecoMR/models/dense_cnn.py b/research/cv/DecoMR/models/dense_cnn.py new file mode 100644 index 0000000000000000000000000000000000000000..87ab5dae8b9e709aa2f44b9a85afbd1247e62c90 --- /dev/null +++ b/research/cv/DecoMR/models/dense_cnn.py @@ -0,0 +1,268 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import os +import mindspore +import mindspore.numpy as msnp +from mindspore import nn, Tensor, ops, load_checkpoint, Parameter +import numpy as np +from models.upsample import Upsample +from models.resnet import resnet50 +from models.layer import ConvBottleNeck, HgNet +from models.uv_generator import Index_UV_Generator +from utils.objfile import read_obj + +# Warp elements in image space to UV space. +def warp_feature(dp_out, feature_map, uv_res): + """ + C: channel number of the input feature map; H: height; W: width + + :param dp_out: IUV image in shape (batch_size, 3, H, W) + :param feature_map: Local feature map in shape (batch_size, C, H, W) + :param uv_res: The resolution of the transferred feature map in UV space. + + :return: warped_feature: Feature map in UV space with shape (batch_size, C+3, uv_res, uv_res) + The x, y coordinates in the image space and mask will be added as the last 3 channels + of the warped feature, so the channel number of warped feature is C+3. + """ + + expand_dims = ops.ExpandDims() + dp_mask = expand_dims(dp_out[:, 0], 1) # I channel, confidence of being foreground + dp_uv = dp_out[:, 1:] # UV channels, UV coordinates + thre = 0.5 # The threshold of foreground and background. + B, C, H, W = feature_map.shape + + # Get the sampling index of every pixel in batch_size dimension. + index_batch = msnp.arange(0, B, dtype=mindspore.int64)[:, None, None].repeat(H, 1).repeat(W, 2) + index_batch = index_batch.view(-1).astype("int64") + + # Get the sampling index of every pixel in H and W dimension. + tmp_x = msnp.arange(0, W, dtype=mindspore.int64) + tmp_y = msnp.arange(0, H, dtype=mindspore.int64) + + meshgrid = ops.Meshgrid(indexing="ij") + y, x = meshgrid((tmp_y, tmp_x)) + + y = ops.Tile()(y.view(-1), (1, B))[0] + x = ops.Tile()(x.view(-1), (1, B))[0] + + # Sample the confidence of every pixel, + # and only preserve the pixels belong to foreground. + conf = dp_mask[index_batch, 0, y, x] + valid = conf > thre + ind = valid.astype('int64') + + warped_feature = Tensor(msnp.zeros((B, uv_res, uv_res, C + 3))).transpose(0, 3, 1, 2) + if ind.sum() == 0: + warped_feature = warped_feature + + elif ind.sum() != 0: + index_batch = mindspore.Tensor(index_batch.asnumpy()) + y = mindspore.Tensor(y.asnumpy()) + x = mindspore.Tensor(x.asnumpy()) + uv = dp_uv[index_batch, :, y, x] + num_pixel = uv.shape[0] + # Get the corresponding location in UV space + uv = uv * (uv_res - 1) + m_round = ops.Round() + uv_round = m_round(uv).astype("int64").clip(xmin=0, xmax=uv_res-1) + + # We first process the transferred feature in shape (batch_size * H * W, C+3), + # so we need to get the location of each pixel in the two-dimension feature vector. + index_uv = (uv_round[:, 1] * uv_res + uv_round[:, 0]).copy() + index_batch * uv_res * uv_res + + # Sample the feature of foreground pixels + sampled_feature = feature_map[index_batch, :, y, x] + # Scale x,y coordinates to [-1, 1] and + # concatenated to the end of sampled feature as extra channels. + y = (2 * y.astype("float32") / (H - 1)) - 1 + x = (2 * x.astype("float32") / (W - 1)) - 1 + concat = ops.Concat(-1) + sampled_feature = concat([sampled_feature, x[:, None], y[:, None]]) + + zeros = ops.Zeros() + ones = ops.Ones() + # Multiple pixels in image space may be transferred to the same location in the UV space. + # warped_w is used to record the number of the pixels transferred to every location. + warped_w = zeros((B * uv_res * uv_res, 1), sampled_feature.dtype) + index_add = ops.IndexAdd(axis=0) + warped_w = index_add(warped_w, index_uv.astype("int32"), ones((num_pixel, 1), sampled_feature.dtype)) + + # Transfer the sampled feature to UV space. + # Feature vectors transferred to the sample location will be accumulated. + warped_feature = zeros((B * uv_res * uv_res, C + 2), sampled_feature.dtype) + warped_feature = index_add(warped_feature, index_uv.astype("int32"), sampled_feature) + + # Normalize the accumulated feature with the pixel number. + warped_feature = warped_feature/(warped_w + 1e-8) + # Concatenate the mask channel at the end. + warped_feature = concat([warped_feature, (warped_w > 0).astype("float32")]) + # Reshape the shape to (batch_size, C+3, uv_res, uv_res) + warped_feature = warped_feature.reshape(B, uv_res, uv_res, C + 3).transpose(0, 3, 1, 2) + return warped_feature + +# DPNet:returns densepose result +class DPNet(nn.Cell): + def __init__(self, warp_lv=2, norm_type='BN'): + super(DPNet, self).__init__() + nl_layer = nn.ReLU() + self.warp_lv = warp_lv + # image encoder + self.resnet = resnet50(pretrained=True) + # dense pose line + dp_layers = [] + # correspond res[224, 112, 56, 28, 14, 7] + channel_list = [3, 64, 256, 512, 1024, 2048] + for i in range(warp_lv, 5): + in_channels = channel_list[i + 1] + out_channels = channel_list[i] + + dp_layers.append( + nn.SequentialCell( + Upsample(), + ConvBottleNeck(in_channels=in_channels, out_channels=out_channels, + nl_layer=nl_layer, norm_type=norm_type) + ) + ) + + self.dp_layers = nn.CellList(dp_layers) + self.dp_uv_end = nn.SequentialCell(ConvBottleNeck(channel_list[warp_lv], 32, nl_layer, norm_type=norm_type), + nn.Conv2d(32, 2, kernel_size=1, has_bias=True, bias_init='normal'), + nn.Sigmoid()) + + self.dp_mask_end = nn.SequentialCell(ConvBottleNeck(channel_list[warp_lv], 32, nl_layer, norm_type=norm_type), + nn.Conv2d(32, 1, kernel_size=1, has_bias=True, bias_init='normal'), + nn.Sigmoid()) + + def construct(self, image, UV=None): + codes, features = self.resnet(image) + # output densepose results + dp_feature = features[-1] + for i in range(len(self.dp_layers) - 1, -1, -1): + dp_feature = self.dp_layers[i](dp_feature) + dp_feature = dp_feature + features[i - 1 + len(features) - len(self.dp_layers)] + dp_uv = self.dp_uv_end(dp_feature) + dp_mask = self.dp_mask_end(dp_feature) + ops_cat = ops.Concat(1) + dp_out = ops_cat((dp_mask, dp_uv)) + + return dp_out, dp_feature, codes + +def Pretrained_DPNet(warp_level, norm_type, pretrained=False): + + model_file = "ckpt/rank0/CNet_5.ckpt" + model = DPNet(warp_lv=warp_level, norm_type=norm_type) + + if pretrained: + load_checkpoint(model_file, net=model) + return model + +def get_LNet(options): + if options.model == 'DecoMR': + uv_net = UVNet(uv_channels=options.uv_channels, + uv_res=options.uv_res, + warp_lv=options.warp_level, + uv_type=options.uv_type, + norm_type=options.norm_type) + return uv_net + +# UVNet returns location map +class UVNet(nn.Cell): + def __init__(self, uv_channels=64, uv_res=128, warp_lv=2, uv_type='SMPL', norm_type='BN'): + super(UVNet, self).__init__() + + nl_layer = nn.ReLU() + self.fc_head = nn.SequentialCell( + nn.Dense(2048, 512), + nn.BatchNorm1d(512), + nl_layer, + nn.Dense(512, 256) + ) + self.camera = nn.SequentialCell( + nn.Dense(2048, 512), + nn.BatchNorm1d(512), + nl_layer, + nn.Dense(512, 256), + nn.BatchNorm1d(256), + nn.LeakyReLU(), + nn.Dense(256, 3) + ) + + self.warp_lv = warp_lv + channel_list = [3, 64, 256, 512, 1024, 2048] + warp_channel = channel_list[warp_lv] + self.uv_res = uv_res + self.warp_res = int(256 // (2**self.warp_lv)) + + if uv_type == "SMPL": + ref_file = 'data/SMPL_ref_map_{0}.npy'.format(self.warp_res) + elif uv_type == 'BF': + ref_file = 'data/BF_ref_map_{0}.npy'.format(self.warp_res) + if not os.path.exists(ref_file): + sampler = Index_UV_Generator(UV_height=self.warp_res, uv_type=uv_type) + ref_vert, _ = read_obj('data/reference_mesh.obj') + ref_map = sampler.get_UV_map(Tensor.from_numpy(ref_vert).astype("float32")) + np.save(ref_file, ref_map.asnumpy()) + self.ref_map = Parameter(Tensor.from_numpy(np.load(ref_file)).astype("float32").transpose(0, 3, 1, 2), + name="ref_map", requires_grad=False) + + self.uv_conv1 = nn.SequentialCell( + nn.Conv2d(256 + warp_channel + 3 + 3, 2 * warp_channel, kernel_size=1, has_bias=True, bias_init='normal'), + nl_layer, + nn.Conv2d(2 * warp_channel, 2 * warp_channel, kernel_size=1, has_bias=True, bias_init='normal'), + nl_layer, + nn.Conv2d(2 * warp_channel, warp_channel, kernel_size=1, has_bias=True, bias_init='normal')) + + uv_lv = 0 if uv_res == 256 else 1 + self.hg = HgNet(in_channels=warp_channel, level=5-warp_lv, nl_layer=nl_layer, norm_type=norm_type) + + cur = min(8, 2 ** (warp_lv - uv_lv)) + prev = cur + self.uv_conv2 = ConvBottleNeck(warp_channel, uv_channels * cur, nl_layer, norm_type=norm_type) + + layers = [] + for lv in range(warp_lv, uv_lv, -1): + cur = min(prev, 2 ** (lv - uv_lv - 1)) + layers.append( + nn.SequentialCell(Upsample(), + ConvBottleNeck(uv_channels * prev, uv_channels * cur, nl_layer, norm_type=norm_type)) + ) + prev = cur + self.decoder = nn.SequentialCell(layers) + self.uv_end = nn.SequentialCell(ConvBottleNeck(uv_channels, 32, nl_layer, norm_type=norm_type), + nn.Conv2d(32, 3, kernel_size=1, has_bias=True, bias_init='normal')) + + def construct(self, dp_out, dp_feature, codes): + n_batch = dp_out.shape[0] + local_feature = warp_feature(dp_out, dp_feature, self.warp_res) + + global_feature = self.fc_head(codes) + + shape_1 = (-1, -1, self.warp_res, self.warp_res) + global_feature = ops.BroadcastTo(shape_1)(global_feature[:, :, None, None]) + + self.ref_map = self.ref_map.astype(local_feature.dtype) + concat = ops.Concat(1) + shape_2 = (n_batch, -1, -1, -1) + uv_map = concat((local_feature, global_feature, ops.BroadcastTo(shape_2)(self.ref_map))) + + uv_map = self.uv_conv1(uv_map) + uv_map = self.hg(uv_map) + uv_map = self.uv_conv2(uv_map) + uv_map = self.decoder(uv_map) + uv_map = self.uv_end(uv_map).transpose(0, 2, 3, 1) + cam = self.camera(codes) + + return uv_map, cam diff --git a/research/cv/DecoMR/models/geometric_layers.py b/research/cv/DecoMR/models/geometric_layers.py new file mode 100644 index 0000000000000000000000000000000000000000..ab75b0cd36c29dbcf9ab67801aa1f3ed45bb57ab --- /dev/null +++ b/research/cv/DecoMR/models/geometric_layers.py @@ -0,0 +1,73 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import mindspore.nn as nn +from mindspore import ops + +def rodrigues(theta): + """Convert axis-angle representation to rotation matrix. + Args: + theta: size = [B, 3] + Returns: + Rotation matrix corresponding to the quaternion -- size = [B, 3, 3] + """ + net = nn.Norm(axis=1) + l1norm = net(theta + 1e-8) + angle = ops.ExpandDims()(l1norm, -1) + normalized = ops.Div()(theta, angle) + angle = angle*0.5 + v_cos = ops.Cos()(angle) + v_sin = ops.Sin()(angle) + quat = ops.Concat(axis=1)((v_cos, v_sin*normalized)) + return quat2mat(quat) + +def quat2mat(quat): + """Convert quaternion coefficients to rotation matrix. + Args: + quat: size = [B, 4] 4 <===>(w, x, y, z) + Returns: + Rotation matrix corresponding to the quaternion -- size = [B, 3, 3] + """ + norm_quat = quat + y = nn.Norm(axis=1, keep_dims=True)(norm_quat) + x = norm_quat + + norm_quat = x / y + w, x, y, z = norm_quat[:, 0], norm_quat[:, 1], norm_quat[:, 2], norm_quat[:, 3] + + B = quat.shape[0] + + w2, x2, y2, z2 = ops.Pow()(w, 2), ops.Pow()(x, 2), ops.Pow()(y, 2), ops.Pow()(z, 2) + wx, wy, wz = w*x, w*y, w*z + xy, xz, yz = x*y, x*z, y*z + + rotMat = ops.Stack(axis=1)([w2 + x2 - y2 - z2, 2*xy - 2*wz, 2*wy + 2*xz, + 2*wz + 2*xy, w2 - x2 + y2 - z2, 2*yz - 2*wx, + 2*xz - 2*wy, 2*wx + 2*yz, w2 - x2 - y2 + z2]).view(B, 3, 3) + return rotMat + +def orthographic_projection(X, camera): + """Perform orthographic projection of 3D points X using the camera parameters + Args: + X: size = [B, N, 3] + camera: size = [B, 3] + Returns: + Projected 2D points -- size = [B, N, 2] + """ + camera = camera.view(-1, 1, 3) + X_trans = X[:, :, :2]+camera[:, :, 1:] + shape = X_trans.shape + X_2d = (camera[:, :, 0] * X_trans.view(shape[0], -1)).view(shape) + return X_2d diff --git a/research/cv/DecoMR/models/grid_sample.py b/research/cv/DecoMR/models/grid_sample.py new file mode 100644 index 0000000000000000000000000000000000000000..e81681c9db0cfb7dcc47ed55d86eea54b1322280 --- /dev/null +++ b/research/cv/DecoMR/models/grid_sample.py @@ -0,0 +1,61 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import mindspore as ms +from mindspore import ops, nn +from mindspore import numpy as msnp + + +def clip_coordinates(ind, clip_limit): + return ops.clip_by_value(ind, 0, clip_limit - 1) + +class grid_sample(nn.Cell): + def __init__(self): + super(grid_sample, self).__init__() + self.gather = ops.GatherNd() + self.concat = ops.Concat(1) + + def construct(self, input_tens, grid): + B, C, H, W = input_tens.shape + _, IH, IW, _ = grid.shape + B_ind = ops.cast(msnp.arange(B).repeat(C * IH * IW), ms.int32).reshape((-1, 1)) + C_ind = ops.cast(msnp.arange(C).repeat(IH * IW), ms.int32).reshape((-1, 1)) + C_ind = ops.Tile()(C_ind, (B, 1)) + + iy = ops.Tile()((((grid[..., 1] + 1) / 2) * (H - 1)).reshape((-1, 1)), (C, 1)) + ix = ops.Tile()((((grid[..., 0] + 1) / 2) * (W - 1)).reshape((-1, 1)), (C, 1)) + + ix_nw = clip_coordinates(ops.floor(ix), W) + iy_nw = clip_coordinates(ops.floor(iy), H) + ix_se = clip_coordinates(ix_nw + 1, W) + iy_se = clip_coordinates(iy_nw + 1, H) + + nw_ind = self.concat((B_ind, C_ind, ops.cast(iy_nw, ms.int32), ops.cast(ix_nw, ms.int32))) + nw = self.gather(input_tens, nw_ind) + ne_ind = self.concat((B_ind, C_ind, ops.cast(iy_nw, ms.int32), ops.cast(ix_se, ms.int32))) + ne = self.gather(input_tens, ne_ind) + sw_ind = self.concat((B_ind, C_ind, ops.cast(iy_se, ms.int32), ops.cast(ix_nw, ms.int32))) + sw = self.gather(input_tens, sw_ind) + se_ind = self.concat((B_ind, C_ind, ops.cast(iy_se, ms.int32), ops.cast(ix_se, ms.int32))) + se = self.gather(input_tens, se_ind) + + nw_w = ops.absolute(((ix_se - ix) * (iy_se - iy)).reshape((-1,))) + ne_w = ops.absolute(((ix - ix_nw) * (iy_se - iy)).reshape((-1,))) + sw_w = ops.absolute(((ix_se - ix) * (iy - iy_nw)).reshape((-1,))) + se_w = ops.absolute(((ix - ix_nw) * (iy - iy_nw)).reshape((-1,))) + + output = nw_w * nw + ne_w * ne + sw_w * sw + se_w * se + + return output.reshape((B, C, IH, IW)) diff --git a/research/cv/DecoMR/models/layer.py b/research/cv/DecoMR/models/layer.py new file mode 100644 index 0000000000000000000000000000000000000000..a5128d5f8773ea193a83eb8834c2a5466e69ee6e --- /dev/null +++ b/research/cv/DecoMR/models/layer.py @@ -0,0 +1,142 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +from __future__ import division +from models.upsample import Upsample +import mindspore.nn as nn + +class FCBlock(nn.Cell): + """Wrapper around nn.Linear that includes batch normalization and activation functions.""" + def __init__(self, in_size, out_size, batchnorm=True, activation=nn.ReLU(), dropout=False): + super(FCBlock, self).__init__() + module_list = [nn.Dense(in_size, out_size)] + if batchnorm: + module_list.append(nn.BatchNorm1d(out_size)) + if activation is not None: + module_list.append(activation) + if dropout: + module_list.append(dropout) + self.fc_block = nn.SequentialCell(*module_list) + + def construct(self, x): + return self.fc_block(x) + +class ConvBottleNeck(nn.Cell): + """ + the Bottleneck Residual Block in ResNet + """ + + def __init__(self, in_channels, out_channels, nl_layer=nn.ReLU(), norm_type='GN'): + super(ConvBottleNeck, self).__init__() + + self.nl_layer = nl_layer + self.in_channels = in_channels + self.out_channels = out_channels + self.conv1 = nn.Conv2d(in_channels, out_channels // 2, kernel_size=1, has_bias=True, bias_init='normal') + self.conv2 = nn.Conv2d(out_channels // 2, out_channels // 2, kernel_size=3, pad_mode="pad", padding=1, + has_bias=True, bias_init='normal') + self.conv3 = nn.Conv2d(out_channels // 2, out_channels, kernel_size=1, has_bias=True, bias_init='normal') + + if norm_type == 'BN': + affine = True + self.norm1 = nn.BatchNorm2d(out_channels // 2, affine=affine) + self.norm2 = nn.BatchNorm2d(out_channels // 2, affine=affine) + self.norm3 = nn.BatchNorm2d(out_channels, affine=affine) + else: + self.norm1 = nn.GroupNorm((out_channels // 2) // 8, (out_channels // 2)) + self.norm2 = nn.GroupNorm((out_channels // 2) // 8, (out_channels // 2)) + self.norm3 = nn.GroupNorm(out_channels // 8, out_channels) + + if in_channels != out_channels: + self.skip_conv = nn.Conv2d(in_channels, out_channels, kernel_size=1, has_bias=True, bias_init='normal') + + def construct(self, x): + + residual = x + + y = self.conv1(x) + y = self.norm1(y) + y = self.nl_layer(y) + + y = self.conv2(y) + y = self.norm2(y) + y = self.nl_layer(y) + + y = self.conv3(y) + y = self.norm3(y) + + if self.in_channels != self.out_channels: + residual = self.skip_conv(residual) + y += residual + y = self.nl_layer(y) + return y + +# A net similar to hourglass, used in the UV net (Location Net) +class HgNet(nn.Cell): + def __init__(self, in_channels, level, nl_layer=nn.ReLU(), norm_type='GN'): + super(HgNet, self).__init__() + + down_layers = [] + up_layers = [] + + if norm_type == 'GN': + self.norm = nn.GroupNorm(in_channels // 8, in_channels) + else: + affine = True + self.norm = nn.BatchNorm2d(in_channels, affine=affine) + + for _ in range(level): + out_channels = in_channels * 2 + down_layers.append( + nn.SequentialCell( + ConvBottleNeck(in_channels=in_channels, out_channels=out_channels, nl_layer=nl_layer, + norm_type=norm_type), nn.MaxPool2d(kernel_size=2, stride=2) + ) + ) + up_layers.append( + nn.SequentialCell( + Upsample(), + ConvBottleNeck(in_channels=out_channels, out_channels=in_channels, + nl_layer=nl_layer, norm_type=norm_type) + ) + ) + in_channels = out_channels + self.down_layers = nn.CellList(down_layers) + self.up_layers = nn.CellList(up_layers) + + def construct(self, x): + + feature_list = [] + + y = x + for i in range(len(self.down_layers)): + feature_list.append(y) + y = self.down_layers[i](y) + for i in range(len(self.down_layers) - 1, -1, -1): + y = self.up_layers[i](y) + feature_list[i] + y = self.norm(y) + return y + +def conv3x3(in_planes, out_planes, stride=1): + """3x3 convolution with padding""" + return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, has_bias=False) + + +def deconv3x3(in_planes, out_planes, stride=1): + return nn.Sequential( + Upsample(), + nn.ReflectionPad2d(1), + nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=1, pad_model="valid") + ) diff --git a/research/cv/DecoMR/models/resnet.py b/research/cv/DecoMR/models/resnet.py new file mode 100644 index 0000000000000000000000000000000000000000..21a929df935abf131748fb0c61aeca03a0e777a5 --- /dev/null +++ b/research/cv/DecoMR/models/resnet.py @@ -0,0 +1,254 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import mindspore as ms +import mindspore.nn as nn +from mindspore import load_checkpoint + +affine_par = True + + +def conv3x3(in_planes, out_planes, stride=1): + return nn.Conv2d(in_planes, out_planes, kernel_size=3, padding="same", stride=stride, has_bias=False) + + +class Bottleneck(nn.Cell): + """ + Bottleneck layer + """ + expansion = 4 + + def __init__(self, in_planes, planes, stride=1, dilation_=1, downsample=None): + super(Bottleneck, self).__init__() + self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride, has_bias=False) + self.bn1 = nn.BatchNorm2d(planes, affine=affine_par) + padding = 1 + if dilation_ == 2: + padding = 2 + elif dilation_ == 4: + padding = 4 + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=padding, pad_mode="pad", has_bias=False, + dilation=dilation_) + + self.bn2 = nn.BatchNorm2d(planes, affine=affine_par) + self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, has_bias=False) + self.bn3 = nn.BatchNorm2d(planes * 4, affine=affine_par) + for i in self.bn3.get_parameters(): + i.requires_grad = False + self.relu = nn.ReLU() + self.downsample = downsample + self.stride = stride + + def construct(self, x): + """ + forword + """ + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu(out) + + return out + + +class ResNet(nn.Cell): + """ + resnet + """ + def __init__(self, block, layers): + self.in_planes = 64 + super(ResNet, self).__init__() + self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, pad_mode="pad", + has_bias=False) + self.bn1 = nn.BatchNorm2d(64, affine=affine_par) + self.relu = nn.ReLU() + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode="same") # change + self.layer1 = self._make_layer(block, 64, layers[0]) + self.layer2 = self._make_layer(block, 128, layers[1], stride=2) + self.layer3 = self._make_layer(block, 256, layers[2], stride=2) + self.layer4 = self._make_layer(block, 512, layers[3], stride=2, dilation=2) + self.avgpool = nn.AvgPool2d(7, stride=1) + + def _make_layer(self, block, planes, blocks, stride=1, dilation=1): + """ + make layer + """ + downsample = None + if stride != 1 or self.in_planes != planes * block.expansion or dilation == 2 or dilation == 4: + downsample = nn.SequentialCell( + nn.Conv2d(self.in_planes, planes * block.expansion, + kernel_size=1, stride=stride, has_bias=False), + nn.BatchNorm2d(planes * block.expansion, affine=affine_par), + ) + layers = [block(self.in_planes, planes, stride, dilation_=dilation, downsample=downsample)] + self.in_planes = planes * block.expansion + for _ in range(1, blocks): + layers.append(block(self.in_planes, planes, dilation_=dilation)) + + return nn.SequentialCell(*layers) + + def load_pretrained_model(self, model_file): + """ + load pretrained model + """ + load_checkpoint(model_file, net=self) + + def construct(self, x): + """ + forward + """ + feature_list = [] + feature_list.append(x) + + x1 = self.conv1(x) + + x1 = self.bn1(x1) + x1 = self.relu(x1) + feature_list.append(x1) + + x2 = self.maxpool(x1) + x2 = self.layer1(x2) + feature_list.append(x2) + + x3 = self.layer2(x2) + feature_list.append(x3) + + x4 = self.layer3(x3) + feature_list.append(x4) + + x5 = self.layer4(x4) + feature_list.append(x5) + y = self.avgpool(x5) + y = y.view(y.shape[0], -1) + + return y, feature_list + +class AdaptiveAvgPool2D(nn.Cell): + """ + AdaptiveAvgPool2D layer + """ + def __init__(self, output_size): + super(AdaptiveAvgPool2D, self).__init__() + self.adaptive_avg_pool = ms.ops.AdaptiveAvgPool2D(output_size) + + def construct(self, x): + """ + forward + :param x: + :return: + """ + return self.adaptive_avg_pool(x) + + +class ResNetLocate(nn.Cell): + """ + resnet for resnet101 + """ + def __init__(self, block, layers): + super(ResNetLocate, self).__init__() + self.resnet = ResNet(block, layers) + self.in_planes = 512 + self.out_planes = [512, 256, 256, 128] + + self.ppms_pre = nn.Conv2d(2048, self.in_planes, 1, 1, bias=False) + ppms, infos = [], [] + for ii in [1, 3, 5]: + ppms.append( + nn.SequentialCell(AdaptiveAvgPool2D(ii), + nn.Conv2d(self.in_planes, self.in_planes, 1, 1, has_bias=False), + nn.ReLU())) + self.ppms = nn.CellList(ppms) + + self.ppm_cat = nn.SequentialCell(nn.Conv2d(self.in_planes * 4, self.in_planes, 3, 1, 1, bias=False), + nn.ReLU()) + for ii in self.out_planes: + infos.append(nn.SequentialCell(nn.Conv2d(self.in_planes, ii, 3, 1, 1, bias=False), nn.ReLU())) + self.infos = nn.CellList(infos) + + self.resize_bilinear = nn.ResizeBilinear() + self.cat = ms.ops.Concat(axis=1) + + def load_pretrained_model(self, model): + self.resnet.load_state_dict(model) + + def construct(self, x): + """ + forward + """ + xs = self.resnet(x) + + xs_1 = self.ppms_pre(xs[-1]) + xls = [xs_1] + for k in range(len(self.ppms)): + xls.append(self.resize_bilinear(self.ppms[k](xs_1), xs_1.size()[2:], align_corners=True)) + xls = self.ppm_cat(self.cat(xls)) + top_score = None + + infos = [] + for k in range(len(self.infos)): + infos.append(self.infos[k]( + self.resize_bilinear(xls, xs[len(self.infos) - 1 - k].size()[2:], align_corners=True))) + + return xs, top_score, infos + + +def resnet50(pretrained=False): + """Constructs a ResNet-50 model. + """ + model_file = "data/ms_resnet50.ckpt" + model = ResNet(Bottleneck, [3, 4, 6, 3]) + if pretrained: + load_checkpoint(model_file, net=model) + return model + + +def resnet101(pretrained=False): + """Constructs a ResNet-101 model. + """ + model_file = "" + model = ResNetLocate(Bottleneck, [3, 4, 23, 3]) + if pretrained: + load_checkpoint(model_file, net=model) + return model + + +if __name__ == "__main__": + name = "resnet50" + net = resnet50() + num_params = 0 + num_layers = 0 + for n, param in net.parameters_and_names(): + if "moving_" in n: + continue + num_params += param.size + num_layers += 1 + print(name) + print(net) + print(f"The number of layers: {num_layers}") + print(f"The number of parameters: {num_params}") diff --git a/research/cv/DecoMR/models/smpl.py b/research/cv/DecoMR/models/smpl.py new file mode 100644 index 0000000000000000000000000000000000000000..4fa71aae5ba319670fb0ce80634d91ce043017aa --- /dev/null +++ b/research/cv/DecoMR/models/smpl.py @@ -0,0 +1,266 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +from __future__ import division + +import mindspore as ms +from mindspore import ops, Tensor, nn, Parameter +from mindspore.ops import constexpr +import numpy as np +from scipy.sparse import coo_matrix + +try: + import cPickle as pickle +except ImportError: + import pickle + +from models.geometric_layers import rodrigues +import utils.config as cfg + +@constexpr +def generate_Tensor_att(): + return Tensor([0, 0, 0, 1], dtype=ms.float32) + +def generate_Tensor_np(temp): + return Tensor(temp) + +def generate_Tensor(temp): + return Tensor(temp, dtype=ms.float32) + +def generate_Tensor_int(temp): + return Tensor(temp, dtype=ms.int64) + +def generate_Tensor_at(temp): + return Tensor.from_numpy(temp.astype(np.int64)) + +def generate_Tensor_atf(temp): + return Tensor.from_numpy(temp.astype('float32')) + + +def sparse_to_dense(x, y, shape): + return coo_matrix((y, x), shape, dtype=np.float).todense() + +def to_int(temp): + return int(temp) + +def Einsum(x, y): + return Tensor(np.einsum('bik,ji->bjk', x, y), dtype=ms.float32) + +class SMPL(nn.Cell): + def __init__(self, model_file=cfg.SMPL_FILE): + super(SMPL, self).__init__() + with open(model_file, "rb") as f: + smpl_model = pickle.load(f, encoding="iso-8859-1") + J_regressor = smpl_model["J_regressor"].tocoo() + row = J_regressor.row + col = J_regressor.col + data = J_regressor.data + i = generate_Tensor([row, col]) + v = generate_Tensor(data) + J_regressor_shape = [24, 6890] + + a1 = i.asnumpy() + v1 = v.asnumpy() + J_regressor = sparse_to_dense(a1, v1, J_regressor_shape) + J_regressor = generate_Tensor(J_regressor) + + self.J_regressor = Parameter(J_regressor, name="J_regressor", requires_grad=False) + + weights = generate_Tensor(smpl_model['weights']) + self.weights = Parameter(weights, name="weights", requires_grad=False) + + posedirs = generate_Tensor(smpl_model['posedirs']) + self.posedirs = Parameter(posedirs, name="posedirs", requires_grad=False) + + v_template = generate_Tensor(smpl_model['v_template']) + self.v_template = Parameter(v_template, name="v_template", requires_grad=False) + + shapdirs = generate_Tensor(np.array(smpl_model['shapedirs'])) + self.shapedirs = Parameter(shapdirs, name="shapedirs", requires_grad=False) + + faces = generate_Tensor_at(smpl_model['f']) + self.faces = Parameter(faces, name="faces", requires_grad=False) + + kintree_table = generate_Tensor_at(smpl_model['kintree_table']) + self.kintree_table = Parameter(kintree_table, name="kintree_table", requires_grad=False) + + id_to_col = {to_int(self.kintree_table[1, i].asnumpy()): i for i in range(self.kintree_table.shape[1])} + + parent = generate_Tensor_int([id_to_col[to_int(kintree_table[0, it].asnumpy())] + for it in range(1, kintree_table.shape[1])]) + self.parent = Parameter(parent, name="parent", requires_grad=False) + + self.pose_shape = [24, 3] + self.beta_shape = [10] + self.translation_shape = [3] + + self.pose = ms.numpy.zeros(self.pose_shape) + self.beta = ms.numpy.zeros(self.beta_shape) + self.translation = ms.numpy.zeros(self.translation_shape) + + self.verts = None + self.J = None + self.R = None + + J_regressor_extra = generate_Tensor_atf(np.load(cfg.JOINT_REGRESSOR_TRAIN_EXTRA)) + self.J_regressor_extra = Parameter(J_regressor_extra, name="J_regressor_extra", requires_grad=False) + + self.joints_idx = cfg.JOINTS_IDX + + h36m_regressor_cmr = generate_Tensor(np.load(cfg.JOINT_REGRESSOR_H36M)) + self.h36m_regressor_cmr = Parameter(h36m_regressor_cmr, name="h36m_regressor_cmr", requires_grad=False) + + lsp_regressor_cmr = generate_Tensor(np.load(cfg.JOINT_REGRESSOR_H36M)) + self.lsp_regressor_cmr = Parameter(lsp_regressor_cmr[cfg.H36M_TO_J14], name="lsp_regressor_cmr", + requires_grad=False) + + lsp_regressor_eval = generate_Tensor(np.load(cfg.LSP_REGRESSOR_EVAL)) + self.lsp_regressor_eval = Parameter(lsp_regressor_eval.transpose(1, 0), name="lsp_regressor_eval", + requires_grad=False) + + # We hope the training and evaluation regressor for the lsp joints to be consistent, + # so we replace parts of the training regressor. + train_regressor = ops.Concat(0)((self.J_regressor, self.J_regressor_extra)) + train_regressor = train_regressor[[cfg.JOINTS_IDX]].copy() + idx = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 18] + train_regressor[idx] = self.lsp_regressor_eval + self.train_regressor = Parameter(train_regressor, name="train_regressor", requires_grad=False) + + def construct(self, pose, beta): + batch_size = pose.shape[0] + if batch_size == 0: + return ops.Zeros()((0, 6890, 3), pose.dtype) + v_template = self.v_template[None, :] + + broadcast_to = ops.BroadcastTo((batch_size, -1, -1)) + shapedirs = broadcast_to(self.shapedirs.view(-1, 10)[None, :]) + + beta = beta[:, :, None] + v_shaped = ops.matmul(shapedirs, beta).view(-1, 6890, 3) + v_template + + J = [] + for i in range(batch_size): + J.append(ops.matmul(self.J_regressor, v_shaped[i])) + J = ops.Stack(axis=0)(J) + + pose_cube = pose.view(-1, 3) + R = pose if (pose.ndim == 4) else rodrigues(pose_cube).view(batch_size, 24, 3, 3) + + I_cube = ops.Eye()(3, 3, pose.dtype)[None, None, :] + + lrotmin = (R[:, 1:, :] - I_cube).view(batch_size, -1) + broadcast_to = ops.BroadcastTo((batch_size, -1, -1)) + posedirs = broadcast_to(self.posedirs.view(-1, 207)[None, :]) + v_posed = v_shaped + ops.matmul(posedirs, lrotmin[:, :, None]).view(-1, 6890, 3) + + J_ = J.copy() + J_[:, 1:, :] = J[:, 1:, :] - J[:, self.parent, :] + G_ = ops.Concat(axis=-1)([R, J_[:, :, :, None]]) + + broadcast_too = ops.BroadcastTo((batch_size, 24, -1, -1)) + pad = generate_Tensor_att() + + pad_row = broadcast_too(pad.view(1, 1, 1, 4)) + + G_ = ops.Concat(axis=2)([G_, pad_row]) + G = G_.copy() + + for i in range(1, 24): + G[:, i, :, :] = ops.matmul(G[:, self.parent[i - 1], :, :], G_[:, i, :, :]) + + rest = ops.Concat(axis=2)((J, ops.Zeros()((batch_size, 24, 1), pose.dtype))).view(batch_size, 24, 4, 1) + + zeros = ops.Zeros()((batch_size, 24, 4, 3), pose.dtype) + rest = ops.Concat(axis=-1)((zeros, rest)) + rest = ops.matmul(G, rest) + G = G - rest + T = ops.matmul(self.weights, G.transpose(1, 0, 2, 3).view(24, -1)).\ + view(6890, batch_size, 4, 4).transpose(1, 0, 2, 3) + + rest_shape_h = ops.Concat(axis=-1)([v_posed, ops.OnesLike()(v_posed)[:, :, [0]]]) + v = ops.matmul(T, rest_shape_h[:, :, :, None])[:, :, :3, 0] + + return v + + # The function used for outputting the 24 training joints. + def get_joints(self, vertices): + """ + This method is used to get the joint locations from the SMPL mesh + Input: + vertices: size = (B, 6890, 3) + Output: + 3D joints: size = (B, 24, 3) + """ + joints = Einsum(vertices.asnumpy(), self.J_regressor.asnumpy()) + joints_extra = Einsum(vertices.asnumpy(), self.J_regressor_extra.asnumpy()) + joints = ops.Concat(axis=1)((joints, joints_extra)) + joints = joints[:, cfg.JOINTS_IDX] + return joints + + # The function used for getting 38 joints. + def get_full_joints(self, vertices): + """ + This method is used to get the joint locations from the SMPL mesh + Input: + vertices: size = (B, 6890, 3) + Output: + 3D joints: size = (B, 38, 3) + """ + + joints = Einsum(vertices, self.J_regressor_temp) + joints_extra = Einsum(vertices, self.J_regressor_extra_temp) + joints = ops.Concat(axis=1)((joints, joints_extra)) + return joints + + # Get 14 lsp joints use the joint regressor. + def get_lsp_joints(self, vertices): + joints = ops.matmul(self.lsp_regressor_cmr[None, :], vertices) + return joints + + # Get the joints defined by SMPL model. + def get_smpl_joints(self, vertices): + """ + This method is used to get the SMPL model joint locations from the SMPL mesh + Input: + vertices: size = (B, 6890, 3) + Output: + 3D joints: size = (B, 24, 3)ijk,ikn->ijn + """ + joints = Einsum(vertices, self.J_regressor_temp) + return joints + + # Get 24 training joints using the evaluation LSP joint regressor. + def get_train_joints(self, vertices): + """ + This method is used to get the training 24 joint locations from the SMPL mesh + Input: + vertices: size = (B, 6890, 3) + Output: + 3D joints: size = (B, 24, 3) + """ + joints = ops.matmul(self.train_regressor[None, :], vertices) + return joints + + # Get 14 lsp joints for the evaluation. + def get_eval_joints(self, vertices): + """ + This method is used to get the 14 eval joint locations from the SMPL mesh + Input: + vertices: size = (B, 6890, 3) + Output: + 3D joints: size = (B, 14, 3) + """ + joints = ops.matmul(self.lsp_regressor_eval[None, :], vertices) + return joints diff --git a/research/cv/DecoMR/models/upsample.py b/research/cv/DecoMR/models/upsample.py new file mode 100644 index 0000000000000000000000000000000000000000..7f8a2731172f184d86de85c3d54a680bb4d34511 --- /dev/null +++ b/research/cv/DecoMR/models/upsample.py @@ -0,0 +1,26 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +from __future__ import division +import mindspore.nn as nn + +class Upsample(nn.Cell): + def __init__(self): + super(Upsample, self).__init__() + self.upsample = nn.ResizeBilinear() + + def construct(self, x): + y = self.upsample(x, scale_factor=2) + return y diff --git a/research/cv/DecoMR/models/uv_generator.py b/research/cv/DecoMR/models/uv_generator.py new file mode 100644 index 0000000000000000000000000000000000000000..95689c0b446e1e02773920f3466c8d97f3528d84 --- /dev/null +++ b/research/cv/DecoMR/models/uv_generator.py @@ -0,0 +1,186 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import os +from os.path import join +import pickle +import numpy as np +import mindspore +from mindspore import nn, Tensor +import mindspore.ops as ops +from mindspore.ops import constexpr +from models.grid_sample import grid_sample + +# ''' +# Index_UV_Generator is used to transform mesh and location map +# The verts is in shape (B * V *C) +# The UV map is in shape (B * H * W * C) +# B: batch size; V: vertex number; C: channel number +# H: height of uv map; W: width of uv map +# ''' + +@constexpr +def generate_Tensor_np(temp): + return Tensor(temp) + +def generate_Tensor(temp): + return Tensor(temp, dtype=mindspore.float32) + +def generate_Tensor_int64(temp): + return Tensor(temp, dtype=mindspore.int64) + +def generate_Tensor_int32(temp): + return Tensor(temp, dtype=mindspore.int32) + +def generate_Tensor_int8(temp): + return Tensor(temp, dtype=mindspore.int8) + + +class Index_UV_Generator(nn.Cell): + def __init__(self, UV_height, UV_width=-1, uv_type='BF', data_dir=None): + super(Index_UV_Generator, self).__init__() + + self.grid_sample = grid_sample() + if uv_type == "SMPL": + obj_file = "smpl_fbx_template.obj" + elif uv_type == "BF": + obj_file = "smpl_boundry_free_template.obj" + + self.uv_type = uv_type + + if data_dir is None: + d = os.path.dirname(__file__) + data_dir = os.path.join(d, "data", "uv_sampler") + data_dir = 'data/uv_sampler' + self.data_dir = data_dir + self.h = UV_height + self.w = self.h if UV_width < 0 else UV_width + self.obj_file = obj_file + self.para_file = 'paras_h{:04d}_w{:04d}_{}.npz'.format(self.h, self.w, self.uv_type) + + if not os.path.isfile(join(data_dir, self.para_file)): + self.process() + + para = np.load(join(data_dir, self.para_file)) + + self.v_index = generate_Tensor_int64(para["v_index"]) + self.bary_weights = generate_Tensor(para["bary_weights"]) + + self.vt2v = generate_Tensor_int32(para['vt2v']) + self.vt_count = generate_Tensor(para['vt_count']) + + + self.texcoords = generate_Tensor(para['texcoords']) + + self.mask = generate_Tensor_int8(para['mask'].astype('uint8')) + + def get_UV_map(self, verts): + self.bary_weights = self.bary_weights.astype(verts.dtype) + self.v_index = self.v_index + + if verts.ndim == 2: + expand_dims = ops.ExpandDims() + verts = expand_dims(verts, 0) + + im = verts[:, self.v_index, :] + bw = self.bary_weights[:, :, None, :] + + squeeze = ops.Squeeze(axis=3) + + im = squeeze(ops.matmul(bw, im)) + + return im + + def resample(self, uv_map): + batch_size, _, _, channel_num = uv_map.shape + v_num = self.vt_count.shape[0] + self.texcoords = self.texcoords.astype(uv_map.dtype) + self.vt2v = self.vt2v + self.vt_count = self.vt_count.astype(uv_map.dtype) + + shape = (batch_size, -1, -1, -1) + broadcast_to = ops.BroadcastTo(shape) + + uv_grid = broadcast_to(self.texcoords[None, None, :, :]) + + perm = (0, 3, 1, 2) + transpose = ops.Transpose() + uv_map = transpose(uv_map, perm) + vt = self.grid_sample(uv_map, uv_grid) + squeeze = ops.Squeeze(axis=2) + vt = squeeze(vt).transpose(0, 2, 1) + + zeros = ops.Zeros() + v = zeros((batch_size, v_num, channel_num), vt.dtype) + + index_add = ops.IndexAdd(axis=1) + v = index_add(v, self.vt2v, vt) + v = v / self.vt_count[None, :, None] + return v + + # just used for the generation of GT UVmaps + def forward(self, verts): + return self.get_UV_map(verts) + +# Compute the weight map in UV space according to human body parts. +def cal_uv_weight(sampler, out_path): + + with open('../data/segm_per_v_overlap.pkl', 'rb') as f: + tmp = pickle.load(f) + + part_names = ['hips', + 'leftUpLeg', + 'rightUpLeg', + 'spine', + 'leftLeg', + 'rightLeg', + 'spine1', + 'leftFoot', + 'rightFoot', + 'spine2', + 'leftToeBase', + 'rightToeBase', + 'neck', + 'leftShoulder', + 'rightShoulder', + 'head', + 'leftArm', + 'rightArm', + 'leftForeArm', + 'rightForeArm', + 'leftHand', + 'rightHand', + 'leftHandIndex1', + 'rightHandIndex1'] + + part_weight = [1, 5, 5, 1, 5, 5, 1, 25, 25, 1, 25, 25, 2, 1, 1, 2, 5, 5, 5, 5, 25, 25, 25, 25] + part_weight = generate_Tensor_int64(part_weight) + + zeros = ops.Zeros() + vert_part = zeros((6890, 24), mindspore.float32) + for i in range(24): + key = part_names[i] + verts = tmp[key] + vert_part[verts, i] = 1 + + + squeeze = ops.Squeeze(axis=0) + part_map = squeeze(sampler.get_UV_map(vert_part)) + part_map = part_map > 0 + weight_map = part_weight[None, None, :].astype("float32") * part_map.astype("float32") + weight_map = weight_map.max(axis=-1) + weight_map = weight_map / weight_map.mean() + + np.save(out_path, weight_map.asnumpy()) diff --git a/research/cv/DecoMR/preprocess_datasets.py b/research/cv/DecoMR/preprocess_datasets.py new file mode 100644 index 0000000000000000000000000000000000000000..4debb42f78e6c0355c2492584ff5770b94515f9e --- /dev/null +++ b/research/cv/DecoMR/preprocess_datasets.py @@ -0,0 +1,75 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import argparse +from utils import config as cfg +from utils import objfile +from utils.renderer import UVRenderer +from models import SMPL +import numpy as np + + +from datasets.preprocess import \ + up_3d_extract, \ + process_dataset + +import mindspore.context as context + +context.set_context(mode=context.PYNATIVE_MODE, device_target='GPU', device_id=3) + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--train_files', default=False, action='store_true', help='Extract files needed for training') + parser.add_argument('--eval_files', default=False, action='store_true', help='Extract files needed for evaluation') + parser.add_argument('--gt_iuv', default=True, action='store_true', help='Extract files needed for evaluation') + parser.add_argument('--uv_type', type=str, default='BF', choices=['BF', 'SMPL']) + + args = parser.parse_args() + + # define path to store extra files + out_path = cfg.DATASET_NPZ_PATH + + print("cfg.UP_3D_ROOT= ", cfg.UP_3D_ROOT) + print("out_path = ", out_path) + if args.train_files: + + # UP-3D dataset preprocessing (trainval set) + up_3d_extract(cfg.UP_3D_ROOT, out_path, 'trainval') + + + if args.eval_files: + + # UP-3D dataset preprocessing (lsp_test set) + up_3d_extract(cfg.UP_3D_ROOT, out_path, 'lsp_test') + + if args.gt_iuv: + smpl = SMPL(model_file=cfg.SMPL_FILE) + uv_type = args.uv_type + print("uv_type = ", uv_type) + uv_type = args.uv_type + + if uv_type == 'SMPL': + data = objfile.read_obj_full('data/uv_sampler/smpl_fbx_template.obj') + elif uv_type == 'BF': + data = objfile.read_obj_full('data/uv_sampler/smpl_boundry_free_template.obj') + + vt = np.array(data['texcoords']) + face = [f[0] for f in data['faces']] + face = np.array(face) - 1 + vt_face = [f[2] for f in data['faces']] + vt_face = np.array(vt_face) - 1 + renderer = UVRenderer(faces=face, tex=np.zeros([256, 256, 3]), vt=1 - vt, ft=vt_face) + for dataset_name in ['up-3d']: + process_dataset(dataset_name, is_train=True, uv_type=uv_type, smpl=smpl, renderer=renderer) diff --git a/research/cv/DecoMR/preprocess_surreal.py b/research/cv/DecoMR/preprocess_surreal.py new file mode 100644 index 0000000000000000000000000000000000000000..25293fc7260cbe61ddd9012048097e928b57c38a --- /dev/null +++ b/research/cv/DecoMR/preprocess_surreal.py @@ -0,0 +1,69 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import argparse +from utils import config as cfg +from utils.renderer import UVRenderer +from utils import objfile +from models import SMPL +import numpy as np +import mindspore.context as context + +from datasets.preprocess import \ + process_dataset, process_surreal,\ + extract_surreal_eval, extract_surreal_train + +context.set_context(mode=context.GRAPH_MODE, device_target='GPU', device_id=0) + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--train_files', default=True, action='store_true', help='Extract files needed for training') + parser.add_argument('--eval_files', default=True, action='store_true', help='Extract files needed for evaluation') + parser.add_argument('--gt_iuv', default=True, action='store_true', help='Extract files needed for evaluation') + parser.add_argument('--uv_type', type=str, default='BF', choices=['BF', 'SMPL']) + + args = parser.parse_args() + + # define path to store extra files + out_path = cfg.DATASET_NPZ_PATH + openpose_path = None + if args.train_files: + # SURREAL dataset preprocessing (training set) + extract_surreal_train(cfg.SURREAL_ROOT, out_path) + + if args.eval_files: + # SURREAL dataset preprocessing (validation set) + extract_surreal_eval(cfg.SURREAL_ROOT, out_path) + + if args.gt_iuv: + smpl = SMPL() + uv_type = args.uv_type + + if uv_type == 'SMPL': + data = objfile.read_obj_full('data/uv_sampler/smpl_fbx_template.obj') + elif uv_type == 'BF': + data = objfile.read_obj_full('data/uv_sampler/smpl_boundry_free_template.obj') + + vt = np.array(data['texcoords']) + face = [f[0] for f in data['faces']] + face = np.array(face) - 1 + vt_face = [f[2] for f in data['faces']] + vt_face = np.array(vt_face) - 1 + renderer = UVRenderer(faces=face, tex=np.zeros([256, 256, 3]), vt=1 - vt, ft=vt_face) + + process_surreal(is_train=True, uv_type=uv_type, renderer=renderer) + + for dataset_name in ['lspet', 'coco', 'lsp-orig', 'mpii', 'lspet', 'mpi-inf-3dhp']: + process_dataset(dataset_name, is_train=True, uv_type=uv_type, smpl=smpl, renderer=renderer) diff --git a/research/cv/DecoMR/pretrained_model_convert/pth_to_msp.py b/research/cv/DecoMR/pretrained_model_convert/pth_to_msp.py new file mode 100644 index 0000000000000000000000000000000000000000..a5a1f54e9e04daadc13c1752da92032f5c2cd662 --- /dev/null +++ b/research/cv/DecoMR/pretrained_model_convert/pth_to_msp.py @@ -0,0 +1,67 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import os.path +import argparse + +import resnet_pth +import torch.nn + +import resnet_msp +import mindspore.nn + + +def convert_resnet(pretrained_file, result): + resnet50_pth = resnet_pth.resnet50() + resnet50_msp = resnet_msp.resnet50() + if torch.cuda.is_available(): + resnet50_pth.load_state_dict(torch.load(pretrained_file), strict=False) + else: + resnet50_pth.load_state_dict(torch.load(pretrained_file, map_location=torch.device("cpu")), strict=False) + + p_pth_list = list() + for p_pth in resnet50_pth.parameters(): + p_pth_list.append(p_pth.cpu().detach().numpy()) + + bn_list = list() + for m in resnet50_pth.modules(): + if isinstance(m, torch.nn.BatchNorm2d): + bn_list.append(m.running_mean.cpu().numpy()) + bn_list.append(m.running_var.cpu().numpy()) + p_index = 0 + bn_index = 0 + for n_msp, p_msp in resnet50_msp.parameters_and_names(): + if "moving_" not in n_msp: + p_msp.set_data(mindspore.Tensor(p_pth_list[p_index])) + p_index += 1 + else: + p_msp.set_data(mindspore.Tensor(bn_list[bn_index])) + bn_index += 1 + mindspore.save_checkpoint(resnet50_msp, result) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument("--model", choices=["vgg", "resnet"], type=str) + parser.add_argument("--pth_file", type=str, default="data/resnet50-19c8e357.pth", help="input pth file") + parser.add_argument("--msp_file", type=str, default="data/resnet50_msp.ckpt", help="output msp file") + args = parser.parse_args() + if not os.path.exists(args.pth_file): + raise FileNotFoundError(args.pth_file) + if args.model == "resnet": + convert_resnet(args.pth_file, args.msp_file) + else: + print("unknown model") + print("success") diff --git a/research/cv/DecoMR/pretrained_model_convert/resnet_msp.py b/research/cv/DecoMR/pretrained_model_convert/resnet_msp.py new file mode 100644 index 0000000000000000000000000000000000000000..9856793a1f0efa4dca305300e38737645e0e1181 --- /dev/null +++ b/research/cv/DecoMR/pretrained_model_convert/resnet_msp.py @@ -0,0 +1,163 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Resnet model define""" + +import mindspore.nn as nn +from mindspore import load_checkpoint + +affine_par = True + + +def conv3x3(in_planes, out_planes, stride=1): + return nn.Conv2d(in_planes, out_planes, kernel_size=3, padding="same", stride=stride, has_bias=False) + + +class Bottleneck(nn.Cell): + """ + Bottleneck layer + """ + expansion = 4 + + def __init__(self, in_planes, planes, stride=1, dilation_=1, downsample=None): + super(Bottleneck, self).__init__() + self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride, has_bias=False) + self.bn1 = nn.BatchNorm2d(planes, affine=affine_par, use_batch_statistics=False) + for i in self.bn1.get_parameters(): + i.requires_grad = False + padding = 1 + if dilation_ == 2: + padding = 2 + elif dilation_ == 4: + padding = 4 + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=padding, pad_mode="pad", has_bias=False, + dilation=dilation_) + + self.bn2 = nn.BatchNorm2d(planes, affine=affine_par, use_batch_statistics=False) + for i in self.bn2.get_parameters(): + i.requires_grad = False + self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, has_bias=False) + self.bn3 = nn.BatchNorm2d(planes * 4, affine=affine_par, use_batch_statistics=False) + for i in self.bn3.get_parameters(): + i.requires_grad = False + self.relu = nn.ReLU() + self.downsample = downsample + self.stride = stride + + def construct(self, x): + """ + forword + """ + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu(out) + + return out + + +class ResNet(nn.Cell): + """ + resnet + """ + + def __init__(self, block, layers): + self.in_planes = 64 + super(ResNet, self).__init__() + self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, pad_mode="pad", + has_bias=False) + self.bn1 = nn.BatchNorm2d(64, affine=affine_par, use_batch_statistics=False) + for i in self.bn1.get_parameters(): + i.requires_grad = False + self.relu = nn.ReLU() + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode="same") # change + self.layer1 = self._make_layer(block, 64, layers[0]) + self.layer2 = self._make_layer(block, 128, layers[1], stride=2) + self.layer3 = self._make_layer(block, 256, layers[2], stride=2) + self.layer4 = self._make_layer(block, 512, layers[3], stride=1, dilation=2) + + def _make_layer(self, block, planes, blocks, stride=1, dilation=1): + """ + make layer + """ + downsample = None + if stride != 1 or self.in_planes != planes * block.expansion or dilation == 2 or dilation == 4: + downsample = nn.SequentialCell( + nn.Conv2d(self.in_planes, planes * block.expansion, + kernel_size=1, stride=stride, has_bias=False), + nn.BatchNorm2d(planes * block.expansion, affine=affine_par, use_batch_statistics=False), + ) + for i in downsample[1].get_parameters(): + i.requires_grad = False + layers = [block(self.in_planes, planes, stride, dilation_=dilation, downsample=downsample)] + self.in_planes = planes * block.expansion + for i in range(1, blocks): + layers.append(block(self.in_planes, planes, dilation_=dilation)) + + return nn.SequentialCell(*layers) + + def load_pretrained_model(self, model_file): + """ + load pretrained model + """ + load_checkpoint(model_file, net=self) + + def construct(self, x): + """ + forward + """ + tmp_x = [] + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + tmp_x.append(x) + x = self.maxpool(x) + + x = self.layer1(x) + tmp_x.append(x) + x = self.layer2(x) + tmp_x.append(x) + x = self.layer3(x) + tmp_x.append(x) + x = self.layer4(x) + tmp_x.append(x) + + return tmp_x + + +# adding prefix "base" to parameter names for load_checkpoint(). +class Tmp(nn.Cell): + def __init__(self, base): + super(Tmp, self).__init__() + self.base = base + + +def resnet50(): + base = ResNet(Bottleneck, [3, 4, 6, 3]) + return Tmp(base) diff --git a/research/cv/DecoMR/pretrained_model_convert/resnet_pth.py b/research/cv/DecoMR/pretrained_model_convert/resnet_pth.py new file mode 100644 index 0000000000000000000000000000000000000000..c94ca4edfaa1c209b38d488ffadb409d7037b379 --- /dev/null +++ b/research/cv/DecoMR/pretrained_model_convert/resnet_pth.py @@ -0,0 +1,140 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + + +import torch.nn as nn + +affine_par = True + + +def conv3x3(in_planes, out_planes, stride=1): + return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, + padding=1, bias=False) + + +class Bottleneck(nn.Module): + expansion = 4 + + def __init__(self, inplanes, planes, stride=1, dilation_=1, downsample=None): + super(Bottleneck, self).__init__() + self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, stride=stride, bias=False) # change + self.bn1 = nn.BatchNorm2d(planes, affine=affine_par) + for i in self.bn1.parameters(): + i.requires_grad = False + padding = 1 + if dilation_ == 2: + padding = 2 + elif dilation_ == 4: + padding = 4 + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, # change + padding=padding, bias=False, dilation=dilation_) + self.bn2 = nn.BatchNorm2d(planes, affine=affine_par) + for i in self.bn2.parameters(): + i.requires_grad = False + self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) + self.bn3 = nn.BatchNorm2d(planes * 4, affine=affine_par) + for i in self.bn3.parameters(): + i.requires_grad = False + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu(out) + + return out + + +class ResNet(nn.Module): + def __init__(self, block, layers): + self.inplanes = 64 + super(ResNet, self).__init__() + self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, + bias=False) + self.bn1 = nn.BatchNorm2d(64, affine=affine_par) + for i in self.bn1.parameters(): + i.requires_grad = False + self.relu = nn.ReLU(inplace=True) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + self.layer1 = self._make_layer(block, 64, layers[0]) + self.layer2 = self._make_layer(block, 128, layers[1], stride=2) + self.layer3 = self._make_layer(block, 256, layers[2], stride=2) + self.layer4 = self._make_layer(block, 512, layers[3], stride=1, dilation__=2) + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + m.weight.data.normal_(0, 0.01) + elif isinstance(m, nn.BatchNorm2d): + m.weight.data.fill_(1) + m.bias.data.zero_() + + def _make_layer(self, block, planes, blocks, stride=1, dilation__=1): + downsample = None + if stride != 1 or self.inplanes != planes * block.expansion or dilation__ == 2 or dilation__ == 4: + downsample = nn.Sequential( + nn.Conv2d(self.inplanes, planes * block.expansion, + kernel_size=1, stride=stride, bias=False), + nn.BatchNorm2d(planes * block.expansion, affine=affine_par), + ) + for i in downsample[1].parameters(): + i.requires_grad = False + layers = [] + layers.append(block(self.inplanes, planes, stride, dilation_=dilation__, downsample=downsample)) + self.inplanes = planes * block.expansion + for i in range(1, blocks): + layers.append(block(self.inplanes, planes, dilation_=dilation__)) + + return nn.Sequential(*layers) + + def forward(self, x): + tmp_x = [] + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + tmp_x.append(x) + x = self.maxpool(x) + + x = self.layer1(x) + tmp_x.append(x) + x = self.layer2(x) + tmp_x.append(x) + x = self.layer3(x) + tmp_x.append(x) + x = self.layer4(x) + tmp_x.append(x) + + return tmp_x + + +def resnet50(): + model = ResNet(Bottleneck, [3, 4, 6, 3]) + return model diff --git a/research/cv/DecoMR/requirements.txt b/research/cv/DecoMR/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..c937576b65683bb2869f6c0a83759ef762737821 --- /dev/null +++ b/research/cv/DecoMR/requirements.txt @@ -0,0 +1,10 @@ +chumpy==0.69 +tqdm==4.42.0 +numpy==1.18.1 +spacepy==0.2.1 +opendr==0.78 +opencv_python==4.1.2.30 +h5py==2.10.0 +scipy==1.2.1 +mindspore==1.8.0 +ipdb==0.13.3 diff --git a/research/cv/DecoMR/scripts/convert_model.sh b/research/cv/DecoMR/scripts/convert_model.sh new file mode 100644 index 0000000000000000000000000000000000000000..2335598a66182d77a538b704af9b5ef0784bdd37 --- /dev/null +++ b/research/cv/DecoMR/scripts/convert_model.sh @@ -0,0 +1,47 @@ +#!/bin/bash +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +# The number of parameters transferred is not equal to the required number, print prompt information +if [ $# != 3 ] +then + echo "===============================================================================================================" + echo "Please run the script as: " + echo "bash convert_model.sh [MODEL_NAME] [PTH_FILE] [MSP_FILE]" + echo "for example: bash convert_model.sh data/resnet50-19c8e357.pth data/resnet50_msp.ckpt" + echo "===============================================================================================================" + exit 1 +fi + +# Get absolute path +get_real_path(){ + if [ "${1:0:1}" == "/" ]; then + echo "$1" + else + echo "$(realpath -m $PWD/$1)" + fi +} + +# Get current script path +BASE_PATH=$(cd "`dirname $0`" || exit; pwd) +MODEL_NAME=$1 +PTH_FILE=$(get_real_path $2) +MSP_FILE=$(get_real_path $3) + +cd $BASE_PATH/.. +python pretrained_model_convert/pth_to_msp.py \ + --model=$MODEL_NAME \ + --pth_file="$PTH_FILE" \ + --msp_file="$MSP_FILE" diff --git a/research/cv/DecoMR/scripts/run_eval.sh b/research/cv/DecoMR/scripts/run_eval.sh new file mode 100644 index 0000000000000000000000000000000000000000..20a29b56c97b5a53435856841450e66380138aa1 --- /dev/null +++ b/research/cv/DecoMR/scripts/run_eval.sh @@ -0,0 +1,34 @@ +#!/bin/bash +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +if [ $# != 2 ] +then + echo "==============================================================================================================" + echo "Please run the script as: " + echo "bash ./run_eval.sh [DATASET] [BATCH_SIZE]" + echo "for example: bash ./run_eval.sh up-3d 16" + echo "==============================================================================================================" + exit 1 +fi + +DATASET=$1 +BATCH_SIZE=$2 + +cd .. + +python eval.py \ + --dataset=$DATASET \ + --batch_size=$BATCH_SIZE > eval.log 2>&1 & diff --git a/research/cv/DecoMR/scripts/run_train_distribute_gpu.sh b/research/cv/DecoMR/scripts/run_train_distribute_gpu.sh new file mode 100644 index 0000000000000000000000000000000000000000..03592671c59e11789a6daa38fe5bc537dd8fee5e --- /dev/null +++ b/research/cv/DecoMR/scripts/run_train_distribute_gpu.sh @@ -0,0 +1,44 @@ +#!/bin/bash +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +if [ $# != 6 ] +then + echo "==============================================================================================================" + echo "Please run the script as: " + echo "bash ./run_train_distribute_gpu.sh [DATASET] [NGPU] [EPOCHS_DP] [EPOCHS_END] [BATCH_SIZE] [CKPT_PATH] " + echo "for example: bash ./run_train_distribute_gpu.sh up-3d 8 5 30 16 './ckpt'" + echo "==============================================================================================================" + exit 1 +fi + +ulimit -u unlimited + +DATASET=$1 +NGPU=$2 +EPOCHS_DP=$3 +EPOCHS_END=$4 +BATCH_SIZE=$5 +CKPT_PATH=$6 + +cd .. + +mpirun --allow-run-as-root -n $NGPU python train.py --run_distribute==True \ + --dataset=$DATASET \ + --ngpu=$NGPU \ + --num_epochs_dp=$EPOCHS_DP \ + --num_epochs_end=$EPOCHS_END \ + --batch_size=$BATCH_SIZE \ + --ckpt_dir=$CKPT_PATH > trainx8.log 2>&1 & diff --git a/research/cv/DecoMR/scripts/run_train_standalone_gpu.sh b/research/cv/DecoMR/scripts/run_train_standalone_gpu.sh new file mode 100644 index 0000000000000000000000000000000000000000..ecd9aab0adc5446a3f1387de293ec42365f5382c --- /dev/null +++ b/research/cv/DecoMR/scripts/run_train_standalone_gpu.sh @@ -0,0 +1,44 @@ +#!/bin/bash +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +if [ $# != 6 ] +then + echo "===============================================================================================================" + echo "Please run the script as: " + echo "bash ./run_train_standalone_gpu.sh [DATASET] [DEVICE_ID] [EPOCHS_DP] [EPOCHS_END] [BATCH_SIZE] [CKPT_PATH]" + echo "for example: bash ./run_train_standalone_gpu.sh up-3d 0 5 30 16 './ckpt'" + echo "===============================================================================================================" + exit 1 +fi + +ulimit -u unlimited + +DATASET=$1 +DEVICE_ID=$2 +EPOCHS_DP=$3 +EPOCHS_END=$4 +BATCH_SIZE=$5 +CKPT_PATH=$6 + +cd .. + +python train.py \ + --dataset=$DATASET \ + --device_id=$DEVICE_ID \ + --num_epochs_dp=$EPOCHS_DP \ + --num_epochs_end=$EPOCHS_END \ + --batch_size=$BATCH_SIZE \ + --ckpt_dir=$CKPT_PATH > trainx1.log 2>&1 & diff --git a/research/cv/DecoMR/train.py b/research/cv/DecoMR/train.py new file mode 100644 index 0000000000000000000000000000000000000000..00054fdac33d388a400f3fd74aa4cd92c109b1c0 --- /dev/null +++ b/research/cv/DecoMR/train.py @@ -0,0 +1,152 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import os +import time +from mindspore import context, nn, Tensor, save_checkpoint +from mindspore.context import ParallelMode +from mindspore.communication.management import init, get_rank, get_group_size +from mindspore.common import set_seed +from datasets.base_dataset import create_dataset +from models import dense_cnn +from models.WithLossCellDP import WithLossCellDP +from models.WithLossCellEnd import WithLossCellEnd +from models.TrainOneStepDP import TrainOneStepDP +from models.TrainOneStepEnd import TrainOneStepEnd +from models.uv_generator import Index_UV_Generator +from models.DMR import DMR +from models import SMPL +from utils import TrainOptions +import numpy as np + +set_seed(1) + +options = TrainOptions().parse_args() + +context.set_context(mode=context.PYNATIVE_MODE, device_target='GPU') + +def train(): + if options.run_distribute: + + init() + context.reset_auto_parallel_context() + context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, device_num=get_group_size(), + gradients_mean=True) + options.rank = get_rank() + options.group_size = get_group_size() + options.ckpt_dir = os.path.join(options.ckpt_dir, 'rank{}'.format(options.rank)) + all_dataset = create_dataset(options.dataset, options, is_train=True, use_IUV=True) + dataset = all_dataset.batch(options.batch_size, drop_remainder=True) + else: + options.ckpt_dir = os.path.join(options.ckpt_dir, 'rank{}'.format(options.rank)) + context.set_context(device_id=options.device_id) + all_dataset = create_dataset(options.dataset, options, is_train=True, use_IUV=True) + dataset = all_dataset.batch(options.batch_size) + + smpl = SMPL() + + sampler = Index_UV_Generator(UV_height=options.uv_res, UV_width=-1, uv_type=options.uv_type) + + weight_file = 'data/weight_p24_h{:04d}_w{:04d}_{}.npy'.format(options.uv_res, options.uv_res, options.uv_type) + uv_weight = Tensor.from_numpy(np.load(weight_file)) + uv_weight = uv_weight * sampler.mask + uv_weight = uv_weight / uv_weight.mean() + uv_weight = uv_weight[None, :, :, None] + tv_factor = (options.uv_res - 1) * (options.uv_res - 1) + + + CNet = dense_cnn.DPNet(warp_lv=options.warp_level, norm_type=options.norm_type) + optimizer_CNet = nn.Adam(CNet.trainable_params(), learning_rate=options.lr, beta1=options.adam_beta1) + CNet_with_criterion = WithLossCellDP(CNet, options) + TrainOneStepCellDP = TrainOneStepDP(CNet_with_criterion, optimizer_CNet) + + TrainOneStepCellDP.set_train() + + if not os.path.exists(options.ckpt_dir): + os.makedirs(options.ckpt_dir) + + iter1 = dataset.create_dict_iterator(num_epochs=options.num_epochs_dp) + for epoch in range(options.num_epochs_dp): + start_epoch_time = time.time() + for i, data in enumerate(iter1): + input_data = data + has_dp = input_data['has_dp'] + images = input_data['img'] + gt_dp_iuv = input_data['gt_iuv'] + fit_joint_error = input_data['fit_joint_error'] + + gt_dp_iuv[:, 1:] = gt_dp_iuv[:, 1:] / 255.0 + + Cout = TrainOneStepCellDP(has_dp, images, gt_dp_iuv, fit_joint_error) + + print('stage_dp:', 'epoch', epoch, 'step', i, 'CLoss', Cout[0]) + + print('stage_dp:', 'epoch', epoch, 'use time:', time.time() - start_epoch_time, 's') + print('stage_dp:', 'epoch', epoch, 'performance', (time.time() - start_epoch_time) + * 1000 / dataset.get_dataset_size(), 'ms/step') + if (epoch + 1) % 1 == 0 and options.rank == 0: + save_checkpoint(CNet, os.path.join(options.ckpt_dir, f"CNet_{epoch + 1}.ckpt")) + + Pretrained_CNet = dense_cnn.Pretrained_DPNet(options.warp_level, options.norm_type, pretrained=True) + LNet = dense_cnn.get_LNet(options) + + DMR_model = DMR(Pretrained_CNet, LNet) + optimizer_DMR_model = nn.Adam(DMR_model.trainable_params(), learning_rate=options.lr, beta1=options.adam_beta1) + DMR_model_with_criterion = WithLossCellEnd(DMR_model, options, uv_weight, tv_factor) + TrainOneStepCellEnd = TrainOneStepEnd(DMR_model_with_criterion, optimizer_DMR_model) + TrainOneStepCellEnd.set_train(True) + + iter2 = dataset.create_dict_iterator(num_epochs=options.num_epochs_end) + for epoch in range(options.num_epochs_end): + start_epoch_time = time.time() + for i, data in enumerate(iter2): + + input_data = data + gt_keypoints_2d = input_data['keypoints'] + gt_keypoints_3d = input_data['pose_3d'] + has_pose_3d = input_data['has_pose_3d'] + + gt_keypoints_2d_smpl = input_data['keypoints_smpl'] + gt_keypoints_3d_smpl = input_data['pose_3d_smpl'] + has_pose_3d_smpl = input_data['has_pose_3d_smpl'] + + gt_pose = input_data['pose'] + gt_betas = input_data['betas'] + has_smpl = input_data['has_smpl'] + has_dp = input_data['has_dp'] + images = input_data['img'] + + gt_dp_iuv = input_data['gt_iuv'] + fit_joint_error = input_data['fit_joint_error'] + + gt_dp_iuv[:, 1:] = gt_dp_iuv[:, 1:] / 255.0 + + gt_vertices = smpl(gt_pose, gt_betas) + + gt_uv_map = sampler.get_UV_map(gt_vertices) + + Lout = TrainOneStepCellEnd(images, has_dp, has_smpl, has_pose_3d, has_pose_3d_smpl, gt_dp_iuv, gt_uv_map, + gt_vertices, fit_joint_error, gt_keypoints_2d, gt_keypoints_3d, + gt_keypoints_2d_smpl, gt_keypoints_3d_smpl) + + print('stage_end:', 'epoch', epoch, 'step', i, 'CLoss', Lout[1], 'LLoss', Lout[2], 'total', Lout[0]) + print('stage_end:', 'epoch', epoch, 'use time:', time.time() - start_epoch_time, 's') + print('stage_end:', 'epoch', epoch, 'performance', (time.time() - start_epoch_time) + * 1000 / dataset.get_dataset_size(), 'ms/step') + if (epoch + 1) % 1 == 0 and options.rank == 0: + save_checkpoint(DMR_model, os.path.join(options.ckpt_dir, f"dmr_{epoch + 1}.ckpt")) + +if __name__ == '__main__': + train() diff --git a/research/cv/DecoMR/utils/__init__.py b/research/cv/DecoMR/utils/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b8c83b34a9e577c71bb71694c850971108060426 --- /dev/null +++ b/research/cv/DecoMR/utils/__init__.py @@ -0,0 +1,16 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +from .train_options import TrainOptions diff --git a/research/cv/DecoMR/utils/config.py b/research/cv/DecoMR/utils/config.py new file mode 100644 index 0000000000000000000000000000000000000000..ad8bbab14cd60af93b228abd905485921eb19372 --- /dev/null +++ b/research/cv/DecoMR/utils/config.py @@ -0,0 +1,142 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +from os.path import join + +H36M_ROOT = '' +H36M_ROOT_ORIGIN = '' + +LSP_ROOT = '' +LSP_ORIGINAL_ROOT = '' +UPI_S1H_ROOT = '' +MPII_ROOT = '' +COCO_ROOT = '' +UP_3D_ROOT = 'data/up-3d' +SURREAL_ROOT = '' +PW3D_ROOT = '' +MPI_INF_3DHP_ROOT = '' +LSPET_ROOT = '' + +# Output folder to save test/train npz files +DATASET_NPZ_PATH = 'data/DATASET_NPZ_PATH' + +# Path to test/train npz files +DATASET_FILES = [{'h36m-p1': join(DATASET_NPZ_PATH, 'h36m_valid_protocol1.npz'), + 'h36m-p2': join(DATASET_NPZ_PATH, 'h36m_valid_protocol2.npz'), + 'lsp': join(DATASET_NPZ_PATH, 'lsp_dataset_test.npz'), + 'lsp-orig-test': join(DATASET_NPZ_PATH, 'lsp_dataset_original_test.npz'), + 'up-3d': join(DATASET_NPZ_PATH, 'up_3d_lsp_test.npz'), + 'up-3d-test': join(DATASET_NPZ_PATH, 'up_3d_test.npz'), + 'surreal': join(DATASET_NPZ_PATH, 'surreal_val.npz'), + '3dpw': join(DATASET_NPZ_PATH, '3dpw_test.npz'), + 'mpi-inf-3dhp': join(DATASET_NPZ_PATH, 'mpi_inf_3dhp_valid.npz'), + }, + + {'h36m-train': join(DATASET_NPZ_PATH, 'h36m_train_new.npz'), + 'lsp-orig': join(DATASET_NPZ_PATH, 'lsp_dataset_original_train.npz'), + 'up-3d': join(DATASET_NPZ_PATH, 'up_3d_trainval.npz'), + 'mpii': join(DATASET_NPZ_PATH, 'mpii_train.npz'), + 'coco': join(DATASET_NPZ_PATH, 'coco_2014_train.npz'), + 'lspet': join(DATASET_NPZ_PATH, 'hr-lspet_train.npz'), + 'mpi-inf-3dhp': join(DATASET_NPZ_PATH, 'mpi_inf_3dhp_train.npz'), + 'surreal': join(DATASET_NPZ_PATH, 'surreal_train.npz'), + } + ] + +# Path to SPIN fitting result +FIT_FILES = [{}, + { + 'lsp-orig': join(DATASET_NPZ_PATH, 'spin_fits', 'lsp.npz'), + 'mpii': join(DATASET_NPZ_PATH, 'spin_fits', 'mpii.npz'), + 'coco': join(DATASET_NPZ_PATH, 'spin_fits', 'coco.npz'), + 'lspet': join(DATASET_NPZ_PATH, 'spin_fits', 'lspet.npz'), + 'mpi-inf-3dhp': join(DATASET_NPZ_PATH, 'spin_fits', 'mpi_inf_3dhp.npz'), + } + ] +DATASET_FOLDERS = {'h36m-train': H36M_ROOT, + 'h36m-p1': H36M_ROOT_ORIGIN, + 'h36m-p2': H36M_ROOT_ORIGIN, + 'lsp-orig': LSP_ORIGINAL_ROOT, + 'lsp': LSP_ROOT, + 'lsp-orig-test': LSP_ORIGINAL_ROOT, + 'upi-s1h': UPI_S1H_ROOT, + 'up-3d': UP_3D_ROOT, + 'up-3d-test': UP_3D_ROOT, + 'mpii': MPII_ROOT, + 'coco': COCO_ROOT, + 'surreal': SURREAL_ROOT, + '3dpw': PW3D_ROOT, + 'lspet': LSPET_ROOT, + 'mpi-inf-3dhp': MPI_INF_3DHP_ROOT, + } + +CUBE_PARTS_FILE = 'data/cube_parts.npy' +JOINT_REGRESSOR_TRAIN_EXTRA = 'data/J_regressor_extra.npy' +VERTEX_TEXTURE_FILE = 'data/vertex_texture.npy' +SMPL_FILE = 'data/basicmodel_neutral_lbs_10_207_0_v1.0.0.pkl' +MALE_SMPL_FILE = 'data/basicmodel_m_lbs_10_207_0_v1.0.0.pkl' +FEMALE_SMPL_FILE = 'data/basicmodel_f_lbs_10_207_0_v1.0.0.pkl' + +JOINT_REGRESSOR_H36M = 'data/J_regressor_h36m.npy' +LSP_REGRESSOR_EVAL = 'data/smpl2lsp_j_regressor_nt_v2.npy' + + +""" +Each dataset uses different sets of joints. +We keep a superset of 24 joints such that we include all joints from every dataset. +If a dataset doesn't provide annotations for a specific joint, we simply ignore it. +The joints used here are: +0 - Right Ankle +1 - Right Knee +2 - Right Hip +3 - Left Hip +4 - Left Knee +5 - Left Ankle +6 - Right Wrist +7 - Right Elbow +8 - Right Shoulder +9 - Left Shoulder +10 - Left Elbow +11 - Left Wrist +12 - Neck (LSP definition) +13 - Top of Head (LSP definition) +14 - Pelvis (MPII definition) +15 - Thorax (MPII definition) +16 - Spine (Human3.6M definition) +17 - Jaw (Human3.6M definition) +18 - Head (Human3.6M definition) +19 - Nose +20 - Left Eye +21 - Right Eye +22 - Left Ear +23 - Right Ear +""" + +JOINTS_IDX = [8, 5, 29, 30, 4, 7, 21, 19, 17, 16, 18, 20, 31, 32, 33, 34, 35, 36, 37, 24, 26, 25, 28, 27] + +# Joint selectors +# Indices to get the 14 LSP joints from the 17 H36M joints +H36M_TO_J17 = [6, 5, 4, 1, 2, 3, 16, 15, 14, 11, 12, 13, 8, 10, 0, 7, 9] +H36M_TO_J14 = H36M_TO_J17[:14] +# Indices to get the 14 LSP joints from the ground truth joints +J24_TO_J17 = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 18, 14, 16, 17] +J24_TO_J14 = J24_TO_J17[:14] + +FOCAL_LENGTH = 5000. +INPUT_RES = 224 + +# Mean and standard deviation for normalizing input image +IMG_NORM_MEAN = [0.485, 0.456, 0.406] +IMG_NORM_STD = [0.229, 0.224, 0.225] diff --git a/research/cv/DecoMR/utils/imutils.py b/research/cv/DecoMR/utils/imutils.py new file mode 100644 index 0000000000000000000000000000000000000000..75a92cbfa792dcfa4ad37216fc5a7b5f94effd35 --- /dev/null +++ b/research/cv/DecoMR/utils/imutils.py @@ -0,0 +1,165 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import numpy as np +import scipy.misc +import cv2 + +def get_transform(center, scale, res, rot=0): + """Generate transformation matrix.""" + h = 200 * scale + t = np.zeros((3, 3)) + t[0, 0] = float(res[1]) / h + t[1, 1] = float(res[0]) / h + t[0, 2] = res[1] * (-float(center[0]) / h + .5) + t[1, 2] = res[0] * (-float(center[1]) / h + .5) + t[2, 2] = 1 + if rot != 0: + rot = -rot # To match direction of rotation from cropping + rot_mat = np.zeros((3, 3)) + rot_rad = rot * np.pi / 180 + sn, cs = np.sin(rot_rad), np.cos(rot_rad) + rot_mat[0, :2] = [cs, -sn] + rot_mat[1, :2] = [sn, cs] + rot_mat[2, 2] = 1 + # Need to rotate around center + t_mat = np.eye(3) + t_mat[0, 2] = -res[1]/2 + t_mat[1, 2] = -res[0]/2 + t_inv = t_mat.copy() + t_inv[:2, 2] *= -1 + t = np.dot(t_inv, np.dot(rot_mat, np.dot(t_mat, t))) + return t + +def transform(pt, center, scale, res, invert=0, rot=0): + """Transform pixel location to different reference.""" + t = get_transform(center, scale, res, rot=rot) + if invert: + t = np.linalg.inv(t) + new_pt = np.array([pt[0]-1, pt[1]-1, 1.]).T + new_pt = np.dot(t, new_pt) + return new_pt[:2].astype(int)+1 + +def crop(img, center, scale, res, rot=0): + """Crop image according to the supplied bounding box.""" + # Upper left point + ul = np.array(transform([1, 1], center, scale, res, invert=1))-1 + # Bottom right point + br = np.array(transform([res[0]+1, + res[1]+1], center, scale, res, invert=1))-1 + + # Padding so that when rotated proper amount of context is included + pad = int(np.linalg.norm(br - ul) / 2 - float(br[1] - ul[1]) / 2) + if rot != 0: + ul -= pad + br += pad + + new_shape = [br[1] - ul[1], br[0] - ul[0]] + if len(img.shape) > 2: + new_shape += [img.shape[2]] + new_img = np.zeros(new_shape) + + # Range to fill new array + new_x = max(0, -ul[0]), min(br[0], len(img[0])) - ul[0] + new_y = max(0, -ul[1]), min(br[1], len(img)) - ul[1] + # Range to sample from original image + old_x = max(0, ul[0]), min(len(img[0]), br[0]) + old_y = max(0, ul[1]), min(len(img), br[1]) + new_img[new_y[0]:new_y[1], new_x[0]:new_x[1]] = \ + img[old_y[0]:old_y[1], old_x[0]:old_x[1]] + + if rot != 0: + # Remove padding + new_img = scipy.misc.imrotate(new_img, rot) + new_img = new_img[pad:-pad, pad:-pad] + + new_img = scipy.misc.imresize(new_img, res) + return new_img + +def uncrop(img, center, scale, orig_shape, rot=0, is_rgb=True): + """'Undo' the image cropping/resizing. + This function is used when evaluating mask/part segmentation. + """ + res = img.shape[:2] + # Upper left point + ul = np.array(transform([1, 1], center, scale, res, invert=1))-1 + # Bottom right point + br = np.array(transform([res[0]+1, res[1]+1], center, scale, res, invert=1))-1 + # size of cropped image + crop_shape = [br[1] - ul[1], br[0] - ul[0]] + + new_shape = [br[1] - ul[1], br[0] - ul[0]] + if len(img.shape) > 2: + new_shape += [img.shape[2]] + new_img = np.zeros(orig_shape, dtype=np.uint8) + # Range to fill new array + new_x = max(0, -ul[0]), min(br[0], orig_shape[1]) - ul[0] + new_y = max(0, -ul[1]), min(br[1], orig_shape[0]) - ul[1] + # Range to sample from original image + old_x = max(0, ul[0]), min(orig_shape[1], br[0]) + old_y = max(0, ul[1]), min(orig_shape[0], br[1]) + img = scipy.misc.imresize(img, crop_shape, interp='nearest') + new_img[old_y[0]:old_y[1], old_x[0]:old_x[1]] = img[new_y[0]:new_y[1], new_x[0]:new_x[1]] + return new_img + +def rot_aa(aa, rot): + """Rotate axis angle parameters.""" + # pose parameters + R = np.array([[np.cos(np.deg2rad(-rot)), -np.sin(np.deg2rad(-rot)), 0], + [np.sin(np.deg2rad(-rot)), np.cos(np.deg2rad(-rot)), 0], + [0, 0, 1]]) + # find the rotation of the body in camera frame + per_rdg, _ = cv2.Rodrigues(aa) + # apply the global rotation to the global orientation + resrot, _ = cv2.Rodrigues(np.dot(R, per_rdg)) + aa = (resrot.T)[0] + return aa + +def flip_img(img): + """Flip rgb images or masks. + channels come last, e.g. (256,256,3). + """ + img = np.fliplr(img) + return img + +def flip_kp(kp): + """Flip keypoints.""" + flipped_parts = [5, 4, 3, 2, 1, 0, 11, 10, 9, 8, 7, 6, 12, 13, 14, 15, 16, 17, 18, 19, 21, 20, 23, 22] + kp = kp[flipped_parts] + kp[:, 0] = - kp[:, 0] + return kp + +def flip_pose(pose): + """Flip pose. + The flipping is based on SMPL parameters. + """ + flippedParts = [0, 1, 2, 6, 7, 8, 3, 4, 5, 9, 10, 11, 15, 16, 17, 12, 13, + 14, 18, 19, 20, 24, 25, 26, 21, 22, 23, 27, 28, 29, 33, + 34, 35, 30, 31, 32, 36, 37, 38, 42, 43, 44, 39, 40, 41, + 45, 46, 47, 51, 52, 53, 48, 49, 50, 57, 58, 59, 54, 55, + 56, 63, 64, 65, 60, 61, 62, 69, 70, 71, 66, 67, 68] + pose = pose[flippedParts] + # we also negate the second and the third dimension of the axis-angle + pose[1::3] = -pose[1::3] + pose[2::3] = -pose[2::3] + return pose + +def flip_aa(aa): + """Flip axis-angle representation. + We negate the second and the third dimension of the axis-angle. + """ + aa[1] = -aa[1] + aa[2] = -aa[2] + return aa diff --git a/research/cv/DecoMR/utils/objfile.py b/research/cv/DecoMR/utils/objfile.py new file mode 100644 index 0000000000000000000000000000000000000000..f9e777e33e7a16ce778699f65c7bf902ed854aca --- /dev/null +++ b/research/cv/DecoMR/utils/objfile.py @@ -0,0 +1,99 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +""" +This file include the codes to read .obj files +""" + +import numpy as np + +def read_obj(filepath): + vertices = [] + faces = [] + for line in open(filepath, "r"): + if line.startswith('#'): continue + values = line.split() + if not values: continue + if values[0] == 'v': + + v = [float(x) for x in values[1:4]] + vertices.append(v) + + elif values[0] == 'f': + face = [] + for v in values[1:]: + w = v.split('/') + face.append(int(w[0])) + faces.append(face) + vertices = np.array(vertices) + return vertices, faces + + +def read_obj_full(filepath): + vertices = [] + normals = [] + vt_texcoords = [] + faces = [] + + + + material = None + for line in open(filepath, "r"): + if line.startswith('#'): continue + values = line.split() + if not values: continue + if values[0] == 'v': + # v = map(float, values[1:4]) + v = [float(x) for x in values[1:4]] + vertices.append(v) + elif values[0] == 'vn': + # v = map(float, values[1:4]) + v = [float(x) for x in values[1:4]] + normals.append(v) + elif values[0] == 'vt': + v = [float(x) for x in values[1:3]] + vt_texcoords.append(v) + + elif values[0] in ('usemtl', 'usemat'): + material = values[1] + elif values[0] == 'f': + face = [] + texcoords = [] + norms = [] + for v in values[1:]: + w = v.split('/') + face.append(int(w[0])) + if len(w) >= 2 and w[1]: + texcoords.append(int(w[1])) + else: + texcoords.append(0) + if len(w) >= 3 and w[2] > 0: + norms.append(int(w[2])) + else: + norms.append(0) + faces.append((face, norms, texcoords, material)) + out_dict = {} + out_dict['vertices'] = vertices + out_dict['faces'] = faces + out_dict['texcoords'] = vt_texcoords + return out_dict + +def write_obj(filepath, vertices, faces): + with open(filepath, 'w') as fp: + for v in vertices: + fp.write('v %f %f %f\n' % (v[0], v[1], v[2])) + + for f in faces: + fp.write('f %d %d %d\n' % (f[0], f[1], f[2])) diff --git a/research/cv/DecoMR/utils/renderer.py b/research/cv/DecoMR/utils/renderer.py new file mode 100644 index 0000000000000000000000000000000000000000..f48e74d58d757fc2a6e6367a5beef95d3590119a --- /dev/null +++ b/research/cv/DecoMR/utils/renderer.py @@ -0,0 +1,201 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import numpy as np +from chumpy.ch import Ch +from opendr.camera import ProjectPoints +from opendr.renderer import ColoredRenderer, TexturedRenderer +from opendr.lighting import LambertianPointLight + +# Rotate the points by a specified angle. +def rotateY(points, angle): + ry = np.array([[np.cos(angle), 0., np.sin(angle)], [0., 1., 0.], [-np.sin(angle), 0., np.cos(angle)]]) + return np.dot(points, ry) + +class Renderer: + """ + Render mesh using OpenDR for visualization. + """ + + def __init__(self, width=800, height=600, near=0.5, far=1000, faces=None): + self.colors = {'pink': [.9, .7, .7], 'light_blue': [0.65098039, 0.74117647, 0.85882353], + 'blue': [0.65098039, 0.74117647, 0.85882353], 'green': [180.0/255.0, 238.0/255.0, 180.0/255], + 'tan': [1.0, 218.0/255, 185.0/255]} + self.width = width + self.height = height + self.faces = faces + self.renderer = ColoredRenderer() + + def render(self, vertices, faces=None, img=None, + camera_t=np.zeros([3], dtype=np.float32), + camera_rot=np.zeros([3], dtype=np.float32), + camera_center=None, + use_bg=False, + bg_color=(0.0, 0.0, 0.0), + body_color=None, + focal_length=5000, + disp_text=False, + gt_keyp=None, + pred_keyp=None, + **kwargs): + if img is not None: + height, width = img.shape[:2] + else: + height, width = self.height, self.width + + if faces is None: + faces = self.faces + + if camera_center is None: + camera_center = np.array([width * 0.5, height * 0.5]) + + self.renderer.camera = ProjectPoints(rt=camera_rot, t=camera_t, f=focal_length * np.ones(2), + c=camera_center, k=np.zeros(5)) + + dist = np.abs(self.renderer.camera.t.r[2] - np.mean(vertices, axis=0)[2]) + far = dist + 20 + + self.renderer.frustum = {'near': 1.0, 'far': far, 'width': width, 'height': height} + + if img is not None: + if use_bg: + self.renderer.background_image = img + else: + self.renderer.background_image = np.ones_like(img) * np.array(bg_color) + + if body_color is None: + color = self.colors['blue'] + else: + color = self.colors[body_color] + + if isinstance(self.renderer, TexturedRenderer): + color = [1., 1., 1.] + + self.renderer.set(v=vertices, f=faces, vc=color, bgcolor=np.ones(3)) + albedo = self.renderer.vc + + # Construct Back Light (on back right corner) + yrot = np.radians(120) + + self.renderer.vc = LambertianPointLight(f=self.renderer.f, v=self.renderer.v, + num_verts=self.renderer.v.shape[0], + light_pos=rotateY(np.array([-200, -100, -100]), yrot), vc=albedo, + light_color=np.array([1, 1, 1])) + + # Construct Left Light + self.renderer.vc += LambertianPointLight(f=self.renderer.f, v=self.renderer.v, + num_verts=self.renderer.v.shape[0], + light_pos=rotateY(np.array([800, 10, 300]), yrot), vc=albedo, + light_color=np.array([1, 1, 1])) + + # Construct Right Light + self.renderer.vc += LambertianPointLight(f=self.renderer.f, v=self.renderer.v, + num_verts=self.renderer.v.shape[0], + light_pos=rotateY(np.array([-500, 500, 1000]), yrot), vc=albedo, + light_color=np.array([.7, .7, .7])) + + return self.renderer.r + + +def render_IUV(img, vertices, camera, renderer, color='pink', focal_length=1000): + """ + Draws vert with text. + Renderer is an instance of SMPLRenderer. + """ + # Fix a flength so i can render this with persp correct scale + res = img.shape[1] + camera_t = np.array([camera[1], camera[2], 2*focal_length/(res * camera[0] + 1e-9)]) + + rend_img = renderer.render(vertices, camera_t=camera_t, img=img, use_bg=True, + focal_length=focal_length, body_color=color) + + return rend_img + + +class UVRenderer: + """ + Render mesh using OpenDR for visualization. + """ + + def __init__(self, width=800, height=600, near=0.5, far=1000, faces=None, tex=None, vt=None, ft=None): + self.colors = {'pink': [.9, .7, .7], 'blue': [0.65098039, 0.74117647, 0.85882353]} + self.width = width + self.height = height + self.faces = faces + self.tex = tex + self.vt = vt + self.ft = ft + self.renderer = TexturedRenderer() + + def render(self, vertices, faces=None, img=None, + camera_t=np.zeros([3], dtype=np.float32), + camera_rot=np.zeros([3], dtype=np.float32), + camera_center=None, use_bg=False, + bg_color=(0.0, 0.0, 0.0), body_color=None, + focal_length=5000, + disp_text=False, + gt_keyp=None, + pred_keyp=None, + **kwargs): + + if img is not None: + height, width = img.shape[:2] + else: + height, width = self.height, self.width + + if faces is None: + faces = self.faces + + if camera_center is None: + camera_center = np.array([width * 0.5, + height * 0.5]) + + self.renderer.camera = ProjectPoints(rt=camera_rot, t=camera_t, f=focal_length * np.ones(2), + c=camera_center, k=np.zeros(5)) + + dist = np.abs(self.renderer.camera.t.r[2] - np.mean(vertices, axis=0)[2]) + far = dist + 20 + + self.renderer.frustum = {'near': 1.0, + 'far': far, + 'width': width, + 'height': height} + + if img is not None: + if use_bg: + self.renderer.background_image = img + else: + self.renderer.background_image = np.ones_like(img) * np.array(bg_color) + + if body_color is None: + color = self.colors['blue'] + else: + color = self.colors[body_color] + + if isinstance(self.renderer, TexturedRenderer): + color = [1., 1., 1.] + + self.renderer.set(v=vertices, f=faces, vt=self.vt, ft=self.ft, + vc=color, bgcolor=np.ones(3), texture_image=self.tex) + + self.renderer.vc = Ch(np.ones([6890, 3])) + + _ = self.renderer.r + out = self.renderer.texcoord_image + return out diff --git a/research/cv/DecoMR/utils/train_options.py b/research/cv/DecoMR/utils/train_options.py new file mode 100644 index 0000000000000000000000000000000000000000..d645351e7e07cb6ef18be0c27e692a5eaacc93b2 --- /dev/null +++ b/research/cv/DecoMR/utils/train_options.py @@ -0,0 +1,167 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import os +import json +import argparse +from collections import namedtuple +import numpy as np + + +class TrainOptions: + """Object that handles command line options.""" + def __init__(self): + self.parser = argparse.ArgumentParser() + + req = self.parser.add_argument_group('Required') + req.add_argument('--name', default='sample_dp', help='Name of the experiment') + + gen = self.parser.add_argument_group('General') + gen.add_argument('--device_id', type=int, default=1, help='device id') + gen.add_argument("--run_distribute", type=bool, default=False, help="Run distribute, default: false.") + gen.add_argument('--pretrained', default=True, action='store_true') + gen.add_argument('--time_to_run', type=int, default=np.inf, + help='Total time to run in seconds. Used for training in environments with timing constraints') + gen.add_argument('--resume', dest='resume', default=False, action='store_true', + help='Resume from checkpoint (Use latest checkpoint by default') + gen.add_argument('--num_workers', type=int, default=8, help='Number of processes used for data loading') + gen.add_argument('--ngpu', type=int, default=1, help='Number of gpus used for training') + gen.add_argument('--rank', type=int, default=0, help='shard_id') + gen.add_argument('--group_size', type=int, default=1, help='group size') + + io = self.parser.add_argument_group('io') + io.add_argument('--ckpt_dir', default='./ckpt', help='Directory to store ckp') + io.add_argument('--eval_dir', default='./ckpt/rank0', help='Directory to store ckpt') + io.add_argument('--save_root', type=str, default='./results') + io.add_argument('--log_dir', default='./logs', help='Directory to store logs') + io.add_argument('--log_freq', default=20, type=int, help='Frequency of printing intermediate results') + io.add_argument('--checkpoint', default=None, help='Path to checkpoint') + io.add_argument('--from_json', default=None, + help='Load options from json file instead of the command line') + io.add_argument('--pretrained_checkpoint', default='/logs/sample_dp/checkpoints/final.pt', + help='Load a pretrained network when starting training') + + arch = self.parser.add_argument_group('Architecture') + arch.add_argument('--model', default='DecoMR', choices=['DecoMR']) + + arch.add_argument('--img_res', type=int, default=224, + help='Rescale bounding boxes to size [img_res, img_res] before feeding it in the network') + arch.add_argument('--uv_res', type=int, default=128, choices=[128, 256], + help='The resolution of output location map') + arch.add_argument('--uv_type', default='BF', choices=['SMPL', 'BF'], + help='The type of uv texture map, ' + 'SMPL for SMPL default uv map, ' + 'BF(boundry-free) for our new UV map') + + arch.add_argument('--uv_channels', type=int, default=128, help='Number of channels in uv_map') + arch.add_argument('--warp_level', type=int, default=2, help='The level of the feature warping process.') + arch.add_argument('--norm_type', default='GN', choices=['GN', 'BN'], + help='Normalization layer of the LNet') + + train = self.parser.add_argument_group('Training Options') + train.add_argument('--dataset', default='up-3d', + choices=['itw', 'all', 'h36m', 'up-3d', 'mesh', 'spin', 'surreal'], + help='Choose training dataset') + + train.add_argument('--num_epochs_dp', type=int, default=5, help='Total number of training epochs in stage dp') + train.add_argument('--num_epochs_end', type=int, default=30, + help='Total number of training epochs in stage end') + train.add_argument('--batch_size', type=int, default=16, help='Batch size') + train.add_argument('--summary_steps', type=int, default=100, help='Summary saving frequency') + train.add_argument('--checkpoint_steps', type=int, default=5000, help='Checkpoint saving frequency') + train.add_argument('--test_steps', type=int, default=10000, help='Testing frequency') + train.add_argument('--rot_factor', type=float, default=30, + help='Random rotation in the range [-rot_factor, rot_factor]') + train.add_argument('--noise_factor', type=float, default=0.4, + help='Random rotation in the range [-rot_factor, rot_factor]') + train.add_argument('--scale_factor', type=float, default=0.25, + help='rescale bounding boxes by a factor of [1-options.scale_factor,1+options.scale_factor]') + train.add_argument('--no_augmentation', dest='use_augmentation', default=True, action='store_false', + help='Don\'t do augmentation') + train.add_argument('--no_augmentation_rgb', dest='use_augmentation_rgb', default=True, action='store_false', + help='Don\'t do color jittering during training') + train.add_argument('--no_flip', dest='use_flip', default=True, action='store_false', help='Don\'t flip images') + train.add_argument('--stage', default='dp', choices=['dp', 'end'], + help='Training stage, ' + 'dp: only train the CNet' + 'end: end-to-end training.') + + train.add_argument('--use_spin_fit', dest='use_spin_fit', default=False, action='store_true', + help='Use the fitting result from spin as GT') + train.add_argument('--adaptive_weight', dest='adaptive_weight', default=False, action='store_true', + help='Change the loss weight according to the fitting error of SPIN fit results.' + 'Useful only if use_spin_fit = True.') + train.add_argument('--gtkey3d_from_mesh', dest='gtkey3d_from_mesh', default=False, action='store_true', + help='For the data without GT 3D keypoints but with fitted SMPL parameters,' + 'get the GT 3D keypoints from the mesh.') + + shuffle_train = train.add_mutually_exclusive_group() + shuffle_train.add_argument('--shuffle_train', dest='shuffle_train', action='store_true', + help='Shuffle training data') + shuffle_train.add_argument('--no_shuffle_train', dest='shuffle_train', action='store_false', + help='Don\'t shuffle training data') + shuffle_train.set_defaults(shuffle_train=True) + + optim = self.parser.add_argument_group('Optimization') + optim.add_argument('--adam_beta1', type=float, default=0.9, help='Value for Adam Beta 1') + optim.add_argument("--lr", type=float, default=2.5e-4, help="Learning rate") + optim.add_argument("--wd", type=float, default=0, help="Weight decay weight") + optim.add_argument("--lam_tv", type=float, default=1e-4, help='lambda of tv loss') + optim.add_argument("--lam_con", type=float, default=1, help='lambda of consistent loss') + optim.add_argument("--lam_dp_mask", type=float, default=0.2, help='lambda of densepose mask loss') + optim.add_argument("--lam_dp_uv", type=float, default=1, help='lambda of densepose uv loss') + optim.add_argument("--lam_mesh", type=float, default=0, help='lambda of mesh loss') + optim.add_argument("--lam_uv", type=float, default=1, help='lambda of location map loss') + optim.add_argument("--lam_key2d", type=float, default=1, help='lambda of 2D joint loss') + optim.add_argument("--lam_key3d", type=float, default=1, help='lambda of 3D joint loss') + + train.add_argument('--use_smpl_joints', dest='use_smpl_joints', default=False, action='store_true', + help='Use the 24 SMPL joints for supervision, ' + 'should be set True when using data from SURREAL dataset.') + optim.add_argument("--lam_key2d_smpl", type=float, default=1, help='lambda of 2D SMPL joint loss') + optim.add_argument("--lam_key3d_smpl", type=float, default=1, help='lambda of 3D SMPL joint loss') + + + def parse_args(self): + """Parse input arguments.""" + self.args = self.parser.parse_args() + # If config file is passed, override all arguments with the values from the config file + if self.args.from_json is not None: + path_to_json = os.path.abspath(self.args.from_json) + with open(path_to_json, "r") as f: + json_args = json.load(f) + json_args = namedtuple("json_args", json_args.keys())(**json_args) + return json_args + else: + self.args.log_dir = os.path.join(os.path.abspath(self.args.log_dir), self.args.name) + self.args.summary_dir = os.path.join(self.args.log_dir, 'tensorboard') + if not os.path.exists(self.args.log_dir): + os.makedirs(self.args.log_dir) + + self.args.checkpoint_dir = os.path.join(self.args.log_dir, 'checkpoints') + if not os.path.exists(self.args.checkpoint_dir): + os.makedirs(self.args.checkpoint_dir) + + self.save_dump() + return self.args + + def save_dump(self): + """Store all argument values to a json file. + The default location is logs/expname/config.json. + """ + if not os.path.exists(self.args.log_dir): + os.makedirs(self.args.log_dir) + with open(os.path.join(self.args.log_dir, "config.json"), "w") as f: + json.dump(vars(self.args), f, indent=4)