Add GPU implementation of CTPN

136fced5 · dessyang · rescue · a82682fd · 136fced5 · 136fced5
Commit 136fced5 authored 3 years ago by dessyang Committed by rescue 3 years ago
--- a/official/cv/ctpn/README.md
+++ b/official/cv/ctpn/README.md
@@ -76,8 +76,11 @@ Here we used 6 datasets for training, and 1 datasets for Evaluation.
  ├── scripts
  │   ├── eval_res.sh                       # calculate precision and recall
  │   ├── run_distribute_train_ascend.sh    # launch distributed training with ascend platform(8p)
+  │   ├── run_distribute_train_gpu.sh       # launch distributed training with gpu platform(8p)
  │   ├── run_eval_ascend.sh                # launch evaluating with ascend platform
-  │   ├──run_infer_310.sh                   # shell script for 310 inference
+  │   ├── run_eval_gpu.sh                   # launch evaluating with gpu platform
+  │   ├── run_infer_310.sh                  # shell script for 310 inference
+  │   ├── run_standalone_train_gpu.sh       # launch standalone training with gpu platform(1p)
  │   └── run_standalone_train_ascend.sh    # launch standalone training with ascend platform(1p)
  ├── src
  │   ├── CTPN
@@ -102,6 +105,7 @@ Here we used 6 datasets for training, and 1 datasets for Evaluation.
  │   ├── eval_callback.py                  # evaluation callback while training
  │   ├── eval_utils.py                     # evaluation function
  │   ├── lr_schedule.py                    # learning rate scheduler
+  │   ├── weight_init.py                    # lstm initialization
  │   ├── network_define.py                 # network definition
  │   └── text_connector
  │       ├── __init__.py                   # package init file
@@ -171,7 +175,8 @@ Modify the parameters according to the actual path
 ```bash
 # distribute training
 bash scripts/run_distribute_train_ascend.sh [RANK_TABLE_FILE] [TASK_TYPE] [PRETRAINED_PATH]
-# example: bash scripts/run_distribute_train_ascend.sh ~/hccl_8p.json Pretraining(or Finetune) /home/DataSet/ctpn_dataset/backbone/0-150_5004.ckpt
+# example: bash scripts/run_distribute_train_ascend.sh /home/hccl_8p_01234567_10.155.170.71.json Pretraining(or Finetune) \
+# /home/DataSet/ctpn_dataset/backbone/0-150_5004.ckpt

 # standalone training
 bash scrpits/run_standalone_train_ascend.sh [TASK_TYPE] [PRETRAINED_PATH] [DEVICE_ID]
@@ -183,6 +188,24 @@ bash scripts/run_eval_ascend.sh [IMAGE_PATH] [DATASET_PATH] [CHECKPOINT_PATH]
 # /home/DataSet/ctpn_dataset/ctpn_final_dataset/test/ctpn_test.mindrecord /home/model/cv/ctpn/train_parallel0/ckpt_0/
 ```

+- GPU:
+
+```bash
+# distribute training
+bash scripts/run_distribute_train_gpu.sh [TASK_TYPE] [PRETRAINED_PATH]
+# example: bash scripts/run_distribute_train_gpu.sh Pretraining(or Finetune) \
+# /home/DataSet/ctpn_dataset/backbone/0-150_5004.ckpt
+
+# standalone training
+bash scrpits/run_standalone_train_gpu.sh [TASK_TYPE] [PRETRAINED_PATH] [DEVICE_ID]
+example: bash scrpits/run_standalone_train_gpu.sh Pretraining(or Finetune) /home/DataSet/ctpn_dataset/backbone/0-150_5004.ckpt 0
+
+# evaluation:
+bash scripts/run_eval_gpu.sh [IMAGE_PATH] [DATASET_PATH] [CHECKPOINT_PATH]
+# example: bash script/run_eval_gpu.sh /home/DataSet/ctpn_dataset/ICDAR2013/test \
+# /home/DataSet/ctpn_dataset/ctpn_final_dataset/test/ctpn_test.mindrecord /home/model/cv/ctpn/train_parallel0/ckpt_0/
+```
+
 The `pretrained_path` should be a checkpoint of vgg16 trained on Imagenet2012. The name of weight in dict should be totally the same, also the batch_norm should be enabled in the trainig of vgg16, otherwise fails in further steps.COCO_TEXT_PARSER_PATH coco_text.py can refer to [Link](https://github.com/andreasveit/coco-text).To get the vgg16 backbone, you can use the network structure defined in src/CTPN/vgg16.py.To train the backbone, copy the src/CTPN/vgg16.py under modelzoo/official/cv/vgg16/src/, and modify the vgg16/train.py to suit the new construction.You can fix it as below:

 ```python
@@ -225,11 +248,21 @@ ICDAR2013, SCUT-FORU to improve precision and recall, and when doing Finetune, w
    Ascend:
      # distribute training example(8p)
      bash run_distribute_train_ascend.sh [RANK_TABLE_FILE] [TASK_TYPE] [PRETRAINED_PATH]
-      # example: bash scripts/run_distribute_train_ascend.sh ~/hccl_8p.json Pretraining(or Finetune) /home/DataSet/ctpn_dataset/backbone/0-150_5004.ckpt
+      # example: bash scripts/run_distribute_train_ascend.sh /home/hccl_8p_01234567_10.155.170.71.json Pretraining(or Finetune) /home/DataSet/ctpn_dataset/backbone/0-150_5004.ckpt

      # standalone training
      bash run_standalone_train_ascend.sh [TASK_TYPE] [PRETRAINED_PATH]
      # example: bash scrpits/run_standalone_train_ascend.sh Pretraining(or Finetune) /home/DataSet/ctpn_dataset/backbone/0-150_5004.ckpt 0
+
+  shell:
+    GPU:
+      # distribute training example(8p)
+      bash run_distribute_train_gpu.sh [TASK_TYPE] [PRETRAINED_PATH]
+      # example: bash scripts/run_distribute_train_gpu.sh Pretraining(or Finetune) /home/DataSet/ctpn_dataset/backbone/0-150_5004.ckpt
+
+      # standalone training
+      bash run_standalone_train_gpu.sh [TASK_TYPE] [PRETRAINED_PATH]
+      # example: bash scrpits/run_standalone_train_gpu.sh Pretraining(or Finetune) /home/DataSet/ctpn_dataset/backbone/0-150_5004.ckpt 0
 ```

 ### Result
@@ -314,6 +347,13 @@ You can start training using python or shell scripts. The usage of shell scripts
  # example: bash script/run_eval_ascend.sh /home/DataSet/ctpn_dataset/ICDAR2013/test /home/DataSet/ctpn_dataset/ctpn_final_dataset/test/ctpn_test.mindrecord /home/model/cv/ctpn/train_parallel0/ckpt_0/
 ```

+- GPU:
+
+```bash
+  bash run_eval_gpu.sh [IMAGE_PATH] [DATASET_PATH] [CHECKPOINT_PATH]
+  # example: bash script/run_eval_gpu.sh /home/DataSet/ctpn_dataset/ICDAR2013/test /home/DataSet/ctpn_dataset/ctpn_final_dataset/test/ctpn_test.mindrecord /home/model/cv/ctpn/train_parallel0/ckpt_0/
+```
+
 After eval, you can get serval archive file named submit_ctpn-xx_xxxx.zip, which contains the name of your checkpoint file.To evalulate it, you can use the scripts provided by the ICDAR2013 network, you can download the Deteval scripts from the [link](https://rrc.cvc.uab.es/?com=downloads&action=download&ch=2&f=aHR0cHM6Ly9ycmMuY3ZjLnVhYi5lcy9zdGFuZGFsb25lcy9zY3JpcHRfdGVzdF9jaDJfdDFfZTItMTU3Nzk4MzA2Ny56aXA=)
 After download the scripts, unzip it and put it under ctpn/scripts and use eval_res.sh to get the result.You will get files as below:

@@ -342,6 +382,12 @@ Evaluation result will be stored in the example path, you can find result like t
 {"precision": 0.90791, "recall": 0.86118, "hmean": 0.88393}
 ```

+Evaluation result on GPU will be as follows:
+
+```text
+{"precision": 0.9346, "recall": 0.8621, "hmean": 0.8969}
+```
+
 ## Model Export

 ```shell
@@ -412,34 +458,34 @@ Evaluation result will be stored in the example path, you can find result like t

 ### Training Performance

-| Parameters                 | Ascend                                                       |
-| -------------------------- | ------------------------------------------------------------ |
-| Model Version              | CTPN                                                     |
-| Resource                   | Ascend 910; cpu 2.60GHz, 192cores; memory 755G; OS Euler2.8                |
-| uploaded Date              | 02/06/2021                                                   |
-| MindSpore Version          | 1.1.1                                                        |
-| Dataset                    | 16930 images                                                 |
-| Batch_size                 | 2                                                            |
-| Training Parameters        | src/config.py                                                |
-| Optimizer                  | Momentum                                                     |
-| Loss Function              | SoftmaxCrossEntropyWithLogits for classification, SmoothL2Loss for bbox regression|
-| Loss                       | ~0.04                                                       |
-| Total time (8p)            | 6h                                                           |
-| Scripts                    | [ctpn script](https://gitee.com/mindspore/models/tree/master/official/cv/ctpn) |
+| Parameters                 | Ascend                                                       | GPU                                              |
+| -------------------------- | ------------------------------------------------------------ |------------------------------------------------------------ |
+| Model Version              | CTPN                                                         | CTPN                                                     |
+| Resource                   | Ascend 910; cpu 2.60GHz, 192cores; memory 755G; OS Euler2.8  | Tesla V100 PCIE 32GB; CPU 2.60GHz; 104cores; Memory 790G; EulerOS 2.0     |
+| uploaded Date              | 02/06/2021                                                   | 09/20/2021                                                   |
+| MindSpore Version          | 1.1.1                                                        | 1.5.0                                                        |
+| Dataset                    | 16930 images                                                 | 16930 images                                                 |
+| Batch_size                 | 2                                                            | 2                                                            |
+| Training Parameters        | src/config.py                                                | src/config.py                                                |
+| Optimizer                  | Momentum                                                     | Momentum                                                     |
+| Loss Function              | SoftmaxCrossEntropyWithLogits for classification, SmoothL2Loss for bbox regression| SoftmaxCrossEntropyWithLogits for classification, SmoothL2Loss for bbox regression|
+| Loss                       | ~0.04                                                        | ~0.04                                                       |
+| Total time (8p)            | 6h                                                           | 11h                                                           |
+| Scripts                    | [ctpn script](https://gitee.com/mindspore/models/tree/master/official/cv/ctpn) | [ctpn script](https://gitee.com/mindspore/models/tree/master/official/cv/ctpn)     |

 #### Inference Performance

-| Parameters          | Ascend                 |
-| ------------------- | --------------------------- |
-| Model Version       | CTPN                 |
-| Resource            | Ascend 910; cpu 2.60GHz, 192cores; memory 755G; OS Euler2.8         |
-| Uploaded Date       | 02/06/2020                 |
-| MindSpore Version   | 1.1.1              |
-| Dataset             | 229 images                  |
-| Batch_size          | 1                         |
-| Accuracy            | precision=0.9079, recall=0.8611 F-measure:0.8839 |
-| Total time          | 1 min                      |
-| Model for inference | 135M (.ckpt file)   |
+| Parameters          | Ascend                                        | GPU                 |
+| ------------------- | --------------------------------------------- | --------------------------- |
+| Model Version       | CTPN                                          | CTPN                 |
+| Resource            | Ascend 910; cpu 2.60GHz, 192cores; memory 755G; OS Euler2.8   | Tesla V100 PCIE 32GB; CPU 2.60GHz; 104cores; Memory 790G; EulerOS 2.0 |
+| Uploaded Date       | 02/06/2021                                    | 09/20/2021                 |
+| MindSpore Version   | 1.1.1                                         | 1.5.0              |
+| Dataset             | 229 images                                    |229 images                  |
+| Batch_size          | 1                                             |1                         |
+| Accuracy            | precision=0.9079, recall=0.8611 F-measure:0.8839 | precision=0.9346, recall=0.8621 F-measure:0.8969 |
+| Total time          | 1 min                                         |1 min                      |
+| Model for inference | 135M (.ckpt file)                             | 135M (.ckpt file)           |

 #### Training performance results

@@ -451,6 +497,14 @@ Evaluation result will be stored in the example path, you can find result like t
 | :--------: | :---------------: |
 |     8p     |     84 img/s     |

+| **GPU** | train performance |
+| :--------: | :---------------: |
+|     1p     |     6 img/s      |
+
+| **GPU** | train performance |
+| :--------: | :---------------: |
+|     8p     |     52 img/s     |
+
 # [Description of Random Situation](#contents)

 We set seed to 1 in train.py.

--- a/official/cv/ctpn/scripts/run_distribute_train_gpu.sh
+++ b/official/cv/ctpn/scripts/run_distribute_train_gpu.sh
+#!/bin/bash
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+if [ $# -ne 2 ]
+then 
+    echo "Usage: sh scripts/run_distribute_train_gpu.sh [TASK_TYPE] [PRETRAINED_PATH]"
+exit 1
+fi
+
+get_real_path(){
+  if [ "${1:0:1}" == "/" ]; then
+    echo "$1"
+  else
+    echo "$(realpath -m $PWD/$1)"
+  fi
+}
+
+TASK_TYPE=$1
+PATH2=$(get_real_path $2)
+echo $PATH2
+if [ ! -f $PATH2 ]
+then 
+    echo "error: PRETRAINED_PATH=$PATH2 is not a file"
+exit 1
+fi
+
+rm -rf ./train_parallel
+mkdir ./train_parallel
+cp ./*.py ./train_parallel
+cp ./*yaml ./train_parallel
+cp -r ./scripts ./train_parallel
+cp -r ./src ./train_parallel
+cd ./train_parallel || exit
+
+export DEVICE_NUM=8
+export RANK_SIZE=8
+
+echo "start training"
+mpirun --allow-run-as-root -n $RANK_SIZE python train.py --run_distribute=True --task_type=$TASK_TYPE --pre_trained=$PATH2 --device_target="GPU" &> log &
+cd ..
--- a/official/cv/ctpn/scripts/run_eval_gpu.sh
+++ b/official/cv/ctpn/scripts/run_eval_gpu.sh
+#!/bin/bash
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+if [ $# != 3 ]
+then 
+    echo "Usage: sh scripts/run_eval_gpu.sh [IMAGE_PATH] [DATASET_PATH] [CHECKPOINT_PATH]"
+exit 1
+fi
+
+get_real_path(){
+  if [ "${1:0:1}" == "/" ]; then
+    echo "$1"
+  else
+    echo "$(realpath -m $PWD/$1)"
+  fi
+}
+IMAGE_PATH=$(get_real_path $1)
+DATASET_PATH=$(get_real_path $2)
+CHECKPOINT_PATH=$(get_real_path $3)
+echo $IMAGE_PATH
+echo $DATASET_PATH
+echo $CHECKPOINT_PATH
+
+if [ ! -d $IMAGE_PATH ]
+then 
+    echo "error: IMAGE_PATH=$PATH1 is not a path"
+exit 1
+fi
+
+if [ ! -f $DATASET_PATH ]
+then 
+    echo "error: CHECKPOINT_PATH=$DATASET_PATH is not a path"
+exit 1
+fi
+
+if [ ! -d $CHECKPOINT_PATH ]
+then 
+    echo "error: CHECKPOINT_PATH=$CHECKPOINT_PATH is not a directory"
+exit 1
+fi
+
+export DEVICE_NUM=1
+export RANK_SIZE=$DEVICE_NUM
+export RANK_ID=0
+for file in "${CHECKPOINT_PATH}"/*.ckpt
+do
+    if [ -d "eval" ];
+    then
+        rm -rf ./eval
+    fi
+    mkdir ./eval
+    cp ./*.py ./eval
+    cp -r ./scripts ./eval
+    cp -r ./src ./eval
+    cp ./*yaml ./eval
+    cd ./eval || exit
+    env > env.log
+    CHECKPOINT_FILE_PATH=$file
+    echo "start eval for checkpoint file: ${CHECKPOINT_FILE_PATH}"
+    python eval.py --image_path=$IMAGE_PATH --dataset_path=$DATASET_PATH --checkpoint_path=$CHECKPOINT_FILE_PATH --device_target="GPU" &> log
+    echo "end eval for checkpoint file: ${CHECKPOINT_FILE_PATH}"
+    cd ./submit || exit
+    file_base_name=$(basename $file)
+    zip -r ../../submit_${file_base_name%.*}.zip *.txt
+    cd ../../
+done
+
--- a/official/cv/ctpn/scripts/run_infer_310.sh
+++ b/official/cv/ctpn/scripts/run_infer_310.sh
--- a/official/cv/ctpn/scripts/run_standalone_train_gpu.sh
+++ b/official/cv/ctpn/scripts/run_standalone_train_gpu.sh
+#!/bin/bash
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+echo "=============================================================================================================="
+echo "Please run the script as: "
+echo "sh run_standalone_train.sh [TASK_TYPE] [PRETRAINED_PATH] [DEVICE_ID]"
+echo "for example: sh run_standalone_train.sh Pretraining /path/vgg16_backbone.ckpt 0"
+echo "when device id is occupied, choose for another one"
+echo "It is better to use absolute path."
+echo "=============================================================================================================="
+if [ $# -ne 3 ]
+then 
+    echo "Usage: sh scripts/run_standalone_train_gpu.sh [TASK_TYPE] [PRETRAINED_PATH] [DEVICE_ID]"
+exit 1
+fi
+
+get_real_path(){
+  if [ "${1:0:1}" == "/" ]; then
+    echo "$1"
+  else
+    echo "$(realpath -m $PWD/$1)"
+  fi
+}
+
+TASK_TYPE=$1
+PRETRAINED_PATH=$(get_real_path $2)
+echo $PRETRAINED_PATH
+if [ ! -f $PRETRAINED_PATH ]
+then 
+    echo "error: PRETRAINED_PATH=$PRETRAINED_PATH is not a file"
+exit 1
+fi
+
+rm -rf ./train
+mkdir ./train
+cp ./*.py ./train
+cp ./*yaml ./train
+cp -r ./scripts ./train
+cp -r ./src ./train
+cd ./train || exit
+
+export DEVICE_NUM=1
+export DEVICE_ID=$3
+export RANK_ID=0
+export RANK_SIZE=1
+
+echo "start training for device $DEVICE_ID"
+export CUDA_VISIBLE_DEVICES=$DEVICE_ID
+env > env.log
+python train.py --task_type=$TASK_TYPE --pre_trained=$PRETRAINED_PATH --device_target="GPU" &> log &
+cd ..
--- a/official/cv/ctpn/src/CTPN/bbox_assign_sample.py
+++ b/official/cv/ctpn/src/CTPN/bbox_assign_sample.py
@@ -16,12 +16,21 @@

 import numpy as np
 import mindspore.nn as nn
+from mindspore import context
 from mindspore.ops import operations as P
 from mindspore.common.tensor import Tensor
 import mindspore.common.dtype as mstype
 from src.CTPN.BoundingBoxEncode import BoundingBoxEncode


+if context.get_context("device_target") == "Ascend":
+    mtype = mstype.float16
+    nptype = np.float16
+else:
+    mtype = mstype.float32
+    nptype = np.float32
+
+
 class BboxAssignSample(nn.Cell):
    """
    Bbox assigner and sampler definition.
@@ -48,10 +57,10 @@ class BboxAssignSample(nn.Cell):
        cfg = config
        self.batch_size = batch_size

-        self.neg_iou_thr = Tensor(cfg.neg_iou_thr, mstype.float16)
-        self.pos_iou_thr = Tensor(cfg.pos_iou_thr, mstype.float16)
-        self.min_pos_iou = Tensor(cfg.min_pos_iou, mstype.float16)
-        self.zero_thr = Tensor(0.0, mstype.float16)
+        self.neg_iou_thr = Tensor(cfg.neg_iou_thr, mtype)
+        self.pos_iou_thr = Tensor(cfg.pos_iou_thr, mtype)
+        self.min_pos_iou = Tensor(cfg.min_pos_iou, mtype)
+        self.zero_thr = Tensor(0.0, mtype)

        self.num_bboxes = num_bboxes
        self.num_gts = cfg.num_gts
@@ -93,10 +102,9 @@ class BboxAssignSample(nn.Cell):
        self.assigned_pos_ones = Tensor(np.array(np.ones(self.num_expected_pos), dtype=np.int32))

        self.check_neg_mask = Tensor(np.array(np.ones(self.num_expected_neg - self.num_expected_pos), dtype=np.bool))
-        self.range_pos_size = Tensor(np.arange(self.num_expected_pos).astype(np.float16))
-        self.check_gt_one = Tensor(np.array(-1 * np.ones((self.num_gts, 4)), dtype=np.float16))
-        self.check_anchor_two = Tensor(np.array(-2 * np.ones((self.num_bboxes, 4)), dtype=np.float16))
-
+        self.range_pos_size = Tensor(np.arange(self.num_expected_pos).astype(nptype))
+        self.check_gt_one = Tensor(np.array(-1 * np.ones((self.num_gts, 4)), dtype=nptype))
+        self.check_anchor_two = Tensor(np.array(-2 * np.ones((self.num_bboxes, 4)), dtype=nptype))

    def construct(self, gt_bboxes_i, gt_labels_i, valid_mask, bboxes, gt_valids):
        gt_bboxes_i = self.select(self.cast(self.tile(self.reshape(self.cast(gt_valids, mstype.int32), \
@@ -122,7 +130,7 @@ class BboxAssignSample(nn.Cell):
            assigned_gt_inds4 = self.select(pos_mask_j, self.assigned_gt_ones + j, assigned_gt_inds4)
        assigned_gt_inds5 = self.select(valid_mask, assigned_gt_inds4, self.assigned_gt_ignores)
        pos_index, valid_pos_index = self.random_choice_with_mask_pos(self.greater(assigned_gt_inds5, 0))
-        pos_check_valid = self.cast(self.greater(assigned_gt_inds5, 0), mstype.float16)
+        pos_check_valid = self.cast(self.greater(assigned_gt_inds5, 0), mtype)
        pos_check_valid = self.sum_inds(pos_check_valid, -1)
        valid_pos_index = self.less(self.range_pos_size, pos_check_valid)
        pos_index = pos_index * self.reshape(self.cast(valid_pos_index, mstype.int32), (self.num_expected_pos, 1))
@@ -131,7 +139,7 @@ class BboxAssignSample(nn.Cell):
        pos_assigned_gt_index = self.reshape(pos_assigned_gt_index, (self.num_expected_pos, 1))
        neg_index, valid_neg_index = self.random_choice_with_mask_neg(self.equal(assigned_gt_inds5, 0))

-        num_pos = self.cast(self.logicalnot(valid_pos_index), mstype.float16)
+        num_pos = self.cast(self.logicalnot(valid_pos_index), mtype)
        num_pos = self.sum_inds(num_pos, -1)
        unvalid_pos_index = self.less(self.range_pos_size, num_pos)
        valid_neg_index = self.logicaland(self.concat((self.check_neg_mask, unvalid_pos_index)), valid_neg_index)

--- a/official/cv/ctpn/src/CTPN/proposal_generator.py
+++ b/official/cv/ctpn/src/CTPN/proposal_generator.py
@@ -18,9 +18,17 @@ import numpy as np
 import mindspore.nn as nn
 import mindspore.common.dtype as mstype
 from mindspore.ops import operations as P
-from mindspore import Tensor
+from mindspore import Tensor, context
 from src.CTPN.BoundingBoxDecode import BoundingBoxDecode

+
+if context.get_context("device_target") == "Ascend":
+    mtype = mstype.float16
+    nptype = np.float16
+else:
+    mtype = mstype.float32
+    nptype = np.float32
+
 class Proposal(nn.Cell):
    """
    Proposal subnet.
@@ -99,7 +107,7 @@ class Proposal(nn.Cell):
        self.tile = P.Tile()
        self.set_train_local(config, training=True)

-        self.multi_10 = Tensor(10.0, mstype.float16)
+        self.multi_10 = Tensor(10.0, mtype)

    def set_train_local(self, config, training=False):
        """Set training flag."""
@@ -122,7 +130,7 @@ class Proposal(nn.Cell):
        self.topKv2 = P.TopK(sorted=True)
        self.topK_shape_stage2 = (self.max_num, 1)
        self.min_float_num = -65536.0
-        self.topK_mask = Tensor(self.min_float_num * np.ones(total_max_topk_input, np.float16))
+        self.topK_mask = Tensor(self.min_float_num * np.ones(total_max_topk_input, nptype))
        self.shape = P.Shape()

    def construct(self, rpn_cls_score_total, rpn_bbox_pred_total, anchor_list):
@@ -152,18 +160,18 @@ class Proposal(nn.Cell):
        rpn_cls_score = self.reshape(rpn_cls_score, self.reshape_shape)
        rpn_cls_score = self.activation(rpn_cls_score)
        if self.use_sigmoid_cls:
-            rpn_cls_score_process = self.cast(self.squeeze(rpn_cls_score), mstype.float16)
+            rpn_cls_score_process = self.cast(self.squeeze(rpn_cls_score), mtype)
        else:
-            rpn_cls_score_process = self.cast(self.squeeze(rpn_cls_score[::, 1]), mstype.float16)
+            rpn_cls_score_process = self.cast(self.squeeze(rpn_cls_score[::, 1]), mtype)

-        rpn_bbox_pred_process = self.cast(self.reshape(rpn_bbox_pred, (-1, 4)), mstype.float16)
+        rpn_bbox_pred_process = self.cast(self.reshape(rpn_bbox_pred, (-1, 4)), mtype)

        scores_sorted, topk_inds = self.topKv2(rpn_cls_score_process, self.num_pre)

        topk_inds = self.reshape(topk_inds, self.topK_shape)

        bboxes_sorted = self.gatherND(rpn_bbox_pred_process, topk_inds)
-        anchors_sorted = self.cast(self.gatherND(anchors, topk_inds), mstype.float16)
+        anchors_sorted = self.cast(self.gatherND(anchors, topk_inds), mtype)

        proposals_decode = self.decode(anchors_sorted, bboxes_sorted)

@@ -178,7 +186,7 @@ class Proposal(nn.Cell):

        _, _, _, _, scores = self.split(proposals)
        scores = self.squeeze(scores)
-        topk_mask = self.cast(self.topK_mask, mstype.float16)
+        topk_mask = self.cast(self.topK_mask, mtype)
        scores_using = self.select(masks, scores, topk_mask)

        _, topk_inds = self.topKv2(scores_using, self.max_num)

--- a/official/cv/ctpn/src/CTPN/rpn.py
+++ b/official/cv/ctpn/src/CTPN/rpn.py
@@ -17,10 +17,18 @@ import numpy as np
 import mindspore.nn as nn
 import mindspore.common.dtype as mstype
 from mindspore.ops import operations as P
-from mindspore import Tensor
+from mindspore import Tensor, context
 from mindspore.ops import functional as F
 from src.CTPN.bbox_assign_sample import BboxAssignSample

+
+if context.get_context("device_target") == "Ascend":
+    mtype = mstype.float16
+    nptype = np.float16
+else:
+    mtype = mstype.float32
+    nptype = np.float32
+
 class RpnRegClsBlock(nn.Cell):
    """
       Rpn reg cls block for rpn layer
@@ -46,9 +54,9 @@ class RpnRegClsBlock(nn.Cell):
        self.shape = P.Shape()
        self.reshape = P.Reshape()
        self.shape = (-1, 2*config.hidden_size)
-        self.lstm_fc = nn.Dense(2*config.hidden_size, 512).to_float(mstype.float16)
-        self.rpn_cls = nn.Dense(in_channels=512, out_channels=num_anchors * cls_out_channels).to_float(mstype.float16)
-        self.rpn_reg = nn.Dense(in_channels=512, out_channels=num_anchors * 4).to_float(mstype.float16)
+        self.lstm_fc = nn.Dense(2*config.hidden_size, 512).to_float(mtype)
+        self.rpn_cls = nn.Dense(in_channels=512, out_channels=num_anchors * cls_out_channels).to_float(mtype)
+        self.rpn_reg = nn.Dense(in_channels=512, out_channels=num_anchors * 4).to_float(mtype)
        self.shape1 = (-1, config.num_step, config.rnn_batch_size)
        self.shape2 = (config.batch_size, -1, config.rnn_batch_size, config.num_step)
        self.transpose = P.Transpose()
@@ -105,7 +113,7 @@ class RPN(nn.Cell):
        self.batch_size = batch_size
        self.test_batch_size = cfg_rpn.test_batch_size
        self.num_layers = 1
-        self.real_ratio = Tensor(np.ones((1, 1)).astype(np.float16))
+        self.real_ratio = Tensor(np.ones((1, 1)).astype(nptype))
        self.use_sigmoid_cls = config.use_sigmoid_cls
        if config.use_sigmoid_cls:
            self.reshape_shape_cls = (-1,)
@@ -121,15 +129,15 @@ class RPN(nn.Cell):
        self.reshape = P.Reshape()
        self.concat = P.Concat(axis=0)
        self.fill = P.Fill()
-        self.placeh1 = Tensor(np.ones((1,)).astype(np.float16))
+        self.placeh1 = Tensor(np.ones((1,)).astype(nptype))

        self.trans_shape = (0, 2, 3, 1)

        self.reshape_shape_reg = (-1, 4)
        self.softmax = nn.Softmax()
-        self.rpn_loss_reg_weight = Tensor(np.array(cfg_rpn.rpn_loss_reg_weight).astype(np.float16))
-        self.rpn_loss_cls_weight = Tensor(np.array(cfg_rpn.rpn_loss_cls_weight).astype(np.float16))
-        self.num_expected_total = Tensor(np.array(cfg_rpn.num_expected_neg * self.batch_size).astype(np.float16))
+        self.rpn_loss_reg_weight = Tensor(np.array(cfg_rpn.rpn_loss_reg_weight).astype(nptype))
+        self.rpn_loss_cls_weight = Tensor(np.array(cfg_rpn.rpn_loss_cls_weight).astype(nptype))
+        self.num_expected_total = Tensor(np.array(cfg_rpn.num_expected_neg * self.batch_size).astype(nptype))
        self.num_bboxes = cfg_rpn.num_bboxes
        self.get_targets = BboxAssignSample(cfg_rpn, self.batch_size, self.num_bboxes, False)
        self.CheckValid = P.CheckValid()
@@ -139,9 +147,9 @@ class RPN(nn.Cell):
        self.cast = P.Cast()
        self.tile = P.Tile()
        self.zeros_like = P.ZerosLike()
-        self.loss = Tensor(np.zeros((1,)).astype(np.float16))
-        self.clsloss = Tensor(np.zeros((1,)).astype(np.float16))
-        self.regloss = Tensor(np.zeros((1,)).astype(np.float16))
+        self.loss = Tensor(np.zeros((1,)).astype(nptype))
+        self.clsloss = Tensor(np.zeros((1,)).astype(nptype))
+        self.regloss = Tensor(np.zeros((1,)).astype(nptype))

    def _make_rpn_layer(self, num_layers, in_channels, feat_channels, num_anchors, cls_out_channels):
        """
@@ -190,8 +198,8 @@ class RPN(nn.Cell):
                                                                                 self.cast(valid_flag_list,
                                                                                           mstype.bool_),
                                                                                 anchor_list, gt_valids_i)
-                bbox_weight = self.cast(bbox_weight, mstype.float16)
-                label_weight = self.cast(label_weight, mstype.float16)
+                bbox_weight = self.cast(bbox_weight, mtype)
+                label_weight = self.cast(label_weight, mtype)
                bbox_targets += (bbox_target,)
                bbox_weights += (bbox_weight,)
                labels += (label,)

--- a/official/cv/ctpn/src/ctpn.py
+++ b/official/cv/ctpn/src/ctpn.py
@@ -15,6 +15,7 @@
 """CPTN network definition."""

 import numpy as np
+from mindspore import context
 import mindspore.nn as nn
 from mindspore import Tensor, Parameter
 from mindspore.common import dtype as mstype
@@ -23,6 +24,16 @@ from src.CTPN.rpn import RPN
 from src.CTPN.anchor_generator import AnchorGenerator
 from src.CTPN.proposal_generator import Proposal
 from src.CTPN.vgg16 import VGG16FeatureExtraction
+from src.weight_init import lstm_default_state
+
+if context.get_context("device_target") == "Ascend":
+    mtype = mstype.float16
+    nptype = np.float16
+    device_target = "Ascend"
+else:
+    mtype = mstype.float32
+    nptype = np.float32
+    device_target = "GPU"

 class BiLSTM(nn.Cell):
    """
@@ -68,7 +79,7 @@ class BiLSTM(nn.Cell):
    def construct(self, x):
        if self.use_dropout:
            x = self.dropout(x)
-        x = self.cast(x, mstype.float16)
+        x = self.cast(x, mtype)
        bw_x = self.reverse_seq(x)
        y1, _, _, _, _, _, _, _ = self.rnn1(x, self.w1, self.b1, None, self.h1, self.c1)
        y1_bw, _, _, _, _, _, _, _ = self.rnn_bw(bw_x, self.w1_bw, self.b1_bw, None, self.h1_bw, self.c1_bw)
@@ -92,9 +103,14 @@ class CTPN(nn.Cell):
        self.num_step = config.num_step
        self.input_size = config.input_size
        self.hidden_size = config.hidden_size
-        self.vgg16_feature_extractor = VGG16FeatureExtraction().to_float(mstype.float16)
-        self.conv = nn.Conv2d(512, 512, kernel_size=3, padding=0, pad_mode='same').to_float(mstype.float16)
-        self.rnn = BiLSTM(self.config, batch_size=self.batch_size).to_float(mstype.float16)
+        self.vgg16_feature_extractor = VGG16FeatureExtraction().to_float(mtype)
+        self.conv = nn.Conv2d(512, 512, kernel_size=3, padding=0, pad_mode='same').to_float(mtype)
+        self.rnn = BiLSTM(self.config, batch_size=self.batch_size).to_float(mtype)
+        self.rnn2 = nn.LSTM(input_size=self.input_size,
+                            hidden_size=self.hidden_size,
+                            bidirectional=True).to_float(mtype)
+        self.h, self.c = lstm_default_state(self.batch_size * config.rnn_batch_size,
+                                            self.hidden_size, bidirectional=True)
        self.reshape = P.Reshape()
        self.transpose = P.Transpose()
        self.cast = P.Cast()
@@ -115,14 +131,18 @@ class CTPN(nn.Cell):
                                                config.activate_num_classes,
                                                config.use_sigmoid_cls)
        self.proposal_generator_test.set_train_local(config, False)
+
    def construct(self, img_data, gt_bboxes, gt_labels, gt_valids, img_metas=None):
        x = self.vgg16_feature_extractor(img_data)
        x = self.conv(x)
-        x = self.cast(x, mstype.float16)
+        x = self.cast(x, mtype)
        x = self.transpose(x, (0, 2, 1, 3))
        x = self.reshape(x, (-1, self.input_size, self.num_step))
        x = self.transpose(x, (2, 0, 1))
-        x = self.rnn(x)
+        if device_target == "Ascend":
+            x = self.rnn(x)
+        else:
+            x, _ = self.rnn2(x, (self.h, self.c))
        rpn_loss, cls_score, bbox_pred, rpn_cls_loss, rpn_reg_loss = self.rpn_with_loss(x,
                                                                                        img_metas,
                                                                                        self.anchor_list,
@@ -136,9 +156,17 @@ class CTPN(nn.Cell):

    def get_anchors(self, featmap_size):
        anchors = self.anchor_generator.grid_anchors(featmap_size)
-        return Tensor(anchors, mstype.float16)
+        return Tensor(anchors, mtype)
+

 class CTPN_Infer(nn.Cell):
+    """
+     Define CTPN_Infer network
+
+     Args:
+        config(EasyDict): config for ctpn network
+        batch_size(int): batch size of input data, only support 1
+     """
    def __init__(self, config, batch_size):
        super(CTPN_Infer, self).__init__()
        self.network = CTPN(config, batch_size=batch_size, is_training=False)

--- a/official/cv/ctpn/src/dataset.py
+++ b/official/cv/ctpn/src/dataset.py
@@ -24,6 +24,10 @@ import mindspore.dataset.transforms.c_transforms as CC
 import mindspore.common.dtype as mstype
 from src.model_utils.config import config

+if config.device_target == "Ascend":
+    mtype = mstype.float16
+else:
+    mtype = mstype.float32

 class PhotoMetricDistortion:
    """Photo Metric Distortion"""
@@ -286,7 +290,7 @@ def create_ctpn_dataset(mindrecord_file, batch_size=1, repeat_num=1, device_num=
    hwc_to_chw = C.HWC2CHW()
    normalize_op = C.Normalize((123.675, 116.28, 103.53), (58.395, 57.12, 57.375))
    type_cast0 = CC.TypeCast(mstype.float32)
-    type_cast1 = CC.TypeCast(mstype.float16)
+    type_cast1 = CC.TypeCast(mtype)
    type_cast2 = CC.TypeCast(mstype.int32)
    type_cast3 = CC.TypeCast(mstype.bool_)
    if is_training:

--- a/official/cv/ctpn/src/weight_init.py
+++ b/official/cv/ctpn/src/weight_init.py
+# Copyright 2021 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#" ============================================================================
+"""
+weights initialization
+"""
+import numpy as np
+from mindspore import Tensor
+
+
+def lstm_default_state(batch_size, hidden_size, bidirectional, num_layers=1):
+    """init default input."""
+    num_directions = 2 if bidirectional else 1
+    h = Tensor(np.zeros((num_layers * num_directions, batch_size, hidden_size)).astype(np.float32))
+    c = Tensor(np.zeros((num_layers * num_directions, batch_size, hidden_size)).astype(np.float32))
+    return h, c
--- a/official/cv/ctpn/train.py
+++ b/official/cv/ctpn/train.py
@@ -17,9 +17,10 @@
 import os
 import ast
 import operator
+import numpy as np
 import mindspore.common.dtype as mstype
-from mindspore import context, Tensor
-from mindspore.communication.management import init
+from mindspore import context, Tensor, Parameter
+from mindspore.communication.management import init, get_group_size, get_rank
 from mindspore.train.callback import CheckpointConfig, ModelCheckpoint, TimeMonitor
 from mindspore.train import Model
 from mindspore.context import ParallelMode
@@ -34,13 +35,14 @@ from src.eval_utils import eval_for_ctpn, get_eval_result
 from src.eval_callback import EvalCallBack
 from src.model_utils.config import config
 from src.model_utils.moxing_adapter import moxing_wrapper
-from src.model_utils.device_adapter import get_device_num, get_device_id, get_rank_id
+from src.model_utils.device_adapter import get_device_id


 set_seed(1)


-context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=get_device_id(), save_graphs=True)
+context.set_context(mode=context.GRAPH_MODE, device_target=config.device_target, \
+    device_id=get_device_id(), save_graphs=True)


 binOps = {
@@ -89,16 +91,17 @@ def train():
    config.num_step = config.img_width // 16
    config.rnn_batch_size = config.img_height // 16
    config.weight_decay = arithmeticeval(config.weight_decay)
-
    if config.run_distribute:
-        rank = get_rank_id()
-        device_num = get_device_num()
+        init()
+        context.reset_auto_parallel_context()
+        rank = get_rank()
+        device_num = get_group_size()
        context.set_auto_parallel_context(device_num=device_num, parallel_mode=ParallelMode.DATA_PARALLEL,
                                          gradients_mean=True)
-        init()
    else:
        rank = 0
        device_num = 1
+
    if config.task_type == "Pretraining":
        print("Start to do pretraining")
        mindrecord_file = config.pretraining_dataset_file
@@ -132,6 +135,12 @@ def train():
        for item in list(param_dict.keys()):
            if not item.startswith('vgg16_feature_extractor'):
                param_dict.pop(item)
+
+        if config.device_target == "GPU":
+            print("Converting pretrained checkpoint from fp16 to fp32.")
+            for key, value in param_dict.items():
+                tensor = value.asnumpy().astype(np.float32)
+                param_dict[key] = Parameter(tensor, key)
        load_param_into_net(net, param_dict)
    else:
        if load_path != "":