!2971 ONNX:EDSR

Merge pull request !2971 from cyx/edsr

!2971 ONNX:EDSR
Merge pull request !2971 from cyx/edsr
caf80873 · i-robot · Gitee · adbb3878 · 8ed346df · caf80873
Unverified Commit caf80873 authored 2 years ago by i-robot Committed by Gitee 2 years ago
--- a/research/cv/EDSR/DIV2K_config.yaml
+++ b/research/cv/EDSR/DIV2K_config.yaml
@@ -22,6 +22,7 @@ epoch_size: 6000
 eval_epoch_frq: 20
 self_ensemble: True
 save_sr: True
+eval_type: ""
 # Adam opt options
 opt_type: Adam

--- a/research/cv/EDSR/README_CN.md
+++ b/research/cv/EDSR/README_CN.md
@@ -21,6 +21,7 @@
        - [推理](#推理)
            - [在昇腾310上使用DIV2K数据集进行推理](#在昇腾310上使用DIV2K数据集进行推理)
            - [在昇腾310上使用其他数据集进行推理](#在昇腾310上使用其他数据集进行推理)
+            - [进行onnx推理](#进行onnx推理)
 - [模型描述](#模型描述)
    - [性能](#性能)
        - [训练性能](#训练性能)
@@ -218,7 +219,7 @@ EDSR是由多个优化后的residual blocks串联而成，相比原始版本的r
 ## 脚本及样例代码
-```bash
+```text
 ├── model_zoo
    ├── README.md                       // 所有模型相关说明
    ├── EDSR
@@ -229,6 +230,7 @@ EDSR是由多个优化后的residual blocks串联而成，相比原始版本的r
        │   ├──run_train.sh             // 分布式到Ascend的shell脚本
        │   ├──run_eval.sh              // Ascend评估的shell脚本
        │   ├──run_infer_310.sh         // Ascend-310推理shell脚本
+        │   └── run_eval_onnx.sh        // 用于ONNX评估的shell脚本
        ├── src
        │   ├──dataset.py               // 创建数据集
        │   ├──edsr.py                  // edsr网络架构
@@ -237,7 +239,8 @@ EDSR是由多个优化后的residual blocks串联而成，相比原始版本的r
        │   ├──utils.py                 // train.py/eval.py公用的代码段
        ├── train.py                    // 训练脚本
        ├── eval.py                     // 评估脚本
-        ├── export.py                   // 将checkpoint文件导出到air/mindir
+        ├── eval_onnx.py                // ONNX评估脚本
+        ├── export.py                   // 将checkpoint文件导出到onnx/air/mindir
        ├── preprocess.py               // Ascend-310推理的数据预处理脚本
        ├── ascend310_infer
        │   ├──src                      // 实现Ascend-310推理源代码
@@ -310,7 +313,8 @@ EDSR是由多个优化后的residual blocks串联而成，相比原始版本的r
 ## 导出
-在运行推理之前我们需要先导出模型。Air模型只能在昇腾910环境上导出，mindir可以在任意环境上导出。batch_size只支持1。
+在运行推理之前我们需要先导出模型。Air模型只能在昇腾910环境上导出，mindir/onnx可以在任意环境上导出。batch_size只支持1。
+注意：若要导出onnx需要将export.py代码中file_format = 'MINDIR'修改为file_format = 'ONNX'
 ### 导出脚本
@@ -356,14 +360,44 @@ python export.py --config_path DIV2K_config.yaml --output_path [dir to save mode
 - 推理流程
-```bash
+  ```bash
-# (1) 整理数据集，lr图片统一padding到一个固定尺寸。参考preprocess.py
+  # (1) 整理数据集，lr图片统一padding到一个固定尺寸。参考preprocess.py
-# (2) 根据固定尺寸导出模型，参考export.py
+  # (2) 根据固定尺寸导出模型，参考export.py
-# (3) 使用build.sh在ascend310_infer文件夹内编译推理程序，得到程序ascend310_infer/out/main
+  # (3) 使用build.sh在ascend310_infer文件夹内编译推理程序，得到程序ascend310_infer/out/main
-# (4) 配置数据集图片路径，模型路径，输出路径等，使用main推理得到超分辨率重建图片。
+  # (4) 配置数据集图片路径，模型路径，输出路径等，使用main推理得到超分辨率重建图片。
-./ascend310_infer/out/main --mindir_path=[model] --dataset_path=[read_data_path] --device_id=[device_id] --save_dir=[save_data_path]
+  ./ascend310_infer/out/main --mindir_path=[model] --dataset_path=[read_data_path] --device_id=[device_id] --save_dir=[save_data_path]
-# (5) 后处理图片，去除padding的无效区域。和hr图一起统计指标。参考preprocess.py
+  # (5) 后处理图片，去除padding的无效区域。和hr图一起统计指标。参考preprocess.py
-```
+  ```
+#### 进行onnx推理
+- 推理流程
+  ```bash
+  # (1) 整理数据集，lr图片统一padding到一个固定尺寸。参考preprocess.py
+  # (2) 根据固定尺寸导出模型，参考export.py
+  # (3) 执行推理脚本
+  ```
+- 在GPU环境中运行ONNX评估
+  ```bash
+  # 运行X2评估示例(EDSR(x2) in the paper)
+  bash scripts/run_eval_onnx.sh ./DIV2K_config.yaml  2  DIV2K path output_path  pre_trained_model_path  ONNX
+  # 运行X3评估示例(EDSR(x3) in the paper)
+  bash scripts/run_eval_onnx.sh ./DIV2K_config.yaml  3  DIV2K path output_path  pre_trained_model_path  ONNX
+  # 运行X4评估示例(EDSR(x4) in the paper)
+  bash scripts/run_eval_onnx.sh ./DIV2K_config.yaml  2  DIV2K path output_path  pre_trained_model_path  ONNX
+  ```
+  上述python命令将在后台运行，您可以通过eval_onnx.log文件查看结果。测试数据集的准确性如下：
+  ```bash
+  .....
+  [100/100] rank = 0 result = {'psnr': 29.297856984107398, 'num_sr': 100.0, 'time': 5.842652082443237}
+  evaluation result = {'psnr': 29.297856984107398, 'num_sr': 100.0, 'time': 2905.9808044433594}
+  eval success
+  ```
 # 模型描述

--- a/research/cv/EDSR/eval_onnx.py
+++ b/research/cv/EDSR/eval_onnx.py
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""
+#################evaluate EDSR example on DIV2K########################
+"""
+import os
+import time
+import mindspore
+from mindspore.common import set_seed
+from mindspore import Tensor
+import onnxruntime as ort
+from src.metric import PSNR, SaveSrHr
+from src.utils import init_env, init_dataset
+from model_utils.config import config
+set_seed(2021)
+def create_session(checkpoint_path, target_device):
+    """Create ONNX runtime session"""
+    if target_device == 'GPU':
+        providers = ['CUDAExecutionProvider']
+    elif target_device in ('CPU', 'Ascend'):
+        providers = ['CPUExecutionProvider']
+    else:
+        raise ValueError(f"Unsupported target device '{target_device}'. Expected one of: 'CPU', 'GPU', 'Ascend'")
+    session = ort.InferenceSession(checkpoint_path, providers=providers)
+    input_names = [x.name for x in session.get_inputs()]
+    return session, input_names
+def unpadding(img, target_shape):
+    h, w = target_shape[2], target_shape[3]
+    _, _, img_h, img_w = img.shape
+    if img_h > h:
+        img = img[:, :, :h, :]
+    if img_w > w:
+        img = img[:, :, :, :w]
+    return img
+def do_eval(session, input_names, ds_val, metrics, cur_epoch=None):
+    """
+    do eval for psnr and save hr, sr
+    """
+    total_step = ds_val.get_dataset_size()
+    setw = len(str(total_step))
+    begin = time.time()
+    step_begin = time.time()
+    rank_id = 0
+    for i, (lr, hr) in enumerate(ds_val):
+        input_data = [lr.asnumpy()]
+        sr = session.run(None, dict(zip(input_names, input_data)))
+        sr = Tensor(unpadding(sr[0], hr.shape), mindspore.float32)
+        _ = [m.update(sr, hr) for m in metrics.values()]
+        result = {k: m.eval(sync=False) for k, m in metrics.items()}
+        result["time"] = time.time() - step_begin
+        step_begin = time.time()
+        print(f"[{i+1:>{setw}}/{total_step:>{setw}}] rank = {rank_id} result = {result}", flush=True)
+    result = {k: m.eval(sync=True) for k, m in metrics.items()}
+    result["time"] = time.time() - begin
+    print(f"evaluation result = {result}", flush=True)
+    return result
+def run_eval():
+    """
+    run eval
+    """
+    print(config, flush=True)
+    cfg = config
+    cfg.lr_type = "bicubic_AUG_self_ensemble"
+    init_env(cfg)
+    session, input_names = create_session(cfg.pre_trained, 'GPU')
+    if cfg.dataset_name == "DIV2K":
+        cfg.batch_size = 1
+        cfg.patch_size = -1
+        ds_val = init_dataset(cfg, "valid")
+        metrics = {
+            "psnr": PSNR(rgb_range=cfg.rgb_range, shave=6 + cfg.scale),
+        }
+        if config.save_sr:
+            save_img_dir = os.path.join(cfg.output_path, "HrSr")
+            os.makedirs(save_img_dir, exist_ok=True)
+            metrics["num_sr"] = SaveSrHr(save_img_dir)
+        do_eval(session, input_names, ds_val, metrics)
+        print("eval success", flush=True)
+    else:
+        raise RuntimeError("Unsupported dataset.")
+if __name__ == '__main__':
+    run_eval()
--- a/research/cv/EDSR/requirements.txt
+++ b/research/cv/EDSR/requirements.txt
+onnxruntime-gpu
+pillow
+numpy
+pyyaml
\ No newline at end of file
--- a/research/cv/EDSR/scripts/run_eval_onnx.sh
+++ b/research/cv/EDSR/scripts/run_eval_onnx.sh
+#!/bin/bash
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+echo "=============================================================================================================="
+echo "Please run the script as: "
+echo "bash run.sh DEVICE_ID CKPT_PATH"
+echo "For example: bash scripts/run_eval_onnx.sh ./DIV2K_config.yaml  2  DIV2K path output_path  pre_trained_model_path  ONNX"
+echo "It is better to use the absolute path."
+echo "=============================================================================================================="
+if [ $# != 6 ]
+then
+    echo "Usage:  bash scripts/run_eval_onnx.sh [config_path]  [scale]  [data_path] [output_path]  [pre_trained_model_path]  [eval_type]"
+exit 1
+fi
+export args=${*:1}
+python eval_onnx.py --config_path $1 --scale $2 --data_path $3 --output_path $4 --pre_trained $5 --eval_type $6 > eval_onnx.log 2>&1 &
--- a/research/cv/EDSR/src/dataset.py
+++ b/research/cv/EDSR/src/dataset.py
@@ -241,7 +241,10 @@ def create_dataset_DIV2K(config, dataset_type="train", num_parallel_workers=10,
    """
    dataset_path = config["dataset_path"]
    lr_scale = config["scale"]
-    lr_type = config.get("lr_type", "bicubic")
+    if config["eval_type"] == "ONNX":
+        lr_type = config.get("lr_type", "bicubic_AUG_self_ensemble")
+    else:
+        lr_type = config.get("lr_type", "bicubic")
    batch_size = config.get("batch_size", 1)
    patch_size = config.get("patch_size", -1)
    epoch_size = config.get("epoch_size", None)
@@ -261,7 +264,10 @@ def create_dataset_DIV2K(config, dataset_type="train", num_parallel_workers=10,
    lrs_pattern = []
    for lr_scale in multi_lr_scale:
        dir_lr = os.path.join(dataset_path, f"DIV2K_{dataset_type}_LR_{lr_type}", f"X{lr_scale}")
-        lr_pattern = os.path.join(dir_lr, f"*x{lr_scale}.png")
+        if config["eval_type"] == "ONNX":
+            lr_pattern = os.path.join(dir_lr, f"*x{lr_scale}_0.png")
+        else:
+            lr_pattern = os.path.join(dir_lr, f"*x{lr_scale}.png")
        lrs_pattern.append(lr_pattern)
        column_names.append(f"lrx{lr_scale}")
    column_names.append("hr")  # ["lrx2","lrx3","lrx4",..., "hr"]

--- a/research/cv/EDSR/src/metric.py
+++ b/research/cv/EDSR/src/metric.py
@@ -199,13 +199,12 @@ class Quantizer(nn.Cell):
    """
    def __init__(self, _min=0.0, _max=255.0):
        super(Quantizer, self).__init__()
-        self.round = ops.Round()
        self._min = _min
        self._max = _max
    def construct(self, x):
        x = ops.clip_by_value(x, self._min, self._max)
-        x = self.round(x)
+        x = x.astype("Int32")
        return x
@@ -239,6 +238,7 @@ class _DistMetric(nn.Metric):
        if get_device_num is not None and get_device_num() > 1:
            self.all_reduce_sum = TensorSyncer(_type="sum")
        self.clear()
+        self.sum = None
    def _accumulate(self, value):
        if isinstance(value, (list, tuple)):
@@ -293,7 +293,7 @@ class PSNR(_DistMetric):
        diff = (sr - hr) / self.rgb_range
        valid = diff
        if self.shave is not None and self.shave != 0:
-            valid = valid[..., self.shave:(-self.shave), self.shave:(-self.shave)]
+            valid = valid[..., int(self.shave):int(-self.shave), int(self.shave):int(-self.shave)]
        mse_list = (valid ** 2).mean(axis=(1, 2, 3))
        mse_list = self._convert_data(mse_list).tolist()
        psnr_list = [float(1e32) if mse == 0 else(- 10.0 * math.log10(mse)) for mse in mse_list]

--- a/research/cv/EDSR/src/utils.py
+++ b/research/cv/EDSR/src/utils.py
@@ -65,6 +65,7 @@ def init_dataset(cfg, dataset_type="train"):
        "lr_type": cfg.lr_type,
        "batch_size": cfg.batch_size,
        "patch_size": cfg.patch_size,
+        "eval_type": cfg.eval_type,
    }
    if cfg.dataset_name == "DIV2K":
        dataset = create_dataset_DIV2K(config=ds_cfg,