diff --git a/research/cv/OCRNet/README_CN.md b/research/cv/OCRNet/README_CN.md new file mode 100644 index 0000000000000000000000000000000000000000..68dbe69a4c257182a31155602869266b36a5e5c3 --- /dev/null +++ b/research/cv/OCRNet/README_CN.md @@ -0,0 +1,449 @@ +# 目录 + +<!-- TOC --> + +- [目录](#目录) +- [OCRNet描述](#ocrnet描述) + - [概述](#概述) + - [论文](#论文) +- [模型架构](#模型架构) +- [数据集](#数据集) +- [环境要求](#环境要求) +- [快速入门](#快速入门) +- [脚本说明](#脚本说明) + - [脚本及样例代码](#脚本及样例代码) + - [脚本参数](#脚本参数) + - [训练过程](#训练过程) + - [用法](#用法) + - [Ascend处理器环境运行](#ascend处理器环境运行) + - [训练时推理](#训练时推理) + - [结果](#结果) + - [评估过程](#评估过程) + - [用法](#用法-1) + - [Ascend处理器环境运行](#ascend处理器环境运行-1) + - [结果](#结果-1) + - [推理过程](#推理过程) + - [导出MindIR](#导出mindir) + - [在Ascend310执行推理](#在ascend310执行推理) + - [结果](#结果-2) +- [模型描述](#模型描述) + - [性能](#性能) + - [评估性能](#评估性能) + - [Cityscapes上OCRNet的性能](#cityscapes上ocrnet的性能) +- [随机情况说明](#随机情况说明) +- [ModelZoo主页](#modelzoo主页) + +<!-- /TOC --> + +# OCRNet描述 + +## 概述 + +OCRNet是由微软亚研院和中科院计算所提出的语义分割网络。OCRNet使用了一种新的物体上下文信息——在构建上下文信息时显式地增强了来自于同一类物体的像素的贡献,并在2019年7月和2020年1月的 Cityscapes leaderboard提交结果中都取得了语义分割任务第一名的成绩。相关工作“Object-Contextual Representations for Semantic Segmentation”已经被 ECCV 2020 收录。 + +## 论文 + +[Object-Contextual Representations for Semantic Segmentation](https://arxiv.org/pdf/1909.11065) + +# 模型架构 + +OCRNet的总体架构如下: + + + +# 数据集 + +1. 数据集[Cityscapes](https://www.cityscapes-dataset.com/) + +Cityscapes数据集包含5000幅高质量像素级别精细注释的街城市道场景图像。图像按2975/500/1525的分割方式分为三组,分别用于训练、验证和测试。数据集中共包含30类实体,其中19类用于验证。 + +2. 数据集下载后的结构模式 + +```bash +$SEG_ROOT/data +├─ cityscapes +│ ├─ leftImg8bit +│ │ ├─ train +│ │ │ └─ [city_folders] +│ │ └─ val +│ │ └─ [city_folders] +│ ├─ gtFine +│ │ ├─ train +│ │ │ └─ [city_folders] +│ │ └─ val +│ │ └─ [city_folders] +│ ├─ train.lst +│ └─ val.lst + +``` + +# 环境要求 + +- 硬件(Ascend) + - 准备Ascend处理器搭建硬件环境 +- 框架 + - [Mindspore](https://www.mindspore.cn/install/en) +- 如需查看详情,请参见如下资源: + - [MindSpore教程](https://www.mindspore.cn/tutorials/zh-CN/master/index.html) + - [MindSpore Python API](https://www.mindspore.cn/docs/api/zh-CN/master/index.html) + +# 快速入门 + +通过官方网站安装MindSpore后,您可以按照如下步骤进行训练和评估: + +- Ascend处理器环境运行 + +```bash +# 分布式训练 +bash scripts/run_distribute_train.sh [RANK_TABLE_FILE] [DATASET_PATH] [TRAIN_OUTPUT_PATH] [CHECKPOINT_PATH] [EVAL_CALLBACK] + +# 分布式训练,从指定周期开始恢复训练 +bash scripts/run_distribute_train.sh [RANK_TABLE_FILE] [DATASET_PATH] [TRAIN_OUTPUT_PATH] [CHECKPOINT_PATH] [BEGIN_EPOCH] [EVAL_CALLBACK] + +# 单机训练 +bash scripts/run_standalone_train.sh [DEVICE_ID] [DATASET_PATH] [TRAIN_OUTPUT_PATH] [CHECKPOINT_PATH] [EVAL_CALLBACK] + +# 单机训练,从指定周期开始恢复训练 +bash scripts/run_standalone_train.sh [DEVICE_ID] [DATASET_PATH] [TRAIN_OUTPUT_PATH] [CHECKPOINT_PATH] [BEGIN_EPOCH] [EVAL_CALLBACK] + +# 运行评估 +bash scripts/run_eval.sh [DEVICE_ID] [DATASET_PATH] [CHECKPOINT_PATH] +``` + +如果要在ModelArts上进行模型的训练,可以参考ModelArts的[官方指导文档](https://support.huaweicloud.com/modelarts/)开始进行模型的训练和推理,具体操作如下: + +```text +# 训练模型 +1. 创建作业 +2. 选择数据集存储位置 +3. 选择输出存储位置 +2. 在模型参数列表位置按如下形式添加参数: + data_url [自动填充] + train_url [自动填充] + checkpoint_url [CHECKPOINT_PATH_OBS] + modelarts True + device_target Ascend + run_distribute [True/False] + eval_callback [True/False] + # 其他可选参数具体详情请参考train.py脚本 +3. 选择相应数量的处理器 +4. 开始运行 + +# 评估模型 +1. 创建作业 +2. 选择数据集存储位置 +3. 选择输出存储位置 +2. 在模型参数列表位置按如下形式添加参数: + data_url [自动填充] + train_url [自动填充] + checkpoint_url [CHECKPOINT_PATH_OBS] + modelarts True + device_target Ascend +3. 选择单个处理器 +4. 开始运行 +``` + +# 脚本说明 + +## 脚本及样例代码 + +```bash +├─ OCRNet +│ ├─ ascend310_infer # 310推理相关脚本 +│ │ ├─ inc +│ │ │ └─ utils.py +│ │ └─ src +│ │ │ ├─ build.sh +│ │ │ ├─ CMakeLists.txt +│ │ │ ├─ main.cc +│ │ │ └─ utils.cc +│ ├─ scripts +│ │ ├─ ascend310_inference.sh # 启动Ascend310推理(单卡) +│ │ ├─ run_standalone_train.sh # 启动Ascend单机训练(单卡) +│ │ ├─ run_distribute_train.sh # 启动Ascend分布式训练(8卡) +│ │ └─ run_eval.sh # 启动Asend单机评估(单卡) +│ ├─ data +│ │ ├─ cityscapes +│ │ │ ├─ leftImg8bit +│ │ │ │ └─ [original_images] # 数据集图像文件 +│ │ │ ├─ gtFine +│ │ │ │ └─ [label_images] # 数据集标签文件 +│ │ │ ├─ train.lst # 训练集存储路径列表 +│ │ │ └─ val.lst # 验证集存储路径列表 +│ ├─ src +│ │ ├─ model_utils +│ │ │ └─ moxing_adapter.py # ModelArts设备配置 +│ │ ├─ config.py # 参数配置 +│ │ ├─ basedataset.py # 数据集生成器基类 +│ │ ├─ cityscapes.py # Cityscapes数据集生成器 +│ │ ├─ loss.py # 损失函数 +│ │ ├─ callback.py # 训练时推理回调函数 +│ │ ├─ seg_hrnet_ocr.py # OCRNet网络结构 +│ │ └─ utils.py # 参数初始化函数 +│ ├─ train_out +│ ├─ export.py # 310推理,导出mindir +│ ├─ preprocess.py # 310推理,数据预处理 +│ ├─ postprocess.py # 310推理,计算mIoU +│ ├─ train.py # 训练模型 +│ └─ eval.py # 评估模型 +``` + +## 脚本参数 + +在配置文件中可以同时配置训练参数和评估参数。 + +```python +hrnetv2_w48_configuratiom = { + "data_url": None, # 数据集OBS存储路径 + "data_path": None, # 数据集本地机器存储路径 + "train_url": None, # 训练输出OBS存储路径 + "train_path": None, # 训练输出本地机器存储路径 + "checkpoint_url": None, # checkpoint文件OBS存储路径 + "checkpoint_path": None, # checkpoint文件本地机器存储路径 + "run_distribute": False, # 是否为分布式运行 + "device_target": "Ascend", # 运行平台 + "workers": 8, + "modelarts": False, # 是否在ModelArts上运行 + "lr": 0.0013, # 基础学习率 + "lr_power": 4e-10, # 学习率调整因子 + "save_checkpoint_epochs": 20, # 存储checkpoint的频率 + "keep_checkpoint_max": 20, # 保存checkpoint的个数 + "total_epoch": 1000, # 总训练周期 + "begin_epoch": 0, # 开始周期 + "end_epoch": 1000, # 结束周期 + "batchsize": 4, # 输入张量批次大小 + "eval_callback": False, # 是否使用训练时推理 + "eval_interval": 50, # 训练时推理的频率 + "train": { + "train_list": "/train.lst", # 训练集文件存储路径列表 + "image_size": [512, 1024], # 训练输入图像大小 + "base_size": 2048, # 训练图像的基础大小 + "multi_scale": True, # 是否随机放缩图像 + "flip": True, # 是否翻转图像 + "downsample_rate": 1, # 下采样率 + "scale_factor": 16, # 方法因子 + "shuffle": True, # 是否混洗 + "param_initializer": "TruncatedNormal", # 参数初始化方法 + "opt_momentum": 0.9, # 动量优化器 + "wd": 0.0005, # 权重衰减 + "num_samples": 0 # 采样数 + }, + "dataset": { + "name": "Cityscapes", # 数据集名称 + "num_classes": 19, # 类别数量 + "ignore_label": 255, # 不被考虑的类别标签值 + "mean": [0.485, 0.456, 0.406], # 均值 + "std": [0.229, 0.224, 0.225], # 方差 + + }, + "eval": { + "eval_list": "/val.lst", # 验证集文件存储路径列表 + "image_size": [1024, 2048], # 评估输入图像大小 + "base_size": 2048, # 评估图像基础大小 + "batch_size": 1, # 评估输入批次大小 + "num_samples": 0, # 采样数 + "flip": False, # 是否翻转图像 + "multi_scale": False, # 是否使用多尺寸特征图 + "scale_list": [1] # 放大尺寸列表 + }, + "model": { # 模型相关参数 + "name": "seg_hrnet_w48", # 模型名称 + "extra": { + "FINAL_CONV_KERNEL": 1, + "STAGE1": { # stage1参数 + "NUM_MODULES": 1, # High-resolution module数量 + "NUM_BRANCHES": 1, # 分支数量 + "BLOCK": "BOTTLENECK", # 残差块类型 + "NUM_BLOCKS": [4], # 各分支残差块数量 + "NUM_CHANNELS": [64], # 各分支特征图通道数 + "FUSE_METHOD": "SUM" # 分支融合方式 + }, + "STAGE2": { # stage2参数 + "NUM_MODULES": 1, + "NUM_BRANCHES": 2, + "BLOCK": "BASIC", + "NUM_BLOCKS": [4, 4], + "NUM_CHANNELS": [48, 96], + "FUSE_METHOD": "SUM" + }, + "STAGE3": { # stage3参数 + "NUM_MODULES": 4, + "NUM_BRANCHES": 3, + "BLOCK": "BASIC", + "NUM_BLOCKS": [4, 4, 4], + "NUM_CHANNELS": [48, 96, 192], + "FUSE_METHOD": "SUM" + }, + "STAGE4": { # stage4参数 + "NUM_MODULES": 3, + "NUM_BRANCHES": 4, + "BLOCK": "BASIC", + "NUM_BLOCKS": [4, 4, 4, 4], + "NUM_CHANNELS": [48, 96, 192, 384], + "FUSE_METHOD": "SUM" + } + }, + "ocr": { # ocr module参数 + "mid_channels": 512, + "key_channels": 256, + "key_channels": 256, + "dropout": 0.05, + "scale": 1 + } + }, + "loss": { + "loss_scale": 10, # 损失等级 + "use_weights": True, + "balance_weights": [0.4, 1] + }, +} +``` + +## 训练过程 + +### 用法 + +#### Ascend处理器环境运行 + +```bash +# 分布式训练 +bash scripts/run_distribute_train.sh [RANK_TABLE_FILE] [DATASET_PATH] [TRAIN_OUTPUT_PATH] [CHECKPOINT_PATH] [EVAL_CALLBACK](optional) + +# 分布式训练,从指定周期开始恢复训练 +bash scripts/run_distribute_train.sh [RANK_TABLE_FILE] [DATASET_PATH] [TRAIN_OUTPUT_PATH] [CHECKPOINT_PATH] [BEGIN_EPOCH] [EVAL_CALLBACK](optional) + +# 单机训练 +bash scripts/run_standalone_train.sh [DEVICE_ID] [DATASET_PATH] [TRAIN_OUTPUT_PATH] [CHECKPOINT_PATH] [EVAL_CALLBACK](optional) + +# 单机训练,从指定周期开始恢复训练 +bash scripts/run_standalone_train.sh [DEVICE_ID] [DATASET_PATH] [TRAIN_OUTPUT_PATH] [CHECKPOINT_PATH] [BEGIN_EPOCH] [EVAL_CALLBACK](optional) +``` + +分布式训练需要提前创建JSON格式的HCCL配置文件。 + +具体操作,参见[hccn_tools](https://gitee.com/mindspore/mindspore/tree/master/model_zoo/utils/hccl_tools)中的说明。 + +训练结果保存在示例路径中,文件夹名称以“train”或“train_parallel”开头。您可在此路径下的日志中找到检查点文件以及结果,如下所示。 + +运行单卡用例时如果想更换运行卡号,可以通过设置环境变量 `export DEVICE_ID=x`。 + +#### 训练时推理 + +如果需要训练时推理,在执行shell脚本时为`EVAL_CALLBACK`参数传入`True`即可,其默认值为`False`。 + +### 结果 + +使用Cityscapes数据集训练OCRNet + +```text +# 分布式训练结果(4p) +epoch: 1 step: 248, loss is 1.1374861 +epoch time: 339815.397 ms, per step time: 1370.223 ms +epoch: 2 step: 248, loss is 0.40133527 +epoch time: 69468.934 ms, per step time: 280.117 ms +epoch: 3 step: 248, loss is 0.20046248 +epoch time: 69358.028 ms, per step time: 279.669 ms +epoch: 4 step: 248, loss is 0.37442797 +epoch time: 69333.672 ms, per step time: 279.571 ms +epoch: 5 step: 248, loss is 0.34999597 +epoch time: 69352.299 ms, per step time: 279.646 ms +... +``` + +## 评估过程 + +### 用法 + +#### Ascend处理器环境运行 + +```bash +# 运行评估 +bash scripts/run_eval.sh [DEVICE_ID] [DATASET_PATH] [CHECKPOINT_PATH] +``` + +### 结果 + +评估结果保存在示例路径中,文件夹名为“eval”。你可在此路径下的日志文件中找到如下结果: + +```text +Total number of images: 500 +=========== Validation Result =========== +===> mIoU: 0.7961077635109521 +===> IoU array: + [0.98296033 0.85900498 0.92949463 0.61348649 0.65449864 0.62388795 + 0.70787673 0.79361175 0.92397478 0.64360418 0.94558114 0.81655936 + 0.63244356 0.95029043 0.83258733 0.90902162 0.85732374 0.67388184 + 0.77595802] +========================================= +``` + +## 推理过程 + +### 导出MindIR + +```bash +python export.py --device_id [DEVICE_ID] --checkpoint_file [CKPT_PATH] --file_name [FILE_NAME] --file_format MINDIR --device_target Ascend +``` + +### 在Ascend310执行推理 + +在执行推理之前,必须先通过`export.py`脚本到本mindir文件。以下展示了使用mindir模型执行推理的示例。目前只支持Cityscapes数据集batchsize为1的推理。 + +```bash +bash scripts/ascend310_inference.sh [MINDIR_PATH] [DATA_PATH] [DEVICE_ID] +``` + +- `MINDIR_PATH` mindir文件的存储路径 +- `DATA_PATH` Cityscapes原始数据集的存储路径 +- `DEVICE_ID` 卡号 + +脚本内部分为三步: + +1. `preprocess.py`对原始数据集进行预处理,并将处理后的数据集以二进制的形式存储在`./preprocess_Result/`路径下; +2. `ascend310_infer/src/main.cc`执行推理过程,并将预测结果以二进制的形式存储在`./result_Files/`路径下,推理日志可在`infer.log`中查看; +3. `postprocess.py`利用预测结果与相应标签计算mIoU,计算结果可在`acc.log`中查看。 + +### 结果 + +```text +Total number of images: 500 +=========== 310 Inference Result =========== +miou: 0.7880364289865892 +iou array: + [0.98327649 0.86189605 0.92990512 0.53712174 0.63041064 0.68390911 + 0.71874631 0.80141863 0.92871439 0.63142162 0.94527287 0.83139662 + 0.6455081 0.95468034 0.81087329 0.87612221 0.74120989 0.67898836 + 0.78182036] +============================================ +``` + +# 模型描述 + +## 性能 + +### 评估性能 + +#### Cityscapes上OCRNet的性能 + +|参数|Ascend 910| +|------------------------------|------------------------------| +|模型版本|OCRNet| +|资源|Ascend 910;CPU 2.60GHz,192核;内存 755G;系统 Euler2.8| +|上传日期|2021-12-12| +|MindSpore版本|1.2| +|数据集|Cityscapes| +|训练参数|epoch=1000, steps per epoch=248, batch_size = 3| +|优化器|SGD| +|损失函数|Softmax交叉熵| +|输出|mIoU| +|损失|0.06756218| +|速度|279毫秒/步(4卡)| +|总时长|19.4小时| + +# 随机情况说明 + +`train.py`中使用了随机种子。 + +# ModelZoo主页 + + 请浏览官网[主页](https://gitee.com/mindspore/models)。 diff --git a/research/cv/OCRNet/ascend310_infer/inc/utils.h b/research/cv/OCRNet/ascend310_infer/inc/utils.h new file mode 100644 index 0000000000000000000000000000000000000000..ad69339ecaa451e69d111d0c0aee543c8e8f9152 --- /dev/null +++ b/research/cv/OCRNet/ascend310_infer/inc/utils.h @@ -0,0 +1,33 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_INFERENCE_UTILS_H_ +#define MINDSPORE_INFERENCE_UTILS_H_ + +#include <sys/stat.h> +#include <dirent.h> +#include <vector> +#include <string> +#include <memory> +#include "include/api/types.h" + +DIR *open_dir(std::string_view dirName); +std::string real_path(std::string_view path); +mindspore::MSTensor read_file_to_tensor(const std::string &file); +int write_result(const std::string& imageFile, const std::vector<mindspore::MSTensor> &outputs); +std::vector<std::string> get_all_files(std::string dir_name); + +#endif diff --git a/research/cv/OCRNet/ascend310_infer/src/CMakeLists.txt b/research/cv/OCRNet/ascend310_infer/src/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..14e676821a4936c03e98b9299b3b5f5e4496a8ea --- /dev/null +++ b/research/cv/OCRNet/ascend310_infer/src/CMakeLists.txt @@ -0,0 +1,14 @@ +cmake_minimum_required(VERSION 3.14.1) +project(MindSporeCxxTestcase[CXX]) +add_compile_definitions(_GLIBCXX_USE_CXX11_ABI=0) +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O0 -g -std=c++17 -Werror -Wall -fPIE -Wl,--allow-shlib-undefined") +set(PROJECT_SRC_ROOT ${CMAKE_CURRENT_LIST_DIR}/) +option(MINDSPORE_PATH "mindspore install path" "") +include_directories(${MINDSPORE_PATH}) +include_directories(${MINDSPORE_PATH}/include) +include_directories(${PROJECT_SRC_ROOT}/../) +find_library(MS_LIB libmindspore.so ${MINDSPORE_PATH}/lib) +file(GLOB_RECURSE MD_LIB ${MINDSPORE_PATH}/_c_dataengine*) +find_package(gflags REQUIRED) +add_executable(main main.cc utils.cc) +target_link_libraries(main ${MS_LIB} ${MD_LIB} gflags) diff --git a/research/cv/OCRNet/ascend310_infer/src/build.sh b/research/cv/OCRNet/ascend310_infer/src/build.sh new file mode 100644 index 0000000000000000000000000000000000000000..abcb999930ca5d62345b204d7fcfe4e097e8f0bb --- /dev/null +++ b/research/cv/OCRNet/ascend310_infer/src/build.sh @@ -0,0 +1,18 @@ +#!/bin/bash +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +cmake . -DMINDSPORE_PATH="`pip show mindspore-ascend | grep Location | awk '{print $2"/mindspore"}' | xargs realpath`" +make diff --git a/research/cv/OCRNet/ascend310_infer/src/main.cc b/research/cv/OCRNet/ascend310_infer/src/main.cc new file mode 100644 index 0000000000000000000000000000000000000000..fad45ae8d71b42942687ac2d8c30d704a0c3455b --- /dev/null +++ b/research/cv/OCRNet/ascend310_infer/src/main.cc @@ -0,0 +1,124 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <sys/time.h> +#include <gflags/gflags.h> +#include <dirent.h> +#include <iostream> +#include <string> +#include <algorithm> +#include <iosfwd> +#include <vector> +#include <fstream> +#include <sstream> + +#include "include/api/model.h" +#include "include/api/context.h" +#include "include/api/types.h" +#include "include/api/serialization.h" +#include "include/dataset/vision_ascend.h" +#include "include/dataset/execute.h" +#include "include/dataset/constants.h" +#include "include/dataset/transforms.h" +#include "include/dataset/vision.h" +#include "inc/utils.h" + +using mindspore::dataset::vision::Decode; +using mindspore::dataset::vision::Resize; +using mindspore::dataset::vision::CenterCrop; +using mindspore::dataset::vision::Normalize; +using mindspore::dataset::vision::HWC2CHW; +using mindspore::dataset::TensorTransform; +using mindspore::Context; +using mindspore::Serialization; +using mindspore::Model; +using mindspore::Status; +using mindspore::ModelType; +using mindspore::GraphCell; +using mindspore::kSuccess; +using mindspore::MSTensor; +using mindspore::dataset::Execute; + +DEFINE_string(gmindir_path, "./ocrnet.mindir", "mindir path"); +DEFINE_string(gdataset_path, ".", "dataset path"); +DEFINE_int32(gdevice_id, 0, "device id"); + +int main(int argc, char **argv) { + gflags::ParseCommandLineFlags(&argc, &argv, true); + if (real_path(FLAGS_gmindir_path).empty()) { + std::cout << "Invalid mindir." << std::endl; + return 1; + } + auto context = std::make_shared<Context>(); + auto ascend310 = std::make_shared<mindspore::Ascend310DeviceInfo>(); + ascend310->SetDeviceID(FLAGS_gdevice_id); + context->MutableDeviceInfo().push_back(ascend310); + mindspore::Graph graph; + Serialization::Load(FLAGS_gmindir_path, ModelType::kMindIR, &graph); + Model model; + Status ret = model.Build(GraphCell(graph), context); + if (ret != kSuccess) { + std::cout << "ERROR: Build failed." << std::endl; + return 1; + } + auto all_files = get_all_files(FLAGS_gdataset_path); + std::cout << typeid(all_files).name() << std::endl; + if (all_files.empty()) { + std::cout << "ERROR: no input data." << std::endl; + return 1; + } + std::vector<MSTensor> modelInputs = model.GetInputs(); + std::map<double, double> costTime_map; + size_t size = all_files.size(); + for (size_t i = 0; i < size; ++i) { + struct timeval start = {0}; + struct timeval end = {0}; + double startTimeMs = 0; + double endTimeMs = 0; + std::vector<MSTensor> inputs; + std::vector<MSTensor> outputs; + std::cout << "==> Image: " << all_files[i] << std::endl; + MSTensor image = read_file_to_tensor(all_files[i]); + inputs.emplace_back(modelInputs[0].Name(), modelInputs[0].DataType(), modelInputs[0].Shape(), + image.Data().get(), image.DataSize()); + gettimeofday(&start, nullptr); + ret = model.Predict(inputs, &outputs); + gettimeofday(&end, nullptr); + if (ret != kSuccess) { + std::cout << "Predict " << all_files[i] << " failed." << std::endl; + return 1; + } + startTimeMs = (1.0 * start.tv_sec * 1000000 + start.tv_usec) / 1000; + endTimeMs = (1.0 * end.tv_sec * 1000000 + end.tv_usec) / 1000; + costTime_map.insert(std::pair<double, double>(startTimeMs, endTimeMs)); + write_result(all_files[i], outputs); + } + double average = 0.0; + int inferCount = 0; + for (auto iter = costTime_map.begin(); iter != costTime_map.end(); iter++) { + average += iter->second - iter->first; + inferCount++; + } + average = average / inferCount; + std::stringstream timeCost; + timeCost << "NN inference cost average time: " << average << " ms of infer_count " << inferCount << std::endl; + std::cout << "NN inference cost average time: " << average << "ms of infer_count " << inferCount << std::endl; + std::string fileName = "./time_Result" + std::string("/test_perform_static.txt"); + std::ofstream fileStream(fileName.c_str(), std::ios::trunc); + fileStream << timeCost.str(); + fileStream.close(); + costTime_map.clear(); + return 0; +} diff --git a/research/cv/OCRNet/ascend310_infer/src/utils.cc b/research/cv/OCRNet/ascend310_infer/src/utils.cc new file mode 100644 index 0000000000000000000000000000000000000000..e461ceeae24fc09dbed5153e4bbe84f8816d8b53 --- /dev/null +++ b/research/cv/OCRNet/ascend310_infer/src/utils.cc @@ -0,0 +1,142 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <fstream> +#include <algorithm> +#include <iostream> +#include "inc/utils.h" + +using mindspore::MSTensor; +using mindspore::DataType; + +std::vector<std::string> get_all_files(std::string dirName) { + struct dirent *filename; + DIR *dir = open_dir(dirName); + if (dir == nullptr) { + return {}; + } + std::vector<std::string> gdirs; + std::vector<std::string> gfiles; + while ((filename = readdir(dir)) != nullptr) { + std::string dName = std::string(filename->d_name); + if (dName == "." || dName == "..") { + continue; + } else if (filename->d_type == DT_DIR) { + gdirs.emplace_back(std::string(dirName) + "/" + filename->d_name); + } else if (filename->d_type == DT_REG) { + gfiles.emplace_back(std::string(dirName) + "/" + filename->d_name); + } else { + continue; + } + } + + for (auto d : gdirs) { + dir = open_dir(d); + while ((filename = readdir(dir)) != nullptr) { + std::string dName = std::string(filename->d_name); + if (dName == "." || dName == ".." || filename->d_type != DT_REG) { + continue; + } + gfiles.emplace_back(std::string(d) + "/" + filename->d_name); + } + } + std::sort(gfiles.begin(), gfiles.end()); + for (auto &f : gfiles) { + std::cout << "image file: " << f << std::endl; + } + return gfiles; +} + +int write_result(const std::string& imageFile, const std::vector<MSTensor> &outputs) { + std::string homePath = "./result_Files"; + for (size_t i = 0; i < outputs.size(); ++i) { + size_t outputSize = outputs[i].DataSize(); + std::shared_ptr<const void> netOutput = outputs[i].Data(); + int pos = imageFile.rfind('/'); + std::string fileName(imageFile, pos + 1); + fileName.replace(fileName.find('.'), fileName.size() - fileName.find('.'), '_' + std::to_string(i) + ".bin"); + std::string outFileName = homePath + "/" + fileName; + FILE *outputFile = fopen(outFileName.c_str(), "wb"); + fwrite(netOutput.get(), outputSize, sizeof(char), outputFile); + fclose(outputFile); + outputFile = nullptr; + } + return 0; +} + +mindspore::MSTensor read_file_to_tensor(const std::string &file) { + if (file.empty()) { + std::cout << "Pointer file is nullptr" << std::endl; + return mindspore::MSTensor(); + } + + std::ifstream ifs(file); + if (!ifs.good()) { + std::cout << "File: " << file << " is not exist" << std::endl; + return mindspore::MSTensor(); + } + + if (!ifs.is_open()) { + std::cout << "File: " << file << "open failed" << std::endl; + return mindspore::MSTensor(); + } + + ifs.seekg(0, std::ios::end); + size_t size = ifs.tellg(); + mindspore::MSTensor buffer(file, mindspore::DataType::kNumberTypeUInt8, + {static_cast<int64_t>(size)}, nullptr, size); + + ifs.seekg(0, std::ios::beg); + ifs.read(reinterpret_cast<char *>(buffer.MutableData()), size); + ifs.close(); + + return buffer; +} + +DIR *open_dir(std::string_view dirName) { + if (dirName.empty()) { + std::cout << " dirName is null ! " << std::endl; + return nullptr; + } + std::string realPath = real_path(dirName); + struct stat s; + lstat(realPath.c_str(), &s); + if (!S_ISDIR(s.st_mode)) { + std::cout << "dirName is not a valid directory !" << std::endl; + return nullptr; + } + DIR *dir = opendir(realPath.c_str()); + if (dir == nullptr) { + std::cout << "Can not open dir " << dirName << std::endl; + return nullptr; + } + std::cout << "Successfully opened the dir " << dirName << std::endl; + return dir; +} + +std::string real_path(std::string_view path) { + char realPathMem[PATH_MAX] = {0}; + char *realPathRet = nullptr; + realPathRet = realpath(path.data(), realPathMem); + if (realPathRet == nullptr) { + std::cout << "File: " << path << " is not exist."; + return ""; + } + + std::string realPath(realPathMem); + std::cout << path << " realpath is: " << realPath << std::endl; + return realPath; +} diff --git a/research/cv/OCRNet/eval.py b/research/cv/OCRNet/eval.py new file mode 100644 index 0000000000000000000000000000000000000000..0f4624919d8417d3d710397d0500e345ec7e3661 --- /dev/null +++ b/research/cv/OCRNet/eval.py @@ -0,0 +1,156 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""OCRNet inference.""" +import ast +import argparse +import numpy as np + +import mindspore.ops as P +import mindspore.dataset.engine as de +from mindspore import context, DatasetHelper +from mindspore.train.serialization import load_param_into_net, load_checkpoint + +from src.seg_hrnet_ocr import get_seg_model +from src.cityscapes import Cityscapes +from src.config import config_hrnetv2_w48 as config +from src.config import organize_configuration +from src.model_utils.moxing_adapter import moxing_wrapper + + +def parse_args(): + """ + Get arguments from command-line. + """ + parser = argparse.ArgumentParser(description='OCRNet Semantic Segmentation Inference.') + parser.add_argument("--data_url", type=str, default=None, + help="Storage path of dataset.") + parser.add_argument("--train_url", type=str, default=None, + help="Storage path of evaluation results in OBS. It's useless here.") + parser.add_argument("--data_path", type=str, default=None, + help="Storage path of dataset in OBS.") + parser.add_argument("--output_path", type=str, default=None, + help="Storage path of evaluation results on machine. It's useless here.") + parser.add_argument("--modelarts", type=ast.literal_eval, default=False, + help="Run online or offline.") + parser.add_argument("--checkpoint_url", type=str, + help="Storage path of checkpoint file in OBS.") + parser.add_argument("--checkpoint_path", type=str, + help="Storage path of checkpoint file on machine.") + return parser.parse_args() + + +def get_confusion_matrix(label, pred, shape, num_class, ignore=-1): + """ + Calcute the confusion matrix by given label and pred. + """ + output = pred.asnumpy().transpose(0, 2, 3, 1) # NCHW -> NHWC + seg_pred = np.asarray(np.argmax(output, axis=3), dtype=np.uint8) + seg_gt = np.asarray(label.asnumpy()[:, :shape[-2], :shape[-1]], dtype=np.int32) + + ignore_index = seg_gt != ignore + seg_gt = seg_gt[ignore_index] + seg_pred = seg_pred[ignore_index] + + index = (seg_gt * num_class + seg_pred).astype('int32') + label_count = np.bincount(index) + confusion_matrix = np.zeros((num_class, num_class)) + + for i_label in range(num_class): + for i_pred in range(num_class): + cur_index = i_label * num_class + i_pred + if cur_index < len(label_count): + confusion_matrix[i_label, i_pred] = label_count[cur_index] + return confusion_matrix + + +def testval(dataset, helper, model, num_classes=19, ignore_label=255, scales=None, flip=False): + """ + Inference function. + """ + confusion_matrix = np.zeros((num_classes, num_classes)) + count = 0 + for batch in helper: + print("=====> Image: ", count) + image, label = batch # NCHW, NHW + shape = label.shape + pred = dataset.multi_scale_inference(model, image, + scales=scales, + flip=flip) + + if pred.shape[-2] != shape[-2] or pred.shape[-1] != shape[-1]: + pred = P.ResizeBilinear((shape[-2], shape[-1]))(pred) # Tensor + + confusion_matrix += get_confusion_matrix(label, pred, shape, num_classes, ignore_label) + count += 1 + print("Total number of images: ", count) + + pos = confusion_matrix.sum(1) + res = confusion_matrix.sum(0) + tp = np.diag(confusion_matrix) + # pixel_acc = tp.sum() / pos.sum() + # mean_acc = (tp / np.maximum(1.0, pos)).mean() + IoU_array = (tp / np.maximum(1.0, pos + res - tp)) + mean_IoU = IoU_array.mean() + + return mean_IoU, IoU_array + + +@moxing_wrapper(config) +def main(): + """Inference process.""" + # Set context + context.set_context(mode=context.GRAPH_MODE, device_target=config.device_target) + # Initialize network + net = get_seg_model(config) + param_dict = load_checkpoint(ckpt_file_name=config.checkpoint_path) + load_param_into_net(net, param_dict) + net.set_train(False) + # Prepare dataset + ori_dataset = Cityscapes(config.data_path, + num_samples=None, + num_classes=config.dataset.num_classes, + multi_scale=False, + flip=False, + ignore_label=config.dataset.ignore_label, + base_size=config.eval.base_size, + crop_size=config.eval.image_size, + downsample_rate=1, + scale_factor=16, + mean=config.dataset.mean, + std=config.dataset.std, + is_train=False) + dataset = de.GeneratorDataset(ori_dataset, column_names=["image", "label"], + shuffle=False, + num_parallel_workers=config.workers) + dataset = dataset.batch(1, drop_remainder=True) + helper = DatasetHelper(dataset, dataset_sink_mode=False) + + # Calculate results + mean_IoU, IoU_array = testval(ori_dataset, helper, net, + num_classes=config.dataset.num_classes, + ignore_label=config.dataset.ignore_label, + scales=config.eval.scale_list, flip=config.eval.flip) + # Show results + print("=========== Validation Result ===========") + print("===> mIoU:", mean_IoU) + print("===> IoU array: \n", IoU_array) + print("=========================================") + + +if __name__ == '__main__': + args = parse_args() + organize_configuration(cfg=config, args=args) + main() diff --git a/research/cv/OCRNet/export.py b/research/cv/OCRNet/export.py new file mode 100644 index 0000000000000000000000000000000000000000..70de2e1ab86e8eb5a3809a4f380520cca6aa63de --- /dev/null +++ b/research/cv/OCRNet/export.py @@ -0,0 +1,48 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Export checkpoint into mindir or air for 310 inference.""" +import argparse +import numpy as np + +from mindspore import Tensor, context, load_checkpoint, load_param_into_net, export + +from src.seg_hrnet_ocr import get_seg_model +from src.config import config_hrnetv2_w48 as config + + +def main(): + parser = argparse.ArgumentParser("OCRNet Semantic Segmentation exporting.") + parser.add_argument("--device_id", type=int, default=0, help="Device ID. ") + parser.add_argument("--checkpoint_file", type=str, help="Checkpoint file path. ") + parser.add_argument("--file_name", type=str, help="Output file name. ") + parser.add_argument("--file_format", type=str, default="MINDIR", + choices=["AIR", "MINDIR"], help="Output file format. ") + + args = parser.parse_args() + + context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=args.device_id) + + net = get_seg_model(config) + params_dict = load_checkpoint(args.checkpoint_file) + load_param_into_net(net, params_dict) + net.set_train(False) + height, width = config.eval.image_size[0], config.eval.image_size[1] + input_data = Tensor(np.zeros([1, 3, height, width], dtype=np.float32)) + export(net, input_data, file_name=args.file_name, file_format=args.file_format) + + +if __name__ == "__main__": + main() diff --git a/research/cv/OCRNet/figures/OCRNet.png b/research/cv/OCRNet/figures/OCRNet.png new file mode 100644 index 0000000000000000000000000000000000000000..0883bef0035fbbdeb0f94e63e6507aca1bb689f0 Binary files /dev/null and b/research/cv/OCRNet/figures/OCRNet.png differ diff --git a/research/cv/OCRNet/postprocess.py b/research/cv/OCRNet/postprocess.py new file mode 100644 index 0000000000000000000000000000000000000000..077a384c5850e7ccdb6366462b74bf5a00281d65 --- /dev/null +++ b/research/cv/OCRNet/postprocess.py @@ -0,0 +1,90 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Post-process for 310 inference: calculate miou.""" +import os +import argparse +import cv2 +import numpy as np + + +def parse_args(): + """Post process parameters from command line.""" + parser = argparse.ArgumentParser(description="OCRNet Semantic Segmentation 310 Inference.") + parser.add_argument("--result_path", type=str, help="Storage path of pred bin.") + parser.add_argument("--label_path", type=str, help="Storage path of label bin.") + args, _ = parser.parse_known_args() + return args + + +def get_confusion_matrix(label, output, shape, num_class, ignore_label=255): + """Calcute the confusion matrix by given label and pred.""" + # output = output.transpose(0, 2, 3, 1) + seg_pred = np.asarray(np.argmax(output, axis=3), dtype=np.uint8) + seg_gt = np.asarray(label[:, :shape[-2], :shape[-1]], dtype=np.int32) + + ignore_index = seg_gt != ignore_label + seg_gt = seg_gt[ignore_index] + seg_pred = seg_pred[ignore_index] + + index = (seg_gt * num_class + seg_pred).astype(np.int32) + label_count = np.bincount(index) + confusion_matrix = np.zeros((num_class, num_class)) + + for i_label in range(num_class): + for i_pred in range(num_class): + cur_index = i_label * num_class + i_pred + if cur_index < len(label_count): + confusion_matrix[i_label, i_pred] = label_count[cur_index] + return confusion_matrix + + +def main(args): + """Main function for miou calculation.""" + result_list = os.listdir(args.label_path) + num_classes = 19 + confusion_matrix = np.zeros((num_classes, num_classes)) + ignore_label = 255 + count = 0 + for result in result_list: + prefix = result.rstrip(".bin") + pred = np.fromfile(os.path.join(args.result_path, prefix + "_1.bin"), + dtype=np.float32).reshape(19, 256, 512) + label = np.fromfile(os.path.join(args.label_path, prefix + ".bin"), + dtype=np.int32).reshape(1, 1024, 2048) + shape = label.shape + output = pred.transpose(1, 2, 0) + output = cv2.resize(output, (shape[-1], shape[-2]), interpolation=cv2.INTER_LINEAR) + output = np.exp(output) + output = np.expand_dims(output, axis=0) + confusion_matrix += get_confusion_matrix(label, output, shape, num_classes, ignore_label) + count += 1 + print("Total number of images: ", count, flush=True) + + pos = confusion_matrix.sum(1) + res = confusion_matrix.sum(0) + tp = np.diag(confusion_matrix) + iou_array = (tp / np.maximum(1.0, pos + res - tp)) + mean_iou = iou_array.mean() + + # Show results + print("=========== 310 Inference Result ===========", flush=True) + print("miou:", mean_iou, flush=True) + print("iou array: \n", iou_array, flush=True) + print("============================================", flush=True) + + +if __name__ == "__main__": + args_opt = parse_args() + main(args=args_opt) diff --git a/research/cv/OCRNet/preprocess.py b/research/cv/OCRNet/preprocess.py new file mode 100644 index 0000000000000000000000000000000000000000..5e0f5f82ad94bbf53827a79685e3a5055e7f8b70 --- /dev/null +++ b/research/cv/OCRNet/preprocess.py @@ -0,0 +1,65 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Preprocess for 310 inference: transform cityscapes to bin.""" +import os +import argparse + +from src.cityscapes import Cityscapes +from src.config import config_hrnetv2_w48 as config + + +def parse_args(): + """Get arguments from command-line.""" + parser = argparse.ArgumentParser(description="Cityscapes preprocess for OCRNet.") + parser.add_argument("--data_path", type=str, help="Storage path of dataset.") + parser.add_argument("--train_path", type=str, help="Storage path of bin files.") + args = parser.parse_args() + + return args + + +def export_cityscapes_to_bin(args): + """Convert data format from png to bin.""" + image_path = os.path.join(args.train_path, "image") + label_path = os.path.join(args.train_path, "label") + os.makedirs(image_path) + os.makedirs(label_path) + dataset = Cityscapes(args.data_path, + num_samples=None, + num_classes=config.dataset.num_classes, + multi_scale=False, + flip=False, + ignore_label=config.dataset.ignore_label, + base_size=config.eval.base_size, + crop_size=config.eval.image_size, + downsample_rate=1, + scale_factor=16, + mean=config.dataset.mean, + std=config.dataset.std, + is_train=False) + for i, data in enumerate(dataset): + image = data[0] + label = data[1] + file_name = "cityscapes_val_" + str(i) + ".bin" + image_file_path = os.path.join(image_path, file_name) + label_file_path = os.path.join(label_path, file_name) + image.tofile(image_file_path) + label.tofile(label_file_path) + print("Export bin files finished!") + + +if __name__ == "__main__": + args_opt = parse_args() + export_cityscapes_to_bin(args=args_opt) diff --git a/research/cv/OCRNet/scripts/ascend310_inference.sh b/research/cv/OCRNet/scripts/ascend310_inference.sh new file mode 100644 index 0000000000000000000000000000000000000000..b87aff70ecf166584da87882e85d3d6683c276b5 --- /dev/null +++ b/research/cv/OCRNet/scripts/ascend310_inference.sh @@ -0,0 +1,116 @@ +#!/bin/bash +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +if [[ $# != 3 ]]; then + echo "Usage:" + echo "bash scripts/ascend310_inference.sh [MINDIR_PATH] [DATA_PATH] [DEVICE_ID]" + exit 1 +fi + +get_real_path() { + if [ "${1:0:1}" == "/" ]; then + echo "$1" + else + echo "$(realpath -m $PWD/$1)" + fi +} + +model=$(get_real_path $1) +data_path=$(get_real_path $2) +device_id=$3 + +echo "mindir name: "$model +echo "dataset path: "$data_path +echo "device id: "$device_id + +export ASCEND_HOME=/usr/local/Ascend/ +if [ -d ${ASCEND_HOME}/ascend-toolkit ]; then + export PATH=$ASCEND_HOME/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin:$ASCEND_HOME/ascend-toolkit/latest/atc/bin:$PATH + export LD_LIBRARY_PATH=/usr/local/lib:$ASCEND_HOME/ascend-toolkit/latest/atc/lib64:$ASCEND_HOME/ascend-toolkit/latest/fwkacllib/lib64:$ASCEND_HOME/driver/lib64:$ASCEND_HOME/add-ons:$LD_LIBRARY_PATH + export TBE_IMPL_PATH=$ASCEND_HOME/ascend-toolkit/latest/opp/op_impl/built-in/ai_core/tbe + export PYTHONPATH=${TBE_IMPL_PATH}:$ASCEND_HOME/ascend-toolkit/latest/fwkacllib/python/site-packages:$PYTHONPATH + export ASCEND_OPP_PATH=$ASCEND_HOME/ascend-toolkit/latest/opp +else + export PATH=$ASCEND_HOME/atc/ccec_compiler/bin:$ASCEND_HOME/atc/bin:$PATH + export LD_LIBRARY_PATH=/usr/local/lib:$ASCEND_HOME/atc/lib64:$ASCEND_HOME/acllib/lib64:$ASCEND_HOME/driver/lib64:$ASCEND_HOME/add-ons:$LD_LIBRARY_PATH + export PYTHONPATH=$ASCEND_HOME/atc/python/site-packages:$PYTHONPATH + export ASCEND_OPP_PATH=$ASCEND_HOME/opp +fi + +function compile_app() +{ + cd ./ascend310_infer/src/ || exit + if [ -f "Makefile" ]; then + make clean + fi + bash build.sh &> build.log +} + +function preprocess_data() +{ + if [ -d preprocess_Result ]; then + rm -rf ./preprocess_Result + fi + mkdir preprocess_Result + python3.7 ./preprocess.py --data_path=$data_path --train_path=./preprocess_Result &> preprocess.log +} + +function infer() +{ + cd - || exit + if [ -d result_Files ]; then + rm -rf ./result_Files + fi + if [ -d time_Result ]; then + rm -rf ./time_Result + fi + mkdir result_Files + mkdir time_Result + ./ascend310_infer/src/main --gmindir_path=$model --gdataset_path=./preprocess_Result/image --gdevice_id=$device_id &> infer.log +} + +function cal_acc() +{ + python3.7 ./postprocess.py --result_path=./result_Files --label_path=./preprocess_Result/label &> acc.log + if [ $? -ne 0 ]; then + echo "Calculate accuracy failed." + exit 1 + fi +} + +preprocess_data +if [ $? -ne 0 ]; then + echo "Dataset preprocessing failed." + exit 1 +fi + +compile_app +if [ $? -ne 0 ]; then + echo "Compile app code failed." + exit 1 +fi + +infer +if [ $? -ne 0 ]; then + echo "Execute inference failed." + exit 1 +fi + +cal_acc +if [ $? -ne 0 ]; then + echo "Calculate mIoU failed." + exit 1 +fi diff --git a/research/cv/OCRNet/scripts/run_distribute_train.sh b/research/cv/OCRNet/scripts/run_distribute_train.sh new file mode 100644 index 0000000000000000000000000000000000000000..67fe3fc318a532a20f3ebf6500ef23d7c059ae96 --- /dev/null +++ b/research/cv/OCRNet/scripts/run_distribute_train.sh @@ -0,0 +1,88 @@ +#!/bin/bash +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +if [ $# != 5 ] && [ $# != 6 ] +then + echo "Using: bash scripts/run_distribute_train.sh [RANK_TABLE_FILE] [DATASET_PATH] [TRAIN_OUTPUT_PATH] [CHECKPOINT_PATH] [EVAL_CALLBACK]" + echo "or" + echo "Using: bash scripts/run_distribute_train.sh [RANK_TABLE_FILE] [DATASET_PATH] [TRAIN_OUTPUT_PATH] [CHECKPOINT_PATH] [BEGIN_EPOCH] [EVAL_CALLBACK]" + exit 1 +fi + +get_real_path(){ + if [ "${1:0:1}" == "/" ]; then + echo "$1" + else + echo "$(realpath -m $PWD/$1)" + fi +} + +PATH1=$(get_real_path $1) # rank_table_file +PATH2=$(get_real_path $2) # dataset_path +PATH3=$(get_real_path $3) # train_output_path +PATH4=$(get_real_path $4) # pretrained or resume ckpt_path + +if [ ! -f $PATH1 ] +then + echo "error: RANK_TABLE_FILE=$PATH1 is not a file." + exit 1 +fi + +if [ ! -d $PATH2 ] +then + echo "error: DATASET_PATH=$PATH2 is not a directory." + exit 1 +fi + +if [ ! -d $PATH3 ] +then + echo "error: TRAIN_OUTPUT_PATH=$PATH3 is not a directory." +fi + +if [ ! -f $PATH4 ] +then + echo "error: CHECKPOINT_PATH=$PATH4 is not a file." + exit 1 +fi + +export DEVICE_NUM=4 +export RANK_SIZE=4 +export RANK_TABLE_FILE=$PATH1 + +export SERVER_ID=0 +rank_start=$((DEVICE_NUM * SERVER_ID)) + + +for((i=0; i<${DEVICE_NUM}; i++)) +do + export DEVICE_ID=${i} + export RANK_ID=$((rank_start + i)) + rm -rf ./train_parallel$i + mkdir ./train_parallel$i + cp ./train.py ./train_parallel$i + cp -r ./src ./train_parallel$i + cd ./train_parallel$i || exit + echo "Start training for rank $RANK_ID, device $DEVICE_ID." + env > env.log + if [ $# == 5 ] + then + python3 train.py --data_path $PATH2 --output_path $PATH3 --checkpoint_path $PATH4 --modelarts False --run_distribute True --device_target Ascend --lr 0.0012 --lr_power 6e-10 --begin_epoch 0 --end_epoch 1000 --eval_callback $5 --eval_interval 50 &> log & + elif [ $# == 6 ] + then + python3 train.py --data_path $PATH2 --output_path $PATH3 --checkpoint_path $PATH4 --modelarts False --run_distribute True --device_target Ascend --lr 0.0012 --lr_power 6e-10 --begin_epoch $5 --end_epoch 1000 --eval_callback $6 --eval_interval 50 &> log & + fi + cd .. +done diff --git a/research/cv/OCRNet/scripts/run_eval.sh b/research/cv/OCRNet/scripts/run_eval.sh new file mode 100644 index 0000000000000000000000000000000000000000..2a0678a96c6c2717fc67f40c08fe60d5c991f11c --- /dev/null +++ b/research/cv/OCRNet/scripts/run_eval.sh @@ -0,0 +1,62 @@ +#!/bin/bash +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +if [ $# != 3 ] +then + echo "Using: bash scripts/run_eval.sh [DEVICE_ID] [DATASET_PATH] [CHECKPOINT_PATH]" + exit 1 +fi + +get_real_path(){ + if [ "${1:0:1}" == "/" ]; then + echo "$1" + else + echo "$(realpath -m $PWD/$1)" + fi +} + +PATH1=$(get_real_path $2) +PATH2=$(get_real_path $3) + +if [ ! -d $PATH1 ] +then + echo "error: DATASET_PATH=$PATH1 is not a dictionary." + exit 1 +fi + +if [ ! -f $PATH2 ] +then + echo "error: CHECKPOINT_PATH=$PATH2 is not a file." + exit 1 +fi + +export DEVICE_NUM=1 +export DEVICE_ID=$1 +export RANK_SIZE=$DEVICE_NUM +export RANK_ID=0 + +if [ -d "eval" ]; +then + rm -rf ./eval +fi +mkdir ./eval +cp ./eval.py ./eval +cp -r ./src ./eval +cd ./eval || exit +env > env.log +echo "start evaluation for device $DEVICE_ID" +python3 eval.py --data_path $PATH1 --device_target Ascend --modelarts False --checkpoint_path $PATH2 &> log & +cd .. diff --git a/research/cv/OCRNet/scripts/run_standalone_train.sh b/research/cv/OCRNet/scripts/run_standalone_train.sh new file mode 100644 index 0000000000000000000000000000000000000000..d1da4ea92384f5803cc31ce56f89b703973c060b --- /dev/null +++ b/research/cv/OCRNet/scripts/run_standalone_train.sh @@ -0,0 +1,78 @@ +#!/bin/bash +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +if [ $# != 5 ] && [ $# != 6 ] +then + echo "Using: bash scripts/run_standalone_train.sh [DEVICE_ID] [DATASET_PATH] [TRAIN_OUTPUT_PATH] [CHECKPOINT_PATH] [EVAL_CALLBACK]" + echo "or" + echo "Using: bash scripts/run_standalone_train.sh [DEVICE_ID] [DATASET_PATH] [TRAIN_OUTPUT_PATH] [CHECKPOINT_PATH] [BEGIN_EPOCH] [EVAL_CALLBACK]" + exit 1 +fi + +get_real_path(){ + if [ "${1:0:1}" == "/" ]; then + echo "$1" + else + echo "$(realpath -m $PWD/$1)" + fi +} + +PATH1=$(get_real_path $2) # dataset_path +PATH2=$(get_real_path $3) # train_output_path +PATH3=$(get_real_path $4) # pretrained or resume ckpt_path + +if [ ! -d $PATH1 ] +then + echo "error: DATASET_PATH=$PATH1 is not a directory." + exit 1 +fi + +if [ ! -d $PATH2 ] +then + echo "error: TRAIN_OUTPUT_PATH=$PATH2 is not a directory." +fi + +if [ ! -f $PATH3 ] +then + echo "error: CHECKPOINT_PATH=$PATH3 is not a file." + exit 1 +fi + +export DEVICE_NUM=1 +export DEVICE_ID=$1 +export RANK_ID=0 +export RANK_SIZE=1 + +if [ -d "./train" ] +then + rm -rf ./train + echo "Remove dir ./train." +fi +mkdir ./train +echo "Create a dir ./train." +cp ./train.py ./train +cp -r ./src ./train +cd ./train || exit +echo "Start training for device $DEVICE_ID" +env > env.log +if [ $# == 5 ] +then + python3 train.py --data_path $PATH1 --output_path $PATH2 --checkpoint_path $PATH3 --modelarts False --run_distribute False --device_target Ascend --lr 0.0017 --lr_power 6e-10 --begin_epoch 0 --eval_callback $5 &> log & +else + python3 train.py --data_path $PATH1 --output_path $PATH2 --checkpoint_path $PATH3 --modelarts False --run_distribute False --device_target Ascend --lr 0.0017 --lr_power 6e-10 --begin_epoch $5 --eval_callback $6 &> log & +fi +cd .. + diff --git a/research/cv/OCRNet/src/basedataset.py b/research/cv/OCRNet/src/basedataset.py new file mode 100644 index 0000000000000000000000000000000000000000..dd63f0c8b30728dbc5deba9dd844796e045caefa --- /dev/null +++ b/research/cv/OCRNet/src/basedataset.py @@ -0,0 +1,210 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Base dataset generator definition.""" +import random +import cv2 +import numpy as np + +import mindspore.ops as P +from mindspore import Tensor +from mindspore.common import dtype + + +class BaseDataset: + """Base dataset generator class.""" + def __init__(self, + ignore_label=-1, + base_size=2048, + crop_size=(512, 1024), + downsample_rate=1, + scale_factor=16, + mean=None, + std=None): + + self.base_size = base_size + self.crop_size = crop_size + self.ignore_label = ignore_label + + self.mean = mean + self.std = std + self.scale_factor = scale_factor + self.downsample_rate = 1. / downsample_rate + + self.files = [] + + def __len__(self): + return len(self.files) + + def input_transform(self, image): + """Transform data format of images.""" + image = image.astype(np.float32)[:, :, ::-1] # BGR2RGB + image = image / 255.0 + image -= self.mean + image /= self.std + return image + + def label_transform(self, label): + """Transform data format of labels.""" + return np.array(label).astype('int32') + + def pad_image(self, image, h, w, shape, padvalue): + """Pad an image.""" + pad_image = image.copy() + pad_h = max(shape[0] - h, 0) + pad_w = max(shape[1] - w, 0) + if pad_h > 0 or pad_w > 0: + pad_image = cv2.copyMakeBorder(image, 0, pad_h, 0, + pad_w, cv2.BORDER_CONSTANT, + value=padvalue) + + return pad_image + + def rand_crop(self, image, label): + """Crop a feature at a random location.""" + h, w, _ = image.shape + image = self.pad_image(image, h, w, self.crop_size, (0.0, 0.0, 0.0)) + label = self.pad_image(label, h, w, self.crop_size, (self.ignore_label,)) + + new_h, new_w = label.shape + x = random.randint(0, new_w - self.crop_size[1]) + y = random.randint(0, new_h - self.crop_size[0]) + image = image[y:y + self.crop_size[0], x:x + self.crop_size[1]] + label = label[y:y + self.crop_size[0], x:x + self.crop_size[1]] + + return image, label + + def multi_scale_aug(self, image, label=None, rand_scale=1, rand_crop=True): + """Augment feature into different scales.""" + long_size = np.int(self.base_size * rand_scale + 0.5) + h, w, _ = image.shape + if h > w: + new_h = long_size + new_w = np.int(w * long_size / h + 0.5) + else: + new_w = long_size + new_h = np.int(h * long_size / w + 0.5) + + image = cv2.resize(image, (new_w, new_h), interpolation=cv2.INTER_LINEAR) + # image = cv2.resize(image, (new_w, new_h), interpolation=cv2.INTER_NEAREST) + if label is not None: + label = cv2.resize(label, (new_w, new_h), interpolation=cv2.INTER_NEAREST) + else: + return image + + if rand_crop: + image, label = self.rand_crop(image, label) + + return image, label + + def gen_sample(self, image, label, multi_scale=False, is_flip=False): + """Data preprocessing.""" + if multi_scale: + rand_scale = 0.5 + random.randint(0, self.scale_factor) / 10.0 + image, label = self.multi_scale_aug(image, label, rand_scale=rand_scale) + + image = self.input_transform(image) # HWC + label = self.label_transform(label) # HW + + image = image.transpose((2, 0, 1)) # CHW + + if is_flip: + flip = np.random.choice(2) * 2 - 1 + image = image[:, :, ::flip] + label = label[:, ::flip] + + if self.downsample_rate != 1: + label = cv2.resize(label, None, + fx=self.downsample_rate, + fy=self.downsample_rate, + interpolation=cv2.INTER_NEAREST) + # image CHW, label HW + return image, label + + def inference(self, model, image, flip=False): + """Inference using one feature.""" + shape = image.shape + pred = model(image) + pred = pred[-1] # image NCHW + pred = P.ResizeBilinear((shape[-2], shape[-1]))(pred) + if flip: + flip_img = image.asnumpy()[:, :, :, ::-1] + flip_output = model(Tensor(flip_img.copy())) + flip_output = P.ResizeBilinear((shape[-2], shape[-1]))(flip_output) + flip_pred = flip_output.asnumpy() + flip_pred = Tensor(flip_pred[:, :, :, ::-1]) + pred = P.Add()(pred, flip_pred) + pred = Tensor(pred.asnumpy() * 0.5) + return P.Exp()(pred) + + def multi_scale_inference(self, model, image, scales=None, flip=False): + """Inference using multi-scale features.""" + batch, _, ori_height, ori_width = image.shape + assert batch == 1, "only supporting batchsize 1." + image = image.asnumpy()[0].transpose((1, 2, 0)).copy() + stride_h = np.int(self.crop_size[0] * 2.0 / 3.0) + stride_w = np.int(self.crop_size[1] * 2.0 / 3.0) + + final_pred = Tensor(np.zeros([1, self.num_classes, ori_height, ori_width]), dtype=dtype.float32) + padvalue = -1.0 * np.array(self.mean) / np.array(self.std) + for scale in scales: + new_img = self.multi_scale_aug(image=image, rand_scale=scale, rand_crop=False) + height, width = new_img.shape[:-1] + + if max(height, width) <= np.min(self.crop_size): + new_img = self.pad_image(new_img, height, width, + self.crop_size, padvalue) + new_img = new_img.transpose((2, 0, 1)) + new_img = np.expand_dims(new_img, axis=0) + new_img = Tensor(new_img) + preds = self.inference(model, new_img, flip) + preds = preds[:, :, 0:height, 0:width] + else: + if height < self.crop_size[0] or width < self.crop_size[1]: + new_img = self.pad_image(new_img, height, width, + self.crop_size, padvalue) + new_h, new_w = new_img.shape[:-1] + rows = np.int(np.ceil(1.0 * (new_h - + self.crop_size[0]) / stride_h)) + 1 + cols = np.int(np.ceil(1.0 * (new_w - + self.crop_size[1]) / stride_w)) + 1 + preds = Tensor(np.zeros([1, self.num_classes, new_h, new_w]), dtype=dtype.float32) + count = Tensor(np.zeros([1, 1, new_h, new_w]), dtype=dtype.float32) + + for r in range(rows): + for c in range(cols): + h0 = r * stride_h + w0 = c * stride_w + h1 = min(h0 + self.crop_size[0], new_h) + w1 = min(w0 + self.crop_size[1], new_w) + crop_img = new_img[h0:h1, w0:w1, :] + if h1 == new_h or w1 == new_w: + crop_img = self.pad_image(crop_img, + h1 - h0, + w1 - w0, + self.crop_size, + padvalue) + crop_img = crop_img.transpose((2, 0, 1)) + crop_img = np.expand_dims(crop_img, axis=0) + crop_img = Tensor(crop_img) + pred = self.inference(model, crop_img, flip) + preds[:, :, h0:h1, w0:w1] += pred[:, :, 0:h1 - h0, 0:w1 - w0] + count[:, :, h0:h1, w0:w1] += 1 + preds = preds / count + preds = preds[:, :, :height, :width] + preds = P.ResizeBilinear((ori_height, ori_width))(preds) + final_pred += preds + final_pred = P.Add()(final_pred, preds) + return final_pred diff --git a/research/cv/OCRNet/src/callback.py b/research/cv/OCRNet/src/callback.py new file mode 100644 index 0000000000000000000000000000000000000000..49ff2bb2063592e07422d1069b4b465f831b2fed --- /dev/null +++ b/research/cv/OCRNet/src/callback.py @@ -0,0 +1,100 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Callback for inference while training.""" +import os +import mindspore.ops as P +from mindspore.train.callback import Callback +from mindspore import save_checkpoint +import numpy as np + + +def get_confusion_matrix(label, pred, shape, num_classes, ignore_label): + """Calcute the confusion matrix by given label and pred.""" + output = pred.transpose(0, 2, 3, 1) # NHWC + seg_pred = np.asarray(np.argmax(output, axis=3), dtype=np.uint8) # NHW + seg_gt = np.asarray(label[:, :shape[-2], :shape[-1]], dtype=np.int32) # NHW + + ignore_index = seg_gt != ignore_label # NHW + seg_gt = seg_gt[ignore_index] # NHW + seg_pred = seg_pred[ignore_index] # NHW + + index = (seg_gt * num_classes + seg_pred).astype(np.int32) + label_count = np.bincount(index) + confusion_matrix = np.zeros((num_classes, num_classes)) + + for i_label in range(num_classes): + for i_pred in range(num_classes): + cur_index = i_label * num_classes + i_pred + if cur_index < len(label_count): + confusion_matrix[i_label, i_pred] = label_count[cur_index] + return confusion_matrix + + +def evaluate_model(net, data_helper, num_classes, ignore_label): + """Inference function.""" + net.set_train(False) + confusion_matrix = np.zeros((num_classes, num_classes)) + for item in data_helper: + image = item[0] + label = item[1].asnumpy() + shape = label.shape + pred = net(image) + pred = pred[-1] + pred = P.ResizeBilinear((shape[-2], shape[-1]))(pred).asnumpy() + confusion_matrix += get_confusion_matrix(label, pred, shape, num_classes, ignore_label) + pos = confusion_matrix.sum(1) + res = confusion_matrix.sum(0) + tp = np.diag(confusion_matrix) + IoU_array = (tp / np.maximum(1.0, pos + res - tp)) + mean_IoU = IoU_array.mean() + return IoU_array, mean_IoU + + +class EvalCallback(Callback): + """Callback for inference while training.""" + def __init__(self, network, eval_data, num_classes, ignore_label, train_url, eval_interval=1): + self.network = network + self.eval_data = eval_data + self.best_iouarray = None + self.best_miou = 0 + self.best_epoch = 0 + self.num_classes = num_classes + self.ignore_label = ignore_label + self.eval_interval = eval_interval + self.train_url = train_url + + def epoch_end(self, run_context): + """Executions after each epoch.""" + cb_param = run_context.original_args() + cur_epoch = cb_param.cur_epoch_num + device_id = int(os.getenv("DEVICE_ID")) + if cur_epoch % self.eval_interval == 0: + iou_array, miou = evaluate_model(self.network, self.eval_data, self.num_classes, self.ignore_label) + if miou > self.best_miou: + self.best_miou = miou + self.best_iouarray = iou_array + self.best_epoch = cur_epoch + save_checkpoint(self.network, self.train_url + "/best_card%d.ckpt" % device_id) + + log_text1 = 'EPOCH: %d, mIoU: %.4f\n' % (cur_epoch, miou) + log_text2 = 'BEST EPOCH: %s, BEST mIoU: %0.4f\n' % (self.best_epoch, self.best_miou) + log_text3 = 'DEVICE_ID: %d\n' % device_id + print("==================================================\n", + log_text3, + log_text1, + log_text2, + "==================================================") + self.network.set_train(True) diff --git a/research/cv/OCRNet/src/cityscapes.py b/research/cv/OCRNet/src/cityscapes.py new file mode 100644 index 0000000000000000000000000000000000000000..cd243439c299aaca4fe8cceaac64838ef7ab9228 --- /dev/null +++ b/research/cv/OCRNet/src/cityscapes.py @@ -0,0 +1,167 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Dataset Cityscapes generator.""" +import cv2 +import numpy as np + +import mindspore.ops as P +from mindspore import Tensor +from mindspore.common import dtype + +from src.basedataset import BaseDataset + + +class Cityscapes(BaseDataset): + """Dataset Cityscapes generator.""" + def __init__(self, + root, + num_samples=None, + num_classes=19, + multi_scale=False, + flip=False, + ignore_label=-1, + base_size=2048, + crop_size=(512, 1024), + downsample_rate=1, + scale_factor=16, + mean=None, + std=None, + is_train=True): + + super(Cityscapes, self).__init__(ignore_label, base_size, + crop_size, downsample_rate, scale_factor, mean, std) + + self._index = 0 + self.root = root + if is_train: + self.list_path = root + "/train.lst" + else: + self.list_path = root + "/val.lst" + self.num_classes = num_classes + self.multi_scale = multi_scale + self.flip = flip + list_file = open(self.list_path) + img_list = [line.strip().split() for line in list_file] + list_file.close() + self.img_list = [(self.root+"/"+vector[0], self.root+"/"+vector[1]) for vector in img_list] + self._number = len(self.img_list) + + if num_samples: + self.files = self.files[:num_samples] + + self.label_mapping = {-1: ignore_label, 0: ignore_label, + 1: ignore_label, 2: ignore_label, + 3: ignore_label, 4: ignore_label, + 5: ignore_label, 6: ignore_label, + 7: 0, 8: 1, 9: ignore_label, + 10: ignore_label, 11: 2, 12: 3, + 13: 4, 14: ignore_label, 15: ignore_label, + 16: ignore_label, 17: 5, 18: ignore_label, + 19: 6, 20: 7, 21: 8, 22: 9, 23: 10, 24: 11, + 25: 12, 26: 13, 27: 14, 28: 15, + 29: ignore_label, 30: ignore_label, + 31: 16, 32: 17, 33: 18} + self.class_weights = Tensor([0.8373, 0.918, 0.866, 1.0345, + 1.0166, 0.9969, 0.9754, 1.0489, + 0.8786, 1.0023, 0.9539, 0.9843, + 1.1116, 0.9037, 1.0865, 1.0955, + 1.0865, 1.1529, 1.0507], dtype=dtype.float32) + + def __len__(self): + return self._number + + def __getitem__(self, index): + if index < self._number: + image_path = self.img_list[index][0] + label_path = self.img_list[index][1] + image = cv2.imread(image_path, cv2.IMREAD_COLOR) + label = cv2.imread(label_path, cv2.IMREAD_GRAYSCALE) + label = self.convert_label(label) + image, label = self.gen_sample(image, label, self.multi_scale, self.flip) + else: + raise StopIteration + return image.copy(), label.copy() + + def show(self): + """Show the total number of val data.""" + print("Total number of data vectors: ", self._number) + for line in self.img_list: + print(line) + + def convert_label(self, label, inverse=False): + """Convert classification ids in labels.""" + temp = label.copy() + if inverse: + for v, k in self.label_mapping.items(): + label[temp == k] = v + else: + for k, v in self.label_mapping.items(): + label[temp == k] = v + return label + + def multi_scale_inference(self, model, image, scales=None, flip=False): + """Inference using multi-scale features from dataset Cityscapes.""" + batch, _, ori_height, ori_width = image.shape + assert batch == 1, "only supporting batchsize 1." + image = image.asnumpy()[0].transpose((1, 2, 0)).copy() + stride_h = np.int(self.crop_size[0] * 1.0) + stride_w = np.int(self.crop_size[1] * 1.0) + + final_pred = Tensor(np.zeros([1, self.num_classes, ori_height, ori_width]), dtype=dtype.float32) + for scale in scales: + new_img = self.multi_scale_aug(image=image, + rand_scale=scale, + rand_crop=False) + height, width = new_img.shape[:-1] + + if scale <= 1.0: + new_img = new_img.transpose((2, 0, 1)) + new_img = np.expand_dims(new_img, axis=0) + new_img = Tensor(new_img) + preds = self.inference(model, new_img, flip) + preds = preds.asnumpy() + preds = preds[:, :, 0:height, 0:width] + else: + new_h, new_w = new_img.shape[:-1] + rows = np.int(np.ceil(1.0 * (new_h - + self.crop_size[0]) / stride_h)) + 1 + cols = np.int(np.ceil(1.0 * (new_w - + self.crop_size[1]) / stride_w)) + 1 + preds = np.zeros([1, self.num_classes, new_h, new_w]).astype(np.float32) + + count = np.zeros([1, 1, new_h, new_w]).astype(np.float32) + + for r in range(rows): + for c in range(cols): + h0 = r * stride_h + w0 = c * stride_w + h1 = min(h0 + self.crop_size[0], new_h) + w1 = min(w0 + self.crop_size[1], new_w) + h0 = max(int(h1 - self.crop_size[0]), 0) + w0 = max(int(w1 - self.crop_size[1]), 0) + crop_img = new_img[h0:h1, w0:w1, :] + crop_img = crop_img.transpose((2, 0, 1)) + crop_img = np.expand_dims(crop_img, axis=0) + crop_img = Tensor(crop_img) + pred = self.inference(model, crop_img, flip) + preds[:, :, h0:h1, w0:w1] += pred.asnumpy()[:, :, 0:h1 - h0, 0:w1 - w0] + count[:, :, h0:h1, w0:w1] += 1 + preds = preds / count + preds = preds[:, :, :height, :width] + preds = Tensor(preds) + preds = P.ResizeBilinear((ori_height, ori_width))(preds) + final_pred = P.Add()(final_pred, preds) + return final_pred diff --git a/research/cv/OCRNet/src/config.py b/research/cv/OCRNet/src/config.py new file mode 100644 index 0000000000000000000000000000000000000000..f1753fd7c96f4e9eb3d5b54abfea1e02a410c160 --- /dev/null +++ b/research/cv/OCRNet/src/config.py @@ -0,0 +1,143 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Configuration.""" +import json +from easydict import EasyDict as ed + + +config_hrnetv2_w48 = ed({ + "data_url": None, + "data_path": None, + "train_url": None, + "output_path": None, + "checkpoint_url": None, + "checkpoint_path": None, + "eval_data_url": None, + "eval_data_path": None, + "run_distribute": False, + "device_target": "Ascend", + "workers": 8, + "modelarts": False, + "lr": 0.0013, + "lr_power": 4e-10, + "save_checkpoint_epochs": 20, + "keep_checkpoint_max": 20, + "total_epoch": 1000, + "begin_epoch": 0, + "end_epoch": 1000, + "batchsize": 4, + "eval_callback": False, + "eval_interval": 50, + "train": ed({ + "train_list": "/train.lst", + "image_size": [512, 1024], + "base_size": 2048, + "multi_scale": True, + "flip": True, + "downsample_rate": 1, + "scale_factor": 16, + "shuffle": True, + "param_initializer": "TruncatedNormal", + "opt_momentum": 0.9, + "wd": 0.0005, + "num_samples": 0 + }), + "dataset": ed({ + "name": "Cityscapes", + "num_classes": 19, + "ignore_label": 255, + "mean": [0.485, 0.456, 0.406], + "std": [0.229, 0.224, 0.225], + + }), + "eval": ed({ + "eval_list": "/val.lst", + "image_size": [1024, 2048], + "base_size": 2048, + "batch_size": 1, + "num_samples": 0, + "flip": False, + "multi_scale": False, + "scale_list": [1] + }), + "model": ed({ + "name": "seg_hrnet_w48", + "num_outputs": 2, + "align_corners": True, + "extra": { + "FINAL_CONV_KERNEL": 1, + "STAGE1": { + "NUM_MODULES": 1, + "NUM_BRANCHES": 1, + "BLOCK": "BOTTLENECK", + "NUM_BLOCKS": [4], + "NUM_CHANNELS": [64], + "FUSE_METHOD": "SUM" + }, + "STAGE2": { + "NUM_MODULES": 1, + "NUM_BRANCHES": 2, + "BLOCK": "BASIC", + "NUM_BLOCKS": [4, 4], + "NUM_CHANNELS": [48, 96], + "FUSE_METHOD": "SUM" + }, + "STAGE3": { + "NUM_MODULES": 4, + "NUM_BRANCHES": 3, + "BLOCK": "BASIC", + "NUM_BLOCKS": [4, 4, 4], + "NUM_CHANNELS": [48, 96, 192], + "FUSE_METHOD": "SUM" + }, + "STAGE4": { + "NUM_MODULES": 3, + "NUM_BRANCHES": 4, + "BLOCK": "BASIC", + "NUM_BLOCKS": [4, 4, 4, 4], + "NUM_CHANNELS": [48, 96, 192, 384], + "FUSE_METHOD": "SUM" + }, + }, + "ocr": { + "mid_channels": 512, + "key_channels": 256, + "dropout": 0.05, + "scale": 1 + } + }), + "loss": ed({ + "loss_scale": 10, + "use_weights": True, + "balance_weights": [0.4, 1] + }), +}) + + +def show_config(cfg): + """Show configuration.""" + split_line_up = "==================================================\n" + split_line_bt = "\n==================================================" + print(split_line_up, + json.dumps(cfg, ensure_ascii=False, indent=2), + split_line_bt) + + +def organize_configuration(cfg, args): + """Add parameters from command-line into configuration.""" + args_dict = vars(args) + for item in args_dict.items(): + cfg[item[0]] = item[1] diff --git a/research/cv/OCRNet/src/loss.py b/research/cv/OCRNet/src/loss.py new file mode 100644 index 0000000000000000000000000000000000000000..99af216d15412e2e3dc5e3fe70d8c026fa33a410 --- /dev/null +++ b/research/cv/OCRNet/src/loss.py @@ -0,0 +1,152 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Loss functions.""" +import mindspore.nn as nn +import mindspore.ops.operations as P +import mindspore.ops as F +from mindspore.common.tensor import Tensor +from mindspore import dtype as mstype +from mindspore.nn.loss.loss import _Loss + +from src.config import config_hrnetv2_w48 as config + +weights_list = [0.8373, 0.918, 0.866, 1.0345, + 1.0166, 0.9969, 0.9754, 1.0489, + 0.8786, 1.0023, 0.9539, 0.9843, + 1.1116, 0.9037, 1.0865, 1.0955, + 1.0865, 1.1529, 1.0507] + + +class CrossEntropyWithLogits(_Loss): + """ + Cross-entropy loss function for semantic segmentation, + and different classes have the same weight. + """ + + def __init__(self, num_classes=19, ignore_label=255, image_size=None): + super(CrossEntropyWithLogits, self).__init__() + self.resize = F.ResizeBilinear(image_size) + self.one_hot = P.OneHot(axis=-1) + self.on_value = Tensor(1.0, mstype.float32) + self.off_value = Tensor(0.0, mstype.float32) + self.cast = P.Cast() + self.ce = nn.SoftmaxCrossEntropyWithLogits() + self.not_equal = P.NotEqual() + self.num_classes = num_classes + self.ignore_label = ignore_label + self.mul = P.Mul() + self.argmax = P.Argmax(output_type=mstype.int32) + self.sum = P.ReduceSum(False) + self.div = P.RealDiv() + self.transpose = P.Transpose() + self.reshape = P.Reshape() + + def construct(self, logits, labels): + """Loss construction.""" + logits = self.resize(logits) + labels_int = self.cast(labels, mstype.int32) + labels_int = self.reshape(labels_int, (-1,)) + logits_ = self.transpose(logits, (0, 2, 3, 1)) + logits_ = self.reshape(logits_, (-1, self.num_classes)) + weights = self.not_equal(labels_int, self.ignore_label) + weights = self.cast(weights, mstype.float32) + one_hot_labels = self.one_hot(labels_int, self.num_classes, self.on_value, self.off_value) + loss = self.ce(logits_, one_hot_labels) + loss = self.mul(weights, loss) + loss = self.div(self.sum(loss), self.sum(weights)) + + return loss + + +class CrossEntropyWithLogitsAndWeights(_Loss): + """ + Cross-entropy loss function for semantic segmentation, + and different classes have different weights. + """ + + def __init__(self, num_classes=19, ignore_label=255, image_size=None): + super(CrossEntropyWithLogitsAndWeights, self).__init__() + self.one_hot = P.OneHot(axis=-1) + self.on_value = Tensor(1.0, mstype.float32) + self.off_value = Tensor(0.0, mstype.float32) + self.cast = P.Cast() + self.ce = nn.SoftmaxCrossEntropyWithLogits() + self.zeros = F.Zeros() + self.fill = F.Fill() + self.equal = F.Equal() + self.select = F.Select() + self.num_classes = num_classes + self.ignore_label = ignore_label + self.mul = P.Mul() + self.argmax = P.Argmax(output_type=mstype.int32) + self.sum = P.ReduceSum(False) + self.div = P.RealDiv() + self.transpose = P.Transpose() + self.reshape = P.Reshape() + + def construct(self, logits, labels): + """Loss construction.""" + labels_int = self.cast(labels, mstype.int32) + labels_int = self.reshape(labels_int, (-1,)) + logits_ = self.transpose(logits, (0, 2, 3, 1)) + logits_ = self.reshape(logits_, (-1, self.num_classes)) + labels_float = self.cast(labels_int, mstype.float32) + weights = self.zeros(labels_float.shape, mstype.float32) + for i in range(self.num_classes): + fill_weight = self.fill(mstype.float32, labels_float.shape, weights_list[i]) + equal_ = self.equal(labels_float, i) + weights = self.select(equal_, fill_weight, weights) + one_hot_labels = self.one_hot(labels_int, self.num_classes, self.on_value, self.off_value) + loss = self.ce(logits_, one_hot_labels) + loss = self.mul(weights, loss) + loss = self.div(self.sum(loss), self.sum(weights)) + + return loss + + +class CrossEntropy(nn.Cell): + """Loss for OCRNet Specifically""" + + def __init__(self, num_classes=19, ignore_label=-1): + super(CrossEntropy, self).__init__() + self.ignore_label = ignore_label + self.criterion = CrossEntropyWithLogitsAndWeights(num_classes=num_classes, ignore_label=ignore_label) + + self.sum = P.ReduceSum() + self.weights = config.loss.balance_weights + self.num_outputs = config.model.num_outputs + self.align_corners = config.model.align_corners + self.resize_bilinear = nn.ResizeBilinear() + self.concat = P.Concat() + + def _forward(self, score, target): + ph, pw = score.shape[2], score.shape[3] + h, w = target.shape[1], target.shape[2] + if ph != h or pw != w: + score = self.resize_bilinear(score, size=(h, w), align_corners=self.align_corners) + loss = self.criterion(score, target) + return loss + + def construct(self, score, target): + if self.num_outputs == 1: + score = [score] + res = [] + for w, x in zip(self.weights, score): + res.append(w * self._forward(x, target)) + result = 0 + for ele in res: + result += ele + return result diff --git a/research/cv/OCRNet/src/model_utils/moxing_adapter.py b/research/cv/OCRNet/src/model_utils/moxing_adapter.py new file mode 100644 index 0000000000000000000000000000000000000000..08a6797de103a5949f2ed7a62f19a8a4803cbe02 --- /dev/null +++ b/research/cv/OCRNet/src/model_utils/moxing_adapter.py @@ -0,0 +1,125 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Moxing adapter for ModelArts""" +import os +import functools +from mindspore import context +from src.config import show_config + + +_global_sync_count = 0 + + +def get_device_id(): + device_id = os.getenv('DEVICE_ID', '0') + return int(device_id) + + +def get_device_num(): + device_num = os.getenv('RANK_SIZE', '1') + return int(device_num) + + +def get_rank_id(): + global_rank_id = os.getenv('RANK_ID', '0') + return int(global_rank_id) + + +def get_job_id(): + job_id = os.getenv('JOB_ID') + job_id = job_id if job_id != "" else "default" + return job_id + + +def sync_data(from_path, to_path): + """ + Download data from remote obs to local directory if the first url is remote url and the second one is local path + Upload data from local directory to remote obs in contrast. + """ + import moxing as mox + import time + global _global_sync_count + sync_lock = "/tmp/copy_sync.lock" + str(_global_sync_count) + _global_sync_count += 1 + + # Each server contains 8 devices as most. + if get_device_id() % min(get_device_num(), 8) == 0 and not os.path.exists(sync_lock): + print("from path: ", from_path) + print("to path: ", to_path) + mox.file.copy_parallel(from_path, to_path) + print("===finish data synchronization===") + try: + os.mknod(sync_lock) + except IOError: + pass + print("===save flag===") + + while True: + if os.path.exists(sync_lock): + break + time.sleep(1) + + print("Finish sync data from {} to {}.".format(from_path, to_path)) + + +def moxing_wrapper(config, pre_process=None, post_process=None): + """ + Moxing wrapper to download dataset and upload outputs. + """ + def wrapper(run_func): + @functools.wraps(run_func) + def wrapped_func(*args, **kwargs): + # Download data from data_url + if config.modelarts: + if config.data_url: + config.data_path = "/cache/train_data_path" + sync_data(config.data_url, config.data_path) + print("Dataset downloaded: ", os.listdir(config.data_path)) + if config.checkpoint_url: + config.checkpoint_path = "/cache/load_checkpoint.ckpt" + sync_data(config.checkpoint_url, config.checkpoint_path) + print("Preload downloaded: ", config.checkpoint_path) + if config.train_url: + config.output_path = "/cache/output_path" + sync_data(config.train_url, config.output_path) + print("Workspace downloaded: ", os.listdir(config.output_path)) + if config.eval_data_url: + config.eval_data_path = "/cache/eval_data_path" + sync_data(config.eval_data_url, config.eval_data_path) + print("Workspace downloaded: ", os.listdir(config.eval_data_path)) + + context.set_context(save_graphs_path=os.path.join(config.output_path, str(get_rank_id()))) + config.device_num = get_device_num() + config.device_id = get_device_id() + if not os.path.exists(config.output_path): + os.makedirs(config.output_path) + + if pre_process: + pre_process() + + show_config(config) + run_func(*args, **kwargs) + + # Upload data to train_url + if config.modelarts: + if post_process: + post_process() + + if config.train_url: + print("Start to copy output directory") + sync_data(config.output_path, config.train_url) + return wrapped_func + return wrapper diff --git a/research/cv/OCRNet/src/seg_hrnet_ocr.py b/research/cv/OCRNet/src/seg_hrnet_ocr.py new file mode 100644 index 0000000000000000000000000000000000000000..4d541d2425c7cc49aa732807960f6c5ffe32e77d --- /dev/null +++ b/research/cv/OCRNet/src/seg_hrnet_ocr.py @@ -0,0 +1,754 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""OCRNet definition.""" +import logging +import numpy as np +import mindspore +import mindspore.nn as nn +import mindspore.ops as ops +from mindspore.ops import operations as P +from mindspore.train.serialization import load_checkpoint, load_param_into_net +from mindspore.common import initializer +from mindspore.nn import SyncBatchNorm, BatchNorm2d + +from src.utils import get_conv_bias + + +batchNorm2d = BatchNorm2d +BN_MOMENTUM = 0.9 +logger = logging.getLogger(__name__) + + +class Dropout2d(nn.Cell): + """ + Dropout2d layer for the input. + """ + + def __init__(self, keep_prob=0.5): + super(Dropout2d, self).__init__() + if keep_prob <= 0 or keep_prob > 1: + raise ValueError("dropout probability should be a number in range (0, 1], but got {}".format(keep_prob)) + self.dropout = nn.Dropout(keep_prob) + + def construct(self, x): + if not self.training: + return x + ones = ops.Ones()(x.shape[:2], x.dtype) + mask = self.dropout(ones) + out = mask.view(mask.shape + (1, 1)) * x + return out + + +class ModuleHelper: + + @staticmethod + def BNReLU(num_features, **kwargs): + return nn.SequentialCell([ + batchNorm2d(num_features=num_features, **kwargs), + nn.ReLU(), + ]) + + +def conv3x3(in_planes, out_planes, stride=1): + """3x3 convolution with padding""" + + return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, pad_mode='pad', + padding=1, has_bias=False) + + +class NoneCell(nn.Cell): + def __init__(self): + super(NoneCell, self).__init__() + self.name = "NoneCell" + + def construct(self, x): + return x + + +class SpatialGather_Module(nn.Cell): + """ + Aggregate the context features according to the initial + predicted probability distribution. + Employ the soft-weighted method to aggregate the context. + """ + + def __init__(self, cls_num=0, scale=1): + super(SpatialGather_Module, self).__init__() + self.cls_num = cls_num + self.scale = scale + self.softmax = P.Softmax(axis=2) + + def construct(self, feats, probs): + batch_size, c = probs.shape[0], probs.shape[1] + probs = probs.view(batch_size, c, -1) + feats = feats.view(batch_size, feats.shape[1], -1) + feats = P.Transpose()(feats, (0, 2, 1)) # batch x hw x c + probs = self.softmax(self.scale * probs) + ocr_context = ops.matmul(probs, feats) + ocr_context = P.Transpose()(ocr_context, (0, 2, 1)) + ocr_context = P.ExpandDims()(ocr_context, 3) + return ocr_context + + +class _ObjectAttentionBlock(nn.Cell): + ''' + The basic implementation for object context block + Input: + N X C X H X W + Parameters: + in_channels : the dimension of the input feature map + key_channels : the dimension after the key/query transform + scale : choose the scale to downsample the input feature maps (save memory cost) + bn_type : specify the bn type + Return: + N X C X H X W + ''' + + def __init__(self, + in_channels, + key_channels, + scale=1, + bn_type=None): + super(_ObjectAttentionBlock, self).__init__() + self.scale = scale + self.in_channels = in_channels + self.key_channels = key_channels + self.pool = nn.MaxPool2d(kernel_size=(scale, scale)) + self.f_pixel = nn.SequentialCell([ + nn.Conv2d(in_channels=self.in_channels, out_channels=self.key_channels, + kernel_size=1, stride=1, pad_mode='pad', padding=0, has_bias=False), + ModuleHelper.BNReLU(self.key_channels), + nn.Conv2d(in_channels=self.key_channels, out_channels=self.key_channels, + kernel_size=1, stride=1, pad_mode='pad', padding=0, has_bias=False), + ModuleHelper.BNReLU(self.key_channels), + ]) + self.f_object = nn.SequentialCell([ + nn.Conv2d(in_channels=self.in_channels, out_channels=self.key_channels, + kernel_size=1, stride=1, pad_mode='pad', padding=0, has_bias=False), + ModuleHelper.BNReLU(self.key_channels), + nn.Conv2d(in_channels=self.key_channels, out_channels=self.key_channels, + kernel_size=1, stride=1, pad_mode='pad', padding=0, has_bias=False), + ModuleHelper.BNReLU(self.key_channels), + ]) + self.f_down = nn.SequentialCell([ + nn.Conv2d(in_channels=self.in_channels, out_channels=self.key_channels, + kernel_size=1, stride=1, pad_mode='pad', padding=0, has_bias=False), + ModuleHelper.BNReLU(self.key_channels), + ]) + self.f_up = nn.SequentialCell([ + nn.Conv2d(in_channels=self.key_channels, out_channels=self.in_channels, + kernel_size=1, stride=1, pad_mode='pad', padding=0, has_bias=False), + ModuleHelper.BNReLU(self.in_channels), + ]) + self.transpose = P.Transpose() + self.matMul = ops.matmul + self.softmax = P.Softmax(axis=-1) + + def construct(self, x, proxy): + """construct method""" + batch_size, h, w = x.shape[0], x.shape[2], x.shape[3] + if self.scale > 1: + x = self.pool(x) + + query = self.f_pixel(x).view(batch_size, self.key_channels, -1) + query = self.transpose(query, (0, 2, 1)) + key = self.f_object(proxy).view(batch_size, self.key_channels, -1) + value = self.f_down(proxy).view(batch_size, self.key_channels, -1) + value = self.transpose(value, (0, 2, 1)) + sim_map = self.matMul(query, key) + sim_map = (self.key_channels ** -.5) * sim_map + sim_map = self.softmax(sim_map) + + # add bg context ... + context = self.matMul(sim_map, value) + context = self.transpose(context, (0, 2, 1)) + context = context.view(batch_size, self.key_channels, *x.shape[2:]) + context = self.f_up(context) + if self.scale > 1: + context = P.ResizeBilinear(size=(h, w), align_corners=True)(context) + + return context + + +class ObjectAttentionBlock2D(_ObjectAttentionBlock): + """Subclass of the class _ObjectAttentionBlock""" + def __init__(self, + in_channels, + key_channels, + scale=1, + bn_type=None): + super(ObjectAttentionBlock2D, self).__init__(in_channels, + key_channels, + scale, + bn_type=bn_type) + + +class SpatialOCR_Module(nn.Cell): + """ + Implementation of the OCR module: + We aggregate the global object representation to update the representation for each pixel. + """ + + def __init__(self, + in_channels, + key_channels, + out_channels, + scale=1, + dropout=0.1, + bn_type=None): + super(SpatialOCR_Module, self).__init__() + self.object_context_block = ObjectAttentionBlock2D(in_channels, + key_channels, + scale, + bn_type) + _in_channels = 2 * in_channels + + self.conv_bn_dropout = nn.SequentialCell([ + nn.Conv2d(_in_channels, out_channels, kernel_size=1, pad_mode='pad', padding=0, has_bias=False), + ModuleHelper.BNReLU(out_channels), + Dropout2d(keep_prob=1 - dropout), + ]) + self.concat = P.Concat(axis=1) + + def construct(self, feats, proxy_feats): + context = self.object_context_block(feats, proxy_feats) + output = self.conv_bn_dropout(self.concat((context, feats))) + + return output + + +class BasicBlock(nn.Cell): + """BasicBlock definition.""" + expansion = 1 + + def __init__(self, inplanes, planes, stride=1, downsample=None): + super(BasicBlock, self).__init__() + self.conv1 = conv3x3(inplanes, planes, stride) + self.bn1 = batchNorm2d(planes, momentum=BN_MOMENTUM) + self.relu1 = nn.ReLU() + self.relu2 = nn.ReLU() + self.conv2 = conv3x3(planes, planes) + self.bn2 = batchNorm2d(planes, momentum=BN_MOMENTUM) + self.downsample = downsample + self.stride = stride + self.add = ops.Add() + + def construct(self, x): + """BasicBlock construction.""" + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu1(out) + + out = self.conv2(out) + out = self.bn2(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out = self.add(out, residual) + out = self.relu2(out) + + return out + + +class Bottleneck(nn.Cell): + """Bottleneck definition.""" + expansion = 4 + + def __init__(self, inplanes, planes, stride=1, downsample=None): + super(Bottleneck, self).__init__() + self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, has_bias=False) + self.bn1 = batchNorm2d(planes, momentum=BN_MOMENTUM) + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, pad_mode='pad', + padding=1, has_bias=False) + self.bn2 = batchNorm2d(planes, momentum=BN_MOMENTUM) + self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1, + has_bias=False) + self.bn3 = batchNorm2d(planes * self.expansion, + momentum=BN_MOMENTUM) + self.relu1 = nn.ReLU() + self.relu2 = nn.ReLU() + self.relu3 = nn.ReLU() + self.downsample = downsample + self.stride = stride + self.add = ops.Add() + + def construct(self, x): + """Bottleneck construction.""" + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu1(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu2(out) + + out = self.conv3(out) + out = self.bn3(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out = self.add(out, residual) + out = self.relu3(out) + + return out + + +class HighResolutionModule(nn.Cell): + """HRModule definition.""" + + def __init__(self, num_branches, blocks, num_blocks, num_inchannels, + num_channels, fuse_method, multi_scale_output=True): + super(HighResolutionModule, self).__init__() + self._check_branches( + num_branches, blocks, num_blocks, num_inchannels, num_channels) + + self.num_inchannels = num_inchannels + self.fuse_method = fuse_method + self.num_branches = num_branches + + self.multi_scale_output = multi_scale_output + + self.branches = self._make_branches( + num_branches, blocks, num_blocks, num_channels) + self.fuse_layers = self._make_fuse_layers() + self.relu = nn.ReLU() + self.add = ops.Add() + self.resize_bilinear = nn.ResizeBilinear() + + def _check_branches(self, num_branches, blocks, num_blocks, num_inchannels, num_channels): + """Check branches.""" + if num_branches != len(num_blocks): + error_msg = 'NUM_BRANCHES({}) <> NUM_BLOCKS({})'.format( + num_branches, len(num_blocks)) + logger.error(error_msg) + raise ValueError(error_msg) + + if num_branches != len(num_channels): + error_msg = 'NUM_BRANCHES({}) <> NUM_CHANNELS({})'.format( + num_branches, len(num_channels)) + logger.error(error_msg) + raise ValueError(error_msg) + + if num_branches != len(num_inchannels): + error_msg = 'NUM_BRANCHES({}) <> NUM_INCHANNELS({})'.format( + num_branches, len(num_inchannels)) + logger.error(error_msg) + raise ValueError(error_msg) + + def _make_one_branch(self, branch_index, block, num_blocks, num_channels, stride=1): + """Make one branch for parallel layer.""" + downsample = None + if stride != 1 or \ + self.num_inchannels[branch_index] != num_channels[branch_index] * block.expansion: + downsample = nn.SequentialCell([ + nn.Conv2d(self.num_inchannels[branch_index], + num_channels[branch_index] * block.expansion, + kernel_size=1, stride=stride, has_bias=False), + batchNorm2d(num_channels[branch_index] * block.expansion, + momentum=BN_MOMENTUM) + ]) + + layers = [] + layers.append(block(self.num_inchannels[branch_index], + num_channels[branch_index], stride, downsample)) + self.num_inchannels[branch_index] = num_channels[branch_index] * block.expansion + i = 1 + while i < num_blocks[branch_index]: + layers.append(block(self.num_inchannels[branch_index], + num_channels[branch_index])) + i += 1 + + return nn.SequentialCell(layers) + + def _make_branches(self, num_branches, block, num_blocks, num_channels): + branches = [] + + for i in range(num_branches): + branches.append( + self._make_one_branch(i, block, num_blocks, num_channels)) + + return nn.CellList(branches) + + def _make_fuse_layers(self): + """Make fusion layer.""" + if self.num_branches == 1: + return None + + num_branches = self.num_branches + num_inchannels = self.num_inchannels + fuse_layers = [] + for i in range(num_branches if self.multi_scale_output else 1): + fuse_layer = [] + for j in range(num_branches): + if j > i: + fuse_layer.append(nn.SequentialCell([ + nn.Conv2d(num_inchannels[j], + num_inchannels[i], + 1, + 1, + padding=0, + has_bias=False), + batchNorm2d(num_inchannels[i], momentum=BN_MOMENTUM)])) + elif j == i: + fuse_layer.append(NoneCell()) + else: + conv3x3s = [] + for k in range(i - j): + if k == i - j - 1: + num_outchannels_conv3x3 = num_inchannels[i] + conv3x3s.append(nn.SequentialCell([ + nn.Conv2d(num_inchannels[j], + num_outchannels_conv3x3, + 3, 2, pad_mode='pad', padding=1, has_bias=False), + batchNorm2d(num_outchannels_conv3x3, + momentum=BN_MOMENTUM)])) + else: + num_outchannels_conv3x3 = num_inchannels[j] + conv3x3s.append(nn.SequentialCell([ + nn.Conv2d(num_inchannels[j], + num_outchannels_conv3x3, + 3, 2, pad_mode='pad', padding=1, has_bias=False), + batchNorm2d(num_outchannels_conv3x3, + momentum=BN_MOMENTUM), + nn.ReLU()])) + fuse_layer.append(nn.SequentialCell(conv3x3s)) + fuse_layers.append(nn.CellList(fuse_layer)) + + return nn.CellList(fuse_layers) + + def get_num_inchannels(self): + return self.num_inchannels + + def construct(self, x): + """HRModule construction.""" + if self.num_branches == 1: + return [self.branches[0](x[0])] + + for i in range(self.num_branches): + x[i] = self.branches[i](x[i]) + + x_fuse = [] + for i in range(len(self.fuse_layers)): + y = x[0] if i == 0 else self.fuse_layers[i][0](x[0]) + for j in range(1, self.num_branches): + if i == j: + y = self.add(y, x[j]) + elif j > i: + width_output = x[i].shape[-1] + height_output = x[i].shape[-2] + t = self.fuse_layers[i][j](x[j]) + t = ops.ResizeNearestNeighbor((height_output, width_output))(t) + y = self.add(y, self.resize_bilinear(t, size=(height_output, width_output))) + else: + y = self.add(y, self.fuse_layers[i][j](x[j])) + x_fuse.append(self.relu(y)) + + return x_fuse + + +blocks_dict = { + 'BASIC': BasicBlock, + 'BOTTLENECK': Bottleneck +} + + +class HighResolutionNet(nn.Cell): + """OCRNet definition.""" + + def __init__(self, config, **kwargs): + extra = config.model.extra + super(HighResolutionNet, self).__init__() + + # stem net + self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=2, pad_mode='pad', padding=1, + has_bias=False) + self.bn1 = batchNorm2d(64, momentum=BN_MOMENTUM) + self.conv2 = nn.Conv2d(64, 64, kernel_size=3, stride=2, pad_mode='pad', padding=1, + has_bias=False) + self.bn2 = batchNorm2d(64, momentum=BN_MOMENTUM) + self.relu1 = nn.ReLU() + self.relu2 = nn.ReLU() + + self.stage1_cfg = extra['STAGE1'] + num_channels = self.stage1_cfg['NUM_CHANNELS'][0] + block = blocks_dict[self.stage1_cfg['BLOCK']] + num_blocks = self.stage1_cfg['NUM_BLOCKS'][0] + self.layer1 = self._make_layer(block, 64, num_channels, num_blocks) + stage1_out_channel = block.expansion * num_channels + + self.stage2_cfg = extra['STAGE2'] + num_channels = self.stage2_cfg['NUM_CHANNELS'] + block = blocks_dict[self.stage2_cfg['BLOCK']] + num_channels = [ + num_channels[i] * block.expansion for i in range(len(num_channels))] + self.transition1, self.flag1 = self._make_transition_layer( + [stage1_out_channel], num_channels) + self.stage2, pre_stage_channels = self._make_stage( + self.stage2_cfg, num_channels) + + self.stage3_cfg = extra['STAGE3'] + num_channels = self.stage3_cfg['NUM_CHANNELS'] + block = blocks_dict[self.stage3_cfg['BLOCK']] + num_channels = [ + num_channels[i] * block.expansion for i in range(len(num_channels))] + self.transition2, self.flag2 = self._make_transition_layer( + pre_stage_channels, num_channels) + self.stage3, pre_stage_channels = self._make_stage( + self.stage3_cfg, num_channels) + + self.stage4_cfg = extra['STAGE4'] + num_channels = self.stage4_cfg['NUM_CHANNELS'] + block = blocks_dict[self.stage4_cfg['BLOCK']] + num_channels = [ + num_channels[i] * block.expansion for i in range(len(num_channels))] + self.transition3, self.flag3 = self._make_transition_layer( + pre_stage_channels, num_channels) + self.stage4, pre_stage_channels = self._make_stage( + self.stage4_cfg, num_channels, multi_scale_output=True) + + last_inp_channels = np.int(np.sum(pre_stage_channels)) + ocr_mid_channels = config.model.ocr.mid_channels + ocr_key_channels = config.model.ocr.key_channels + + self.resize_bilinear = nn.ResizeBilinear() + + self.conv3x3_ocr = nn.SequentialCell([ + nn.Conv2d(last_inp_channels, ocr_mid_channels, + kernel_size=3, stride=1, pad_mode='pad', padding=1), + batchNorm2d(ocr_mid_channels), + nn.ReLU(), + ]) + self.ocr_gather_head = SpatialGather_Module(config.dataset.num_classes) + + self.ocr_distri_head = SpatialOCR_Module(in_channels=ocr_mid_channels, + key_channels=ocr_key_channels, + out_channels=ocr_mid_channels, + scale=1, + dropout=0.05, + ) + self.cls_head = nn.Conv2d( + ocr_mid_channels, config.dataset.num_classes, kernel_size=1, stride=1, + padding=0, pad_mode='pad', has_bias=True) + + self.aux_head = nn.SequentialCell([ + nn.Conv2d(last_inp_channels, last_inp_channels, + kernel_size=1, stride=1, pad_mode='pad', padding=0), + batchNorm2d(last_inp_channels), + nn.ReLU(), + nn.Conv2d(last_inp_channels, config.dataset.num_classes, + kernel_size=1, stride=1, pad_mode='pad', padding=0, has_bias=True), + ]) + + def _make_transition_layer(self, num_channels_pre_layer, num_channels_cur_layer): + """Make a transition layer between different stages.""" + num_branches_cur = len(num_channels_cur_layer) + num_branches_pre = len(num_channels_pre_layer) + + transition_layers = [] + flag = [] + for i in range(num_branches_cur): + if i < num_branches_pre: + if num_channels_cur_layer[i] != num_channels_pre_layer[i]: + transition_layers.append(nn.SequentialCell([ + nn.Conv2d(num_channels_pre_layer[i], + num_channels_cur_layer[i], + 3, + 1, + pad_mode='pad', + padding=1, + has_bias=False), + batchNorm2d( + num_channels_cur_layer[i], momentum=BN_MOMENTUM), + nn.ReLU()])) + flag.append("ops") + else: + transition_layers.append(NoneCell()) + flag.append(None) + else: + conv3x3s = [] + for j in range(i + 1 - num_branches_pre): + inchannels = num_channels_pre_layer[-1] + outchannels = num_channels_cur_layer[i] \ + if j == i - num_branches_pre else inchannels + conv3x3s.append(nn.SequentialCell([ + nn.Conv2d(inchannels, outchannels, 3, 2, pad_mode='pad', padding=1, has_bias=False), + batchNorm2d(outchannels, momentum=BN_MOMENTUM), + nn.ReLU()])) + transition_layers.append(nn.SequentialCell(conv3x3s)) + flag.append("ops") + + return nn.CellList(transition_layers), flag + + def _make_layer(self, block, inplanes, planes, blocks, stride=1): + """Make the first stage.""" + downsample = None + if stride != 1 or inplanes != planes * block.expansion: + downsample = nn.SequentialCell([ + nn.Conv2d(inplanes, planes * block.expansion, + kernel_size=1, stride=stride, has_bias=False), + batchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM), + ]) + + layers = [] + layers.append(block(inplanes, planes, stride, downsample)) + inplanes = planes * block.expansion + i = 1 + while i < blocks: + layers.append(block(inplanes, planes)) + i += 1 + + return nn.SequentialCell(layers) + + def _make_stage(self, layer_config, num_inchannels, multi_scale_output=True): + """Make a stage.""" + num_modules = layer_config['NUM_MODULES'] + num_branches = layer_config['NUM_BRANCHES'] + num_blocks = layer_config['NUM_BLOCKS'] + num_channels = layer_config['NUM_CHANNELS'] + block = blocks_dict[layer_config['BLOCK']] + fuse_method = layer_config['FUSE_METHOD'] + + modules = [] + for i in range(num_modules): + # multi_scale_output is only used last module + if not multi_scale_output and i == num_modules - 1: + reset_multi_scale_output = False + else: + reset_multi_scale_output = True + modules.append( + HighResolutionModule(num_branches, + block, + num_blocks, + num_inchannels, + num_channels, + fuse_method, + reset_multi_scale_output) + ) + num_inchannels = modules[-1].get_num_inchannels() + self.concat = ops.Concat(axis=1) + + return nn.SequentialCell(modules), num_inchannels + + def construct(self, x): + """OCRNet construction.""" + x = self.conv1(x) + x = self.bn1(x) + x = self.relu1(x) + x = self.conv2(x) + x = self.bn2(x) + x = self.relu2(x) + x = self.layer1(x) + + x_list = [] + for i in range(self.stage2_cfg['NUM_BRANCHES']): + # if self.transition1[i] is not None: + if self.flag1[i] is not None: + x_list.append(self.transition1[i](x)) + else: + x_list.append(x) + y_list = self.stage2(x_list) + + x_list = [] + for i in range(self.stage3_cfg['NUM_BRANCHES']): + # if self.transition2[i] is not None: + if self.flag2[i] is not None: + if i < self.stage2_cfg['NUM_BRANCHES']: + x_list.append(self.transition2[i](y_list[i])) + else: + x_list.append(self.transition2[i](y_list[-1])) + else: + x_list.append(y_list[i]) + y_list = self.stage3(x_list) + + x_list = [] + for i in range(self.stage4_cfg['NUM_BRANCHES']): + # if self.transition3[i] is not None: + if self.flag3[i] is not None: + if i < self.stage3_cfg['NUM_BRANCHES']: + x_list.append(self.transition3[i](y_list[i])) + else: + x_list.append(self.transition3[i](y_list[-1])) + else: + x_list.append(y_list[i]) + x = self.stage4(x_list) + + # Upsampling + out1, out2, out3, out4 = x + h, w = ops.Shape()(out1)[2:] + x1 = ops.Cast()(out1, mindspore.dtype.float32) + x2 = self.resize_bilinear(out2, size=(h, w)) + x3 = self.resize_bilinear(out3, size=(h, w)) + x4 = self.resize_bilinear(out4, size=(h, w)) + + feats = self.concat((x1, x2, x3, x4)) + + # OCR Module + out_aux_seg = [] + out_aux = self.aux_head(feats) + feats = self.conv3x3_ocr(feats) + context = self.ocr_gather_head(feats, out_aux) + feats = self.ocr_distri_head(feats, context) + out = self.cls_head(feats) + out_aux_seg.append(out_aux) + out_aux_seg.append(out) + + return out_aux_seg + + +def get_seg_model(cfg, **kwargs): + """Create OCRNet object, and initialize it by initializer or checkpoint.""" + global batchNorm2d + if cfg.run_distribute: + batchNorm2d = SyncBatchNorm + model = HighResolutionNet(cfg, **kwargs) + for name, cell in model.cells_and_names(): + if any(part in name for part in {'cls', 'aux', 'ocr'}): + if isinstance(cell, nn.Conv2d): + cell.weight.set_data( + initializer.initializer(initializer.HeNormal(), cell.weight.shape, cell.weight.dtype)) + elif isinstance(cell, (BatchNorm2d, SyncBatchNorm)): + cell.gamma.set_data(initializer.initializer(1, + cell.gamma.shape, + cell.gamma.dtype)) + cell.beta.set_data(initializer.initializer(0, + cell.beta.shape, + cell.beta.dtype)) + continue + if isinstance(cell, nn.Conv2d): + cell.weight.set_data(initializer.initializer(initializer.TruncatedNormal(sigma=0.001), + cell.weight.shape, + cell.weight.dtype)) + if cell.has_bias: + cell.bias.set_data(get_conv_bias(cell)) + elif isinstance(cell, (BatchNorm2d, SyncBatchNorm)): + cell.gamma.set_data(initializer.initializer(1, + cell.gamma.shape, + cell.gamma.dtype)) + cell.beta.set_data(initializer.initializer(0, + cell.beta.shape, + cell.beta.dtype)) + + if cfg.checkpoint_path: + pretrained_dict = load_checkpoint(cfg.checkpoint_path) + load_param_into_net(model, pretrained_dict) + + return model diff --git a/research/cv/OCRNet/src/utils.py b/research/cv/OCRNet/src/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..6c5f5fc4a2f2fe64093d2ca481fc46b151629d7a --- /dev/null +++ b/research/cv/OCRNet/src/utils.py @@ -0,0 +1,54 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Initialize bias of convolution cell.""" +import math +from mindspore.common import initializer + + +def calculate_fan_in_and_fan_out(shape): + """ + calculate fan_in and fan_out + + Args: + shape (tuple): input shape. + + Returns: + Tuple, a tuple with two elements, the first element is `n_in` and the second element is `n_out`. + """ + dimensions = len(shape) + if dimensions < 2: + raise ValueError("Fan in and fan out can not be computed for tensor with fewer than 2 dimensions") + if dimensions == 2: # Linear + fan_in = shape[1] + fan_out = shape[0] + else: + num_input_fmaps = shape[1] + num_output_fmaps = shape[0] + receptive_field_size = 1 + if dimensions > 2: + receptive_field_size = shape[2] * shape[3] + fan_in = num_input_fmaps * receptive_field_size + fan_out = num_output_fmaps * receptive_field_size + return fan_in, fan_out + + +def get_conv_bias(cell): + weight = initializer.initializer(initializer.HeUniform(negative_slope=math.sqrt(5)), + cell.weight.shape, cell.weight.dtype).to_tensor() + fan_in, _ = calculate_fan_in_and_fan_out(weight.shape) + bound = 1 / math.sqrt(fan_in) + return initializer.initializer(initializer.Uniform(scale=bound), + cell.bias.shape, cell.bias.dtype) diff --git a/research/cv/OCRNet/train.py b/research/cv/OCRNet/train.py new file mode 100644 index 0000000000000000000000000000000000000000..3bd2b021a13b02121c371700e01e91906e4aa148 --- /dev/null +++ b/research/cv/OCRNet/train.py @@ -0,0 +1,198 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""OCRNet training.""" +import os +import ast +import argparse +import numpy as np + +import mindspore.dataset as de +from mindspore.common import set_seed +from mindspore.context import ParallelMode +from mindspore.communication.management import init +from mindspore import context, Model +from mindspore.nn import SGD +from mindspore.train.loss_scale_manager import FixedLossScaleManager +from mindspore.train.callback import LossMonitor, TimeMonitor, ModelCheckpoint, CheckpointConfig + +from src.config import config_hrnetv2_w48 as config +from src.config import organize_configuration +from src.cityscapes import Cityscapes +from src.seg_hrnet_ocr import get_seg_model +from src.loss import CrossEntropy +from src.callback import EvalCallback +from src.model_utils.moxing_adapter import moxing_wrapper + + +set_seed(1) +de.config.set_seed(1) + + +def eval_callback(network, cfg): + """Create an object for inference while training.""" + dataset = Cityscapes(cfg.data_path, + num_samples=None, + num_classes=cfg.dataset.num_classes, + multi_scale=False, + flip=False, + ignore_label=cfg.dataset.ignore_label, + base_size=cfg.eval.base_size, + crop_size=cfg.eval.image_size, + downsample_rate=1, + scale_factor=16, + mean=cfg.dataset.mean, + std=cfg.dataset.std, + is_train=False) + data_vl = de.GeneratorDataset(dataset, column_names=["image", "label"], + shuffle=False, + num_parallel_workers=cfg.workers) + data_vl = data_vl.batch(1, drop_remainder=True) + eval_cb = EvalCallback(network, data_vl, cfg.dataset.num_classes, + cfg.dataset.ignore_label, cfg.output_path, eval_interval=cfg.eval_interval) + return eval_cb + + +def get_exp_lr(base_lr, xs, power=4e-10): + """Get learning rates for each step.""" + ys = [] + for x in xs: + ys.append(base_lr / np.exp(power*x**2)) + return ys + + +def parse_args(): + """Get arguments from command-line.""" + parser = argparse.ArgumentParser(description="Mindspore OCRNet Training Configurations.") + parser.add_argument("--data_url", type=str, default=None, help="Storage path of dataset in OBS.") + parser.add_argument("--train_url", type=str, default=None, help="Storage path of training results in OBS.") + parser.add_argument("--data_path", type=str, default=None, help="Storage path of dataset on machine.") + parser.add_argument("--output_path", type=str, default=None, help="Storage path of training results on machine.") + parser.add_argument("--checkpoint_url", type=str, default=None, + help="Storage path of checkpoint for pretraining or resuming in OBS.") + parser.add_argument("--checkpoint_path", type=str, default=None, + help="Storage path of checkpoint for pretraining or resuming on machine.") + parser.add_argument("--modelarts", type=ast.literal_eval, default=False, + help="Run on ModelArts or offline machines.") + parser.add_argument("--run_distribute", type=ast.literal_eval, default=False, + help="Use one card or multiple cards training.") + parser.add_argument("--lr", type=float, default=0.0012, + help="Base learning rate.") + parser.add_argument("--lr_power", type=float, default=4e-10, + help="Learning rate adjustment power.") + parser.add_argument("--begin_epoch", type=int, default=0, + help="If it's a training resuming task, give it a beginning epoch.") + parser.add_argument("--end_epoch", type=int, default=1000, + help="If you want to stop the task early, give it an ending epoch.") + parser.add_argument("--batchsize", type=int, default=3, + help="batch size.") + parser.add_argument("--eval_callback", type=ast.literal_eval, default=False, + help="To use inference while training or not.") + parser.add_argument("--eval_interval", type=int, default=50, + help="Epoch interval of evaluating result during training.") + return parser.parse_args() + + +@moxing_wrapper(config) +def main(): + """Training process.""" + context.set_context(mode=context.GRAPH_MODE, device_target=config.device_target) + if config.run_distribute: + init() + device_id = int(os.getenv("DEVICE_ID")) + device_num = int(os.getenv("RANK_SIZE")) + parallel_mode = ParallelMode.DATA_PARALLEL + context.set_auto_parallel_context(parallel_mode=parallel_mode, + gradients_mean=True, + device_num=device_num) + else: + device_id = 0 + device_num = 1 + + # Create dataset + # prepare dataset for train + crop_size = (config.train.image_size[0], config.train.image_size[1]) + data_tr = Cityscapes(config.data_path, + num_samples=None, + num_classes=config.dataset.num_classes, + multi_scale=config.train.multi_scale, + flip=config.train.flip, + ignore_label=config.dataset.ignore_label, + base_size=config.train.base_size, + crop_size=crop_size, + downsample_rate=config.train.downsample_rate, + scale_factor=config.train.scale_factor, + mean=config.dataset.mean, + std=config.dataset.std, + is_train=True) + # dataset.show() + if device_num == 1: + dataset = de.GeneratorDataset(data_tr, column_names=["image", "label"], + num_parallel_workers=config.workers, + shuffle=config.train.shuffle) + else: + dataset = de.GeneratorDataset(data_tr, column_names=["image", "label"], + num_parallel_workers=config.workers, + shuffle=config.train.shuffle, + num_shards=device_num, shard_id=device_id) + dataset = dataset.batch(config.batchsize, drop_remainder=True) + + # Create network + net = get_seg_model(config) + net.set_train(True) + + # Create loss + loss = CrossEntropy(num_classes=config.dataset.num_classes, ignore_label=255) + loss_scale_manager = FixedLossScaleManager(config.loss.loss_scale, False) + # Learning rate adjustment. + steps_per_epoch = dataset.get_dataset_size() + total_steps = config.total_epoch * steps_per_epoch + begin_step = config.begin_epoch * steps_per_epoch + end_step = config.end_epoch * steps_per_epoch + xs = np.linspace(0, total_steps, total_steps) + lr = get_exp_lr(config.lr, xs, power=config.lr_power) + lr = lr[begin_step: end_step] + # Optimizer + opt = SGD(filter(lambda x: x.requires_grad, net.get_parameters()), + lr, + config.train.opt_momentum, + config.train.wd) + + # Create model + model = Model(net, loss_fn=loss, optimizer=opt, loss_scale_manager=loss_scale_manager, amp_level="O3", + keep_batchnorm_fp32=False) + # Callbacks + time_cb = TimeMonitor(data_size=steps_per_epoch) + loss_cb = LossMonitor() + # Save-checkpoint callback + ckpt_config = CheckpointConfig(save_checkpoint_steps=steps_per_epoch * config.save_checkpoint_epochs, + keep_checkpoint_max=config.keep_checkpoint_max) + ckpt_cb = ModelCheckpoint(prefix='{}'.format("seg_OCRNet-SGD"), + directory=config.output_path+"/card" + str(device_id), + config=ckpt_config) + cb = [time_cb, loss_cb, ckpt_cb] + # Self-defined callbacks + if config.eval_callback: + eval_cb = eval_callback(net, config) + cb.append(eval_cb) + + train_epoch = config.end_epoch - config.begin_epoch + model.train(train_epoch, dataset, callbacks=cb, dataset_sink_mode=True) + + +if __name__ == '__main__': + args = parse_args() + organize_configuration(cfg=config, args=args) + main()