diff --git a/research/cv/efficientnet-b1/README_CN.md b/research/cv/efficientnet-b1/README_CN.md new file mode 100644 index 0000000000000000000000000000000000000000..da509b80cdf097448c415eb54898fafbe7d4a429 --- /dev/null +++ b/research/cv/efficientnet-b1/README_CN.md @@ -0,0 +1,244 @@ +# 目录 + +- [目录](#目录) +- [EfficientNet-B1描述](#EfficientNet-B1描述) +- [模型架构](#模型架构) +- [数据集](#数据集) +- [环境要求](#环境要求) +- [脚本说明](#脚本说明) + - [脚本和示例代码](#脚本和示例代码) + - [脚本参数](#脚本参数) + - [训练过程](#训练过程) + - [启动](#启动) + - [结果](#结果) + - [评估过程](#评估过程) + - [启动](#启动-1) + - [结果](#结果-1) + - [推理过程](#推理过程) + - [导出MINDIR](#导出MINDIR) + - [在Ascend310执行推理](#在Ascend310执行推理) + - [结果](#结果-2) +- [模型说明](#模型说明) + - [训练性能](#训练性能) +- [随机情况的描述](#随机情况的描述) +- [ModelZoo 主页](#modelzoo-主页) + +<!-- /TOC --> + +# EfficientNet-B1描述 + +EfficientNet是一种卷积神经网络架构和缩放方法,它使用复合系数统一缩放深度/宽度/分辨率的所有维度。与任意缩放这些因素的常规做法不同,EfficientNet缩放方法使用一组固定的缩放系数来均匀缩放网络宽度,深度和分辨率。(2019年) + +[论文](https://arxiv.org/abs/1905.11946):Mingxing Tan, Quoc V. Le. EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks. 2019. + +# 模型架构 + +EfficientNet总体网络架构如下: + +[链接](https://arxiv.org/abs/1905.11946) + +# 数据集 + +使用的数据集:[imagenet](http://www.image-net.org/) + +- 数据集大小: 146G, 1330k 1000类彩色图像 + - 训练: 140G, 1280k张图片 + - 测试: 6G, 50k张图片 +- 数据格式:RGB + - 注:数据在src/dataset.py中处理。 + +# 环境要求 + +- 硬件(Ascend) + - 使用Ascend来搭建硬件环境。 +- 框架 + - [MindSpore](https://www.mindspore.cn/install) +- 如需查看详情,请参见如下资源: + - [MindSpore 教程](https://www.mindspore.cn/tutorials/zh-CN/master/index.html) + - [MindSpore Python API](https://www.mindspore.cn/docs/api/zh-CN/master/index.html) + +# 脚本说明 + +## 脚本和样例代码 + +```text +├─ EfficientNet-B1 +│ ├─ README_CN.md # EfficientNet-B1相关描述 +│ ├─ ascend310_infer # 310推理脚本 +│ │ ├─ inc +│ │ │ └─ utils.h +│ │ └─ src +│ │ ├─ build.sh +│ │ ├─ CMakeLists.txt +│ │ ├─ main.cc +│ │ └─ utils.cc +│ ├─ scripts +│ │ ├─ run_infer_310.sh # 用于310推理的shell脚本 +│ │ ├─ run_standalone_train.sh # 用于单卡训练的shell脚本 +│ │ ├─ run_distribute_train.sh # 用于八卡训练的shell脚本 +│ │ └─ run_eval.sh # 用于评估的shell脚本 +│ ├─ src +│ │ ├─ model_utils # modelarts训练适应脚本 +│ │ │ └─ moxing_adapter.py +│ │ ├─ models # EfficientNet架构 +│ │ │ ├─ effnet.py +│ │ │ └─ layers.py +│ │ ├─ callback.py # 参数配置 +│ │ ├─ config.py # 配置参数 +│ │ ├─ dataset.py # 创建数据集 +│ │ ├─ loss.py # 损失函数 +│ │ └─ utils.py # 工具函数脚本 +│ ├─ create_imagenet2012_label.py # 创建ImageNet2012标签 +│ ├─ eval.py # 评估脚本 +│ ├─ export.py # 模型格式转换脚本 +│ ├─ postprocess.py # 310推理后处理脚本 +│ └─ train.py # 训练脚本 +``` + +## 脚本参数 + +模型训练和评估过程中使用的参数可以在config.py中设置: + +```text +'train_url': None, # 训练输出路径(桶) +'train_path': None, # 训练输出路径 +'data_url': None, # 训练数据集路径(桶) +'data_path': None, # 训练数据集路径 +'checkpoint_url': None, # checkpoint路径(桶) +'checkpoint_path': None, # checkpoint路径 +'eval_data_url': None, # 推理数据集路径(桶) +'eval_data_path': None, # 推理数据集路径 +'eval_interval': 10, # 训练时推理的时间间隔 +'modelarts': False, # 是否使用modelarts +'run_distribute': False, # 是否多卡训练 +'device_target': 'Ascend', # 训练平台 +'begin_epoch': 0, # 开始训练周期 +'end_epoch': 350, # 结束训练周期 +'total_epoch': 350, # 总训练周期数 +'dataset': 'imagenet', # 数据集名称 +'num_classes': 1000, # 数据集类别数 +'batchsize': 128 # 输入批次大小 +'input_size': 240, # 输入尺寸大小 +'lr_scheme': 'linear', # 学习率衰减方案 +'lr': 0.15, # 最大学习率 +'lr_init': 0.0001, # 初始学习率 +'lr_end': 5e-5 , # 最终学习率 +'warmup_epochs': 2, # 热身周期数 +'use_label_smooth': True, # 是否使用label smooth +'label_smooth_factor': 0.1, # 标签平滑因子 +'conv_init': 'TruncatedNormal', # 卷积层初始化方式 +'dense_init': 'RandomNormal', # 全连接层初始化方式 +'optimizer': 'rmsprop', # 优化器 +'loss_scale': 1024, # loss scale +'opt_momentum': 0.9, # 动量参数 +'wd': 1e-5, # 权重衰减率 +'eps': 0.001 # epsilon +'save_ckpt': True, # 是否保存ckpt文件 +'save_checkpoint_epochs': 1, # 每迭代相应次数保存一个ckpt文件 +'keep_checkpoint_max': 10, # 保存ckpt文件的最大数量 +``` + +## 训练过程 + +### 启动 + +您可以使用python或shell脚本进行训练。 + +```bash +# 训练示例 + python: + Ascend单卡训练示例: + python train.py --data_path [DATA_DIR] --train_path [TRAIN_OUTPUT_PATH] --model efficientnet-b1 --run_distribute False + + shell: + Ascend单卡训练示例: bash scripts/run_standalone_train.sh [DATASET_PATH] [TRAIN_OUTPUT_PATH] + Ascend八卡并行训练: + bash scripts/run_distribute_train.sh [RANK_TABLE_FILE] [DATASET_PATH] +``` + +### 结果 + +多卡训练ckpt文件将存储在 `./checkpoint` 路径下,而单卡训练存放于指定目录。训练日志将被记录到 `log` 中。训练日志部分示例如下: + +```text +epoch: [ 1/350], epoch time: 2709470.652, steps: 625, per step time: 4335.153, avg loss: 5.401, lr:[0.050] +epoch: [ 2/350], epoch time: 236883.599, steps: 625, per step time: 379.014, avg loss: 4.142, lr:[0.100] +epoch: [ 3/350], epoch time: 236615.708, steps: 625, per step time: 378.585, avg loss: 3.724, lr:[0.100] +epoch: [ 4/350], epoch time: 236606.486, steps: 625, per step time: 378.570, avg loss: 3.133, lr:[0.099] +epoch: [ 5/350], epoch time: 236639.009, steps: 625, per step time: 378.622, avg loss: 3.225, lr:[0.099] +``` + +## 评估过程 + +### 启动 + +您可以使用python或shell脚本进行评估。 + +```bash +# 评估示例 + python: + python eval.py --data_path [DATA_DIR] --checkpoint_path [PATH_CHECKPOINT] + + shell: + bash scripts/run_eval.sh [DATASET_PATH] [CHECKPOINT_PATH] +``` + +> 训练过程中可以生成ckpt文件。 + +### 结果 + +可以在 `eval_log` 查看评估结果。 + +```bash +{'Loss': 1.8175019884720827, 'Top_1_Acc': 0.7914495192307693, 'Top_5_Acc': 0.9445458333333333} +``` + +# 推理过程 + +## 导出MINDIR + +```bash +python export.py --checkpoint_path [CHECKPOINT_FILE_PATH] --file_name [OUTPUT_FILE_NAME] --width 240 --height 240 --file_format MINDIR +``` + +## 在Ascend310执行推理 + +在执行推理前,mindir文件必须通过 `export.py` 脚本导出。以下展示了使用mindir模型执行推理的示例。 + +```bash +# Ascend310 inference +bash scripts/run_infer_310.sh [MINDIR_PATH] [DATA_PATH] [DEVICE_ID] +``` + +## 结果 + +推理结果保存在脚本执行的当前路径,你可以在acc.log中看到精度计算结果。 + +# 模型说明 + +## 训练性能 + +| 参数 | Ascend | +| -------------------------- | ------------------------------------- | +| 模型名称 | EfficientNet | +| 模型版本 | B1 | +| 运行环境 | HUAWEI CLOUD Modelarts | +| 上传时间 | 2021-12-06 | +| 数据集 | imagenet | +| 训练参数 | src/config.py | +| 优化器 | RMSProp | +| 损失函数 | CrossEntropySmooth | +| 最终损失 | 1.82 | +| 精确度 (8p) | Top1[79.1%], Top5[94.4%] | +| 训练总时间 (8p) | 25.1h | +| 评估总时间 | 84s | +| 参数量 (M) | 30M | +| 脚本 | [链接](https://gitee.com/mindspore/models/tree/master/research/cv/efficientnet-b1) | + +# 随机情况的描述 + +我们在 `train.py` 脚本中设置了随机种子。 + +# ModelZoo + +请核对官方 [主页](https://gitee.com/mindspore/models) 。 diff --git a/research/cv/efficientnet-b1/ascend310_infer/inc/utils.h b/research/cv/efficientnet-b1/ascend310_infer/inc/utils.h new file mode 100644 index 0000000000000000000000000000000000000000..b0841a615b650ac9dc71b14ff00cc7d1f5e6f6da --- /dev/null +++ b/research/cv/efficientnet-b1/ascend310_infer/inc/utils.h @@ -0,0 +1,35 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_INFERENCE_UTILS_H_ +#define MINDSPORE_INFERENCE_UTILS_H_ + +#include <sys/stat.h> +#include <dirent.h> +#include <vector> +#include <string> +#include <memory> +#include "include/api/types.h" + +std::vector<std::string> GetAllFiles(std::string_view dirName); +DIR *OpenDir(std::string_view dirName); +std::string RealPath(std::string_view path); +mindspore::MSTensor ReadFileToTensor(const std::string &file); +int WriteResult(const std::string& imageFile, const std::vector<mindspore::MSTensor> &outputs); +std::vector<std::string> GetAllFiles(std::string dir_name); +std::vector<std::vector<std::string>> GetAllInputData(std::string dir_name); + +#endif diff --git a/research/cv/efficientnet-b1/ascend310_infer/src/CMakeLists.txt b/research/cv/efficientnet-b1/ascend310_infer/src/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..14e676821a4936c03e98b9299b3b5f5e4496a8ea --- /dev/null +++ b/research/cv/efficientnet-b1/ascend310_infer/src/CMakeLists.txt @@ -0,0 +1,14 @@ +cmake_minimum_required(VERSION 3.14.1) +project(MindSporeCxxTestcase[CXX]) +add_compile_definitions(_GLIBCXX_USE_CXX11_ABI=0) +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O0 -g -std=c++17 -Werror -Wall -fPIE -Wl,--allow-shlib-undefined") +set(PROJECT_SRC_ROOT ${CMAKE_CURRENT_LIST_DIR}/) +option(MINDSPORE_PATH "mindspore install path" "") +include_directories(${MINDSPORE_PATH}) +include_directories(${MINDSPORE_PATH}/include) +include_directories(${PROJECT_SRC_ROOT}/../) +find_library(MS_LIB libmindspore.so ${MINDSPORE_PATH}/lib) +file(GLOB_RECURSE MD_LIB ${MINDSPORE_PATH}/_c_dataengine*) +find_package(gflags REQUIRED) +add_executable(main main.cc utils.cc) +target_link_libraries(main ${MS_LIB} ${MD_LIB} gflags) diff --git a/research/cv/efficientnet-b1/ascend310_infer/src/build.sh b/research/cv/efficientnet-b1/ascend310_infer/src/build.sh new file mode 100644 index 0000000000000000000000000000000000000000..abcb999930ca5d62345b204d7fcfe4e097e8f0bb --- /dev/null +++ b/research/cv/efficientnet-b1/ascend310_infer/src/build.sh @@ -0,0 +1,18 @@ +#!/bin/bash +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +cmake . -DMINDSPORE_PATH="`pip show mindspore-ascend | grep Location | awk '{print $2"/mindspore"}' | xargs realpath`" +make diff --git a/research/cv/efficientnet-b1/ascend310_infer/src/main.cc b/research/cv/efficientnet-b1/ascend310_infer/src/main.cc new file mode 100644 index 0000000000000000000000000000000000000000..0591a3888d9fa68570dcfd74c23d80540f426cb1 --- /dev/null +++ b/research/cv/efficientnet-b1/ascend310_infer/src/main.cc @@ -0,0 +1,152 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <sys/time.h> +#include <gflags/gflags.h> +#include <dirent.h> +#include <iostream> +#include <string> +#include <algorithm> +#include <iosfwd> +#include <vector> +#include <fstream> +#include <sstream> + +#include "include/api/model.h" +#include "include/api/context.h" +#include "include/api/types.h" +#include "include/api/serialization.h" +#include "include/dataset/vision_ascend.h" +#include "include/dataset/execute.h" +#include "include/dataset/transforms.h" +#include "include/dataset/vision.h" +#include "inc/utils.h" + +using mindspore::dataset::vision::Decode; +using mindspore::dataset::vision::Resize; +using mindspore::dataset::vision::CenterCrop; +using mindspore::dataset::vision::Normalize; +using mindspore::dataset::vision::HWC2CHW; +using mindspore::dataset::TensorTransform; +using mindspore::Context; +using mindspore::Serialization; +using mindspore::Model; +using mindspore::Status; +using mindspore::ModelType; +using mindspore::GraphCell; +using mindspore::kSuccess; +using mindspore::MSTensor; +using mindspore::dataset::Execute; + +DEFINE_string(mindir_path, "", "mindir path"); +DEFINE_string(dataset_path, ".", "dataset path"); +DEFINE_int32(device_id, 0, "device id"); + +int main(int argc, char **argv) { + gflags::ParseCommandLineFlags(&argc, &argv, true); + if (RealPath(FLAGS_mindir_path).empty()) { + std::cout << "Invalid mindir" << std::endl; + return 1; + } + + auto context = std::make_shared<Context>(); + auto ascend310 = std::make_shared<mindspore::Ascend310DeviceInfo>(); + ascend310->SetDeviceID(FLAGS_device_id); + context->MutableDeviceInfo().push_back(ascend310); + mindspore::Graph graph; + Serialization::Load(FLAGS_mindir_path, ModelType::kMindIR, &graph); + Model model; + Status ret = model.Build(GraphCell(graph), context); + if (ret != kSuccess) { + std::cout << "ERROR: Build failed." << std::endl; + return 1; + } + + auto all_files = GetAllInputData(FLAGS_dataset_path); + if (all_files.empty()) { + std::cout << "ERROR: no input data." << std::endl; + return 1; + } + std::map<double, double> costTime_map; + size_t size = all_files.size(); + // Define transform + std::vector<int32_t> crop_paras = {240}; + std::vector<int32_t> resize_paras = {256}; + std::vector<float> mean = {0.485 * 255, 0.456 * 255, 0.406 * 255}; + std::vector<float> std = {0.229 * 255, 0.224 * 255, 0.225 * 255}; + + std::shared_ptr<TensorTransform> decode(new Decode()); + std::shared_ptr<TensorTransform> resize(new Resize(resize_paras)); + std::shared_ptr<TensorTransform> centercrop(new CenterCrop(crop_paras)); + std::shared_ptr<TensorTransform> normalize(new Normalize(mean, std)); + std::shared_ptr<TensorTransform> hwc2chw(new HWC2CHW()); + + std::vector<std::shared_ptr<TensorTransform>> trans_list = {decode, resize, centercrop, normalize, hwc2chw}; + mindspore::dataset::Execute SingleOp(trans_list); + + for (size_t i = 0; i < size; ++i) { + for (size_t j = 0; j < all_files[i].size(); ++j) { + struct timeval start = {0}; + struct timeval end = {0}; + double startTimeMs; + double endTimeMs; + std::vector<MSTensor> inputs; + std::vector<MSTensor> outputs; + std::cout << "Start predict input files:" << all_files[i][j] <<std::endl; + auto imgDvpp = std::make_shared<MSTensor>(); + SingleOp(ReadFileToTensor(all_files[i][j]), imgDvpp.get()); + + inputs.emplace_back(imgDvpp->Name(), imgDvpp->DataType(), imgDvpp->Shape(), + imgDvpp->Data().get(), imgDvpp->DataSize()); + + std::cout << "size of input" <<std::endl; + for (auto shape : imgDvpp->Shape()) + std::cout << shape <<std::endl; + std::cout << imgDvpp->DataSize() <<std::endl; + + gettimeofday(&start, nullptr); + ret = model.Predict(inputs, &outputs); + gettimeofday(&end, nullptr); + if (ret != kSuccess) { + std::cout << "Predict " << all_files[i][j] << " failed." << std::endl; + return 1; + } + startTimeMs = (1.0 * start.tv_sec * 1000000 + start.tv_usec) / 1000; + endTimeMs = (1.0 * end.tv_sec * 1000000 + end.tv_usec) / 1000; + costTime_map.insert(std::pair<double, double>(startTimeMs, endTimeMs)); + WriteResult(all_files[i][j], outputs); + } + } + + double average = 0.0; + int inferCount = 0; + + for (auto iter = costTime_map.begin(); iter != costTime_map.end(); iter++) { + double diff = 0.0; + diff = iter->second - iter->first; + average += diff; + inferCount++; + } + average = average / inferCount; + std::stringstream timeCost; + timeCost << "NN inference cost average time: "<< average << " ms of infer_count " << inferCount << std::endl; + std::cout << "NN inference cost average time: "<< average << "ms of infer_count " << inferCount << std::endl; + std::string fileName = "./time_Result" + std::string("/test_perform_static.txt"); + std::ofstream fileStream(fileName.c_str(), std::ios::trunc); + fileStream << timeCost.str(); + fileStream.close(); + costTime_map.clear(); + return 0; +} diff --git a/research/cv/efficientnet-b1/ascend310_infer/src/utils.cc b/research/cv/efficientnet-b1/ascend310_infer/src/utils.cc new file mode 100644 index 0000000000000000000000000000000000000000..461d43650bab038fb5e2e8b7644b9a8064f5131b --- /dev/null +++ b/research/cv/efficientnet-b1/ascend310_infer/src/utils.cc @@ -0,0 +1,183 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <fstream> +#include <algorithm> +#include <iostream> +#include "inc/utils.h" + +using mindspore::MSTensor; +using mindspore::DataType; + + +std::vector<std::vector<std::string>> GetAllInputData(std::string dir_name) { + std::vector<std::vector<std::string>> ret; + + DIR *dir = OpenDir(dir_name); + if (dir == nullptr) { + return {}; + } + struct dirent *filename; + /* read all the files in the dir ~ */ + std::vector<std::string> sub_dirs; + while ((filename = readdir(dir)) != nullptr) { + std::string d_name = std::string(filename->d_name); + // get rid of "." and ".." + if (d_name == "." || d_name == ".." || d_name.empty()) { + continue; + } + std::string dir_path = RealPath(std::string(dir_name) + "/" + filename->d_name); + struct stat s; + lstat(dir_path.c_str(), &s); + if (!S_ISDIR(s.st_mode)) { + continue; + } + + sub_dirs.emplace_back(dir_path); + } + std::sort(sub_dirs.begin(), sub_dirs.end()); + + (void)std::transform(sub_dirs.begin(), sub_dirs.end(), std::back_inserter(ret), + [](const std::string &d) { return GetAllFiles(d); }); + + return ret; +} + +std::vector<std::string> GetAllFiles(std::string dir_name) { + struct dirent *filename; + DIR *dir = OpenDir(dir_name); + if (dir == nullptr) { + return {}; + } + + std::vector<std::string> res; + while ((filename = readdir(dir)) != nullptr) { + std::string d_name = std::string(filename->d_name); + if (d_name == "." || d_name == ".." || d_name.size() <= 3) { + continue; + } + res.emplace_back(std::string(dir_name) + "/" + filename->d_name); + } + std::sort(res.begin(), res.end()); + + return res; +} + +std::vector<std::string> GetAllFiles(std::string_view dirName) { + struct dirent *filename; + DIR *dir = OpenDir(dirName); + if (dir == nullptr) { + return {}; + } + std::vector<std::string> res; + while ((filename = readdir(dir)) != nullptr) { + std::string dName = std::string(filename->d_name); + if (dName == "." || dName == ".." || filename->d_type != DT_REG) { + continue; + } + res.emplace_back(std::string(dirName) + "/" + filename->d_name); + } + std::sort(res.begin(), res.end()); + for (auto &f : res) { + std::cout << "image file: " << f << std::endl; + } + return res; +} + +int WriteResult(const std::string& imageFile, const std::vector<MSTensor> &outputs) { + std::string homePath = "./result_Files"; + for (size_t i = 0; i < outputs.size(); ++i) { + size_t outputSize; + std::shared_ptr<const void> netOutput; + netOutput = outputs[i].Data(); + outputSize = outputs[i].DataSize(); + int pos = imageFile.rfind('/'); + std::string fileName(imageFile, pos + 1); + fileName.replace(fileName.find('.'), fileName.size() - fileName.find('.'), '_' + std::to_string(i) + ".bin"); + std::string outFileName = homePath + "/" + fileName; + FILE *outputFile = fopen(outFileName.c_str(), "wb"); + fwrite(netOutput.get(), outputSize, sizeof(char), outputFile); + fclose(outputFile); + outputFile = nullptr; + } + return 0; +} + +mindspore::MSTensor ReadFileToTensor(const std::string &file) { + if (file.empty()) { + std::cout << "Pointer file is nullptr" << std::endl; + return mindspore::MSTensor(); + } + + std::ifstream ifs(file); + if (!ifs.good()) { + std::cout << "File: " << file << " is not exist" << std::endl; + return mindspore::MSTensor(); + } + + if (!ifs.is_open()) { + std::cout << "File: " << file << "open failed" << std::endl; + return mindspore::MSTensor(); + } + + ifs.seekg(0, std::ios::end); + size_t size = ifs.tellg(); + mindspore::MSTensor buffer(file, mindspore::DataType::kNumberTypeUInt8, {static_cast<int64_t>(size)}, nullptr, size); + + ifs.seekg(0, std::ios::beg); + ifs.read(reinterpret_cast<char *>(buffer.MutableData()), size); + ifs.close(); + + return buffer; +} + +DIR *OpenDir(std::string_view dirName) { + if (dirName.empty()) { + std::cout << " dirName is null ! " << std::endl; + return nullptr; + } + std::string realPath = RealPath(dirName); + struct stat s; + lstat(realPath.c_str(), &s); + if (!S_ISDIR(s.st_mode)) { + std::cout << "dirName is not a valid directory !" << std::endl; + return nullptr; + } + DIR *dir; + dir = opendir(realPath.c_str()); + if (dir == nullptr) { + std::cout << "Can not open dir " << dirName << std::endl; + return nullptr; + } + std::cout << "Successfully opened the dir " << dirName << std::endl; + return dir; +} + +std::string RealPath(std::string_view path) { + char realPathMem[PATH_MAX] = {0}; + char *realPathRet = nullptr; + realPathRet = realpath(path.data(), realPathMem); + if (realPathRet == nullptr) { + std::cout << "File: " << path << " is not exist."; + return ""; + } + + std::string realPath(realPathMem); + std::cout << path << " realpath is: " << realPath << std::endl; + return realPath; +} + + diff --git a/research/cv/efficientnet-b1/create_imagenet2012_label.py b/research/cv/efficientnet-b1/create_imagenet2012_label.py new file mode 100644 index 0000000000000000000000000000000000000000..88c4571f2d30a7c60d39725b5b3a37e97fc896bb --- /dev/null +++ b/research/cv/efficientnet-b1/create_imagenet2012_label.py @@ -0,0 +1,47 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# less required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""create_imagenet2012_label to label.json""" +import os +import json +import argparse + +parser = argparse.ArgumentParser(description="resnet imagenet2012 label") +parser.add_argument("--img_path", type=str, required=True, help="imagenet2012 file path.") +args = parser.parse_args() + +def create_label(file_path): + """create_imagenet2012_label""" + print("[WARNING] Create imagenet label. Currently only use for Imagenet2012!") + dirs = os.listdir(file_path) + file_list = [] + for file in dirs: + file_list.append(file) + file_list = sorted(file_list) + + total = 0 + img_label = {} + for i, file_dir in enumerate(file_list): + files = os.listdir(os.path.join(file_path, file_dir)) + for f in files: + img_label[f] = i + total += len(files) + + with open("imagenet_label.json", "w+") as label: + json.dump(img_label, label) + + print("[INFO] Completed! Total {} data.".format(total)) + +if __name__ == '__main__': + create_label(args.img_path) diff --git a/research/cv/efficientnet-b1/eval.py b/research/cv/efficientnet-b1/eval.py new file mode 100644 index 0000000000000000000000000000000000000000..74cd2144da2928ac4d15e9f7ded6d5a0ec66517f --- /dev/null +++ b/research/cv/efficientnet-b1/eval.py @@ -0,0 +1,91 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""eval efficientnet.""" +import ast +import timeit +import argparse + +import mindspore.nn as nn +from mindspore import context, Model +from mindspore.common import set_seed +from mindspore.train.serialization import load_checkpoint, load_param_into_net + +from src.loss import CrossEntropySmooth +from src.dataset import create_imagenet +from src.models.effnet import EfficientNet +from src.model_utils.moxing_adapter import moxing_wrapper +from src.config import organize_configuration +from src.config import efficientnet_b1_config_ascend as config + + +set_seed(1) + + +def parse_args(): + """Get parameters from command line.""" + parser = argparse.ArgumentParser("Evaluate efficientnet.") + parser.add_argument("--data_url", type=str, default=None, + help="Storage path of dataset in OBS.") + parser.add_argument("--data_path", type=str, default=None, + help="Storage path of dataset in offline machine.") + parser.add_argument("--train_url", type=str, default=None, + help="Storage path of outputs in OBS.") + parser.add_argument("--train_path", type=str, default=None, + help="Storage path of outputs in offline machine.") + parser.add_argument("--checkpoint_url", type=str, default=None, + help="Storage path of checkpoint in OBS.") + parser.add_argument("--checkpoint_path", type=str, default=None, + help="Storage path of checkpoint in OBS.") + parser.add_argument("--model", type=str, default="efficientnet-b1", + help="Specify the model to be trained.") + parser.add_argument("--modelarts", type=ast.literal_eval, default=False, + help="Run on ModelArts or offline machines.") + parser.add_argument("--device_target", type=str, default="Ascend", choices=["Ascend", "CPU", "GPU"], + help="Training platform.") + args_opt = parser.parse_args() + + return args_opt + + +@moxing_wrapper(config) +def main(): + """Main function for model evaluation.""" + context.set_context(mode=context.GRAPH_MODE, device_target=config.device_target, save_graphs=False) + dataset = create_imagenet(dataset_path=config.data_path, do_train=False, repeat_num=1, + input_size=config.input_size, batch_size=config.batchsize, + target=config.device_target, distribute=config.run_distribute) + net = EfficientNet(width_coeff=config.width_coeff, depth_coeff=config.depth_coeff, + dropout_rate=config.dropout_rate, drop_connect_rate=config.drop_connect_rate, + num_classes=config.num_classes) + params = load_checkpoint(config.checkpoint_path) + load_param_into_net(net, params) + net.set_train(False) + + loss = CrossEntropySmooth(smooth_factor=config.label_smooth_factor, num_classes=config.num_classes) + metrics = {"Loss": nn.Loss(), + "Top_1_Acc": nn.Top1CategoricalAccuracy(), + "Top_5_Acc": nn.Top5CategoricalAccuracy()} + model = Model(network=net, loss_fn=loss, metrics=metrics) + start_time = timeit.default_timer() + res = model.eval(dataset) + end_time = timeit.default_timer() + print(res, flush=True) + print("The time spent is {}s.".format(end_time - start_time), flush=True) + + +if __name__ == "__main__": + args = parse_args() + organize_configuration(cfg=config, args=args) + main() diff --git a/research/cv/efficientnet-b1/export.py b/research/cv/efficientnet-b1/export.py new file mode 100644 index 0000000000000000000000000000000000000000..e3ff757914048552bc2104332d953a6d0e66308c --- /dev/null +++ b/research/cv/efficientnet-b1/export.py @@ -0,0 +1,43 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""export efficientnet IR.""" +import argparse +import numpy as np +from mindspore import context, Tensor, load_checkpoint, load_param_into_net, export + +from src.models.effnet import EfficientNet +from src.config import efficientnet_b1_config_ascend as config + + +parser = argparse.ArgumentParser(description="export efficientnet IR.") +parser.add_argument("--checkpoint_path", type=str, required=True, help="Checkpoint file path") +parser.add_argument("--file_name", type=str, default="efficientnet-b1", help="output file name.") +parser.add_argument("--width", type=int, default=240, help="input width") +parser.add_argument("--height", type=int, default=240, help="input height") +parser.add_argument("--file_format", type=str, choices=["AIR", "ONNX", "MINDIR"], default="MINDIR", help="file format") +args_opt = parser.parse_args() + +if __name__ == "__main__": + context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") + + net = EfficientNet(width_coeff=config.width_coeff, depth_coeff=config.depth_coeff, + dropout_rate=config.dropout_rate, drop_connect_rate=config.drop_connect_rate, + num_classes=config.num_classes) + + param_dict = load_checkpoint(args_opt.checkpoint_path) + load_param_into_net(net, param_dict) + input_shp = [1, 3, args_opt.height, args_opt.width] + input_array = Tensor(np.random.uniform(-1.0, 1.0, size=input_shp).astype(np.float32)) + export(net, input_array, file_name=args_opt.file_name, file_format=args_opt.file_format) diff --git a/research/cv/efficientnet-b1/postprocess.py b/research/cv/efficientnet-b1/postprocess.py new file mode 100644 index 0000000000000000000000000000000000000000..1db598c917f116ff6b64e13432c9496dba6648c2 --- /dev/null +++ b/research/cv/efficientnet-b1/postprocess.py @@ -0,0 +1,51 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# less required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""post process for 310 inference""" +import os +import json +import argparse +import numpy as np + +batch_size = 1 +parser = argparse.ArgumentParser(description="efficientnet 310 inference") +parser.add_argument("--result_path", type=str, required=True, help="result files path.") +parser.add_argument("--label_path", type=str, required=True, help="image file path.") +args = parser.parse_args() + + +def get_result(result_path, label_path): + """get the result of top1&rop5""" + files = os.listdir(result_path) + with open(label_path, "r") as label: + labels = json.load(label) + + top1 = 0 + top5 = 0 + total_data = len(files) + for file in files: + img_ids_name = file.split('_0.')[0] + data_path = os.path.join(result_path, img_ids_name + "_0.bin") + result = np.fromfile(data_path, dtype=np.float32).reshape(batch_size, 1000) + for batch in range(batch_size): + predict = np.argsort(-result[batch], axis=-1) + if labels[img_ids_name+".JPEG"] == predict[0]: + top1 += 1 + if labels[img_ids_name+".JPEG"] in predict[:5]: + top5 += 1 + print(f"Total data: {total_data}, top1 accuracy: {top1/total_data}, top5 accuracy: {top5/total_data}.") + + +if __name__ == '__main__': + get_result(args.result_path, args.label_path) diff --git a/research/cv/efficientnet-b1/scripts/run_distribute_train.sh b/research/cv/efficientnet-b1/scripts/run_distribute_train.sh new file mode 100644 index 0000000000000000000000000000000000000000..3d1e225549b570bc884c5ffd0762960920650a6d --- /dev/null +++ b/research/cv/efficientnet-b1/scripts/run_distribute_train.sh @@ -0,0 +1,60 @@ +#!/bin/bash +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +if [ $# != 2 ]; then + echo "Usage: bash run_distribute_train.sh [RANK_TABLE_FILE] [DATASET_PATH]" + exit 1 +fi + +get_real_path() { + if [ "${1:0:1}" == "/" ]; then + echo "$1" + else + echo "$(realpath -m $PWD/$1)" + fi +} + +PATH1=$(get_real_path $1) +PATH2=$(get_real_path $2) + +if [ ! -f $PATH1 ]; then + echo "error: RANK_TABLE_FILE=$PATH1 is not a file" + exit 1 +fi + +if [ ! -d $PATH2 ]; then + echo "error: DATASET_PATH=$PATH2 is not a directory" + exit 1 +fi + +ulimit -u unlimited +export DEVICE_NUM=8 +export RANK_SIZE=8 +export RANK_TABLE_FILE=$PATH1 + +for ((i = 0; i < ${DEVICE_NUM}; i++)); do + export DEVICE_ID=$i + export RANK_ID=$i + rm -rf ./train_parallel$i + mkdir ./train_parallel$i + cp ./*.py ./train_parallel$i + cp -r ./src ./train_parallel$i + cd ./train_parallel$i || exit + echo "start training for rank $RANK_ID, device $DEVICE_ID" + env >env.log + python train.py --data_path=$PATH2 --train_path="./checkpoint" --model="efficientnet-b1" --run_distribute=True > log 2>&1 & + cd .. +done diff --git a/research/cv/efficientnet-b1/scripts/run_eval.sh b/research/cv/efficientnet-b1/scripts/run_eval.sh new file mode 100644 index 0000000000000000000000000000000000000000..5f1d0fe0e0e4af422440fe94ca0c36a685f49b91 --- /dev/null +++ b/research/cv/efficientnet-b1/scripts/run_eval.sh @@ -0,0 +1,51 @@ +#!/bin/bash +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +if [ $# != 2 ] +then + echo "Using: bash scripts/run_eval.sh [DATASET_PATH] [CHECKPOINT_PATH]" + exit 1 +fi + +get_real_path(){ + if [ "${1:0:1}" == "/" ]; then + echo "$1" + else + echo "$(realpath -m $PWD/$1)" + fi +} + +DATA_PATH=$(get_real_path $1) # dataset_path +CHECKPOINT_PATH=$(get_real_path $2) # checkpoint_path + +if [ ! -d $DATA_PATH ] +then + echo "error: DATA_PATH=$DATA_PATH is not a directory." + exit 1 +fi + +if [ ! -f $CHECKPOINT_PATH ] +then + echo "error: TRAIN_PATH=$TRAIN_PATH is not a directory." + exit 1 +fi + +python ./eval.py \ + --checkpoint_path=$CHECKPOINT_PATH \ + --data_path=$DATA_PATH \ + --model efficientnet-b1 \ + --modelarts False \ + --device_target Ascend > eval_log 2>&1 & diff --git a/research/cv/efficientnet-b1/scripts/run_infer_310.sh b/research/cv/efficientnet-b1/scripts/run_infer_310.sh new file mode 100644 index 0000000000000000000000000000000000000000..11acf27e9909ecb739ca1a40fa99dd43f77ff053 --- /dev/null +++ b/research/cv/efficientnet-b1/scripts/run_infer_310.sh @@ -0,0 +1,98 @@ +#!/bin/bash +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +if [[ $# != 3 ]]; then + echo "Usage: bash scripts/run_infer_310.sh [MINDIR_PATH] [DATA_PATH] [DEVICE_ID] + DEVICE_ID is optional, it can be set by environment variable device_id, otherwise the value is zero" +exit 1 +fi + +get_real_path(){ + if [ "${1:0:1}" == "/" ]; then + echo "$1" + else + echo "$(realpath -m $PWD/$1)" + fi +} + +model=$(get_real_path $1) +data_path=$(get_real_path $2) +device_id=$3 + +echo "mindir name: "$model +echo "dataset path: "$data_path +echo "device id: "$device_id + +export ASCEND_HOME=/usr/local/Ascend/ +if [ -d ${ASCEND_HOME}/ascend-toolkit ]; then + export PATH=$ASCEND_HOME/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin:$ASCEND_HOME/ascend-toolkit/latest/atc/bin:$PATH + export LD_LIBRARY_PATH=/usr/local/lib:$ASCEND_HOME/ascend-toolkit/latest/atc/lib64:$ASCEND_HOME/ascend-toolkit/latest/fwkacllib/lib64:$ASCEND_HOME/driver/lib64:$ASCEND_HOME/add-ons:$LD_LIBRARY_PATH + export TBE_IMPL_PATH=$ASCEND_HOME/ascend-toolkit/latest/opp/op_impl/built-in/ai_core/tbe + export PYTHONPATH=${TBE_IMPL_PATH}:$ASCEND_HOME/ascend-toolkit/latest/fwkacllib/python/site-packages:$PYTHONPATH + export ASCEND_OPP_PATH=$ASCEND_HOME/ascend-toolkit/latest/opp +else + export PATH=$ASCEND_HOME/atc/ccec_compiler/bin:$ASCEND_HOME/atc/bin:$PATH + export LD_LIBRARY_PATH=/usr/local/lib:$ASCEND_HOME/atc/lib64:$ASCEND_HOME/acllib/lib64:$ASCEND_HOME/driver/lib64:$ASCEND_HOME/add-ons:$LD_LIBRARY_PATH + export PYTHONPATH=$ASCEND_HOME/atc/python/site-packages:$PYTHONPATH + export ASCEND_OPP_PATH=$ASCEND_HOME/opp +fi + +function compile_app() +{ + cd ./ascend310_infer/src/ || exit + if [ -f "Makefile" ]; then + make clean + fi + sh build.sh &> build.log +} + +function infer() +{ + cd - || exit + if [ -d result_Files ]; then + rm -rf ./result_Files + fi + if [ -d time_Result ]; then + rm -rf ./time_Result + fi + mkdir result_Files + mkdir time_Result + ./ascend310_infer/src/main --mindir_path=$model --dataset_path=$data_path --device_id=$device_id &> infer.log +} + +function cal_acc() +{ + python ./create_imagenet2012_label.py --img_path=$data_path + python ./postprocess.py --result_path=./result_Files --label_path=./imagenet_label.json &> acc.log & +} + +compile_app +if [ $? -ne 0 ]; then + echo "compile app code failed" + exit 1 +fi + +infer +if [ $? -ne 0 ]; then + echo " execute inference failed" + exit 1 +fi + +cal_acc +if [ $? -ne 0 ]; then + echo "calculate accuracy failed" + exit 1 +fi diff --git a/research/cv/efficientnet-b1/scripts/run_standalone_train.sh b/research/cv/efficientnet-b1/scripts/run_standalone_train.sh new file mode 100644 index 0000000000000000000000000000000000000000..e5e6c9e17cc722d16b98fb734a8d71412dda00a0 --- /dev/null +++ b/research/cv/efficientnet-b1/scripts/run_standalone_train.sh @@ -0,0 +1,104 @@ +#!/bin/bash +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +if [ $# -lt 2 ] || [ $# -gt 5 ] +then + echo "Using: bash scripts/run_standalone_train.sh [DATASET_PATH] [TRAIN_OUTPUT_PATH]" + echo "or" + echo "Using: bash scripts/run_standalone_train.sh [DATASET_PATH] [TRAIN_OUTPUT_PATH] [EVAL_DATASET_PATH]" + echo "or" + echo "Using: bash scripts/run_standalone_train.sh [DATASET_PATH] [TRAIN_OUTPUT_PATH] [CHECKPOINT_PATH] [BEGIN_EPOCH]" + echo "or" + echo "Using: bash scripts/run_standalone_train.sh [DATASET_PATH] [TRAIN_OUTPUT_PATH] [CHECKPOINT_PATH] [BEGIN_EPOCH] [EVAL_DATASET_PATH]" + exit 1 +fi + +get_real_path(){ + if [ "${1:0:1}" == "/" ]; then + echo "$1" + else + echo "$(realpath -m $PWD/$1)" + fi +} + +DATA_PATH=$(get_real_path $1) # dataset_path +TRAIN_PATH=$(get_real_path $2) # train_output_path + +if [ ! -d $DATA_PATH ] +then + echo "error: DATA_PATH=$DATA_PATH is not a directory." + exit 1 +fi + +if [ ! -d $TRAIN_PATH ] +then + mkdir $TRAIN_PATH +else + rm -rf $TRAIN_PATH + mkdir $TRAIN_PATH +fi + +if [ $# == 3 ] +then + EVAL_DATASET_PATH=$(get_real_path $3) # eval_dataset_path + + if [ ! -d $EVAL_DATASET_PATH ] + then + echo "error: EVAL_DATASET_PATH=$EVAL_DATASET_PATH is not a directory." + exit 1 + fi +fi + +if [ $# == 4 ] +then + CKPT_PATH=$(get_real_path $3) # checkpoint_path + BEGIN_EPOCH=$4 # begin epoch + + if [ ! -f $CKPT_PATH ] + then + echo "error: CKPT_PATH=$CKPT_PATH is not a file." + exit 1 + fi +fi + +if [ $# == 5 ] +then + CKPT_PATH=$(get_real_path $3) # checkpoint_path + BEGIN_EPOCH=$4 # begin epoch + EVAL_DATASET_PATH=$(get_real_path $5) # eval_dataset_path + + if [ ! -d $EVAL_DATASET_PATH ] + then + echo "error: EVAL_DATASET_PATH=$EVAL_DATASET_PATH is not a directory." + exit 1 + fi + + if [ ! -f $CKPT_PATH ] + then + echo "error: CKPT_PATH=$CKPT_PATH is not a file." + exit 1 + fi +fi + +if [ $# == 2 ]; then + python train.py --data_path=$DATA_PATH --train_path=$TRAIN_PATH &> log & +elif [ $# == 3 ]; then + python train.py --data_path=$DATA_PATH --train_path=$TRAIN_PATH --eval_data_path=$EVAL_DATASET_PATH &> log & +elif [ $# == 4 ]; then + python train.py --data_path=$DATA_PATH --train_path=$TRAIN_PATH --checkpoint_path=$CKPT_PATH --begin_epoch=$BEGIN_EPOCH &> log & +else + python train.py --data_path=$DATA_PATH --train_path=$TRAIN_PATH --eval_data_path=$EVAL_DATASET_PATH --checkpoint_path=$CKPT_PATH --begin_epoch=$BEGIN_EPOCH &> log & +fi diff --git a/research/cv/efficientnet-b1/src/callback.py b/research/cv/efficientnet-b1/src/callback.py new file mode 100644 index 0000000000000000000000000000000000000000..d872022af05bbe6b12bf32a3a43ec005ee6751d7 --- /dev/null +++ b/research/cv/efficientnet-b1/src/callback.py @@ -0,0 +1,109 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""loss and time monitor definition.""" +import os +import time +import numpy as np +from mindspore import Tensor +from mindspore import save_checkpoint +from mindspore.train.callback import Callback + + +class EvalCallBack(Callback): + """ + Evaluate model acc while training. + + Args: + model: model to be evaluated + eval_dataset: eval dataset + eval_intervel: epoch interval for evaluation + + Returns: + None + """ + def __init__(self, model, eval_dataset, eval_interval, save_path=None): + self.model = model + self.eval_dataset = eval_dataset + self.eval_interval = eval_interval + self.save_path = save_path + self.best = 0 + + def epoch_end(self, run_context): + """What to do after an epoch.""" + cb_param = run_context.original_args() + cur_epoch = cb_param.cur_epoch_num + network = cb_param.train_network + if cur_epoch % self.eval_interval == 0: + device_id = int(os.getenv("DEVICE_ID")) + metrics = self.model.eval(self.eval_dataset, dataset_sink_mode=False) + if metrics['Top5-Acc'] > self.best: + self.best = metrics['Top5-Acc'] + if self.save_path: + file_path = os.path.join(self.save_path, f"best-{device_id}.ckpt") + save_checkpoint(network, file_path) + print("=== epoch: {:3d}, device id: {:2d}, best top5: {:1.4f}, top1-acc: {:1.4f}, top5-acc: {:1.4f}".format( + cur_epoch, device_id, self.best, metrics['Top1-Acc'], metrics['Top5-Acc']), flush=True) + + +class TimeLossMonitor(Callback): + """ + Monitor loss and time. + + Args: + lr_init (numpy array): train lr + + Returns: + None + + Examples: + >>> TimeLossMonitor(100,lr_init=Tensor([0.05]*100).asnumpy()) + """ + + def __init__(self, lr_init=None): + super(TimeLossMonitor, self).__init__() + self.lr_init = lr_init + self.lr_init_len = len(lr_init) + + def epoch_begin(self, run_context): + """Epoch begin.""" + self.losses = [] + self.epoch_time = time.time() + + def epoch_end(self, run_context): + """Epoch end.""" + cb_params = run_context.original_args() + + epoch_mseconds = (time.time() - self.epoch_time) * 1000 + per_step_mseconds = epoch_mseconds / cb_params.batch_num + print("epoch: [{:3d}/{:3d}], epoch time: {:5.3f}, steps: {:5d}, " + "per step time: {:5.3f}, avg loss: {:5.3f}, lr:[{:5.3f}]".format( + cb_params.cur_epoch_num, cb_params.epoch_num, epoch_mseconds, cb_params.batch_num, + per_step_mseconds, np.mean(self.losses), self.lr_init[cb_params.cur_step_num - 1]), flush=True) + + def step_begin(self, run_context): + """Step begin.""" + self.step_time = time.time() + + def step_end(self, run_context): + """step end""" + cb_params = run_context.original_args() + step_loss = cb_params.net_outputs + + if isinstance(step_loss, (tuple, list)) and isinstance(step_loss[0], Tensor): + step_loss = step_loss[0] + if isinstance(step_loss, Tensor): + step_loss = np.mean(step_loss.asnumpy()) + + self.losses.append(step_loss) diff --git a/research/cv/efficientnet-b1/src/config.py b/research/cv/efficientnet-b1/src/config.py new file mode 100644 index 0000000000000000000000000000000000000000..9dd6447bda70a193caa7d03dba8a60999bc75218 --- /dev/null +++ b/research/cv/efficientnet-b1/src/config.py @@ -0,0 +1,73 @@ +"""Configurations.""" +import json +from easydict import EasyDict as edict + + +efficientnet_b1_config_ascend = edict({ + "train_url": None, + "train_path": None, + "data_url": None, + "data_path": None, + "checkpoint_url": None, + "checkpoint_path": None, + "eval_data_url": None, + "eval_data_path": None, + "eval_interval": 20, + "modelarts": False, + "device_target": "Ascend", + "run_distribute": False, + "begin_epoch": 0, + "end_epoch": 100, + "total_epoch": 350, + + "dataset": "imagenet", + "num_classes": 1000, + "batchsize": 128, + + "lr_scheme": "linear", + "lr": 0.15, + "lr_init": 0.0001, + "lr_end": 5e-5, + "warmup_epochs": 2, + + "use_label_smooth": True, + "label_smooth_factor": 0.1, + + "conv_init": "TruncatedNormal", + "dense_init": "TruncatedNormal", + + "optimizer": "rmsprop", + "loss_scale": 1024, + "opt_momentum": 0.9, + "wd": 1e-5, + "eps": 0.001, + + "device_num": 1, + "device_id": 0, + + "model": "efficientnet-b1", + "input_size": (240, 240), + "width_coeff": 1.0, + "depth_coeff": 1.1, + "dropout_rate": 0.2, + "drop_connect_rate": 0.2, + + "save_ckpt": True, + "save_checkpoint_epochs": 1, + "keep_checkpoint_max": 10 +}) + + +def show_config(cfg): + split_line_up = "==================================================\n" + split_line_bt = "\n==================================================" + print(split_line_up, + json.dumps(cfg, ensure_ascii=False, indent=2), + split_line_bt, flush=True) + + +def organize_configuration(cfg, args): + """Add parameters from command-line into configuration.""" + args_dict = vars(args) + for item in args_dict.items(): + cfg[item[0]] = item[1] diff --git a/research/cv/efficientnet-b1/src/dataset.py b/research/cv/efficientnet-b1/src/dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..1c87e997310a4de56fe9e245fac0b8d3dc438ddb --- /dev/null +++ b/research/cv/efficientnet-b1/src/dataset.py @@ -0,0 +1,127 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""dataset generators.""" +import os +import mindspore.common.dtype as mstype +import mindspore.dataset as ds +import mindspore.dataset.vision.c_transforms as C +import mindspore.dataset.transforms.c_transforms as C2 +from mindspore.communication.management import init, get_rank, get_group_size + + +def create_imagenet(dataset_path, do_train, repeat_num=1, input_size=224, + batch_size=32, target="Ascend", distribute=False, + enable_cache=False, cache_session_id=None): + """ + Create a train or eval imagenet2012 dataset for cls_hrnet. + + Args: + dataset_path(string): the path of dataset. + do_train(bool): whether dataset is used for train or eval. + repeat_num(int): the repeat times of dataset. Default: 1 + input_size(int or list): the model input size. Default: 224 + batch_size(int): the batch size of dataset. Default: 32 + target(str): the device target. Default: Ascend + distribute(bool): data for distribute or not. Default: False + enable_cache(bool): whether tensor caching service is used for eval. Default: False + cache_session_id(int): If enable_cache, cache session_id need to be provided. Default: None + + Returns: + dataset + """ + if target == "Ascend": + device_num, rank_id = _get_rank_info() + else: + if distribute: + init() + rank_id = get_rank() + device_num = get_group_size() + else: + rank_id = 0 + device_num = 1 + + ds.config.set_prefetch_size(64) + if device_num == 1: + data_set = ds.ImageFolderDataset(dataset_path, num_parallel_workers=12, shuffle=True) + else: + data_set = ds.ImageFolderDataset(dataset_path, num_parallel_workers=12, shuffle=True, + num_shards=device_num, shard_id=rank_id) + + mean = [0.485 * 255, 0.456 * 255, 0.406 * 255] + std = [0.229 * 255, 0.224 * 255, 0.225 * 255] + + # define map operations + if do_train: + trans = [ + C.RandomCropDecodeResize(input_size, scale=(0.08, 1.0), ratio=(0.75, 1.333)), + C.RandomHorizontalFlip(prob=0.5), + C.Normalize(mean=mean, std=std), + C.HWC2CHW() + ] + else: + trans = [ + C.Decode(), + C.Resize(256), + C.CenterCrop(input_size), + C.Normalize(mean=mean, std=std), + C.HWC2CHW() + ] + + type_cast_op = C2.TypeCast(mstype.int32) + + data_set = data_set.map(operations=trans, input_columns="image", num_parallel_workers=12) + # only enable cache for eval + if do_train: + enable_cache = False + if enable_cache: + if not cache_session_id: + raise ValueError("A cache session_id must be provided to use cache.") + eval_cache = ds.DatasetCache(session_id=int(cache_session_id), size=0) + data_set = data_set.map(operations=type_cast_op, input_columns="label", num_parallel_workers=12, + cache=eval_cache) + else: + data_set = data_set.map(operations=type_cast_op, input_columns="label", num_parallel_workers=12) + + # apply batch operations + data_set = data_set.batch(batch_size, drop_remainder=True) + + # apply dataset repeat operation + data_set = data_set.repeat(repeat_num) + + return data_set + + +def _get_rank_info(device_target="Ascend"): + """ + get rank size and rank id + """ + rank_size = int(os.environ.get("RANK_SIZE", 1)) + + if device_target == "Ascend": + if rank_size > 1: + rank_size = get_group_size() + rank_id = get_rank() + else: + rank_size = 1 + rank_id = 0 + else: + if rank_size > 1: + rank_size = get_group_size() + rank_id = get_rank() + else: + rank_size = 1 + rank_id = 0 + + return rank_size, rank_id diff --git a/research/cv/efficientnet-b1/src/loss.py b/research/cv/efficientnet-b1/src/loss.py new file mode 100644 index 0000000000000000000000000000000000000000..06cb51616065dddcc1e79cc46cec0f031ce3a3a4 --- /dev/null +++ b/research/cv/efficientnet-b1/src/loss.py @@ -0,0 +1,55 @@ +# Copyright 2020 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""define loss function for network.""" +from mindspore.nn.loss.loss import _Loss +from mindspore.ops import operations as P +from mindspore.ops import functional as F +from mindspore.common import dtype as mstype +from mindspore import Tensor +import mindspore.nn as nn + + +class LabelSmoothingCrossEntropy(_Loss): + """LabelSmoothingCrossEntropy""" + def __init__(self, smooth_factor=0.1, num_classes=1000): + super(LabelSmoothingCrossEntropy, self).__init__() + self.onehot = P.OneHot() + self.on_value = Tensor(1.0 - smooth_factor, mstype.float32) + self.off_value = Tensor(1.0 * smooth_factor / (num_classes - 1), mstype.float32) + self.ce = nn.SoftmaxCrossEntropyWithLogits() + self.mean = P.ReduceMean(False) + + def construct(self, logits, label): + one_hot_label = self.onehot(label, F.shape(logits)[1], self.on_value, self.off_value) + loss_logit = self.ce(logits, one_hot_label) + loss_logit = self.mean(loss_logit, 0) + return loss_logit + + +class CrossEntropySmooth(_Loss): + """CrossEntropy""" + def __init__(self, sparse=True, reduction='mean', smooth_factor=0., num_classes=1000): + super(CrossEntropySmooth, self).__init__() + self.onehot = P.OneHot() + self.sparse = sparse + self.on_value = Tensor(1.0 - smooth_factor, mstype.float32) + self.off_value = Tensor(1.0 * smooth_factor / (num_classes - 1), mstype.float32) + self.ce = nn.SoftmaxCrossEntropyWithLogits(reduction=reduction) + + def construct(self, logit, label): + if self.sparse: + label = self.onehot(label, F.shape(logit)[1], self.on_value, self.off_value) + loss = self.ce(logit, label) + return loss diff --git a/research/cv/efficientnet-b1/src/model_utils/moxing_adapter.py b/research/cv/efficientnet-b1/src/model_utils/moxing_adapter.py new file mode 100644 index 0000000000000000000000000000000000000000..fe745699185661307c7acb02f7c61d73865f3096 --- /dev/null +++ b/research/cv/efficientnet-b1/src/model_utils/moxing_adapter.py @@ -0,0 +1,130 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""moxing adapter for modelarts""" +import os +import time +import functools +from mindspore import context +from src.config import show_config + + +_global_sync_count = 0 + + +def get_device_id(): + """Get device id.""" + device_id = os.getenv('DEVICE_ID', '0') + return int(device_id) + + +def get_device_num(): + """Get number of devices.""" + device_num = os.getenv('RANK_SIZE', '1') + return int(device_num) + + +def get_rank_id(): + """Get rank id.""" + global_rank_id = os.getenv('RANK_ID', '0') + return int(global_rank_id) + + +def get_job_id(): + """Get job id.""" + job_id = os.getenv('JOB_ID') + job_id = job_id if job_id != "" else "default" + return job_id + + +def sync_data(from_path, to_path): + """ + Download data from remote obs to local directory if the first url is remote url and the second one is local path + Upload data from local directory to remote obs in contrast. + """ + import moxing as mox + global _global_sync_count + sync_lock = "/tmp/copy_sync.lock" + str(_global_sync_count) + _global_sync_count += 1 + + # Each server contains 8 devices as most. + if get_device_id() % min(get_device_num(), 8) == 0 and not os.path.exists(sync_lock): + print("from path: ", from_path, flush=True) + print("to path: ", to_path, flush=True) + mox.file.copy_parallel(from_path, to_path) + print("===finish data synchronization===", flush=True) + try: + os.mknod(sync_lock) + except IOError: + pass + print("===save flag===", flush=True) + + while True: + if os.path.exists(sync_lock): + break + time.sleep(1) + + print("Finish sync data from {} to {}.".format(from_path, to_path), flush=True) + + +def moxing_wrapper(config, pre_process=None, post_process=None): + """ + Moxing wrapper to download dataset and upload outputs. + """ + def wrapper(run_func): + """Moxing wrapper.""" + @functools.wraps(run_func) + def wrapped_func(*args, **kwargs): + """Moxing wrapper function.""" + # Download data from data_url + if config.modelarts: + if config.data_url: + config.data_path = "/cache/train_data_path" + sync_data(config.data_url, config.data_path) + print("Dataset downloaded: ", os.listdir(config.data_path), flush=True) + if config.checkpoint_url: + config.checkpoint_path = "/cache/" + config.checkpoint_url.split("/")[-1] + sync_data(config.checkpoint_url, config.checkpoint_path) + print("Preload downloaded: ", config.checkpoint_path, flush=True) + if config.train_url: + config.train_path = "/cache/train_path" + sync_data(config.train_url, config.train_path) + print("Workspace downloaded: ", os.listdir(config.train_path), flush=True) + if config.eval_data_url: + config.eval_data_path = "/cache/eval_data_path" + sync_data(config.eval_data_url, config.eval_data_path) + print("Workspace downloaded: ", os.listdir(config.eval_data_path), flush=True) + + context.set_context(save_graphs_path=os.path.join(config.train_path, str(get_rank_id()))) + config.device_num = get_device_num() + config.device_id = get_device_id() + if not os.path.exists(config.train_path): + os.makedirs(config.train_path) + + if pre_process: + pre_process() + + show_config(config) + run_func(*args, **kwargs) + + # Upload data to train_url + if config.modelarts: + if post_process: + post_process() + + if config.train_url: + print("Start to copy output directory", flush=True) + sync_data(config.train_path, config.train_url) + return wrapped_func + return wrapper diff --git a/research/cv/efficientnet-b1/src/models/effnet.py b/research/cv/efficientnet-b1/src/models/effnet.py new file mode 100644 index 0000000000000000000000000000000000000000..1f380c6e337e461ac543c6749c889ae8e05cefea --- /dev/null +++ b/research/cv/efficientnet-b1/src/models/effnet.py @@ -0,0 +1,128 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""efficientnet.""" +import math + +import mindspore.nn as nn + +from src.models.layers import Convolution +from src.models.layers import conv_bn_act +from src.models.layers import AdaptiveAvgPool2d +from src.models.layers import Flatten +from src.models.layers import SEModule +from src.models.layers import DropConnect + + +class MBConv(nn.Cell): + """MBConv""" + def __init__(self, in_, out_, expand, + kernel_size, stride, skip, + se_ratio, dc_ratio=0.2): + super().__init__() + mid_ = in_ * expand + self.expand = expand + self.expand_conv = conv_bn_act(in_, mid_, kernel_size=1, bias=False) + + self.depth_wise_conv = conv_bn_act(mid_, mid_, + kernel_size=kernel_size, stride=stride, + groups=mid_, bias=False) + + self.se = SEModule(mid_, int(in_ * se_ratio)) + + self.project_conv = nn.SequentialCell([ + Convolution(mid_, out_, kernel_size=1, stride=1, bias=False), + nn.BatchNorm2d(num_features=out_, eps=0.001, momentum=0.99) + ]) + self.skip = skip and (stride == 1) and (in_ == out_) + + # DropConnect + self.dropconnect = DropConnect(dc_ratio) + + def construct(self, inputs): + """MBConv""" + if self.expand != 1: + expand = self.expand_conv(inputs) + else: + expand = inputs + x = self.depth_wise_conv(expand) + x = self.se(x) + x = self.project_conv(x) + if self.skip: + x = x + inputs + return x + + +class MBBlock(nn.Cell): + """MBBlock""" + def __init__(self, in_, out_, expand, kernel, stride, num_repeat, skip, se_ratio, drop_connect_ratio=0.2): + super().__init__() + layers = [MBConv(in_, out_, expand, kernel, stride, skip, se_ratio, drop_connect_ratio)] + for _ in range(1, num_repeat): + layers.append(MBConv(out_, out_, expand, kernel, 1, skip, se_ratio, drop_connect_ratio)) + self.layers = nn.SequentialCell([*layers]) + + def construct(self, x): + return self.layers(x) + + +class EfficientNet(nn.Cell): + """efficientnet model""" + def __init__(self, width_coeff, depth_coeff, + depth_div=8, min_depth=None, + dropout_rate=0.2, drop_connect_rate=0.2, + num_classes=1000): + super().__init__() + min_depth = min_depth or depth_div + dropout_rate = 1 - dropout_rate + + def renew_ch(x): + if not width_coeff: + return x + + x *= width_coeff + new_x = max(min_depth, int(x + depth_div / 2) // depth_div * depth_div) + if new_x < 0.9 * x: + new_x += depth_div + return int(new_x) + + def renew_repeat(x): + return int(math.ceil(x * depth_coeff)) + + self.stem = conv_bn_act(3, renew_ch(32), kernel_size=3, stride=2, bias=False) + + self.blocks = nn.SequentialCell([ + # input channel output expand k s skip se + MBBlock(renew_ch(32), renew_ch(16), 1, 3, 1, renew_repeat(1), True, 0.25, drop_connect_rate), + MBBlock(renew_ch(16), renew_ch(24), 6, 3, 2, renew_repeat(2), True, 0.25, drop_connect_rate), + MBBlock(renew_ch(24), renew_ch(40), 6, 5, 2, renew_repeat(2), True, 0.25, drop_connect_rate), + MBBlock(renew_ch(40), renew_ch(80), 6, 3, 2, renew_repeat(3), True, 0.25, drop_connect_rate), + MBBlock(renew_ch(80), renew_ch(112), 6, 5, 1, renew_repeat(3), True, 0.25, drop_connect_rate), + MBBlock(renew_ch(112), renew_ch(192), 6, 5, 2, renew_repeat(4), True, 0.25, drop_connect_rate), + MBBlock(renew_ch(192), renew_ch(320), 6, 3, 1, renew_repeat(1), True, 0.25, drop_connect_rate) + ]) + + self.head = nn.SequentialCell([ + *conv_bn_act(renew_ch(320), renew_ch(1280), kernel_size=1, bias=False), + AdaptiveAvgPool2d(), + nn.Dropout(dropout_rate), + Flatten(), + nn.Dense(renew_ch(1280), num_classes) + ]) + + def construct(self, inputs): + stem = self.stem(inputs) + x = self.blocks(stem) + x = self.head(x) + return x diff --git a/research/cv/efficientnet-b1/src/models/layers.py b/research/cv/efficientnet-b1/src/models/layers.py new file mode 100644 index 0000000000000000000000000000000000000000..475efe25a6a7af7866f75e960b506113855bec54 --- /dev/null +++ b/research/cv/efficientnet-b1/src/models/layers.py @@ -0,0 +1,113 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""efficientnet.""" +import mindspore +import mindspore.numpy as np +from mindspore import Tensor, Parameter, nn +import mindspore.ops.operations as P + + +class Convolution(nn.Cell): + """Convolution""" + def __init__(self, in_, out_, kernel_size, stride=1, pad_mode="same", padding=0, groups=1, bias=False): + super(Convolution, self).__init__() + self.out = out_ + self.is_bias = bias + self.bias = Parameter(Tensor(np.zeros((1, self.out, 1, 1)), mindspore.float32)) + self.conv = nn.Conv2d(in_, out_, kernel_size, stride, + pad_mode=pad_mode, padding=padding, group=groups, has_bias=False) + + def construct(self, x): + if self.is_bias: + x = self.conv(x) + self.bias + else: + x = self.conv(x) + return x + + +def conv_bn_act(in_, out_, kernel_size, + stride=1, groups=1, bias=True, + eps=1e-3, momentum=0.01): + """conv_bn_act""" + return nn.SequentialCell([ + Convolution(in_, out_, kernel_size, stride, groups=groups, bias=bias), + nn.BatchNorm2d(num_features=out_, eps=eps, momentum=1.0 - momentum), + Swish() + ]) + + +class Swish(nn.Cell): + """Swish""" + def construct(self, x): + sigmoid = P.Sigmoid() + x = x * sigmoid(x) + return x + + +class Flatten(nn.Cell): + """Flatten""" + def construct(self, x): + shape = P.Shape() + reshape = P.Reshape() + x = reshape(x, (shape(x)[0], -1)) + return x + + +class SEModule(nn.Cell): + """SEModule""" + def __init__(self, in_, squeeze_ch): + super().__init__() + self.se = nn.SequentialCell([ + AdaptiveAvgPool2d(), + Convolution(in_, squeeze_ch, kernel_size=1, stride=1, pad_mode='pad', padding=0, bias=True), + Swish(), + Convolution(squeeze_ch, in_, kernel_size=1, stride=1, pad_mode='pad', padding=0, bias=True), + ]) + + def construct(self, x): + sigmoid = P.Sigmoid() + x = x * sigmoid(self.se(x)) + return x + + +class AdaptiveAvgPool2d(nn.Cell): + """AdaptiveAvgPool2d""" + def __init__(self): + super(AdaptiveAvgPool2d, self).__init__() + self.mean = P.ReduceMean(True) + + def construct(self, x): + x = self.mean(x, (2, 3)) + return x + +class DropConnect(nn.Cell): + """DropConnect""" + def __init__(self, ratio): + super().__init__() + self.ratio = 1.0 - ratio + + def construct(self, x): + """DropConnect""" + if not self.training: + return x + + random_tensor = self.ratio + shape = (random_tensor.shape[0], 1, 1, 1) + stdnormal = P.StandardNormal(seed=2) + random_tensor = stdnormal(shape) + random_tensor.requires_grad = False + floor = P.Floor() + x = x / self.ratio * floor(random_tensor) + return x diff --git a/research/cv/efficientnet-b1/src/utils.py b/research/cv/efficientnet-b1/src/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..3f63a075b661fb5254388e6635be8d87419ae3ed --- /dev/null +++ b/research/cv/efficientnet-b1/src/utils.py @@ -0,0 +1,132 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""tool functions.""" +import math +import numpy as np + +import mindspore.nn as nn +from mindspore import Tensor +from mindspore.common import initializer + + +# Functions for params initialization. +def calculate_fan_in_and_fan_out(shape): + """ + calculate fan_in and fan_out + + Args: + shape (tuple): input shape. + + Returns: + Tuple, a tuple with two elements, the first element is `n_in` and the second element is `n_out`. + """ + dimensions = len(shape) + if dimensions < 2: + raise ValueError("Fan in and fan out can not be computed for tensor with fewer than 2 dimensions") + if dimensions == 2: + fan_in = shape[1] + fan_out = shape[0] + else: + num_input_fmaps = shape[1] + num_output_fmaps = shape[0] + receptive_field_size = 1 + if dimensions > 2: + receptive_field_size = shape[2] * shape[3] + fan_in = num_input_fmaps * receptive_field_size + fan_out = num_output_fmaps * receptive_field_size + return fan_in, fan_out + + +def get_conv_bias(cell): + """Bias initializer for conv.""" + weight = initializer.initializer(initializer.HeUniform(negative_slope=math.sqrt(5)), + cell.weight.shape, cell.weight.dtype).to_tensor() + fan_in, _ = calculate_fan_in_and_fan_out(weight.shape) + bound = 1 / math.sqrt(fan_in) + return initializer.initializer(initializer.Uniform(scale=bound), + cell.bias.shape, cell.bias.dtype) + + +def params_initializer(config, net): + """Model parameter initializer.""" + for _, cell in net.cells_and_names(): + if isinstance(cell, nn.Conv2d): + if config.conv_init == "XavierUniform": + cell.weight.set_data(initializer.initializer(initializer.XavierUniform(), + cell.weight.shape, + cell.weight.dtype)) + elif config.conv_init == "TruncatedNormal": + cell.weight.set_data(initializer.initializer(initializer.TruncatedNormal(sigma=0.21), + cell.weight.shape, + cell.weight.dtype)) + if cell.has_bias: + cell.bias.set_data(get_conv_bias(cell)) + + if isinstance(cell, nn.BatchNorm2d): + cell.gamma.set_data(initializer.initializer(1, + cell.gamma.shape, + cell.gamma.dtype)) + cell.beta.set_data(initializer.initializer(0, + cell.beta.shape, + cell.beta.dtype)) + + if isinstance(cell, nn.Dense): + if config.dense_init == "TruncatedNormal": + cell.weight.set_data(initializer.initializer(initializer.TruncatedNormal(sigma=0.21), + cell.weight.shape, + cell.weight.dtype)) + elif config.dense_init == "RandomNormal": + in_channel = cell.in_channels + out_channel = cell.out_channels + weight = np.random.normal(loc=0, scale=0.01, size=out_channel * in_channel) + weight = Tensor(np.reshape(weight, (out_channel, in_channel)), dtype=cell.weight.dtype) + cell.weight.set_data(weight) + + +# Functions for learning rate generation. +def get_step_lr(base_lr, total_epoch, spe, milestone=None, step=None, gamma=0.1): + """Get learning rates decay by steps.""" + if milestone: + assert isinstance(milestone, list) + else: + assert isinstance(step, int) + milestone = [] + k = step + while k < total_epoch: + milestone.append(k) + k += step + milestone.append(total_epoch) + mss = [stone * spe for stone in milestone] + i = 0 + lrs = [] + while i < len(mss): + lrs.append(base_lr * (gamma ** i)) + i += 1 + learning_rate = nn.piecewise_constant_lr(mss, lrs) + return learning_rate + + +def get_linear_lr(base_lr, total_epoch, spe, lr_init, lr_end, warmup_epoch=0): + """Get learning rates decay in linear.""" + lr_each_step = [] + total_steps = spe * total_epoch + warmup_steps = spe * warmup_epoch + for i in range(total_steps): + if i < warmup_steps: + lr = lr_init + (base_lr - lr_init) * i / warmup_steps + else: + lr = base_lr - (base_lr - lr_end) * (i - warmup_steps) / (total_steps - warmup_steps) + lr_each_step.append(lr) + return lr_each_step diff --git a/research/cv/efficientnet-b1/train.py b/research/cv/efficientnet-b1/train.py new file mode 100644 index 0000000000000000000000000000000000000000..00420851ed6c3f4896f49bc856ecd148d2e539ea --- /dev/null +++ b/research/cv/efficientnet-b1/train.py @@ -0,0 +1,161 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""train efficientnet.""" +import os +import ast +import argparse + +import mindspore.nn as nn +from mindspore import context +from mindspore.train.model import Model, ParallelMode +from mindspore.train.serialization import load_checkpoint, load_param_into_net +from mindspore.communication.management import init +from mindspore.train.loss_scale_manager import FixedLossScaleManager +from mindspore.train.callback import ModelCheckpoint, CheckpointConfig +from mindspore.common import dtype as mstype +from mindspore.common import set_seed + +from src.callback import TimeLossMonitor, EvalCallBack +from src.utils import get_linear_lr, params_initializer +from src.models.effnet import EfficientNet +from src.dataset import create_imagenet +from src.loss import CrossEntropySmooth +from src.config import efficientnet_b1_config_ascend as config +from src.config import organize_configuration +from src.model_utils.moxing_adapter import moxing_wrapper + + +set_seed(1) + + +def parse_args(): + """Get parameters from command line.""" + parser = argparse.ArgumentParser(description="image classification training") + # Path parameter + parser.add_argument("--data_url", type=str, default=None, help="Dataset path") + parser.add_argument("--data_path", type=str, default=None, help="Dataset path") + parser.add_argument("--train_url", type=str, default=None, help="Train output path") + parser.add_argument("--train_path", type=str, default=None, help="Train output path") + parser.add_argument("--checkpoint_url", type=str, default=None, + help="resume training with existed checkpoint") + parser.add_argument("--checkpoint_path", type=str, default=None, + help="resume training with existed checkpoint") + parser.add_argument("--eval_data_url", type=str, default=None, help="Eval dataset path") + parser.add_argument("--eval_data_path", type=str, default=None, help="Eval dataset path") + parser.add_argument("--eval_interval", type=int, default=10, help="Evaluate frequency.") + # Model parameter + parser.add_argument("--model", type=str, default="efficientnet-b1") + # Platform parameter + parser.add_argument("--modelarts", type=ast.literal_eval, default=False, help="Run mode") + parser.add_argument("--run_distribute", type=ast.literal_eval, default=False, help="Run distribute") + parser.add_argument("--device_target", type=str, default="Ascend", choices=("Ascend", "GPU"), help="run platform") + # Train parameter + parser.add_argument("--begin_epoch", type=int, default=0, help="Begin epoch") + parser.add_argument("--end_epoch", type=int, default=350, help="End epoch") + parser.add_argument("--total_epoch", type=int, default=350, help="total epochs") + parser.add_argument("--batchsize", type=int, default=128) + parser.add_argument("--optimizer", type=str, default="rmsprop") + parser.add_argument("--lr", type=float, default=0.15, help="base lr") + parser.add_argument("--lr_scheme", type=str, default="linear") + parser.add_argument("--lr_end", type=float, default=5e-5) + args_opt = parser.parse_args() + + return args_opt + + +@moxing_wrapper(config) +def main(): + context.set_context(mode=context.GRAPH_MODE, device_target=config.device_target) + if config.run_distribute: + init() + device_id = int(os.getenv("DEVICE_ID")) + device_num = int(os.getenv("RANK_SIZE")) + parallel_mode = ParallelMode.DATA_PARALLEL + context.set_auto_parallel_context(parallel_mode=parallel_mode, + gradients_mean=True, + device_num=device_num) + else: + device_id = 0 + device_num = 1 + print("Generating {}...".format(config.model), flush=True) + dataset = create_imagenet(dataset_path=config.data_path, do_train=True, repeat_num=1, + input_size=config.input_size, batch_size=config.batchsize, + target=config.device_target, distribute=config.run_distribute) + step_size = dataset.get_dataset_size() + if config.eval_data_path: + eval_data = create_imagenet(dataset_path=config.eval_data_path, do_train=False, repeat_num=1, + input_size=config.input_size, batch_size=config.batchsize, + target=config.device_target, distribute=False) + else: + eval_data = None + + net = EfficientNet(width_coeff=config.width_coeff, depth_coeff=config.depth_coeff, + dropout_rate=config.dropout_rate, drop_connect_rate=config.drop_connect_rate, + num_classes=config.num_classes) + if config.checkpoint_path: + params = load_checkpoint(config.checkpoint_path) + load_param_into_net(net, params) + else: + params_initializer(config, net) + net.to_float(mstype.float16) + net.set_train(True) + + # define loss + if not config.use_label_smooth: + config.label_smooth_factor = 0.0 + loss = CrossEntropySmooth(smooth_factor=config.label_smooth_factor, num_classes=config.num_classes) + + # get learning rate + loss_scale = FixedLossScaleManager(config.loss_scale, drop_overflow_update=False) + lr = get_linear_lr(config.lr, config.total_epoch, step_size, + config.lr_init, config.lr_end, + warmup_epoch=config.warmup_epochs) + lr = lr[config.begin_epoch * step_size: config.end_epoch * step_size] + + # define optimization + optimizer = nn.RMSProp(net.trainable_params(), learning_rate=lr, decay=0.9, + weight_decay=config.wd, momentum=config.opt_momentum, + epsilon=config.eps, loss_scale=config.loss_scale) + + # define model + metrics = { + "Top1-Acc": nn.Top1CategoricalAccuracy(), + "Top5-Acc": nn.Top5CategoricalAccuracy() + } + model = Model(net, loss_fn=loss, optimizer=optimizer, loss_scale_manager=loss_scale, + metrics=metrics, amp_level="O3") + + # define callbacks + cb = [TimeLossMonitor(lr_init=lr)] + # Save-checkpoint callback + if config.save_ckpt: + ckpt_config = CheckpointConfig(save_checkpoint_steps=step_size*config.save_checkpoint_epochs, + keep_checkpoint_max=config.keep_checkpoint_max) + ckpt_cb = ModelCheckpoint(prefix=f"{config.model}-{config.dataset}", + directory=os.path.join(config.train_path, f"card{device_id}"), + config=ckpt_config) + cb.append(ckpt_cb) + if config.eval_data_path: + eval_cb = EvalCallBack(model, eval_data, config.eval_interval, save_path=config.train_path) + cb.append(eval_cb) + # begine train + epoch_size = config.end_epoch - config.begin_epoch + model.train(epoch_size, dataset, callbacks=cb) + + +if __name__ == "__main__": + args = parse_args() + organize_configuration(config, args=args) + main()