Skip to content
Snippets Groups Projects
Commit 48aed5f3 authored by i-robot's avatar i-robot Committed by Gitee
Browse files

!1137 ntsnet 310 pr

Merge pull request !1137 from hit_zyh/master
parents 73c06673 835983a0
No related branches found
No related tags found
No related merge requests found
Showing
with 785 additions and 139 deletions
......@@ -36,7 +36,7 @@ Dataset used: [Caltech-UCSD Birds-200-2011](<http://www.vision.caltech.edu/visip
Please download the datasets [CUB_200_2011.tgz] and unzip it, then put all training images into a directory named "train", put all testing images into a directory named "test".
The directory structure is as follows:
The directory structure is as follows, you need to split the dataset by yourself followed by "train_test_split.txt" in the original dataset:
```path
├─resnet50.ckpt
......@@ -64,10 +64,13 @@ The directory structure is as follows:
└─ntsnet
├─README.md # README
├─scripts # shell script
├─run_standalone_train.sh # training in standalone mode(1pcs)
├─run_distribute_train.sh # training in parallel mode(8 pcs)
└─run_eval.sh # evaluation
├─run_standalone_train_ascend.sh # training in standalone mode(1pcs)
├─run_distribute_train_ascend.sh # training in parallel mode(8 pcs)
└─run_eval_ascend.sh # evaluation
├─src
├─config_gpu.py # network configuration
├─dataset_gpu.py # dataset utils
├─lr_generator_gpu.py # leanring rate generator
├─config.py # network configuration
├─dataset.py # dataset utils
├─lr_generator.py # leanring rate generator
......@@ -76,7 +79,9 @@ The directory structure is as follows:
├─mindspore_hub_conf.py # mindspore hub interface
├─export.py # script to export MINDIR model
├─eval.py # evaluation scripts
└─train.py # training scripts
├─train.py # training scripts
├─eval_gpu.py # evaluation scripts
└─train_gpu.py # training scripts
```
## [Script Parameters](#contents)
......@@ -85,10 +90,10 @@ The directory structure is as follows:
```shell
# distributed training
Usage: bash run_train.sh [RANK_TABLE_FILE] [DATA_URL] [TRAIN_URL]
Usage: bash run_train_ascend.sh [RANK_TABLE_FILE] [DATA_URL] [TRAIN_URL]
# standalone training
Usage: bash run_standalone_train.sh [DATA_URL] [TRAIN_URL]
Usage: bash run_standalone_train_ascend.sh [DATA_URL] [TRAIN_URL]
```
### [Parameters Configuration](#contents)
......@@ -133,7 +138,7 @@ Usage: bash run_standalone_train.sh [DATA_URL] [TRAIN_URL]
### [Training](#content)
- Run `run_standalone_train_ascend.sh` for non-distributed training of NTS-Net model in Ascend.
- Run `run_standalone_train_ascend.sh` for non-distributed training of NTS-Net model.
```bash
# standalone training in ascend
......@@ -208,11 +213,35 @@ accuracy: 0.876
## Model Export
### [Export MindIR](#contents)
when export mindir file in Ascend 910, the cropAndResize operator differs from 310 and 910. Specifically, 310 requires an input shape (N,C,H,W) while 910 requires an input shape (N,H,W,C). You need to invalid the CropAndResize Validator check in 910 mindspore environment to export successfully.
```shell
python export.py --ckpt_file [CKPT_PATH] --device_target [DEVICE_TARGET] --file_format [EXPORT_FORMAT]
python export.py --ckpt_file [CKPT_PATH] --train_url [TRAIN_URL]
```
`EXPORT_FORMAT` should be "MINDIR"
- `ckpt_file` Checkpoint file name.
- `train_url` should be Directory contains checkpoint file.
## Inference Process
### Infer on Ascend310
Before performing inference, the mindir file must be exported by `export.py` script. We only provide an example of inference using MINDIR model.
```shell
# Ascend310 inference
bash run_infer_310.sh [MINDIR_PATH] [DATASET_PATH] [DEVICE_ID]
```
- `MINDIR_PATH` The absolute path of ntsnet.mindir.
- `DATASET_PATH` The CUB_200_2011 dataset test directory.
- `DEVICE_ID` is optional, default value is 0.
### result
Inference result is saved in current path, you can find result like this in acc.log file.
# Model Description
......
cmake_minimum_required(VERSION 3.14.1)
project(Ascend310Infer)
add_compile_definitions(_GLIBCXX_USE_CXX11_ABI=0)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O0 -g -std=c++17 -Werror -Wall -fPIE -Wl,--allow-shlib-undefined")
set(PROJECT_SRC_ROOT ${CMAKE_CURRENT_LIST_DIR}/)
option(MINDSPORE_PATH "mindspore install path" "")
include_directories(${MINDSPORE_PATH})
include_directories(${MINDSPORE_PATH}/include)
include_directories(${PROJECT_SRC_ROOT})
find_library(MS_LIB libmindspore.so ${MINDSPORE_PATH}/lib)
file(GLOB_RECURSE MD_LIB ${MINDSPORE_PATH}/_c_dataengine*)
add_executable(main src/main.cc src/utils.cc)
find_package(gflags REQUIRED)
target_link_libraries(main ${MS_LIB} ${MD_LIB} gflags)
\ No newline at end of file
#!/bin/bash
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
if [ ! -d out ]; then
mkdir out
fi
cd out || exit
cmake .. \
-DMINDSPORE_PATH="`pip show mindspore-ascend | grep Location | awk '{print $2"/mindspore"}' | xargs realpath`"
make
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <sys/time.h>
#include <gflags/gflags.h>
#include <dirent.h>
#include <iostream>
#include <string>
#include <algorithm>
#include <iosfwd>
#include <vector>
#include <fstream>
#include "../inc/utils.h"
#include "include/dataset/execute.h"
#include "include/dataset/transforms.h"
#include "include/dataset/vision.h"
#include "include/dataset/vision_ascend.h"
#include "include/api/types.h"
#include "include/api/model.h"
#include "include/api/serialization.h"
#include "include/api/context.h"
using mindspore::Serialization;
using mindspore::Model;
using mindspore::Context;
using mindspore::Status;
using mindspore::ModelType;
using mindspore::Graph;
using mindspore::GraphCell;
using mindspore::kSuccess;
using mindspore::MSTensor;
using mindspore::DataType;
using mindspore::dataset::Execute;
using mindspore::dataset::TensorTransform;
using mindspore::dataset::vision::Decode;
using mindspore::dataset::vision::Resize;
using mindspore::dataset::vision::Normalize;
using mindspore::dataset::vision::HWC2CHW;
DEFINE_string(mindir_path, "", "model path");
DEFINE_string(dataset_path, ".", "dataset path");
DEFINE_int32(device_id, 0, "device id");
int main(int argc, char **argv) {
gflags::ParseCommandLineFlags(&argc, &argv, true);
if (RealPath(FLAGS_mindir_path).empty()) {
std::cout << "Invalid model" << std::endl;
return 1;
}
auto context = std::make_shared<Context>();
auto ascend310_info = std::make_shared<mindspore::Ascend310DeviceInfo>();
ascend310_info->SetDeviceID(FLAGS_device_id);
ascend310_info->SetPrecisionMode("allow_fp32_to_fp16");
context->MutableDeviceInfo().push_back(ascend310_info);
Graph graph;
Status ret = Serialization::Load(FLAGS_mindir_path, ModelType::kMindIR, &graph);
if (ret != kSuccess) {
std::cout << "Load model failed." << std::endl;
return 1;
}
Model model;
ret = model.Build(GraphCell(graph), context);
if (ret != kSuccess) {
std::cout << "ERROR: Build failed." << std::endl;
return 1;
}
std::vector<MSTensor> modelInputs = model.GetInputs();
auto all_files = GetAllFiles(FLAGS_dataset_path);
if (all_files.empty()) {
std::cout << "ERROR: no input data." << std::endl;
return 1;
}
auto decode = Decode();
auto resize = Resize({448, 448});
auto normalize = Normalize({0, 0, 0}, {255, 255, 255});
auto hwc2chw = HWC2CHW();
mindspore::dataset::Execute transform({decode, resize, normalize, hwc2chw});
std::map<double, double> costTime_map;
size_t size = all_files.size();
for (size_t i = 0; i < size; ++i) {
struct timeval start;
struct timeval end;
double startTime_ms;
double endTime_ms;
std::vector<MSTensor> inputs;
std::vector<MSTensor> outputs;
std::cout << "Start predict input files:" << all_files[i] << std::endl;
mindspore::MSTensor image = ReadFileToTensor(all_files[i]);
transform(image, &image);
inputs.emplace_back(modelInputs[0].Name(), modelInputs[0].DataType(), modelInputs[0].Shape(),
image.Data().get(), image.DataSize());
gettimeofday(&start, NULL);
model.Predict(inputs, &outputs);
gettimeofday(&end, NULL);
startTime_ms = (1.0 * start.tv_sec * 1000000 + start.tv_usec) / 1000;
endTime_ms = (1.0 * end.tv_sec * 1000000 + end.tv_usec) / 1000;
costTime_map.insert(std::pair<double, double>(startTime_ms, endTime_ms));
WriteResult(all_files[i], outputs);
}
double average = 0.0;
int infer_cnt = 0;
char tmpCh[256] = {0};
for (auto iter = costTime_map.begin(); iter != costTime_map.end(); iter++) {
double diff = 0.0;
diff = iter->second - iter->first;
average += diff;
infer_cnt++;
}
average = average/infer_cnt;
snprintf(tmpCh, sizeof(tmpCh), "NN inference cost average time: %4.3f ms of infer_count %d\n", average, infer_cnt);
std::cout << "NN inference cost average time: "<< average << "ms of infer_count " << infer_cnt << std::endl;
std::string file_name = "./time_Result" + std::string("/test_perform_static.txt");
std::ofstream file_stream(file_name.c_str(), std::ios::trunc);
file_stream << tmpCh;
file_stream.close();
costTime_map.clear();
return 0;
}
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "inc/utils.h"
#include <fstream>
#include <algorithm>
#include <iostream>
using mindspore::MSTensor;
using mindspore::DataType;
std::vector<std::string> GetAllFiles(std::string_view dirName) {
struct dirent *filename;
DIR *dir = OpenDir(dirName);
if (dir == nullptr) {
return {};
}
std::vector<std::string> res;
std::vector<std::string> res_temp;
while ((filename = readdir(dir)) != nullptr) {
std::string dName = std::string(filename->d_name);
if (dName != "." && dName != ".." && filename->d_type == DT_DIR) {
res_temp = GetAllFiles(std::string(dirName) + "/" + filename->d_name);
for (int i = 0; i < static_cast<int>(res_temp.size()); i++) {
res.emplace_back(res_temp[i]);
}
continue;
}
if (dName == "." || dName == ".." || filename->d_type != DT_REG) {
continue;
}
res.emplace_back(std::string(dirName) + "/" + filename->d_name);
}
std::sort(res.begin(), res.end());
return res;
}
int WriteResult(const std::string& imageFile, const std::vector<MSTensor> &outputs) {
std::string homePath = "./result_Files";
for (size_t i = 0; i < outputs.size(); ++i) {
size_t outputSize;
std::shared_ptr<const void> netOutput;
netOutput = outputs[i].Data();
outputSize = outputs[i].DataSize();
int pos = imageFile.rfind('/');
std::string fileName(imageFile, pos + 1);
fileName.replace(fileName.find('.'), fileName.size() - fileName.find('.'), '_' + std::to_string(i) + ".bin");
std::string outFileName = homePath + "/" + fileName;
FILE * outputFile = fopen(outFileName.c_str(), "wb");
fwrite(netOutput.get(), outputSize, sizeof(char), outputFile);
fclose(outputFile);
outputFile = nullptr;
}
return 0;
}
mindspore::MSTensor ReadFileToTensor(const std::string &file) {
if (file.empty()) {
std::cout << "Pointer file is nullptr" << std::endl;
return mindspore::MSTensor();
}
std::ifstream ifs(file);
if (!ifs.good()) {
std::cout << "File: " << file << " is not exist" << std::endl;
return mindspore::MSTensor();
}
if (!ifs.is_open()) {
std::cout << "File: " << file << "open failed" << std::endl;
return mindspore::MSTensor();
}
ifs.seekg(0, std::ios::end);
size_t size = ifs.tellg();
mindspore::MSTensor buffer(file, mindspore::DataType::kNumberTypeUInt8, {static_cast<int64_t>(size)}, nullptr, size);
ifs.seekg(0, std::ios::beg);
ifs.read(reinterpret_cast<char *>(buffer.MutableData()), size);
ifs.close();
return buffer;
}
DIR *OpenDir(std::string_view dirName) {
if (dirName.empty()) {
std::cout << " dirName is null ! " << std::endl;
return nullptr;
}
std::string realPath = RealPath(dirName);
struct stat s;
lstat(realPath.c_str(), &s);
if (!S_ISDIR(s.st_mode)) {
std::cout << "dirName is not a valid directory !" << std::endl;
return nullptr;
}
DIR *dir;
dir = opendir(realPath.c_str());
if (dir == nullptr) {
std::cout << "Can not open dir " << dirName << std::endl;
return nullptr;
}
std::cout << "Successfully opened the dir " << dirName << std::endl;
return dir;
}
std::string RealPath(std::string_view path) {
char realPathMem[PATH_MAX] = {0};
char *realPathRet = nullptr;
realPathRet = realpath(path.data(), realPathMem);
if (realPathRet == nullptr) {
std::cout << "File: " << path << " is not exist.";
return "";
}
std::string realPath(realPathMem);
std::cout << path << " realpath is: " << realPath << std::endl;
return realPath;
}
......@@ -16,21 +16,17 @@
import argparse
import ast
import os
import mindspore.common.dtype as mstype
from mindspore import context, set_seed, Tensor, load_checkpoint, load_param_into_net, ops
import mindspore.common.dtype as mstype
from src.config import config
from src.dataset import create_dataset_test
from src.network import NTS_NET
parser = argparse.ArgumentParser(description='ntsnet eval running')
parser.add_argument("--run_modelart", type=ast.literal_eval, default=False, help="Run on modelArt, default is false.")
parser.add_argument('--data_url', default="./data", help='Directory contains CUB_200_2011 dataset.')
parser.add_argument('--train_url', default="./ckpt_0", help='Directory contains checkpoint file and eval.log')
parser.add_argument('--data_url', default=None, help='Directory contains CUB_200_2011 dataset.')
parser.add_argument('--train_url', default=None, help='Directory contains checkpoint file and eval.log')
parser.add_argument('--ckpt_filename', default=None, help='checkpoint file name')
parser.add_argument("--device_target", type=str, default="Ascend", help="Device Target, default Ascend",
choices=["Ascend", "GPU"])
parser.add_argument("--device_id", type=int, default=0, help="Device id, default is 0.")
args = parser.parse_args()
run_modelart = args.run_modelart
......@@ -43,20 +39,9 @@ batch_size = config.batch_size
resnet50Path = ""
context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target, save_graphs=False)
context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=False)
context.set_context(device_id=device_id)
if args.ckpt_filename is None:
files = os.listdir(args.train_url)
time = 0.
for file in files:
if file.endswith('.ckpt'):
time_temp = os.path.getatime(os.path.join(args.train_url, file))
if time_temp > time:
time = time_temp
args.ckpt_filename = file
print(f"Get ckpt file {args.ckpt_filename}")
if run_modelart:
import moxing as mox
......@@ -101,8 +86,6 @@ if __name__ == '__main__':
total_num = total_num + float(image_data.shape[0])
print2file("ckpt file name: ", args.ckpt_filename)
print2file("accuracy: ", round(success_num / total_num, 3))
print("accuracy: ", round(success_num / total_num, 3))
print("ckpt file name: ", args.ckpt_filename)
if run_modelart:
mox.file.copy_parallel(src_url=os.path.join(local_output_url, "eval.log"),
dst_url=os.path.join(args.train_url, "eval.log"))
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""ntsnet eval."""
import argparse
import ast
import os
import mindspore.common.dtype as mstype
from mindspore import context, set_seed, Tensor, load_checkpoint, load_param_into_net, ops
from src.configgpu import config
from src.dataset_gpu import create_dataset_test
from src.network import NTS_NET
parser = argparse.ArgumentParser(description='ntsnet eval running')
parser.add_argument("--run_modelart", type=ast.literal_eval, default=False, help="Run on modelArt, default is false.")
parser.add_argument('--data_url', default="./data", help='Directory contains CUB_200_2011 dataset.')
parser.add_argument('--train_url', default="./ckpt_0", help='Directory contains checkpoint file and eval.log')
parser.add_argument('--ckpt_filename', default=None, help='checkpoint file name')
parser.add_argument("--device_target", type=str, default="Ascend", help="Device Target, default Ascend",
choices=["Ascend", "GPU"])
parser.add_argument("--device_id", type=int, default=0, help="Device id, default is 0.")
args = parser.parse_args()
run_modelart = args.run_modelart
if not run_modelart:
device_id = args.device_id
else:
device_id = int(os.getenv('DEVICE_ID'))
batch_size = config.batch_size
resnet50Path = ""
context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target, save_graphs=False)
context.set_context(device_id=device_id)
if args.ckpt_filename is None:
files = os.listdir(args.train_url)
time = 0.
for file in files:
if file.endswith('.ckpt'):
time_temp = os.path.getatime(os.path.join(args.train_url, file))
if time_temp > time:
time = time_temp
args.ckpt_filename = file
print(f"Get ckpt file {args.ckpt_filename}")
if run_modelart:
import moxing as mox
local_input_url = '/cache/data' + str(device_id)
local_output_url = '/cache/ckpt' + str(device_id)
mox.file.copy_parallel(src_url=args.data_url, dst_url=local_input_url)
mox.file.copy_parallel(src_url=os.path.join(args.train_url, args.ckpt_filename),
dst_url=os.path.join(local_output_url, args.ckpt_filename))
mox.file.copy_parallel(src_url=os.path.join(args.train_url, "eval.log"),
dst_url=os.path.join(local_output_url, "eval.log"))
else:
local_input_url = args.data_url
local_output_url = args.train_url
def print2file(obj1, obj2):
with open(os.path.join(local_output_url, 'eval.log'), 'a') as f:
f.write(str(obj1))
f.write(' ')
f.write(str(obj2))
f.write(' \r\n')
if __name__ == '__main__':
set_seed(1)
test_data_set = create_dataset_test(test_path=os.path.join(local_input_url, "CUB_200_2011/test"),
batch_size=batch_size)
test_data_loader = test_data_set.create_dict_iterator(output_numpy=True)
ntsnet = NTS_NET(topK=6, resnet50Path=resnet50Path)
param_dict = load_checkpoint(os.path.join(local_output_url, args.ckpt_filename))
load_param_into_net(ntsnet, param_dict)
ntsnet.set_train(False)
success_num = 0.0
total_num = 0.0
for _, data in enumerate(test_data_loader):
image_data = Tensor(data['image'], mstype.float32)
label = Tensor(data["label"], mstype.int32)
_, scrutinizer_out, _, _ = ntsnet(image_data)
result_label, _ = ops.ArgMaxWithValue(1)(scrutinizer_out)
success_num = success_num + sum((result_label == label).asnumpy())
total_num = total_num + float(image_data.shape[0])
print2file("ckpt file name: ", args.ckpt_filename)
print2file("accuracy: ", round(success_num / total_num, 3))
print("accuracy: ", round(success_num / total_num, 3))
print("ckpt file name: ", args.ckpt_filename)
if run_modelart:
mox.file.copy_parallel(src_url=os.path.join(local_output_url, "eval.log"),
dst_url=os.path.join(args.train_url, "eval.log"))
......@@ -18,19 +18,18 @@ import ast
import os
import numpy as np
import mindspore.common.dtype as mstype
from mindspore import Tensor, context, load_checkpoint, load_param_into_net, export
from mindspore import Tensor, context, load_checkpoint, load_param_into_net, export
import mindspore.common.dtype as mstype
from src.network import NTS_NET
parser = argparse.ArgumentParser(description='ntsnet export')
parser.add_argument("--run_modelart", type=ast.literal_eval, default=False, help="Run on modelArt, default is false.")
parser.add_argument("--device_id", type=int, default=0, help="Device id")
parser.add_argument("--batch_size", type=int, default=8, help="batch size")
parser.add_argument("--batch_size", type=int, default=1, help="batch size")
parser.add_argument("--ckpt_file", type=str, required=True, help="Checkpoint file name.")
parser.add_argument('--data_url', default=None, help='Directory contains CUB_200_2011 dataset.')
parser.add_argument('--train_url', default=None, help='Directory contains checkpoint file')
parser.add_argument('--data_url', default=None, help='Directory contains CUB_200_2011 dataset.')
parser.add_argument("--file_name", type=str, default="ntsnet", help="output file name.")
parser.add_argument("--file_format", type=str, default="MINDIR", help="file format")
parser.add_argument('--device_target', type=str, default="Ascend",
......@@ -38,8 +37,8 @@ parser.add_argument('--device_target', type=str, default="Ascend",
args = parser.parse_args()
context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target)
context.set_context(device_id=args.device_id)
if args.device_target == "Ascend":
context.set_context(device_id=args.device_id)
if args.run_modelart:
import moxing as mox
......@@ -49,7 +48,7 @@ if args.run_modelart:
dst_url=os.path.join(local_output_url, args.ckpt_file))
if __name__ == '__main__':
net = NTS_NET(topK=6)
net = NTS_NET(topK=6, flag910=False)
if args.run_modelart:
param_dict = load_checkpoint(os.path.join(local_output_url, args.ckpt_file))
else:
......
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""postprocess for 310 inference"""
import os
import json
import argparse
import numpy as np
parser = argparse.ArgumentParser(description="postprocess")
parser.add_argument("--result_dir", type=str, required=True, help="result files path.")
parser.add_argument("--label_dir", type=str, required=True, help="image file path.")
args = parser.parse_args()
if __name__ == '__main__':
batch_size = 1
rst_path = args.result_dir
file_list = os.listdir(rst_path)
with open(args.label_dir, "r") as label:
labels = json.load(label)
success_num = 0.0
total_num = 0.0
acc = 0.0
for f in file_list:
if f.find("_1.bin") != -1:
label = f.split("_1.bin")[0] + ".jpg"
scrutinizer_out = np.fromfile(os.path.join(rst_path, f), np.float32)
scrutinizer_out = scrutinizer_out.reshape(batch_size, 200)
pred = np.argmax(scrutinizer_out, axis=1)[0]
print("pred: ", pred)
print("labels[label]: ", labels[label])
total_num = total_num + 1
if pred == labels[label]:
success_num = success_num + 1
acc = success_num / total_num
print("success_num: ", success_num)
print("total_num: ", total_num)
print("acc: ", acc)
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""preprocess"""
import os
import argparse
import json
def create_label(result_path, dir_path):
"""create label"""
print("Create label. ")
dirs = os.listdir(dir_path)
file_list = []
for file in dirs:
file_list.append(file)
file_list = sorted(file_list)
total = 0
img_label = {}
for i, file_dir in enumerate(file_list):
print("dir_path: ", dir_path)
print("file_dir: ", file_dir)
files = os.listdir(os.path.join(dir_path, file_dir))
for f in files:
img_label[f] = i
total += len(files)
json_file = os.path.join(result_path, "label.json")
with open(json_file, "w+") as label:
json.dump(img_label, label)
print("[INFO] Completed! Total {} data.".format(total))
parser = argparse.ArgumentParser('preprocess')
parser.add_argument('--data_path', type=str, default='', help='eval data dir')
parser.add_argument('--result_path', type=str, default='./preprocess_Result/', help='result path')
args = parser.parse_args()
if __name__ == "__main__":
create_label(args.result_path, args.data_path)
......@@ -16,7 +16,7 @@
if [ $# != 3 ]
then
echo "Usage: bash run_distribute_train_ascend.sh [RANK_TABLE_FILE] [DATA_URL] [TRAIN_URL]"
echo "Usage: bash run_train_ascend.sh [RANK_TABLE_FILE] [DATA_URL] [TRAIN_URL]"
exit 1
fi
......@@ -54,7 +54,7 @@ exit 1
fi
ulimit -u unlimited
export HCCL_CONNECT_TIMEOUT=1200
export HCCL_CONNECT_TIMEOUT=600
export DEVICE_NUM=8
export RANK_SIZE=8
export RANK_TABLE_FILE=$PATH1
......@@ -81,7 +81,7 @@ do
cd ./train_parallel$i || exit
echo "start training for rank $RANK_ID, device $DEVICE_ID"
env > env.log
taskset -c $cmdopt python train.py --device_id=$i --device_target="Ascend" --run_distribute=True --device_num=$DEVICE_NUM \
taskset -c $cmdopt python train.py --device_id=$i --run_distribute=True --device_num=$DEVICE_NUM \
--data_url=$PATH2 --train_url=$PATH3 &> log &
cd ..
done
......@@ -47,7 +47,7 @@ export CUDA_VISIBLE_DEVICES="$2"
if [ $1 -gt 1 ]; then
mpirun -n $1 --allow-run-as-root --output-filename log_output --merge-stderr-to-stdout \
python3 ${BASEPATH}/../train.py --device_target="GPU" --run_distribute True --data_url=$DATA_DIR --train_url=$TRAIN_URL >train_gpu.log 2>&1 &
python3 ${BASEPATH}/../train_gpu.py --device_target="GPU" --run_distribute True --data_url=$DATA_DIR --train_url=$TRAIN_URL >train_gpu.log 2>&1 &
else
python3 ${BASEPATH}/../train.py --device_target="GPU" --data_url=$DATA_DIR --train_url=$TRAIN_URL >train_gpu.log 2>&1 &
python3 ${BASEPATH}/../train_gpu.py --device_target="GPU" --data_url=$DATA_DIR --train_url=$TRAIN_URL >train_gpu.log 2>&1 &
fi
......@@ -14,18 +14,12 @@
# limitations under the License.
# ============================================================================
if [ $# -lt 3 ] || [ $# -gt 4 ]
if [ $# != 3 ]
then
echo "Usage: sh run_eval_ascend.sh [DATA_URL] [TRAIN_URL] [CKPT_FILENAME] [DEVICE_ID(optional)]"
echo "Usage: sh run_eval_ascend.sh [DATA_URL] [TRAIN_URL] [CKPT_FILENAME]"
exit 1
fi
export DEVICE_ID=0
if [ $# = 4 ] ; then
export DEVICE_ID=$4
fi;
get_real_path(){
if [ "${1:0:1}" == "/" ]; then
echo "$1"
......@@ -36,7 +30,7 @@ get_real_path(){
PATH1=$(get_real_path $1)
PATH2=$(get_real_path $2)
PATH3=$3
PATH3=$(get_real_path $3)
if [ ! -d $PATH1 ]
then
......@@ -50,9 +44,15 @@ then
exit 1
fi
if [ ! -f $PATH3 ]
then
echo "error: CKPT_FILENAME=$PATH3 is not a file"
exit 1
fi
ulimit -u unlimited
export DEVICE_NUM=1
export DEVICE_ID=0
export RANK_SIZE=$DEVICE_NUM
export RANK_ID=0
......@@ -68,5 +68,5 @@ cd ./eval || exit
env > env.log
echo "start evaluation for device $DEVICE_ID"
python eval.py --device_id=$DEVICE_ID --data_url=$PATH1 --train_url=$PATH2 \
--ckpt_filename=$PATH3 --device_target="Ascend" &> log &
--ckpt_filename=$PATH3 &> log &
cd ..
......@@ -67,6 +67,6 @@ cp -r ../src ./eval
cd ./eval || exit
env > env.log
echo "start evaluation for device $DEVICE_ID"
python eval.py --device_id=$DEVICE_ID --data_url=$PATH1 --train_url=$PATH2 \
python eval_gpu.py --device_id=$DEVICE_ID --data_url=$PATH1 --train_url=$PATH2 \
--ckpt_filename=$PATH3 --device_target="GPU" &> log &
cd ..
#!/bin/bash
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
if [[ $# -lt 2 || $# -gt 3 ]]; then
echo "Usage: bash run_infer_310.sh [MINDIR_PATH] [DATASET_PATH] [DEVICE_ID]
DEVICE_ID is optional, it can be set by environment variable device_id, otherwise the value is zero"
exit 1
fi
get_real_path(){
if [ "${1:0:1}" == "/" ]; then
echo "$1"
else
echo "$(realpath -m $PWD/$1)"
fi
}
model=$(get_real_path $1)
dataset_path=$(get_real_path $2)
device_id=0
if [ $# == 3 ]; then
device_id=$3
fi
echo "mindir name: "$model
echo "dataset path: "$dataset_path
echo "device id: "$device_id
export ASCEND_HOME=/usr/local/Ascend/
if [ -d ${ASCEND_HOME}/ascend-toolkit ]; then
export PATH=$ASCEND_HOME/fwkacllib/bin:$ASCEND_HOME/fwkacllib/ccec_compiler/bin:$ASCEND_HOME/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin:$ASCEND_HOME/ascend-toolkit/latest/atc/bin:$PATH
export LD_LIBRARY_PATH=$ASCEND_HOME/fwkacllib/lib64:/usr/local/lib:$ASCEND_HOME/ascend-toolkit/latest/atc/lib64:$ASCEND_HOME/ascend-toolkit/latest/fwkacllib/lib64:$ASCEND_HOME/driver/lib64:$ASCEND_HOME/add-ons:$LD_LIBRARY_PATH
export TBE_IMPL_PATH=$ASCEND_HOME/ascend-toolkit/latest/opp/op_impl/built-in/ai_core/tbe
export PYTHONPATH=$ASCEND_HOME/fwkacllib/python/site-packages:${TBE_IMPL_PATH}:$ASCEND_HOME/ascend-toolkit/latest/fwkacllib/python/site-packages:$PYTHONPATH
export ASCEND_OPP_PATH=$ASCEND_HOME/ascend-toolkit/latest/opp
else
export PATH=$ASCEND_HOME/fwkacllib/bin:$ASCEND_HOME/fwkacllib/ccec_compiler/bin:$ASCEND_HOME/atc/ccec_compiler/bin:$ASCEND_HOME/atc/bin:$PATH
export LD_LIBRARY_PATH=$ASCEND_HOME/fwkacllib/lib64:/usr/local/lib:$ASCEND_HOME/atc/lib64:$ASCEND_HOME/acllib/lib64:$ASCEND_HOME/driver/lib64:$ASCEND_HOME/add-ons:$LD_LIBRARY_PATH
export PYTHONPATH=$ASCEND_HOME/fwkacllib/python/site-packages:$ASCEND_HOME/atc/python/site-packages:$PYTHONPATH
export ASCEND_OPP_PATH=$ASCEND_HOME/opp
fi
function preprocess_data()
{
if [ -d preprocess_Result ]; then
rm -rf ./preprocess_Result
fi
mkdir preprocess_Result
python3.7 ../preprocess.py --data_path=$dataset_path --result_path=./preprocess_Result/
}
function compile_app()
{
cd ../ascend310_infer/ || exit
bash build.sh &> build.log
}
function infer()
{
cd - || exit
if [ -d result_Files ]; then
rm -rf ./result_Files
fi
if [ -d time_Result ]; then
rm -rf ./time_Result
fi
mkdir result_Files
mkdir time_Result
../ascend310_infer/out/main --mindir_path=$model --dataset_path=$dataset_path --device_id=$device_id &> infer.log
}
function cal_acc()
{
python3.7 ../postprocess.py --result_dir=./result_Files --label_dir=./preprocess_Result/label.json &> acc.log
}
preprocess_data
if [ $? -ne 0 ]; then
echo "preprocess dataset failed"
exit 1
fi
compile_app
if [ $? -ne 0 ]; then
echo "compile app code failed"
exit 1
fi
infer
if [ $? -ne 0 ]; then
echo " execute inference failed"
exit 1
fi
cal_acc
if [ $? -ne 0 ]; then
echo "calculate accuracy failed"
exit 1
fi
......@@ -16,16 +16,10 @@
if [ $# != 2 ]
then
echo "Usage: bash run_standalone_train_ascend.sh [DATA_URL] [TRAIN_URL] [DEVICE_ID(optional)]"
echo "Usage: bash run_standalone_train_ascend.sh [DATA_URL] [TRAIN_URL]"
exit 1
fi
export DEVICE_ID=0
if [ $# = 3 ] ; then
export DEVICE_ID=$3
fi;
get_real_path(){
if [ "${1:0:1}" == "/" ]; then
echo "$1"
......@@ -52,6 +46,7 @@ fi
ulimit -u unlimited
export DEVICE_NUM=1
export DEVICE_ID=0
export RANK_ID=0
export RANK_SIZE=1
......@@ -66,5 +61,5 @@ cp -r ../src ./train
cd ./train || exit
echo "start training for device $DEVICE_ID"
env > env.log
python train.py --device_id=$DEVICE_ID --data_url=$PATH1 --train_url=$PATH2 --device_target="Ascend"&> log &
python train.py --device_id=$DEVICE_ID --data_url=$PATH1 --train_url=$PATH2 &> log &
cd ..
......@@ -61,5 +61,5 @@ cp -r ../src ./train$3
cd ./train$3 || exit
echo "start training for device $DEVICE_ID"
env > env.log
python train.py --device_id=$DEVICE_ID --data_url=$PATH1 --train_url=$PATH2 --device_target="GPU"&> log &
python train_gpu.py --device_id=$DEVICE_ID --data_url=$PATH1 --train_url=$PATH2 --device_target="GPU"&> log &
cd ..
......@@ -17,48 +17,21 @@ network config setting, will be used in train.py and eval.py
"""
from easydict import EasyDict as ed
config_ascend = ed({
config = ed({
"save_checkpoint": True,
"save_checkpoint_epochs": 2,
"keep_checkpoint_max": 10,
"save_checkpoint_epochs": 112,
"keep_checkpoint_max": 10000,
"learning_rate": 0.001,
"m_for_scrutinizer": 4,
"topK": 6,
"input_size": (448, 448),
"crop_pct_size": (600, 600),
"weight_decay": 1e-4,
"momentum": 0.9,
"num_epochs": 200,
"num_epochs": 112,
"num_classes": 200,
"num_train_images": 5994,
"num_test_images": 5794,
"batch_size": 8,
"prefix": "ntsnet",
"lossLogName": "loss.log",
"lr_scheduler": "cosine",
"lr_step": [200, 200],
"optimizer": "momentum"
})
config_gpu = ed({
"save_checkpoint": True,
"save_checkpoint_epochs": 2,
"keep_checkpoint_max": 10,
"learning_rate": 0.001,
"m_for_scrutinizer": 4,
"topK": 6,
"input_size": (448, 448),
"crop_pct_size": (600, 600),
"weight_decay": 1e-4,
"momentum": 0.9,
"num_epochs": 200,
"num_classes": 200,
"num_train_images": 5994,
"num_test_images": 5794,
"batch_size": 16,
"prefix": "ntsnet",
"lossLogName": "loss.log",
"lr_scheduler": "cosine",
"lr_step": [60, 100],
"optimizer": "momentum"
"lossLogName": "loss.log"
})
# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
network config setting, will be used in train_gpu.py and eval_gpu.py
"""
from easydict import EasyDict as ed
config_ascend = ed({
"save_checkpoint": True,
"save_checkpoint_epochs": 2,
"keep_checkpoint_max": 10,
"learning_rate": 0.001,
"m_for_scrutinizer": 4,
"topK": 6,
"input_size": (448, 448),
"crop_pct_size": (600, 600),
"weight_decay": 1e-4,
"momentum": 0.9,
"num_epochs": 200,
"num_classes": 200,
"num_train_images": 5994,
"num_test_images": 5794,
"batch_size": 8,
"prefix": "ntsnet",
"lossLogName": "loss.log",
"lr_scheduler": "cosine",
"lr_step": [200, 200],
"optimizer": "momentum"
})
config_gpu = ed({
"save_checkpoint": True,
"save_checkpoint_epochs": 2,
"keep_checkpoint_max": 10,
"learning_rate": 0.001,
"m_for_scrutinizer": 4,
"topK": 6,
"input_size": (448, 448),
"crop_pct_size": (600, 600),
"weight_decay": 1e-4,
"momentum": 0.9,
"num_epochs": 200,
"num_classes": 200,
"num_train_images": 5994,
"num_test_images": 5794,
"batch_size": 16,
"prefix": "ntsnet",
"lossLogName": "loss.log",
"lr_scheduler": "cosine",
"lr_step": [60, 100],
"optimizer": "momentum"
})
......@@ -14,69 +14,41 @@
# ============================================================================
"""ntsnet dataset"""
import os
import mindspore.dataset as de
import mindspore.dataset as ds
import mindspore.dataset.vision.c_transforms as vision
from mindspore.dataset.vision import Inter
from src.config import config
mean = [0.485 * 255, 0.456 * 255, 0.406 * 255]
std = [0.229 * 255, 0.224 * 255, 0.225 * 255]
def create_dataset_train(train_path, batch_size):
"""create train dataset"""
device_num, rank_id = _get_rank_info()
if device_num == 1:
train_data_set = de.ImageFolderDataset(train_path, num_parallel_workers=8, shuffle=True)
else:
train_data_set = de.ImageFolderDataset(train_path, num_parallel_workers=8,
shuffle=True, num_shards=device_num, shard_id=rank_id)
train_data_set = ds.ImageFolderDataset(train_path, shuffle=True)
# define map operations
transform_img = [
vision.Decode(),
vision.Resize(config.crop_pct_size, Inter.BILINEAR),
vision.RandomCrop(config.input_size),
vision.Resize([448, 448], Inter.LINEAR),
vision.RandomHorizontalFlip(),
vision.Normalize(mean=mean, std=std),
vision.HWC2CHW()
]
train_data_set = train_data_set.map(input_columns="image", num_parallel_workers=8, operations=transform_img)
train_data_set = train_data_set.batch(batch_size, drop_remainder=True)
train_data_set = train_data_set.map(input_columns="image", num_parallel_workers=8, operations=transform_img,
output_columns="image")
train_data_set = train_data_set.map(input_columns="image", num_parallel_workers=8,
operations=lambda x: (x / 255).astype("float32"))
train_data_set = train_data_set.batch(batch_size)
return train_data_set
def create_dataset_test(test_path, batch_size):
"""create test dataset"""
test_data_set = de.ImageFolderDataset(test_path, shuffle=False)
test_data_set = ds.ImageFolderDataset(test_path, shuffle=False)
# define map operations
transform_img = [
vision.Decode(),
vision.Resize(config.crop_pct_size, Inter.BILINEAR),
vision.CenterCrop(config.input_size),
vision.Normalize(mean=mean, std=std),
vision.Resize([448, 448], Inter.LINEAR),
vision.HWC2CHW()
]
test_data_set = test_data_set.map(input_columns="image", num_parallel_workers=8, operations=transform_img)
test_data_set = test_data_set.batch(batch_size, drop_remainder=True)
test_data_set = test_data_set.map(input_columns="image", num_parallel_workers=8, operations=transform_img,
output_columns="image")
test_data_set = test_data_set.map(input_columns="image", num_parallel_workers=8,
operations=lambda x: (x / 255).astype("float32"))
test_data_set = test_data_set.batch(batch_size)
return test_data_set
def _get_rank_info():
"""
get rank size and rank id
"""
rank_size = int(os.environ.get("RANK_SIZE", 1))
if rank_size > 1:
from mindspore.communication.management import get_rank, get_group_size
rank_size = get_group_size()
rank_id = get_rank()
else:
rank_size = rank_id = None
return rank_size, rank_id
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment