diff --git a/official/recommend/tbnet/README.md b/official/recommend/tbnet/README.md index 09f6e6a688bd1bdc2de37c06d3b860eb531db152..6b4fa684e96b0cdfaea9bb12c10091eb5fbc439e 100644 --- a/official/recommend/tbnet/README.md +++ b/official/recommend/tbnet/README.md @@ -9,6 +9,10 @@ - [Script Description](#script-description) - [Script and Sample Code](#script-and-sample-code) - [Script Parameters](#script-parameters) + - [Inference Process](#inference-process) + - [Export MindIR](#export-mindir) + - [Infer on Ascend310](#infer-on-ascend310) + - [Result](#result) - [Model Description](#model-description) - [Performance](#performance) - [Training Performance](#training-performance) @@ -134,6 +138,8 @@ python infer.py \ . └─tbnet ├─README.md + ├── scripts + │ └─run_infer_310.sh # Ascend310 inference script ├─data ├─steam ├─config.json # data and training parameter configuration @@ -149,6 +155,9 @@ python infer.py \ ├─metrics.py # model metrics ├─steam.py # 'steam' dataset text explainer └─tbnet.py # TB-Net model + ├─export.py # export mindir script + ├─preprocess.py # inference data preprocess script + ├─postprocess.py # inference result calculation script ├─eval.py # evaluation ├─infer.py # inference and explanation └─train.py # training @@ -193,6 +202,40 @@ python infer.py \ --run_mode run code by GRAPH mode or PYNATIVE mode ``` +## [Inference Process](#contents) + +### [Export MindIR](#contents) + +```shell +python export.py --checkpoint_id [ID] --device_target [DEVICE] --file_name [FILE_NAME] --file_format [FILE_FORMAT] +``` + +The ckpt_file parameter is required, +`ID` should be an integer within range +`DEVICE` should be in ['Ascend', 'GPU'] +`FILE_FORMAT` should be in "MINDIR" + +### [Infer on Ascend310](#contents) + +Before performing inference, the mindir file must be exported by `export.py` script. We only provide an example of inference using MINDIR model. + +```shell +# Ascend310 inference +bash run_infer_310.sh [MINDIR_PATH] [DATA_PATH] [DEVICE_ID] +``` + +- `MINDIR_PATH` specifies path of used "MINDIR" model. +- `DATA_PATH` specifies path of test.csv +- `DEVICE_ID` is optional, default value is 0. + +### [Result](#contents) + +Inference result is saved in current path, you can find result like this in acc.log file. + +```bash +auc: 0.8251359368836292 +``` + # [Model Description](#contents) ## [Performance](#contents) diff --git a/official/recommend/tbnet/README_CN.md b/official/recommend/tbnet/README_CN.md index 621b80787c633be920a68aa3bbdb22902bee4864..6919c040a6b84f42bc847e585d89b14dffab1d67 100644 --- a/official/recommend/tbnet/README_CN.md +++ b/official/recommend/tbnet/README_CN.md @@ -11,6 +11,10 @@ - [脚本说明](#脚本说明) - [脚本和样例代码](#脚本和样例代码) - [脚本参数](#脚本参数) + - [推理过程](#推理过程) + - [导出MindIR](#导出mindir) + - [在Ascend310执行推理](#在ascend310执行推理) + - [结果](#结果) - [模型描述](#模型描述) - [性能](#性能) - [训练性能](#训练性能) @@ -136,6 +140,8 @@ python infer.py \ . └─tbnet ├─README.md + ├── scripts + │ └─run_infer_310.sh # 用于Ascend310推理的脚本 ├─data ├─steam ├─config.json # 数据和训练参数配置 @@ -151,6 +157,9 @@ python infer.py \ ├─metrics.py # 模型度量 ├─steam.py # 'steam'数据集文本解析 └─tbnet.py # TB-Net网络 + ├─export.py # 导出MINDIR脚本 + ├─preprocess.py # 推理数据预处理脚本 + ├─postprocess.py # 推理结果计算脚本 ├─eval.py # 评估网络 ├─infer.py # 推理和解释 └─train.py # 训练网络 @@ -195,6 +204,39 @@ python infer.py \ --run_mode run code by GRAPH mode or PYNATIVE mode ``` +## 推理过程 + +### 导出MindIR + +```shell +python export.py --checkpoint_id [ID] --device_target [DEVICE] --file_name [FILE_NAME] --file_format [FILE_FORMAT] +``` + +参数checkpoint_id为必填项, +`DEVICE` 须在['Ascend', 'CPU']中选择。 +`FILE_FORMAT` 须在设置为"MINDIR"。 + +### 在Ascend310执行推理 + +在执行推理前,mindir文件必须通过`export.py`脚本导出。以下展示了使用minir模型执行推理的示例。 + +```shell +# Ascend310 inference +bash run_infer_310.sh [MINDIR_PATH] [DATA_PATH] [DEVICE_ID] +``` + +- `MINDIR_PATH` mindir文件路径 +- `DATA_PATH` 推理数据集test.csv路径 +- `DEVICE_ID` 可选,默认值为0。 + +### 结果 + +推理结果保存在脚本执行的当前路径,你可以在acc.log中看到以下精度计算结果。 + +```bash +auc: 0.8251359368836292 +``` + # [模型描述](#目录) ## [性能](#目录) diff --git a/official/recommend/tbnet/ascend310_infer/CMakeLists.txt b/official/recommend/tbnet/ascend310_infer/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..ee3c85447340e0449ff2b70ed24f60a17e07b2b6 --- /dev/null +++ b/official/recommend/tbnet/ascend310_infer/CMakeLists.txt @@ -0,0 +1,14 @@ +cmake_minimum_required(VERSION 3.14.1) +project(Ascend310Infer) +add_compile_definitions(_GLIBCXX_USE_CXX11_ABI=0) +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O0 -g -std=c++17 -Werror -Wall -fPIE -Wl,--allow-shlib-undefined") +set(PROJECT_SRC_ROOT ${CMAKE_CURRENT_LIST_DIR}/) +option(MINDSPORE_PATH "mindspore install path" "") +include_directories(${MINDSPORE_PATH}) +include_directories(${MINDSPORE_PATH}/include) +include_directories(${PROJECT_SRC_ROOT}) +find_library(MS_LIB libmindspore.so ${MINDSPORE_PATH}/lib) +file(GLOB_RECURSE MD_LIB ${MINDSPORE_PATH}/_c_dataengine*) + +add_executable(main src/main.cc src/utils.cc) +target_link_libraries(main ${MS_LIB} ${MD_LIB} gflags) diff --git a/official/recommend/tbnet/ascend310_infer/build.sh b/official/recommend/tbnet/ascend310_infer/build.sh new file mode 100644 index 0000000000000000000000000000000000000000..285514e19f2a1878a7bf8f0eed3c99fbc73868c4 --- /dev/null +++ b/official/recommend/tbnet/ascend310_infer/build.sh @@ -0,0 +1,29 @@ +#!/bin/bash +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +if [ -d out ]; then + rm -rf out +fi + +mkdir out +cd out || exit + +if [ -f "Makefile" ]; then + make clean +fi + +cmake .. \ + -DMINDSPORE_PATH="`pip3.7 show mindspore-ascend | grep Location | awk '{print $2"/mindspore"}' | xargs realpath`" +make diff --git a/official/recommend/tbnet/ascend310_infer/inc/utils.h b/official/recommend/tbnet/ascend310_infer/inc/utils.h new file mode 100644 index 0000000000000000000000000000000000000000..efebe03a8c1179f5a1f9d5f7ee07e0352a9937c6 --- /dev/null +++ b/official/recommend/tbnet/ascend310_infer/inc/utils.h @@ -0,0 +1,32 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_INFERENCE_UTILS_H_ +#define MINDSPORE_INFERENCE_UTILS_H_ + +#include <sys/stat.h> +#include <dirent.h> +#include <vector> +#include <string> +#include <memory> +#include "include/api/types.h" + +std::vector<std::string> GetAllFiles(std::string_view dirName); +DIR *OpenDir(std::string_view dirName); +std::string RealPath(std::string_view path); +mindspore::MSTensor ReadFileToTensor(const std::string &file); +int WriteResult(const std::string& imageFile, const std::vector<mindspore::MSTensor> &outputs); +#endif diff --git a/official/recommend/tbnet/ascend310_infer/src/main.cc b/official/recommend/tbnet/ascend310_infer/src/main.cc new file mode 100644 index 0000000000000000000000000000000000000000..93d22c64e9101f8f7f5f9c9bf722ee68ccc5e025 --- /dev/null +++ b/official/recommend/tbnet/ascend310_infer/src/main.cc @@ -0,0 +1,156 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <sys/time.h> +#include <gflags/gflags.h> +#include <dirent.h> +#include <iostream> +#include <string> +#include <algorithm> +#include <iosfwd> +#include <vector> +#include <fstream> +#include <sstream> + +#include "include/api/model.h" +#include "include/api/context.h" +#include "include/api/types.h" +#include "include/api/serialization.h" +#include "include/dataset/execute.h" +#include "include/dataset/vision.h" +#include "inc/utils.h" + +using mindspore::Context; +using mindspore::Serialization; +using mindspore::Model; +using mindspore::Status; +using mindspore::MSTensor; +using mindspore::dataset::Execute; +using mindspore::ModelType; +using mindspore::GraphCell; +using mindspore::kSuccess; + +DEFINE_string(mindir_path, "", "mindir path"); +DEFINE_string(input0_path, ".", "input0 path"); +DEFINE_string(input1_path, ".", "input1 path"); +DEFINE_string(input2_path, ".", "input2 path"); +DEFINE_string(input3_path, ".", "input3 path"); +DEFINE_string(input4_path, ".", "input4 path"); +DEFINE_string(input5_path, ".", "input5 path"); +DEFINE_int32(device_id, 0, "device id"); + +int main(int argc, char **argv) { + gflags::ParseCommandLineFlags(&argc, &argv, true); + if (RealPath(FLAGS_mindir_path).empty()) { + std::cout << "Invalid mindir" << std::endl; + return 1; + } + + auto context = std::make_shared<Context>(); + auto ascend310 = std::make_shared<mindspore::Ascend310DeviceInfo>(); + ascend310->SetDeviceID(FLAGS_device_id); + ascend310->SetPrecisionMode("allow_fp32_to_fp16"); + ascend310->SetOpSelectImplMode("high_precision"); + context->MutableDeviceInfo().push_back(ascend310); + mindspore::Graph graph; + Serialization::Load(FLAGS_mindir_path, ModelType::kMindIR, &graph); + + Model model; + Status ret = model.Build(GraphCell(graph), context); + if (ret != kSuccess) { + std::cout << "ERROR: Build failed." << std::endl; + return 1; + } + + std::vector<MSTensor> model_inputs = model.GetInputs(); + if (model_inputs.empty()) { + std::cout << "Invalid model, inputs is empty." << std::endl; + return 1; + } + + auto input0_files = GetAllFiles(FLAGS_input0_path); + auto input1_files = GetAllFiles(FLAGS_input1_path); + auto input2_files = GetAllFiles(FLAGS_input2_path); + auto input3_files = GetAllFiles(FLAGS_input3_path); + auto input4_files = GetAllFiles(FLAGS_input4_path); + auto input5_files = GetAllFiles(FLAGS_input5_path); + + if (input0_files.empty() || input1_files.empty()) { + std::cout << "ERROR: input data empty." << std::endl; + return 1; + } + + std::map<double, double> costTime_map; + size_t size = input0_files.size(); + + for (size_t i = 0; i < size; ++i) { + struct timeval start = {0}; + struct timeval end = {0}; + double startTimeMs; + double endTimeMs; + std::vector<MSTensor> inputs; + std::vector<MSTensor> outputs; + std::cout << "Start predict input files:" << input0_files[i] << std::endl; + + auto input0 = ReadFileToTensor(input0_files[i]); + auto input1 = ReadFileToTensor(input1_files[i]); + auto input2 = ReadFileToTensor(input2_files[i]); + auto input3 = ReadFileToTensor(input3_files[i]); + auto input4 = ReadFileToTensor(input4_files[i]); + auto input5 = ReadFileToTensor(input5_files[i]); + inputs.emplace_back(model_inputs[0].Name(), model_inputs[0].DataType(), model_inputs[0].Shape(), + input0.Data().get(), input0.DataSize()); + inputs.emplace_back(model_inputs[1].Name(), model_inputs[1].DataType(), model_inputs[1].Shape(), + input1.Data().get(), input1.DataSize()); + inputs.emplace_back(model_inputs[2].Name(), model_inputs[2].DataType(), model_inputs[2].Shape(), + input2.Data().get(), input2.DataSize()); + inputs.emplace_back(model_inputs[3].Name(), model_inputs[3].DataType(), model_inputs[3].Shape(), + input3.Data().get(), input3.DataSize()); + inputs.emplace_back(model_inputs[4].Name(), model_inputs[4].DataType(), model_inputs[4].Shape(), + input4.Data().get(), input4.DataSize()); + inputs.emplace_back(model_inputs[5].Name(), model_inputs[5].DataType(), model_inputs[5].Shape(), + input5.Data().get(), input5.DataSize()); + gettimeofday(&start, nullptr); + ret = model.Predict(inputs, &outputs); + gettimeofday(&end, nullptr); + if (ret != kSuccess) { + std::cout << "Predict " << input0_files[i] << " failed." << std::endl; + return 1; + } + startTimeMs = (1.0 * start.tv_sec * 1000000 + start.tv_usec) / 1000; + endTimeMs = (1.0 * end.tv_sec * 1000000 + end.tv_usec) / 1000; + costTime_map.insert(std::pair<double, double>(startTimeMs, endTimeMs)); + WriteResult(input0_files[i], outputs); + } + double average = 0.0; + int inferCount = 0; + + for (auto iter = costTime_map.begin(); iter != costTime_map.end(); iter++) { + double diff = 0.0; + diff = iter->second - iter->first; + average += diff; + inferCount++; + } + average = average / inferCount; + std::stringstream timeCost; + timeCost << "NN inference cost average time: "<< average << " ms of infer_count " << inferCount << std::endl; + std::cout << "NN inference cost average time: "<< average << "ms of infer_count " << inferCount << std::endl; + std::string fileName = "./time_Result" + std::string("/test_perform_static.txt"); + std::ofstream fileStream(fileName.c_str(), std::ios::trunc); + fileStream << timeCost.str(); + fileStream.close(); + costTime_map.clear(); + return 0; +} diff --git a/official/recommend/tbnet/ascend310_infer/src/utils.cc b/official/recommend/tbnet/ascend310_infer/src/utils.cc new file mode 100644 index 0000000000000000000000000000000000000000..c947e4d5f451b90bd4728aa3a92c4cfab174f5e6 --- /dev/null +++ b/official/recommend/tbnet/ascend310_infer/src/utils.cc @@ -0,0 +1,129 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <fstream> +#include <algorithm> +#include <iostream> +#include "inc/utils.h" + +using mindspore::MSTensor; +using mindspore::DataType; + +std::vector<std::string> GetAllFiles(std::string_view dirName) { + struct dirent *filename; + DIR *dir = OpenDir(dirName); + if (dir == nullptr) { + return {}; + } + std::vector<std::string> res; + while ((filename = readdir(dir)) != nullptr) { + std::string dName = std::string(filename->d_name); + if (dName == "." || dName == ".." || filename->d_type != DT_REG) { + continue; + } + res.emplace_back(std::string(dirName) + "/" + filename->d_name); + } + std::sort(res.begin(), res.end()); + for (auto &f : res) { + std::cout << "image file: " << f << std::endl; + } + return res; +} + +int WriteResult(const std::string& imageFile, const std::vector<MSTensor> &outputs) { + std::string homePath = "./result_Files"; + for (size_t i = 0; i < outputs.size(); ++i) { + size_t outputSize; + std::shared_ptr<const void> netOutput; + netOutput = outputs[i].Data(); + outputSize = outputs[i].DataSize(); + int pos = imageFile.rfind('/'); + std::string fileName(imageFile, pos + 1); + fileName.replace(fileName.find('.'), fileName.size() - fileName.find('.'), '_' + std::to_string(i) + ".bin"); + std::string outFileName = homePath + "/" + fileName; + FILE * outputFile = fopen(outFileName.c_str(), "wb"); + fwrite(netOutput.get(), outputSize, sizeof(char), outputFile); + fclose(outputFile); + outputFile = nullptr; + } + return 0; +} + +mindspore::MSTensor ReadFileToTensor(const std::string &file) { + if (file.empty()) { + std::cout << "Pointer file is nullptr" << std::endl; + return mindspore::MSTensor(); + } + + std::ifstream ifs(file); + if (!ifs.good()) { + std::cout << "File: " << file << " is not exist" << std::endl; + return mindspore::MSTensor(); + } + + if (!ifs.is_open()) { + std::cout << "File: " << file << "open failed" << std::endl; + return mindspore::MSTensor(); + } + + ifs.seekg(0, std::ios::end); + size_t size = ifs.tellg(); + mindspore::MSTensor buffer(file, mindspore::DataType::kNumberTypeUInt8, {static_cast<int64_t>(size)}, nullptr, size); + + ifs.seekg(0, std::ios::beg); + ifs.read(reinterpret_cast<char *>(buffer.MutableData()), size); + ifs.close(); + + return buffer; +} + + +DIR *OpenDir(std::string_view dirName) { + if (dirName.empty()) { + std::cout << " dirName is null ! " << std::endl; + return nullptr; + } + std::string realPath = RealPath(dirName); + struct stat s; + lstat(realPath.c_str(), &s); + if (!S_ISDIR(s.st_mode)) { + std::cout << "dirName is not a valid directory !" << std::endl; + return nullptr; + } + DIR *dir; + dir = opendir(realPath.c_str()); + if (dir == nullptr) { + std::cout << "Can not open dir " << dirName << std::endl; + return nullptr; + } + std::cout << "Successfully opened the dir " << dirName << std::endl; + return dir; +} + +std::string RealPath(std::string_view path) { + char realPathMem[PATH_MAX] = {0}; + char *realPathRet = nullptr; + realPathRet = realpath(path.data(), realPathMem); + + if (realPathRet == nullptr) { + std::cout << "File: " << path << " is not exist."; + return ""; + } + + std::string realPath(realPathMem); + std::cout << path << " realpath is: " << realPath << std::endl; + return realPath; +} diff --git a/official/recommend/tbnet/export.py b/official/recommend/tbnet/export.py new file mode 100644 index 0000000000000000000000000000000000000000..2a7bbd862e879c9b26123b3592b156ace58930b5 --- /dev/null +++ b/official/recommend/tbnet/export.py @@ -0,0 +1,118 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""TB-Net evaluation.""" + +import os +import argparse +import numpy as np + +from mindspore import context, load_checkpoint, load_param_into_net, Tensor, export + +from src import tbnet, config + + +def get_args(): + """Parse commandline arguments.""" + parser = argparse.ArgumentParser(description='Preprocess TBNet training data.') + + parser.add_argument( + '--dataset', + type=str, + required=False, + default='steam', + help="'steam' dataset is supported currently" + ) + + parser.add_argument( + '--csv', + type=str, + required=False, + default='test.csv', + help="the csv datafile inside the dataset folder (e.g. test.csv)" + ) + + parser.add_argument( + '--checkpoint_id', + type=int, + required=True, + help="use which checkpoint(.ckpt) file to eval" + ) + + parser.add_argument( + '--device_id', + type=int, + required=False, + default=0, + help="device id" + ) + + parser.add_argument( + '--device_target', + type=str, + required=False, + default='Ascend', + choices=['Ascend'], + help="run code on GPU" + ) + + parser.add_argument( + '--run_mode', + type=str, + required=False, + default='graph', + choices=['graph', 'pynative'], + help="run code by GRAPH mode or PYNATIVE mode" + ) + + return parser.parse_args() + + +def export_tbnet(): + """Data preprocess for inference.""" + args = get_args() + + home = os.path.dirname(os.path.realpath(__file__)) + config_path = os.path.join(home, 'data', args.dataset, 'config.json') + ckpt_path = os.path.join(home, 'checkpoints') + + context.set_context(device_id=args.device_id) + if args.run_mode == 'graph': + context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target) + else: + context.set_context(mode=context.PYNATIVE_MODE, device_target=args.device_target) + + net_config = config.TBNetConfig(config_path) + + print(f"creating TBNet from checkpoint {args.checkpoint_id} for evaluation...") + network = tbnet.TBNet(net_config) + param_dict = load_checkpoint(os.path.join(ckpt_path, f'tbnet_epoch{args.checkpoint_id}.ckpt')) + load_param_into_net(network, param_dict) + + loss_net = tbnet.NetWithLossClass(network, net_config) + train_net = tbnet.TrainStepWrap(loss_net, net_config.lr) + train_net.set_train() + eval_net = tbnet.PredictWithSigmoid(network) + + item = Tensor(np.ones((1,)).astype(np.int)) + rl1 = Tensor(np.ones((1, 39)).astype(np.int)) + ety = Tensor(np.ones((1, 39)).astype(np.int)) + rl2 = Tensor(np.ones((1, 39)).astype(np.int)) + his = Tensor(np.ones((1, 39)).astype(np.int)) + rate = Tensor(np.ones((1,)).astype(np.float32)) + inputs = [item, rl1, ety, rl2, his, rate] + export(eval_net, *inputs, file_name='tbnet', file_format='MINDIR') + +if __name__ == '__main__': + export_tbnet() diff --git a/official/recommend/tbnet/postprocess.py b/official/recommend/tbnet/postprocess.py new file mode 100644 index 0000000000000000000000000000000000000000..996743e8e8c2fc00ff42e9b6de7b1f39a7895342 --- /dev/null +++ b/official/recommend/tbnet/postprocess.py @@ -0,0 +1,88 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""preprocess data""" +import argparse +import os +import numpy as np + +parser = argparse.ArgumentParser(description='Postprocess of Hypertext Inference') +parser.add_argument('--result_Path', type=str, default='./result_Files', + help='result path') +parser.add_argument('--label_Path', default='./result_Files', type=str, + help='label file path') +parser.add_argument('--batch_size', default=1, type=int, help='batch_size') +args = parser.parse_args() + +def calculate_auc(labels_list, preds_list): + """ + The AUC calculation function + Input: + labels_list: list of true label + preds_list: list of predicted label + Outputs + Float, means of AUC + """ + auc = [] + n_bins = labels_list.shape[0] // 2 + if labels_list.ndim == 1: + labels_list = labels_list.reshape(-1, 1) + preds_list = preds_list.reshape(-1, 1) + for i in range(labels_list.shape[1]): + labels = labels_list[:, i] + preds = preds_list[:, i] + postive_len = labels.sum() + negative_len = labels.shape[0] - postive_len + total_case = postive_len * negative_len + positive_histogram = np.zeros((n_bins)) + negative_histogram = np.zeros((n_bins)) + bin_width = 1.0 / n_bins + + for j, _ in enumerate(labels): + nth_bin = int(preds[j] // bin_width) + if labels[j]: + positive_histogram[nth_bin] = positive_histogram[nth_bin] + 1 + else: + negative_histogram[nth_bin] = negative_histogram[nth_bin] + 1 + + accumulated_negative = 0 + satisfied_pair = 0 + for k in range(n_bins): + satisfied_pair += ( + positive_histogram[k] * accumulated_negative + + positive_histogram[k] * negative_histogram[k] * 0.5) + accumulated_negative += negative_histogram[k] + auc.append(satisfied_pair / total_case) + + return np.mean(auc) + +dirs = os.listdir(args.label_Path) +cur, total = 0, 0 +print('---------- start cal acc ----------') +gt_list = [] +pred_list = [] +for file in dirs: + label = np.fromfile(os.path.join(args.label_Path, file), dtype=np.float32) + gt_list.append(label) + + file_name = file.split('.')[0] + idx = file_name.split('_')[-1] + predict_file_name = "tbnet_item_bs1_" + str(idx) + "_1.bin" + predict_file = os.path.join(args.result_Path, predict_file_name) + predict = np.fromfile(predict_file, dtype=np.float32) + pred_list.append(predict) +res_pred = np.concatenate(pred_list, axis=0) +res_true = np.concatenate(gt_list, axis=0) +rst_auc = calculate_auc(res_true, res_pred) +print('auc:', rst_auc) diff --git a/official/recommend/tbnet/preprocess.py b/official/recommend/tbnet/preprocess.py new file mode 100644 index 0000000000000000000000000000000000000000..94c24dfe3f25bfde79880b1f011c4cbb936c57d9 --- /dev/null +++ b/official/recommend/tbnet/preprocess.py @@ -0,0 +1,138 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""TB-Net evaluation.""" + +import os +import argparse +import shutil +import numpy as np + +from mindspore import context + +from src import config, dataset + + +def get_args(): + """Parse commandline arguments.""" + parser = argparse.ArgumentParser(description='Preprocess TBNet training data.') + + parser.add_argument( + '--dataset', + type=str, + required=False, + default='steam', + help="'steam' dataset is supported currently" + ) + + parser.add_argument( + '--csv', + type=str, + required=False, + default='test.csv', + help="the csv datafile inside the dataset folder (e.g. test.csv)" + ) + + + parser.add_argument( + '--device_id', + type=int, + required=False, + default=0, + help="device id" + ) + + parser.add_argument( + '--device_target', + type=str, + required=False, + default='Ascend', + choices=['Ascend'], + help="run code on GPU" + ) + + parser.add_argument( + '--run_mode', + type=str, + required=False, + default='graph', + choices=['graph', 'pynative'], + help="run code by GRAPH mode or PYNATIVE mode" + ) + + return parser.parse_args() + + +def preprocess_tbnet(): + """Data preprocess for inference.""" + args = get_args() + + home = os.path.dirname(os.path.realpath(__file__)) + config_path = os.path.join(home, 'data', args.dataset, 'config.json') + test_csv_path = os.path.join(home, 'data', args.dataset, args.csv) + + context.set_context(device_id=args.device_id) + if args.run_mode == 'graph': + context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target) + else: + context.set_context(mode=context.PYNATIVE_MODE, device_target=args.device_target) + + print(f"creating dataset from {test_csv_path}...") + net_config = config.TBNetConfig(config_path) + eval_ds = dataset.create(test_csv_path, net_config.per_item_num_paths, train=True).batch(1) + item_path = os.path.join('./preprocess_Result/', '00_item') + rl1_path = os.path.join('./preprocess_Result/', '01_rl1') + ety_path = os.path.join('./preprocess_Result/', '02_ety') + rl2_path = os.path.join('./preprocess_Result/', '03_rl2') + his_path = os.path.join('./preprocess_Result/', '04_his') + rate_path = os.path.join('./preprocess_Result/', '05_rate') + rst_path = [item_path, rl1_path, ety_path, rl2_path, his_path, rate_path] + if os.path.isdir('./preprocess_Result/'): + shutil.rmtree('./preprocess_Result/') + for paths in rst_path: + os.makedirs(paths) + + idx = 0 + for d in eval_ds.create_dict_iterator(): + item_rst = d['item'].asnumpy().astype(np.int) + rl1_rst = np.expand_dims(d['relation1'].asnumpy().astype(np.int), axis=0) + ety_rst = np.expand_dims(d['entity'].asnumpy().astype(np.int), axis=0) + rl2_rst = np.expand_dims(d['relation2'].asnumpy().astype(np.int), axis=0) + his_rst = np.expand_dims(d['hist_item'].asnumpy().astype(np.int), axis=0) + rate_rst = d['rating'].asnumpy().astype(np.float32) + + item_name = 'tbnet_item_bs1_' + str(idx) + '.bin' + rl1_name = 'tbnet_rl1_bs1_' + str(idx) + '.bin' + ety_name = 'tbnet_ety_bs1_' + str(idx) + '.bin' + rl2_name = 'tbnet_rl2_bs1_' + str(idx) + '.bin' + his_name = 'tbnet_his_bs1_' + str(idx) + '.bin' + rate_name = 'tbnet_rate_bs1_' + str(idx) + '.bin' + + item_real_path = os.path.join(item_path, item_name) + rl1_real_path = os.path.join(rl1_path, rl1_name) + ety_real_path = os.path.join(ety_path, ety_name) + rl2_real_path = os.path.join(rl2_path, rl2_name) + his_real_path = os.path.join(his_path, his_name) + rate_real_path = os.path.join(rate_path, rate_name) + + item_rst.tofile(item_real_path) + rl1_rst.tofile(rl1_real_path) + ety_rst.tofile(ety_real_path) + rl2_rst.tofile(rl2_real_path) + his_rst.tofile(his_real_path) + rate_rst.tofile(rate_real_path) + + idx += 1 +if __name__ == '__main__': + preprocess_tbnet() diff --git a/official/recommend/tbnet/scripts/run_infer_310.sh b/official/recommend/tbnet/scripts/run_infer_310.sh new file mode 100644 index 0000000000000000000000000000000000000000..c1ad959151d30db42a29377db9ca1dd893e436f9 --- /dev/null +++ b/official/recommend/tbnet/scripts/run_infer_310.sh @@ -0,0 +1,115 @@ +#!/bin/bash +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +if [[ $# -lt 2 || $# -gt 3 ]]; then + echo "Usage: bash run_infer_310.sh [MINDIR_PATH] [[DATA_PATH] [DEVICE_ID] (optional) + DEVICE_ID is optional, it can be set by environment variable device_id, otherwise the value is zero" +exit 1 +fi + +get_real_path(){ + if [ "${1:0:1}" == "/" ]; then + echo "$1" + else + echo "$(realpath -m $PWD/$1)" + fi +} + +model=$(get_real_path $1) +data_path=$(get_real_path $2) + +device_id=0 + +if [ $# == 3 ]; then + device_id=$3 +fi + +echo $model +echo $data_path +echo $device_id + +export ASCEND_HOME=/usr/local/Ascend/ +if [ -d ${ASCEND_HOME}/ascend-toolkit ]; then + export PATH=$ASCEND_HOME/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin:$ASCEND_HOME/ascend-toolkit/latest/atc/bin:$PATH + export LD_LIBRARY_PATH=/usr/local/lib:$ASCEND_HOME/ascend-toolkit/latest/atc/lib64:$ASCEND_HOME/ascend-toolkit/latest/fwkacllib/lib64:$ASCEND_HOME/driver/lib64:$ASCEND_HOME/add-ons:$LD_LIBRARY_PATH + export TBE_IMPL_PATH=$ASCEND_HOME/ascend-toolkit/latest/opp/op_impl/built-in/ai_core/tbe + export PYTHONPATH=${TBE_IMPL_PATH}:$ASCEND_HOME/ascend-toolkit/latest/fwkacllib/python/site-packages:$PYTHONPATH + export ASCEND_OPP_PATH=$ASCEND_HOME/ascend-toolkit/latest/opp +else + export PATH=$ASCEND_HOME/atc/ccec_compiler/bin:$ASCEND_HOME/atc/bin:$PATH + export LD_LIBRARY_PATH=/usr/local/lib:$ASCEND_HOME/atc/lib64:$ASCEND_HOME/acllib/lib64:$ASCEND_HOME/driver/lib64:$ASCEND_HOME/add-ons:$LD_LIBRARY_PATH + export PYTHONPATH=$ASCEND_HOME/atc/python/site-packages:$PYTHONPATH + export ASCEND_OPP_PATH=$ASCEND_HOME/opp +fi + +function preprocess_data() +{ + if [ -d preprocess_Result ]; then + rm -rf ./preprocess_Result + fi + mkdir preprocess_Result + python ../preprocess.py --csv=$data_path +} + +function compile_app() +{ + cd ../ascend310_infer || exit + if [ -f "Makefile" ]; then + make clean + fi + sh build.sh &> build.log + + if [ $? -ne 0 ]; then + echo "compile app code failed" + exit 1 + fi + cd - || exit +} + +function infer() +{ + if [ -d result_Files ]; then + rm -rf ./result_Files + fi + if [ -d time_Result ]; then + rm -rf ./time_Result + fi + mkdir result_Files + mkdir time_Result + ../ascend310_infer/out/main --mindir_path=$model --input0_path=./preprocess_Result/00_item/ --input1_path=./preprocess_Result/01_rl1/ \ + --input2_path=./preprocess_Result/02_ety/ --input3_path=./preprocess_Result/03_rl2/ --input4_path=./preprocess_Result/04_his/ \ + --input5_path=./preprocess_Result/05_rate/ --device_id=$device_id &> infer.log + + if [ $? -ne 0 ]; then + echo "execute inference failed" + exit 1 + fi +} + +function cal_acc() +{ + python ../postprocess.py --result_Path=./result_Files \ + --label_Path=./preprocess_Result/05_rate &> acc.log + if [ $? -ne 0 ]; then + echo "calculate accuracy failed" + exit 1 + fi +} + +preprocess_data +compile_app +infer +cal_acc \ No newline at end of file