diff --git a/official/nlp/dgu/Dockerfile b/official/nlp/dgu/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..a48571932465017fa78b87c57d378c7c267e589a --- /dev/null +++ b/official/nlp/dgu/Dockerfile @@ -0,0 +1,25 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +ARG FROM_IMAGE_NAME +FROM ${FROM_IMAGE_NAME} + +RUN ln -s /usr/local/python3.7.5/bin/python3.7 /usr/bin/python + +RUN apt-get update && \ + apt-get install libglib2.0-dev -y || \ + rm -rf /var/lib/dpkg/info && \ + mkdir /var/lib/dpkg/info && \ + apt-get install libglib2.0-dev -y && \ + pip install pytest-runner==5.3.0 diff --git a/official/nlp/dgu/docker_start.sh b/official/nlp/dgu/docker_start.sh new file mode 100644 index 0000000000000000000000000000000000000000..cad53236c29fdf4651c3d915bafe84910b5ff858 --- /dev/null +++ b/official/nlp/dgu/docker_start.sh @@ -0,0 +1,36 @@ +#!/bin/bash + +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +docker_image=$1 +data_dir=$2 +model_dir=$3 + +docker run -it --ipc=host \ + --device=/dev/davinci0 \ + --device=/dev/davinci1 \ + --device=/dev/davinci2 \ + --device=/dev/davinci3 \ + --device=/dev/davinci4 \ + --device=/dev/davinci5 \ + --device=/dev/davinci6 \ + --device=/dev/davinci7 \ + --device=/dev/davinci_manager \ + --device=/dev/devmm_svm --device=/dev/hisi_hdc \ + -v /usr/local/Ascend/driver:/usr/local/Ascend/driver \ + -v /usr/local/Ascend/add-ons/:/usr/local/Ascend/add-ons/ \ + -v ${model_dir}:${model_dir} \ + -v ${data_dir}:${data_dir} \ + -v /root/ascend/log:/root/ascend/log ${docker_image} \ + /bin/bash diff --git a/official/nlp/dgu/infer/convert/convert.sh b/official/nlp/dgu/infer/convert/convert.sh new file mode 100644 index 0000000000000000000000000000000000000000..9d2afbfe71305b727d0f84daba292a919da30b69 --- /dev/null +++ b/official/nlp/dgu/infer/convert/convert.sh @@ -0,0 +1,26 @@ +#!/bin/bash +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +air_path=$1 +om_path=$2 + +echo "Input AIR file path: ${air_path}" +echo "Output OM file path: ${om_path}" + +atc --framework=1 --model="${air_path}" \ + --output="${om_path}" \ + --soc_version=Ascend310 \ + --op_select_implmode="high_precision" \ No newline at end of file diff --git a/official/nlp/dgu/infer/data/config/dgu_atis_intent.pipeline b/official/nlp/dgu/infer/data/config/dgu_atis_intent.pipeline new file mode 100644 index 0000000000000000000000000000000000000000..2bf0cd5171b2de94c97f6408620f00aa9667003b --- /dev/null +++ b/official/nlp/dgu/infer/data/config/dgu_atis_intent.pipeline @@ -0,0 +1,46 @@ +{ + "im_dgu": { + "stream_config": { + "deviceId": "0" + }, + "appsrc0": { + "props": { + "blocksize": "409600" + }, + "factory": "appsrc", + "next": "mxpi_tensorinfer0:0" + }, + "appsrc1": { + "props": { + "blocksize": "409600" + }, + "factory": "appsrc", + "next": "mxpi_tensorinfer0:1" + }, + "appsrc2": { + "props": { + "blocksize": "409600" + }, + "factory": "appsrc", + "next": "mxpi_tensorinfer0:2" + }, + "mxpi_tensorinfer0": { + "props": { + "dataSource": "appsrc0,appsrc1,appsrc2", + "modelPath": "../data/model/atis_intent.om" + }, + "factory": "mxpi_tensorinfer", + "next": "mxpi_dataserialize0" + }, + "mxpi_dataserialize0": { + "props": { + "outputDataKeys": "mxpi_tensorinfer0" + }, + "factory": "mxpi_dataserialize", + "next": "appsink0" + }, + "appsink0": { + "factory": "appsink" + } + } +} \ No newline at end of file diff --git a/official/nlp/dgu/infer/docker_start_infer.sh b/official/nlp/dgu/infer/docker_start_infer.sh new file mode 100644 index 0000000000000000000000000000000000000000..f501a561c22748e5077f399e3102825bdb44ae78 --- /dev/null +++ b/official/nlp/dgu/infer/docker_start_infer.sh @@ -0,0 +1,42 @@ +#!/bin/bash + +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +docker_image=$1 +share_dir=$2 +echo "$1" +echo "$2" +if [ -z "${docker_image}" ]; then + echo "please input docker_image" + exit 1 +fi + +if [ ! -d "${share_dir}" ]; then + echo "please input share directory that contains dataset, models and codes" + exit 1 +fi + + +docker run -it \ + --device=/dev/davinci0 \ + --device=/dev/davinci_manager \ + --device=/dev/devmm_svm \ + --device=/dev/hisi_hdc \ + --privileged \ + -v //usr/local/bin/npu-smi:/usr/local/bin/npu-smi \ + -v /usr/local/Ascend/driver:/usr/local/Ascend/driver \ + -v ${share_dir}:${share_dir} \ + ${docker_image} \ + /bin/bash diff --git a/official/nlp/dgu/infer/mxbase/CMakeLists.txt b/official/nlp/dgu/infer/mxbase/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..df968e77f2d4d7da6ddef53830eb89f0eba60ba9 --- /dev/null +++ b/official/nlp/dgu/infer/mxbase/CMakeLists.txt @@ -0,0 +1,51 @@ +cmake_minimum_required(VERSION 3.10.0) +project(dgu) + +set(TARGET dgu) + +add_definitions(-DENABLE_DVPP_INTERFACE) +add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0) +add_definitions(-Dgoogle=mindxsdk_private) +add_compile_options(-std=c++11 -fPIE -fstack-protector-all -fPIC -Wall) +add_link_options(-Wl,-z,relro,-z,now,-z,noexecstack -s -pie) + +# Check environment variable +if(NOT DEFINED ENV{ASCEND_HOME}) + message(FATAL_ERROR "please define environment variable:ASCEND_HOME") +endif() +if(NOT DEFINED ENV{ASCEND_VERSION}) + message(WARNING "please define environment variable:ASCEND_VERSION") +endif() +if(NOT DEFINED ENV{ARCH_PATTERN}) + message(WARNING "please define environment variable:ARCH_PATTERN") +endif() +set(ACL_INC_DIR $ENV{ASCEND_HOME}/$ENV{ASCEND_VERSION}/$ENV{ARCH_PATTERN}/acllib/include) +set(ACL_LIB_DIR $ENV{ASCEND_HOME}/$ENV{ASCEND_VERSION}/$ENV{ARCH_PATTERN}/acllib/lib64) + +set(MXBASE_ROOT_DIR $ENV{MX_SDK_HOME}) +set(MXBASE_INC ${MXBASE_ROOT_DIR}/include) +set(MXBASE_LIB_DIR ${MXBASE_ROOT_DIR}/lib) +set(MXBASE_POST_LIB_DIR ${MXBASE_ROOT_DIR}/lib/modelpostprocessors) +set(MXBASE_POST_PROCESS_DIR ${MXBASE_ROOT_DIR}/include/MxBase/postprocess/include) +if(DEFINED ENV{MXSDK_OPENSOURCE_DIR}) + set(OPENSOURCE_DIR $ENV{MXSDK_OPENSOURCE_DIR}) +else() + set(OPENSOURCE_DIR ${MXBASE_ROOT_DIR}/opensource) +endif() + +include_directories(${ACL_INC_DIR}) +include_directories(${OPENSOURCE_DIR}/include) +include_directories(${OPENSOURCE_DIR}/include/opencv4) + +include_directories(${MXBASE_INC}) +include_directories(${MXBASE_POST_PROCESS_DIR}) + +link_directories(${ACL_LIB_DIR}) +link_directories(${OPENSOURCE_DIR}/lib) +link_directories(${MXBASE_LIB_DIR}) +link_directories(${MXBASE_POST_LIB_DIR}) + +add_executable(${TARGET} src/main.cpp src/DGU.cpp) +target_link_libraries(${TARGET} glog cpprest mxbase opencv_world stdc++fs) + +install(TARGETS ${TARGET} RUNTIME DESTINATION ${PROJECT_SOURCE_DIR}/) diff --git a/official/nlp/dgu/infer/mxbase/build.sh b/official/nlp/dgu/infer/mxbase/build.sh new file mode 100644 index 0000000000000000000000000000000000000000..3eace18ad0b6dcadd833a2b28eb430bfe5212d67 --- /dev/null +++ b/official/nlp/dgu/infer/mxbase/build.sh @@ -0,0 +1,55 @@ +#!/bin/bash + +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +path_cur=$(dirname $0) + +function check_env() +{ + # set ASCEND_VERSION to ascend-toolkit/latest when it was not specified by user + if [ ! "${ASCEND_VERSION}" ]; then + export ASCEND_VERSION=ascend-toolkit/latest + echo "Set ASCEND_VERSION to the default value: ${ASCEND_VERSION}" + else + echo "ASCEND_VERSION is set to ${ASCEND_VERSION} by user" + fi + + if [ ! "${ARCH_PATTERN}" ]; then + # set ARCH_PATTERN to ./ when it was not specified by user + export ARCH_PATTERN=./ + echo "ARCH_PATTERN is set to the default value: ${ARCH_PATTERN}" + else + echo "ARCH_PATTERN is set to ${ARCH_PATTERN} by user" + fi +} + +function build_dgu() +{ + cd $path_cur + rm -rf build + mkdir -p build + cd build + cmake .. + make + ret=$? + if [ ${ret} -ne 0 ]; then + echo "Failed to build dgu." + exit ${ret} + fi + make install +} + +check_env +build_dgu \ No newline at end of file diff --git a/official/nlp/dgu/infer/mxbase/src/DGU.cpp b/official/nlp/dgu/infer/mxbase/src/DGU.cpp new file mode 100644 index 0000000000000000000000000000000000000000..fb5c902e6c1f19be7995ddc642668a7dce996bce --- /dev/null +++ b/official/nlp/dgu/infer/mxbase/src/DGU.cpp @@ -0,0 +1,270 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "DGU.h" +#include <unistd.h> +#include <sys/stat.h> +#include <map> +#include <fstream> +#include "MxBase/DeviceManager/DeviceManager.h" +#include "MxBase/Log/Log.h" + +const uint32_t MAX_LENGTH = 128; + +APP_ERROR DGUBase::LoadLabels(const std::string &labelPath) { + std::ifstream infile; + // open label file + infile.open(labelPath, std::ios_base::in); + std::string s; + int id; + // check label file validity + if (infile.fail()) { + LogError << "Failed to open label file: " << labelPath << "."; + return APP_ERR_COMM_OPEN_FAIL; + } + labelVec_.clear(); + // construct label vector + while (!infile.eof()) { + infile >> s >> id; + labelVec_[id] = s; + } + infile.close(); + return APP_ERR_OK; +} + +APP_ERROR DGUBase::Init(const InitParam &initParam) { + deviceId_ = initParam.deviceId; + APP_ERROR ret = MxBase::DeviceManager::GetInstance()->InitDevices(); + if (ret != APP_ERR_OK) { + LogError << "Init devices failed, ret=" << ret << "."; + return ret; + } + ret = MxBase::TensorContext::GetInstance()->SetContext(initParam.deviceId); + if (ret != APP_ERR_OK) { + LogError << "Set context failed, ret=" << ret << "."; + return ret; + } + model_ = std::make_shared<MxBase::ModelInferenceProcessor>(); + ret = model_->Init(initParam.modelPath, modelDesc_); + if (ret != APP_ERR_OK) { + LogError << "ModelInferenceProcessor init failed, ret=" << ret << "."; + return ret; + } + labelVec_.resize(initParam.classNum); + // load labels from file + ret = LoadLabels(initParam.labelPath); + if (ret != APP_ERR_OK) { + LogError << "Failed to load labels, ret=" << ret << "."; + return ret; + } + return APP_ERR_OK; +} + +APP_ERROR DGUBase::DeInit() { + model_->DeInit(); + MxBase::DeviceManager::GetInstance()->DestroyDevices(); + return APP_ERR_OK; +} + +APP_ERROR DGUBase::ReadTensorFromFile(const std::string &file, uint32_t *data) { + if (data == NULL) { + LogError << "input data is invalid."; + return APP_ERR_COMM_INVALID_POINTER; + } + std::ifstream infile; + // open label file + infile.open(file, std::ios_base::in | std::ios_base::binary); + // check label file validity + if (infile.fail()) { + LogError << "Failed to open label file: " << file << "."; + return APP_ERR_COMM_OPEN_FAIL; + } + infile.read(reinterpret_cast<char*>(data), sizeof(uint32_t) * MAX_LENGTH); + infile.close(); + return APP_ERR_OK; +} + +APP_ERROR DGUBase::ReadInputTensor(const std::string &fileName, uint32_t index, + std::vector<MxBase::TensorBase> *inputs) { + uint32_t data[MAX_LENGTH] = {0}; + APP_ERROR ret = ReadTensorFromFile(fileName, data); + if (ret != APP_ERR_OK) { + LogError << "ReadTensorFromFile failed."; + return ret; + } + const uint32_t dataSize = modelDesc_.inputTensors[index].tensorSize; + MxBase::MemoryData memoryDataDst(dataSize, MxBase::MemoryData::MEMORY_DEVICE, deviceId_); + MxBase::MemoryData memoryDataSrc(reinterpret_cast<void*>(data), dataSize, MxBase::MemoryData::MEMORY_HOST_MALLOC); + ret = MxBase::MemoryHelper::MxbsMallocAndCopy(memoryDataDst, memoryDataSrc); + if (ret != APP_ERR_OK) { + LogError << GetError(ret) << "Memory malloc and copy failed."; + return ret; + } + std::vector<uint32_t> shape = {1, MAX_LENGTH}; + inputs->push_back(MxBase::TensorBase(memoryDataDst, false, shape, MxBase::TENSOR_DTYPE_UINT32)); + return APP_ERR_OK; +} + +APP_ERROR DGUBase::Inference(const std::vector<MxBase::TensorBase> &inputs, + std::vector<MxBase::TensorBase> *outputs) { + auto dtypes = model_->GetOutputDataType(); + for (size_t i = 0; i < modelDesc_.outputTensors.size(); ++i) { + std::vector<uint32_t> shape = {}; + for (size_t j = 0; j < modelDesc_.outputTensors[i].tensorDims.size(); ++j) { + shape.push_back((uint32_t)modelDesc_.outputTensors[i].tensorDims[j]); + } + MxBase::TensorBase tensor(shape, dtypes[i], MxBase::MemoryData::MemoryType::MEMORY_DEVICE, deviceId_); + APP_ERROR ret = MxBase::TensorBase::TensorBaseMalloc(tensor); + if (ret != APP_ERR_OK) { + LogError << "TensorBaseMalloc failed, ret=" << ret << "."; + return ret; + } + outputs->push_back(tensor); + } + + MxBase::DynamicInfo dynamicInfo = {}; + dynamicInfo.dynamicType = MxBase::DynamicType::STATIC_BATCH; + auto startTime = std::chrono::high_resolution_clock::now(); + APP_ERROR ret = model_->ModelInference(inputs, *outputs, dynamicInfo); + auto endTime = std::chrono::high_resolution_clock::now(); + double costMs = std::chrono::duration<double, std::milli>(endTime - startTime).count(); + g_inferCost.push_back(costMs); + if (ret != APP_ERR_OK) { + LogError << "ModelInference failed, ret=" << ret << "."; + return ret; + } + return APP_ERR_OK; +} + +APP_ERROR DGUBase::PostProcess(std::vector<MxBase::TensorBase> *outputs, std::vector<uint32_t> *argmax) { + MxBase::TensorBase &tensor = outputs->at(0); + APP_ERROR ret = tensor.ToHost(); + if (ret != APP_ERR_OK) { + LogError << GetError(ret) << "Tensor deploy to host failed."; + return ret; + } + // check tensor is available + auto outputShape = tensor.GetShape(); + uint32_t length = outputShape[0]; + uint32_t classNum = outputShape[1]; + LogInfo << "output shape is: " << outputShape[1] << std::endl; + + void* data = tensor.GetBuffer(); + for (uint32_t i = 0; i < length; i++) { + std::vector<float> result = {}; + for (uint32_t j = 0; j < classNum; j++) { + float value = *(reinterpret_cast<float*>(data) + i * classNum + j); + result.push_back(value); + } + // argmax and get the class id + std::vector<float>::iterator maxElement = std::max_element(std::begin(result), std::end(result)); + uint32_t argmaxIndex = maxElement - std::begin(result); + argmax->push_back(argmaxIndex); + } + return APP_ERR_OK; +} + +APP_ERROR DGUBase::CountPredictResult(const std::string &labelFile, const std::vector<uint32_t> &argmax) { + uint32_t data[1] = {0}; + APP_ERROR ret = ReadTensorFromFile(labelFile, data); + if (ret != APP_ERR_OK) { + LogError << "ReadTensorFromFile failed."; + return ret; + } + if (data[0] == argmax[0]) { + g_total_acc += 1; + } + g_total += 1; + return APP_ERR_OK; +} + +APP_ERROR DGUBase::WriteResult(const std::string &fileName, const std::vector<uint32_t> &argmax) { + std::string resultPathName = "result"; + // create result directory when it does not exit + if (access(resultPathName.c_str(), 0) != 0) { + int ret = mkdir(resultPathName.c_str(), S_IRUSR | S_IWUSR | S_IXUSR); + if (ret != 0) { + LogError << "Failed to create result directory: " << resultPathName << ", ret = " << ret; + return APP_ERR_COMM_OPEN_FAIL; + } + } + // create result file under result directory + resultPathName = resultPathName + "/result.txt"; + std::ofstream tfile(resultPathName, std::ofstream::app); + if (tfile.fail()) { + LogError << "Failed to open result file: " << resultPathName; + return APP_ERR_COMM_OPEN_FAIL; + } + // write inference result into file + LogInfo << "=============================================================="; + LogInfo << "infer result of " << fileName << " is: "; + tfile << "file name is: " << fileName << std::endl; + for (auto &item : argmax) { + LogInfo << labelVec_[item]; + tfile << labelVec_[item] << std::endl; + } + LogInfo << "=============================================================="; + tfile.close(); + return APP_ERR_OK; +} + +APP_ERROR DGUBase::Process(const std::string &inferPath, const std::string &fileName, bool eval) { + std::vector<MxBase::TensorBase> inputs = {}; + std::string inputIdsFile = inferPath + "00_data/" + fileName; + APP_ERROR ret = ReadInputTensor(inputIdsFile, INPUT_IDS, &inputs); + if (ret != APP_ERR_OK) { + LogError << "Read input ids failed, ret=" << ret << "."; + return ret; + } + std::string inputMaskFile = inferPath + "01_data/" + fileName; + ret = ReadInputTensor(inputMaskFile, INPUT_MASK, &inputs); + if (ret != APP_ERR_OK) { + LogError << "Read input mask file failed, ret=" << ret << "."; + return ret; + } + std::string tokenTypeIdFile = inferPath + "02_data/" + fileName; + ret = ReadInputTensor(tokenTypeIdFile, TOKEN_TYPE, &inputs); + if (ret != APP_ERR_OK) { + LogError << "Read token typeId file failed, ret=" << ret << "."; + return ret; + } + std::vector<MxBase::TensorBase> outputs = {}; + ret = Inference(inputs, &outputs); + if (ret != APP_ERR_OK) { + LogError << "Inference failed, ret=" << ret << "."; + return ret; + } + std::vector<uint32_t> argmax; + ret = PostProcess(&outputs, &argmax); + if (ret != APP_ERR_OK) { + LogError << "PostProcess failed, ret=" << ret << "."; + return ret; + } + ret = WriteResult(fileName, argmax); + if (ret != APP_ERR_OK) { + LogError << "save result failed, ret=" << ret << "."; + return ret; + } + if (eval) { + std::string labelFile = inferPath + "03_data/" + fileName; + ret = CountPredictResult(labelFile, argmax); + if (ret != APP_ERR_OK) { + LogError << "Calc Acc read label failed, ret=" << ret << "."; + return ret; + } + } + return APP_ERR_OK; +} diff --git a/official/nlp/dgu/infer/mxbase/src/DGU.h b/official/nlp/dgu/infer/mxbase/src/DGU.h new file mode 100644 index 0000000000000000000000000000000000000000..21dadcb17c864727501ecdcc1baf5b0c1dcc3e83 --- /dev/null +++ b/official/nlp/dgu/infer/mxbase/src/DGU.h @@ -0,0 +1,67 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MXBASE_BERTBASE_H +#define MXBASE_BERTBASE_H + +#include <memory> +#include <utility> +#include <vector> +#include <string> +#include <map> +#include <opencv2/opencv.hpp> +#include "MxBase/DvppWrapper/DvppWrapper.h" +#include "MxBase/ModelInfer/ModelInferenceProcessor.h" +#include "MxBase/Tensor/TensorContext/TensorContext.h" + +extern std::vector<double> g_inferCost; +extern uint32_t g_total; +extern uint32_t g_total_acc; + +struct InitParam { + uint32_t deviceId; + std::string labelPath; + std::string modelPath; + uint32_t classNum; +}; + +enum DataIndex { + INPUT_IDS = 0, + INPUT_MASK = 1, + TOKEN_TYPE = 2, +}; + +class DGUBase { + public: + APP_ERROR Init(const InitParam &initParam); + APP_ERROR DeInit(); + APP_ERROR Inference(const std::vector<MxBase::TensorBase> &inputs, std::vector<MxBase::TensorBase> *outputs); + APP_ERROR Process(const std::string &inferPath, const std::string &fileName, bool eval); + APP_ERROR PostProcess(std::vector<MxBase::TensorBase> *outputs, std::vector<uint32_t> *argmax); + protected: + APP_ERROR ReadTensorFromFile(const std::string &file, uint32_t *data); + APP_ERROR ReadInputTensor(const std::string &fileName, uint32_t index, std::vector<MxBase::TensorBase> *inputs); + APP_ERROR LoadLabels(const std::string &labelPath); + APP_ERROR ReadInputTensor(const std::string &fileName, const std::vector<uint32_t> &argmax); + APP_ERROR WriteResult(const std::string &fileName, const std::vector<uint32_t> &argmax); + APP_ERROR CountPredictResult(const std::string &labelFile, const std::vector<uint32_t> &argmax); + private: + std::shared_ptr<MxBase::ModelInferenceProcessor> model_; + MxBase::ModelDesc modelDesc_ = {}; + std::vector<std::string> labelVec_ = {}; + uint32_t deviceId_ = 0; +}; +#endif diff --git a/official/nlp/dgu/infer/mxbase/src/main.cpp b/official/nlp/dgu/infer/mxbase/src/main.cpp new file mode 100644 index 0000000000000000000000000000000000000000..372a24946dc41fb52c3e579cb4e3edab4028a593 --- /dev/null +++ b/official/nlp/dgu/infer/mxbase/src/main.cpp @@ -0,0 +1,122 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <unistd.h> +#include <dirent.h> +#include <iostream> +#include <fstream> +#include <vector> +#include "DGU.h" +#include "MxBase/Log/Log.h" + +std::vector<double> g_inferCost; +uint32_t g_total = 0; +uint32_t g_total_acc = 0; + +void InitDGUParam(InitParam* initParam, const std::string &taskName) { + initParam->deviceId = 0; + if (taskName == "atis_intent") { + initParam->labelPath = "../data/config/map_tag_intent_id.txt"; + initParam->modelPath = "../data/model/atis_intent.om"; + initParam->classNum = 26; + } else if (taskName == "mrda") { + initParam->labelPath = "../data/config/map_tag_mrda_id.txt"; + initParam->modelPath = "../data/model/mrda.om"; + initParam->classNum = 5; + } else if (taskName == "swda") { + initParam->labelPath = "../data/config/map_tag_swda_id.txt"; + initParam->modelPath = "../data/model/swda.om"; + initParam->classNum = 42; + } +} + +APP_ERROR ReadFilesFromPath(const std::string &path, std::vector<std::string> *files) { + DIR *dir = NULL; + struct dirent *ptr = NULL; + + if ((dir=opendir(path.c_str())) == NULL) { + LogError << "Open dir error: " << path; + return APP_ERR_COMM_OPEN_FAIL; + } + + while ((ptr=readdir(dir)) != NULL) { + // d_type == 8 is file + int file_d_type = 8; + if (ptr->d_type == file_d_type) { + files->push_back(ptr->d_name); + } + } + closedir(dir); + // sort ascending order + sort(files->begin(), files->end()); + return APP_ERR_OK; +} + +int main(int argc, char* argv[]) { + if (argc <= 1) { + LogWarn << "Please input image path, such as './dgu /input/data/atis_intent 0 atis_intent'."; + return APP_ERR_OK; + } + + InitParam initParam; + std::string taskName = argv[3]; + InitDGUParam(&initParam, taskName); + auto dguBase = std::make_shared<DGUBase>(); + APP_ERROR ret = dguBase->Init(initParam); + if (ret != APP_ERR_OK) { + LogError << "dgu init failed, ret=" << ret << "."; + return ret; + } + + std::string inferPath = argv[1]; + std::vector<std::string> files; + ret = ReadFilesFromPath(inferPath + "00_data", &files); + if (ret != APP_ERR_OK) { + LogError << "Read files from path failed, ret=" << ret << "."; + return ret; + } + // do eval and calc the f1 score + bool eval = atoi(argv[2]); + for (uint32_t i = 0; i < files.size(); i++) { + LogInfo << "read file name: " << files[i]; + ret = dguBase->Process(inferPath, files[i], eval); + if (ret != APP_ERR_OK) { + LogError << "dguBase process failed, ret=" << ret << "."; + dguBase->DeInit(); + return ret; + } + } + + if (eval) { + LogInfo << "=============================================================="; + if (g_total == 0) { + LogInfo << "Infer total is 0."; + } else { + float acc = (g_total_acc * 1.0) / (g_total * 1.0); + LogInfo << "Acc: " << acc; + } + LogInfo << "=============================================================="; + } + dguBase->DeInit(); + double costSum = 0; + for (uint32_t i = 0; i < g_inferCost.size(); i++) { + costSum += g_inferCost[i]; + } + double scale = 1000.0; + LogInfo << "Infer images sum " << g_inferCost.size() << ", cost total time: " << costSum << " ms."; + LogInfo << "The throughput: " << g_inferCost.size() * scale / costSum << " bin/sec."; + return APP_ERR_OK; +} diff --git a/official/nlp/dgu/infer/sdk/build.sh b/official/nlp/dgu/infer/sdk/build.sh new file mode 100644 index 0000000000000000000000000000000000000000..0b067fb2d570b7f773488322381a55efb1b896a4 --- /dev/null +++ b/official/nlp/dgu/infer/sdk/build.sh @@ -0,0 +1,68 @@ +#!/bin/bash + +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +set -e + +if [ $# -ne 2 ] +then + echo "==============================================================================================================" + echo "Please run the script as: " + echo "bash build.sh [TASK_TYPE] [MODE]" + echo "for example: bash build.sh atis_intent test" + echo "TASK_TYPE including [atis_intent, mrda, swda]" + echo "MODE including [test, infer]" + echo "==============================================================================================================" +exit 1 +fi + +TASK_TYPE=$1 +MODE=$2 + +case $TASK_TYPE in + "atis_intent") + LABEL_FILE="map_tag_intent_id.txt" + ;; + "mrda") + LABEL_FILE="map_tag_mrda_id.txt" + ;; + "swda") + LABEL_FILE="map_tag_swda_id.txt" + ;; + esac + +case $MODE in + "test") + DATA_DIR="input" + EVAL="true" + ;; + "infer") + DATA_DIR="infer" + EVAL="false" + ;; + esac + +# Simple log helper functions +info() { echo -e "\033[1;34m[INFO ][MxStream] $1\033[1;37m" ; } +warn() { echo >&2 -e "\033[1;31m[WARN ][MxStream] $1\033[1;37m" ; } + +export LD_LIBRARY_PATH=${MX_SDK_HOME}/lib:${MX_SDK_HOME}/opensource/lib:${MX_SDK_HOME}/opensource/lib64:/usr/local/Ascend/ascend-toolkit/latest/acllib/lib64:${LD_LIBRARY_PATH} +export GST_PLUGIN_SCANNER=${MX_SDK_HOME}/opensource/libexec/gstreamer-1.0/gst-plugin-scanner +export GST_PLUGIN_PATH=${MX_SDK_HOME}/opensource/lib/gstreamer-1.0:${MX_SDK_HOME}/lib/plugins + +#to set PYTHONPATH, import the StreamManagerApi.py +export PYTHONPATH=$PYTHONPATH:${MX_SDK_HOME}/python + +python3.7 main.py --pipeline=../data/config/dgu_${TASK_TYPE}.pipeline --data_dir=../data/${DATA_DIR}/${TASK_TYPE} --label_file=../data/config/${LABEL_FILE} --output_file=./${TASK_TYPE}_output.txt --do_eval=${EVAL} --task_name=${TASK_TYPE} +exit 0 diff --git a/official/nlp/dgu/infer/sdk/main.py b/official/nlp/dgu/infer/sdk/main.py new file mode 100644 index 0000000000000000000000000000000000000000..36e159b678eed1ff3e4c00a7f4accc3ac6f8516c --- /dev/null +++ b/official/nlp/dgu/infer/sdk/main.py @@ -0,0 +1,248 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +""" +sample script of dgu infer using SDK run in docker +""" + +import argparse +import glob +import os +import time + +import MxpiDataType_pb2 as MxpiDataType +import numpy as np +from StreamManagerApi import StreamManagerApi, MxDataInput, InProtobufVector, \ + MxProtobufIn, StringVector + + +def softmax(z): + """ + softmax function + """ + assert len(z.shape) == 2 + s = np.max(z, axis=1) + s = s[:, np.newaxis] + e_x = np.exp(z - s) + div = np.sum(e_x, axis=1) + div = div[:, np.newaxis] + return e_x / div + + +class Accuracy(): + """ + calculate accuracy + """ + def __init__(self): + self.acc_num = 0 + self.total_num = 0 + def update(self, logits, labels): + labels = np.reshape(labels, -1) + self.acc_num += np.sum(labels == logits) + self.total_num += len(labels) + + +def parse_args(): + """set and check parameters.""" + parser = argparse.ArgumentParser(description="dgu process") + parser.add_argument("--pipeline", type=str, default="", help="SDK infer pipeline") + parser.add_argument("--data_dir", type=str, default="", + help="Dataset contain input_ids, input_mask, segment_ids, label_ids") + parser.add_argument("--label_file", type=str, default="", help="label ids to name") + parser.add_argument("--output_file", type=str, default="", help="save result to file") + parser.add_argument("--task_name", type=str, default="atis_intent", help="(atis_intent, mrda, swda)") + parser.add_argument("--do_eval", type=str, default="true", help="eval the accuracy of model") + args_opt = parser.parse_args() + return args_opt + + +def send_source_data(appsrc_id, filename, stream_name, stream_manager): + """ + Construct the input of the stream, + send inputs data to a specified stream based on streamName. + + Returns: + bool: send data success or not + """ + tensor = np.fromfile(filename, dtype=np.int32) + tensor = np.expand_dims(tensor, 0) + tensor_package_list = MxpiDataType.MxpiTensorPackageList() + tensor_package = tensor_package_list.tensorPackageVec.add() + array_bytes = tensor.tobytes() + data_input = MxDataInput() + data_input.data = array_bytes + tensor_vec = tensor_package.tensorVec.add() + tensor_vec.deviceId = 0 + tensor_vec.memType = 0 + for i in tensor.shape: + tensor_vec.tensorShape.append(i) + tensor_vec.dataStr = data_input.data + tensor_vec.tensorDataSize = len(array_bytes) + + key = "appsrc{}".format(appsrc_id).encode('utf-8') + protobuf_vec = InProtobufVector() + protobuf = MxProtobufIn() + protobuf.key = key + protobuf.type = b'MxTools.MxpiTensorPackageList' + protobuf.protobuf = tensor_package_list.SerializeToString() + protobuf_vec.push_back(protobuf) + + ret = stream_manager.SendProtobuf(stream_name, appsrc_id, protobuf_vec) + if ret < 0: + print("Failed to send data to stream.") + return False + return True + + +def send_appsrc_data(file_name, stream_name, stream_manager): + """ + send three stream to infer model, include input ids, input mask and token type_id. + + Returns: + bool: send data success or not + """ + input_ids = os.path.realpath(os.path.join(args.data_dir, "00_data", file_name)) + if not send_source_data(0, input_ids, stream_name, stream_manager): + return False + input_mask = os.path.realpath(os.path.join(args.data_dir, "01_data", file_name)) + if not send_source_data(1, input_mask, stream_name, stream_manager): + return False + token_type_id = os.path.realpath(os.path.join(args.data_dir, "02_data", file_name)) + if not send_source_data(2, token_type_id, stream_name, stream_manager): + return False + return True + + +def read_label_file(label_file): + """ + Args: + label_file: + "aa 3" + Returns: + label dic + """ + label_map = {} + for line in open(label_file).readlines(): + label, index = line.strip('\n').split('\t') + label_map[index] = label + return label_map + + +def process_infer(logit_id): + """ + find label and position from the logit_id tensor. + + Args: + logit_id: shape is [num_labels], example: [0..0.1..0]. + Returns: + type of label: Q + """ + result_label = label_dic[str(logit_id[0])] + return result_label + + +def post_process(file_name, infer_result): + """ + process the result of infer tensor to Visualization results. + Args: + file_name: label file name. + infer_result: get logit from infer result + """ + # print the infer result + print("==============================================================") + result = MxpiDataType.MxpiTensorPackageList() + result.ParseFromString(infer_result[0].messageBuf) + logit_id = np.frombuffer(result.tensorPackageVec[0].tensorVec[0].dataStr, dtype='<f4') + print("output tensor is: ", logit_id.shape) + print("post_process:") + logit_id = np.argmax(logit_id, axis=-1) + logit_id = np.reshape(logit_id, -1) + + #output to file + result_label = process_infer(logit_id) + print(result_label) + with open(args.output_file, "a") as file: + file.write("{}: {}\n".format(file_name, str(result_label))) + return logit_id + + +def run(): + """ + read pipeline and do infer + """ + # init stream manager + stream_manager_api = StreamManagerApi() + ret = stream_manager_api.InitManager() + if ret != 0: + print("Failed to init Stream manager, ret=%s" % str(ret)) + return + + # create streams by pipeline config file + with open(os.path.realpath(args.pipeline), 'rb') as f: + pipeline_str = f.read() + ret = stream_manager_api.CreateMultipleStreams(pipeline_str) + if ret != 0: + print("Failed to create Stream, ret=%s" % str(ret)) + return + + stream_name = b'im_dgu' + infer_total_time = 0 + # input_ids file list + file_list = glob.glob(os.path.join(os.path.realpath(args.data_dir), "00_data", "*.bin")) + data_prefix_len = len(args.task_name) + 1 + file_num = len(file_list) + for i in range(file_num): + file_list[i] = file_list[i].split('/')[-1] + file_list = sorted(file_list, key=lambda name: int(name[data_prefix_len:-4])) + for file_name in file_list: + if not send_appsrc_data(file_name, stream_name, stream_manager_api): + return + # Obtain the inference result by specifying streamName and uniqueId. + key_vec = StringVector() + key_vec.push_back(b'mxpi_tensorinfer0') + start_time = time.time() + infer_result = stream_manager_api.GetProtobuf(stream_name, 0, key_vec) + infer_total_time += time.time() - start_time + if infer_result.size() == 0: + print("inferResult is null") + return + if infer_result[0].errorCode != 0: + print("GetProtobuf error. errorCode=%d" % (infer_result[0].errorCode)) + return + + logit_id = post_process(file_name, infer_result) + if args.do_eval.lower() == "true": + label_file = os.path.realpath(os.path.join(args.data_dir, "03_data", file_name)) + label_id = np.fromfile(label_file, np.int32) + callback.update(logit_id, label_id) + + if args.do_eval.lower() == "true": + print("==============================================================") + print("acc_num {} , total_num {}, accuracy {:.6f}".format(callback.acc_num, callback.total_num, + callback.acc_num / callback.total_num)) + print("==============================================================") + scale = 1000.0 + print("Infer items sum:", file_num, "infer_total_time:", infer_total_time * scale, "ms") + print("throughput:", file_num / infer_total_time, "bin/sec") + + # destroy streams + stream_manager_api.DestroyAllStreams() + + +if __name__ == '__main__': + args = parse_args() + callback = Accuracy() + label_dic = read_label_file(os.path.realpath(args.label_file)) + run() diff --git a/official/nlp/dgu/infer/util/data_processor_seq.py b/official/nlp/dgu/infer/util/data_processor_seq.py new file mode 100644 index 0000000000000000000000000000000000000000..89baa42ce0d0c7df134b589cab52560365d42f36 --- /dev/null +++ b/official/nlp/dgu/infer/util/data_processor_seq.py @@ -0,0 +1,130 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +""" +data processor file. +""" + +import os +import argparse +import numpy as np +import dataset +from dataset import Tuple, Pad, Stack +from tokenizer import FullTokenizer + +def get_all_path(output_path): + """ + Args: + output_path: save path of convert dataset + Returns: + the path of ids, mask, token, label + """ + ids_path = os.path.join(output_path, "00_data") + mask_path = os.path.join(output_path, "01_data") + token_path = os.path.join(output_path, "02_data") + label_path = os.path.join(output_path, "03_data") + for path in [ids_path, mask_path, token_path, label_path]: + os.makedirs(path, 0o755, exist_ok=True) + + return ids_path, mask_path, token_path, label_path + +TASK_CLASSES = { + 'atis_intent': dataset.ATIS_DID, + 'mrda': dataset.MRDA, + 'swda': dataset.SwDA +} + +def data_save_to_file(data_file_path=None, vocab_file_path='bert-base-uncased-vocab.txt', \ + output_path=None, task_name=None, mode="test", max_seq_length=128): + """data save to mindrecord file.""" + if not os.path.exists(output_path): + os.makedirs(output_path) + output_ids, output_mask, output_token, output_label = get_all_path(output_path) + dataset_class = TASK_CLASSES[task_name] + tokenizer = FullTokenizer(vocab_file=vocab_file_path, do_lower_case=True) + task_dataset = dataset_class(data_file_path, mode=mode) + applid_data = [] + print(task_name + " " + mode + " data process begin") + dataset_len = len(task_dataset) + batchify_fn = lambda samples, fn=Tuple( + Pad(axis=0, pad_val=0), # input + Pad(axis=0, pad_val=0), # mask + Pad(axis=0, pad_val=0), # segment + Stack(dtype='int64') # label + ): fn(samples) + for idx, example in enumerate(task_dataset): + if idx % 1000 == 0: + print("Reading example %d of %d" % (idx, dataset_len)) + data_example = dataset_class.convert_example(example=example, \ + tokenizer=tokenizer, max_seq_length=max_seq_length) + applid_data.append(data_example) + + applid_data = batchify_fn(applid_data) + input_ids, input_mask, segment_ids, label_ids = applid_data + + for idx in range(dataset_len): + if idx % 1000 == 0: + print("Processing example %d of %d" % (idx, dataset_len)) + file_name = task_name + "_" + str(idx) + ".bin" + ids_file_path = os.path.join(output_ids, file_name) + np.array(input_ids[idx], dtype=np.int32).tofile(ids_file_path) + mask_file_path = os.path.join(output_mask, file_name) + np.array(input_mask[idx], dtype=np.int32).tofile(mask_file_path) + + token_file_path = os.path.join(output_token, file_name) + np.array(segment_ids[idx], dtype=np.int32).tofile(token_file_path) + + label_file_path = os.path.join(output_label, file_name) + np.array(label_ids[idx], dtype=np.int32).tofile(label_file_path) + + print(task_name + " " + mode + " data process end, " + "total:" + str(dataset_len)) + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="dgu dataset process") + parser.add_argument( + "--task_name", + default=None, + type=str, + required=True, + help="The name of the task to train.") + parser.add_argument( + "--data_path", + default=None, + type=str, + help="The directory where the dataset will be load.") + parser.add_argument( + "--vocab_file", + default=None, + type=str, + help="The directory where the vocab will be load.") + parser.add_argument( + "--mode", + default="test", + type=str, + help="The mode will be do.[test, infer]") + parser.add_argument( + "--max_seq_len", + default=128, + type=int, + help="The maximum total input sequence length after tokenization for trainng. ") + parser.add_argument( + "--output_path", + default=None, + type=str, + help="The directory where the mindrecord dataset file will be save.") + + args = parser.parse_args() + data_save_to_file(data_file_path=args.data_path, vocab_file_path=args.vocab_file, output_path=args.output_path, \ + task_name=args.task_name, mode=args.mode, max_seq_length=args.max_seq_len) diff --git a/official/nlp/dgu/infer/util/dataset.py b/official/nlp/dgu/infer/util/dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..b4c101b32e59bb936c04bf4089d3b740331b58d0 --- /dev/null +++ b/official/nlp/dgu/infer/util/dataset.py @@ -0,0 +1,411 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +""" +dataset used in DGU. +""" + +import os +from typing import List +import numpy as np + +# The input data bigin with '[CLS]', using '[SEP]' split conversation content( +# Previous part, current part, following part, etc.). If there are multiple +# conversation in split part, using 'INNER_SEP' to further split. +INNER_SEP = '[unused0]' + +class Tuple(): + """ + apply the functions to the corresponding input fields. + """ + def __init__(self, fn, *args): + if isinstance(fn, (list, tuple)): + assert args, 'Input pattern not understood. The input of Tuple can be ' \ + 'Tuple(A, B, C) or Tuple([A, B, C]) or Tuple((A, B, C)). ' \ + 'Received fn=%s, args=%s' % (str(fn), str(args)) + self._fn = fn + else: + self._fn = (fn,) + args + for i, ele_fn in enumerate(self._fn): + assert callable( + ele_fn + ), 'Batchify functions must be callable! type(fn[%d]) = %s' % ( + i, str(type(ele_fn))) + + def __call__(self, data): + + assert len(data[0]) == len(self._fn),\ + 'The number of attributes in each data sample should contain' \ + ' {} elements'.format(len(self._fn)) + ret = [] + for i, ele_fn in enumerate(self._fn): + result = ele_fn([ele[i] for ele in data]) + if isinstance(result, (tuple, list)): + ret.extend(result) + else: + ret.append(result) + return tuple(ret) + + +class Pad(): + """ + pad the data with given value + """ + def __init__(self, + pad_val=0, + axis=0, + ret_length=None, + dtype=None, + pad_right=True): + self._pad_val = pad_val + self._axis = axis + self._ret_length = ret_length + self._dtype = dtype + self._pad_right = pad_right + + def __call__(self, data): + arrs = [np.asarray(ele) for ele in data] + original_length = [ele.shape[self._axis] for ele in arrs] + max_size = max(original_length) + ret_shape = list(arrs[0].shape) + ret_shape[self._axis] = max_size + ret_shape = (len(arrs),) + tuple(ret_shape) + ret = np.full( + shape=ret_shape, + fill_value=self._pad_val, + dtype=arrs[0].dtype if self._dtype is None else self._dtype) + for i, arr in enumerate(arrs): + if arr.shape[self._axis] == max_size: + ret[i] = arr + else: + slices = [slice(None) for _ in range(arr.ndim)] + if self._pad_right: + slices[self._axis] = slice(0, arr.shape[self._axis]) + else: + slices[self._axis] = slice(max_size - arr.shape[self._axis], + max_size) + + if slices[self._axis].start != slices[self._axis].stop: + slices = [slice(i, i + 1)] + slices + ret[tuple(slices)] = arr + if self._ret_length: + return ret, np.asarray( + original_length, + dtype="int32") if self._ret_length else np.asarray( + original_length, self._ret_length) + return ret + + +class Stack(): + """ + Stack the input data + """ + + def __init__(self, axis=0, dtype=None): + self._axis = axis + self._dtype = dtype + + def __call__(self, data): + data = np.stack( + data, + axis=self._axis).astype(self._dtype) if self._dtype else np.stack( + data, axis=self._axis) + return data + + +class Dataset(): + """ Dataset base class """ + def __init__(self): + pass + + def __getitem__(self, idx): + raise NotImplementedError("'{}' not implement in class " \ + "{}".format('__getitem__', self.__class__.__name__)) + + def __len__(self): + raise NotImplementedError("'{}' not implement in class " \ + "{}".format('__len__', self.__class__.__name__)) + + +def get_label_map(label_list): + """ Create label maps """ + label_map = {} + for (i, l) in enumerate(label_list): + label_map[l] = i + return label_map + + +class ATIS_DID(Dataset): + """ + The dataset ATIS_ID is using in task Dialogue Intent Detection. + The source dataset is ATIS(Airline Travel Information System). See detail at + https://www.kaggle.com/siddhadev/ms-cntk-atis + """ + LABEL_MAP = get_label_map([str(i) for i in range(26)]) + + def __init__(self, data_dir, mode='test'): + super(ATIS_DID, self).__init__() + self._data_dir = data_dir + self._mode = mode + self.read_data() + + def read_data(self): + """read data from file""" + if self._mode == 'train': + data_path = os.path.join(self._data_dir, 'train.txt') + elif self._mode == 'dev': + data_path = os.path.join(self._data_dir, 'dev.txt') + elif self._mode == 'test': + data_path = os.path.join(self._data_dir, 'test.txt') + elif self._mode == 'infer': + data_path = os.path.join(self._data_dir, 'infer.txt') + self.data = [] + with open(data_path, 'r', encoding='utf8') as fin: + for line in fin: + if not line: + continue + arr = line.rstrip('\n').split('\t') + if len(arr) != 2: + print('Data format error: %s' % '\t'.join(arr)) + print( + 'Data row should contains two parts: label\tconversation_content.' + ) + continue + label = arr[0] + text = arr[1] + self.data.append([label, text]) + + @classmethod + def get_label(cls, label): + return cls.LABEL_MAP[label] + + @classmethod + def num_classes(cls): + return len(cls.LABEL_MAP) + + @classmethod + def convert_example(cls, example, tokenizer, max_seq_length=512): + """ Convert a glue example into necessary features. """ + label, text = example + tokens = tokenizer.tokenize(text) + if len(tokens) > max_seq_length - 2: + tokens = tokens[len(tokens) - max_seq_length + 2:] + tokens_, segment_ids = [], [] + tokens_.append("[CLS]") + for token in tokens: + tokens_.append(token) + tokens_.append("[SEP]") + tokens = tokens_ + segment_ids = [0] * len(tokens) + input_ids = tokenizer.convert_tokens_to_ids(tokens) + label = np.array([cls.get_label(label)], dtype='int64') + input_mask = [1] * len(input_ids) + while len(input_ids) < max_seq_length: + input_ids.append(0) + input_mask.append(0) + segment_ids.append(0) + return input_ids, input_mask, segment_ids, label + + def __getitem__(self, index): + return self.data[index] + + def __len__(self): + return len(self.data) + + +def read_da_data(data_dir, mode): + """read data from file""" + def _concat_dialogues(examples): + """concat multi turns dialogues""" + new_examples = [] + example_len = len(examples) + for i in range(example_len): + label, caller, text = examples[i] + cur_txt = "%s : %s" % (caller, text) + pre_txt = [ + "%s : %s" % (item[1], item[2]) + for item in examples[max(0, i - 5):i] + ] + suf_txt = [ + "%s : %s" % (item[1], item[2]) + for item in examples[i + 1:min(len(examples), i + 3)] + ] + sample = [label, pre_txt, cur_txt, suf_txt] + new_examples.append(sample) + return new_examples + + if mode == 'train': + data_path = os.path.join(data_dir, 'train.txt') + elif mode == 'dev': + data_path = os.path.join(data_dir, 'dev.txt') + elif mode == 'test': + data_path = os.path.join(data_dir, 'test.txt') + elif mode == 'infer': + data_path = os.path.join(data_dir, 'infer.txt') + data = [] + with open(data_path, 'r', encoding='utf8') as fin: + pre_idx = -1 + examples = [] + for line in fin: + if not line: + continue + arr = line.rstrip('\n').split('\t') + if len(arr) != 4: + print('Data format error: %s' % '\t'.join(arr)) + print( + 'Data row should contains four parts: id\tlabel\tcaller\tconversation_content.' + ) + continue + idx, label, caller, text = arr + if idx != pre_idx: + if idx != 0: + examples = _concat_dialogues(examples) + data.extend(examples) + examples = [] + pre_idx = idx + examples.append((label, caller, text)) + if examples: + examples = _concat_dialogues(examples) + data.extend(examples) + return data + + +def truncate_and_concat(pre_txt: List[str], + cur_txt: str, + suf_txt: List[str], + tokenizer, + max_seq_length, + max_len_of_cur_text): + """concat data""" + cur_tokens = tokenizer.tokenize(cur_txt) + cur_tokens = cur_tokens[:min(max_len_of_cur_text, len(cur_tokens))] + pre_tokens = [] + for text in pre_txt: + pre_tokens.extend(tokenizer.tokenize(text)) + pre_tokens.append(INNER_SEP) + pre_tokens = pre_tokens[:-1] + suf_tokens = [] + for text in suf_txt: + suf_tokens.extend(tokenizer.tokenize(text)) + suf_tokens.append(INNER_SEP) + suf_tokens = suf_tokens[:-1] + if len(cur_tokens) + len(pre_tokens) + len(suf_tokens) > max_seq_length - 4: + left_num = max_seq_length - 4 - len(cur_tokens) + if len(pre_tokens) > len(suf_tokens): + suf_num = int(left_num / 2) + suf_tokens = suf_tokens[:suf_num] + pre_num = left_num - len(suf_tokens) + pre_tokens = pre_tokens[max(0, len(pre_tokens) - pre_num):] + else: + pre_num = int(left_num / 2) + pre_tokens = pre_tokens[max(0, len(pre_tokens) - pre_num):] + suf_num = left_num - len(pre_tokens) + suf_tokens = suf_tokens[:suf_num] + tokens, segment_ids = [], [] + tokens.append("[CLS]") + for token in pre_tokens: + tokens.append(token) + tokens.append("[SEP]") + segment_ids.extend([0] * len(tokens)) + for token in cur_tokens: + tokens.append(token) + tokens.append("[SEP]") + segment_ids.extend([1] * (len(cur_tokens) + 1)) + if suf_tokens: + for token in suf_tokens: + tokens.append(token) + tokens.append("[SEP]") + segment_ids.extend([0] * (len(suf_tokens) + 1)) + input_ids = tokenizer.convert_tokens_to_ids(tokens) + input_mask = [1] * len(input_ids) + while len(input_ids) < max_seq_length: + input_ids.append(0) + input_mask.append(0) + segment_ids.append(0) + return input_ids, input_mask, segment_ids + + +class MRDA(Dataset): + """ + The dataset MRDA is using in task Dialogue Act. + The source dataset is MRDA(Meeting Recorder Dialogue Act). See detail at + https://www.aclweb.org/anthology/W04-2319.pdf + """ + MAX_LEN_OF_CUR_TEXT = 50 + LABEL_MAP = get_label_map([str(i) for i in range(5)]) + + def __init__(self, data_dir, mode='test'): + super(MRDA, self).__init__() + self.data = read_da_data(data_dir, mode) + + @classmethod + def get_label(cls, label): + return cls.LABEL_MAP[label] + + @classmethod + def num_classes(cls): + return len(cls.LABEL_MAP) + + @classmethod + def convert_example(cls, example, tokenizer, max_seq_length=512): + """ Convert a glue example into necessary features. """ + label, pre_txt, cur_txt, suf_txt = example + label = np.array([cls.get_label(label)], dtype='int64') + input_ids, input_mask, segment_ids = truncate_and_concat(pre_txt, cur_txt, suf_txt, \ + tokenizer, max_seq_length, cls.MAX_LEN_OF_CUR_TEXT) + return input_ids, input_mask, segment_ids, label + + def __getitem__(self, index): + return self.data[index] + + def __len__(self): + return len(self.data) + + +class SwDA(Dataset): + """ + The dataset SwDA is using in task Dialogue Act. + The source dataset is SwDA(Switchboard Dialog Act). See detail at + http://compprag.christopherpotts.net/swda.html + """ + MAX_LEN_OF_CUR_TEXT = 50 + LABEL_MAP = get_label_map([str(i) for i in range(42)]) + + def __init__(self, data_dir, mode='test'): + super(SwDA, self).__init__() + self.data = read_da_data(data_dir, mode) + + @classmethod + def get_label(cls, label): + return cls.LABEL_MAP[label] + + @classmethod + def num_classes(cls): + return len(cls.LABEL_MAP) + + @classmethod + def convert_example(cls, example, tokenizer, max_seq_length=512): + """ Convert a glue example into necessary features. """ + label, pre_txt, cur_txt, suf_txt = example + label = np.array([cls.get_label(label)], dtype='int64') + input_ids, input_mask, segment_ids = truncate_and_concat(pre_txt, cur_txt, suf_txt, \ + tokenizer, max_seq_length, cls.MAX_LEN_OF_CUR_TEXT) + return input_ids, input_mask, segment_ids, label + + def __getitem__(self, index): + return self.data[index] + + def __len__(self): + return len(self.data) diff --git a/official/nlp/dgu/infer/util/run_dataconvert_dgu.sh b/official/nlp/dgu/infer/util/run_dataconvert_dgu.sh new file mode 100644 index 0000000000000000000000000000000000000000..d26ff0656a9610d23bccaeeb33459e4e05037f1b --- /dev/null +++ b/official/nlp/dgu/infer/util/run_dataconvert_dgu.sh @@ -0,0 +1,55 @@ +#!/bin/bash + +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -e + +if [ $# -ne 2 ] +then + echo "==============================================================================================================" + echo "Please run the script as: " + echo "bash run_dataconvert_dgu.sh [TASK_TYPE] [MODE]" + echo "for example: bash run_dataconvert_dgu.sh atis_intent test" + echo "TASK_TYPE including [atis_intent, mrda, swda]" + echo "MODE including [test, infer]" + echo "==============================================================================================================" +exit 1 +fi + +TASK_TYPE=$1 +MODE=$2 + +case $MODE in + "test") + OUTPUT_DIR="input" + ;; + "infer") + OUTPUT_DIR="infer" + ;; + esac +# Simple log helper functions +info() { echo -e "\033[1;34m[INFO ][MxStream] $1\033[1;37m" ; } +warn() { echo >&2 -e "\033[1;31m[WARN ][MxStream] $1\033[1;37m" ; } + +export LD_LIBRARY_PATH=${MX_SDK_HOME}/lib:${MX_SDK_HOME}/opensource/lib:${MX_SDK_HOME}/opensource/lib64:/usr/local/Ascend/ascend-toolkit/latest/acllib/lib64:${LD_LIBRARY_PATH} +export GST_PLUGIN_SCANNER=${MX_SDK_HOME}/opensource/libexec/gstreamer-1.0/gst-plugin-scanner +export GST_PLUGIN_PATH=${MX_SDK_HOME}/opensource/lib/gstreamer-1.0:${MX_SDK_HOME}/lib/plugins + +#to set PYTHONPATH, import the StreamManagerApi.py +export PYTHONPATH=$PYTHONPATH:${MX_SDK_HOME}/python +cp ../../src/tokenizer.py ./ + +python3.7 data_processor_seq.py --task_name=${TASK_TYPE} --data_path=../data/rawdata/${TASK_TYPE} --vocab_file=../data/config/bert-base-uncased-vocab.txt --mode=${MODE} --max_seq_len=128 --output_path=../data/${OUTPUT_DIR}/${TASK_TYPE} +exit 0 diff --git a/official/nlp/dgu/modelart/start.py b/official/nlp/dgu/modelart/start.py new file mode 100644 index 0000000000000000000000000000000000000000..d62aa132b15b2613d09e4ffee5826284b0b56770 --- /dev/null +++ b/official/nlp/dgu/modelart/start.py @@ -0,0 +1,372 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +''' +Bert finetune and evaluation script. +''' + +import argparse +import os +import time +import numpy as np +import mindspore.common.dtype as mstype +import mindspore.ops as P +from mindspore import Tensor, context, export +from mindspore import log as logger +from mindspore.nn import Accuracy +from mindspore.nn.optim import AdamWeightDecay +from mindspore.nn.wrap.loss_scale import DynamicLossScaleUpdateCell +from mindspore.train.callback import (CheckpointConfig, ModelCheckpoint, + TimeMonitor) +from mindspore.train.model import Model +from mindspore.train.serialization import load_checkpoint, load_param_into_net + +import src.dataset as data +import src.metric as metric +from src.bert_for_finetune import BertCLS, BertFinetuneCell +from src.finetune_eval_config import (bert_net_cfg, bert_net_udc_cfg, + optimizer_cfg) +from src.utils import (CustomWarmUpLR, GetAllCkptPath, LossCallBack, + create_classification_dataset, make_directory) + +CACHE_TRAINING_URL = "/cache/training/" + +if not os.path.isdir(CACHE_TRAINING_URL): + os.makedirs(CACHE_TRAINING_URL) + +def parse_args(): + """Parse args.""" + parser = argparse.ArgumentParser(__doc__) + parser.add_argument( + "--task_name", + default="atis_intent", + type=str, + required=True, + help="The name of the task to train.") + parser.add_argument( + "--device_target", + default="Ascend", + type=str, + help="The device to train.") + parser.add_argument( + "--device_id", + default=0, + type=int, + help="The device id to use.") + parser.add_argument( + "--model_name_or_path", + default='bert-BertCLS-111.ckpt', + type=str, + help="Path to pre-trained bert model or shortcut name.") + parser.add_argument( + "--local_model_name_or_path", + default='/cache/pretrainModel/bert-BertCLS-111.ckpt', type=str, + help="local Path to pre-trained bert model or shortcut name, for online work.") + parser.add_argument( + "--checkpoints_path", default=None, type=str, + help="The output directory where the checkpoints will be saved.") + parser.add_argument( + "--eval_ckpt_path", default=None, type=str, + help="The path of checkpoint to be loaded.") + parser.add_argument( + "--max_seq_len", default=None, type=int, + help="The maximum total input sequence length after tokenization for trainng.\ + Sequences longer than this will be truncated, sequences shorter will be padded.") + parser.add_argument( + "--eval_max_seq_len", + default=None, type=int, + help="The maximum total input sequence length after tokenization for evaling.\ + Sequences longer than this will be truncated, sequences shorter will be padded.") + parser.add_argument( + "--learning_rate", default=None, type=float, help="The initial learning rate for Adam.") + parser.add_argument( + "--epochs", default=None, type=int, help="Total number of training epochs to perform.") + parser.add_argument( + "--save_steps", default=None, type=int, help="Save checkpoint every X updates steps.") + parser.add_argument( + "--warmup_proportion", default=0.1, type=float, help="The proportion of warmup.") + parser.add_argument( + "--do_train", default="true", type=str, help="Whether training.") + parser.add_argument( + "--do_eval", default="true", type=str, help="Whether evaluation.") + parser.add_argument( + "--train_data_shuffle", type=str, default="true", choices=["true", "false"], + help="Enable train data shuffle, default is true") + parser.add_argument( + "--train_data_file_path", type=str, default="", + help="Data path, it is better to use absolute path") + parser.add_argument( + "--train_batch_size", type=int, default=32, help="Train batch size, default is 32") + parser.add_argument( + "--eval_batch_size", type=int, default=None, + help="Eval batch size, default is None. if the eval_batch_size parameter is not passed in,\ + It will be assigned the same value as train_batch_size") + parser.add_argument( + "--eval_data_file_path", type=str, default="", help="Data path, it is better to use absolute path") + parser.add_argument( + "--eval_data_shuffle", type=str, default="false", choices=["true", "false"], + help="Enable eval data shuffle, default is false") + parser.add_argument( + "--is_modelarts_work", type=str, default="false", help="Whether modelarts online work.") + parser.add_argument( + "--train_url", type=str, default="", + help="save_model path, it is better to use absolute path, for modelarts online work.") + parser.add_argument( + "--data_url", type=str, default="", help="data path, for modelarts online work") + args = parser.parse_args() + return args + +def set_default_args(args): + """set default args.""" + args.task_name = args.task_name.lower() + if args.task_name == 'udc': + args.save_steps = 1000 + if not args.epochs: + args.epochs = 2 + if not args.max_seq_len: + args.max_seq_len = 224 + if not args.eval_batch_size: + args.eval_batch_size = 100 + elif args.task_name == 'atis_intent': + args.save_steps = 100 + if not args.epochs: + args.epochs = 20 + elif args.task_name == 'mrda': + args.save_steps = 500 + if not args.epochs: + args.epochs = 7 + elif args.task_name == 'swda': + args.save_steps = 500 + if not args.epochs: + args.epochs = 3 + else: + raise ValueError('Not support task: %s.' % args.task_name) + + if not args.checkpoints_path: + args.checkpoints_path = './checkpoints/' + args.task_name + if not args.learning_rate: + args.learning_rate = 2e-5 + if not args.max_seq_len: + args.max_seq_len = 128 + if not args.eval_max_seq_len: + args.eval_max_seq_len = args.max_seq_len + if not args.eval_batch_size: + args.eval_batch_size = args.train_batch_size + +def do_train(dataset=None, network=None, load_checkpoint_path="base-BertCLS-111.ckpt", + save_checkpoint_path="", epoch_num=1): + """ do train """ + if load_checkpoint_path == "": + raise ValueError("Pretrain model missed, finetune task must load pretrain model!") + print("load pretrain model: ", load_checkpoint_path) + steps_per_epoch = args_opt.save_steps + num_examples = dataset.get_dataset_size() * args_opt.train_batch_size + max_train_steps = epoch_num * dataset.get_dataset_size() + warmup_steps = int(max_train_steps * args_opt.warmup_proportion) + print("Num train examples: %d" % num_examples) + print("Max train steps: %d" % max_train_steps) + print("Num warmup steps: %d" % warmup_steps) + #warmup and optimizer + lr_schedule = CustomWarmUpLR(learning_rate=args_opt.learning_rate, \ + warmup_steps=warmup_steps, max_train_steps=max_train_steps) + params = network.trainable_params() + decay_params = list(filter(optimizer_cfg.AdamWeightDecay.decay_filter, params)) + other_params = list(filter(lambda x: not optimizer_cfg.AdamWeightDecay.decay_filter(x), params)) + group_params = [{'params': decay_params, 'weight_decay': optimizer_cfg.AdamWeightDecay.weight_decay}, + {'params': other_params, 'weight_decay': 0.0}] + optimizer = AdamWeightDecay(group_params, lr_schedule, eps=optimizer_cfg.AdamWeightDecay.eps) + update_cell = DynamicLossScaleUpdateCell(loss_scale_value=2**32, scale_factor=2, scale_window=1000) + #ckpt config + ckpt_config = CheckpointConfig(save_checkpoint_steps=steps_per_epoch, keep_checkpoint_max=5) + ckpoint_cb = ModelCheckpoint(prefix=args_opt.task_name, + directory=None if save_checkpoint_path == "" else save_checkpoint_path, + config=ckpt_config) + # load checkpoint into network + param_dict = load_checkpoint(load_checkpoint_path) + load_param_into_net(network, param_dict) + + netwithgrads = BertFinetuneCell(network, optimizer=optimizer, scale_update_cell=update_cell) + model = Model(netwithgrads) + callbacks = [TimeMonitor(dataset.get_dataset_size()), LossCallBack(dataset.get_dataset_size()), ckpoint_cb] + model.train(epoch_num, dataset, callbacks=callbacks) + +def eval_result_print(eval_metric, result): + if args_opt.task_name.lower() in ['atis_intent', 'mrda', 'swda']: + metric_name = "Accuracy" + else: + metric_name = eval_metric.name() + print(metric_name, " :", result) + if args_opt.task_name.lower() == "udc": + print("R1@10: ", result[0]) + print("R2@10: ", result[1]) + print("R5@10: ", result[2]) + +def do_eval(dataset=None, network=None, num_class=5, eval_metric=None, load_checkpoint_path=""): + """ do eval """ + if load_checkpoint_path == "": + raise ValueError("Finetune model missed, evaluation task must load finetune model!") + print("eval model: ", load_checkpoint_path) + print("loading... ") + net_for_pretraining = network(eval_net_cfg, False, num_class) + net_for_pretraining.set_train(False) + param_dict = load_checkpoint(load_checkpoint_path) + load_param_into_net(net_for_pretraining, param_dict) + model = Model(net_for_pretraining) + + print("evaling... ") + columns_list = ["input_ids", "input_mask", "segment_ids", "label_ids"] + eval_metric.clear() + evaluate_times = [] + for data_item in dataset.create_dict_iterator(num_epochs=1): + input_data = [] + for i in columns_list: + input_data.append(data_item[i]) + input_ids, input_mask, token_type_id, label_ids = input_data + squeeze = P.Squeeze(-1) + label_ids = squeeze(label_ids) + time_begin = time.time() + logits = model.predict(input_ids, input_mask, token_type_id, label_ids) + time_end = time.time() + evaluate_times.append(time_end - time_begin) + eval_metric.update(logits, label_ids) + print("==============================================================") + print("(w/o first and last) elapsed time: {}, per step time : {}".format( + sum(evaluate_times[1:-1]), sum(evaluate_times[1:-1])/(len(evaluate_times) - 2))) + print("==============================================================") + result = eval_metric.eval() + eval_result_print(eval_metric, result) + return result + + +def run_dgu(args_input): + """run_dgu main function """ + dataset_class, metric_class = TASK_CLASSES[args_input.task_name] + epoch_num = args_input.epochs + num_class = dataset_class.num_classes() + + target = args_input.device_target + if target == "Ascend": + context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=args_input.device_id) + elif target == "GPU": + context.set_context(mode=context.GRAPH_MODE, device_target="GPU", device_id=args_input.device_id) + if net_cfg.compute_type != mstype.float32: + logger.warning('GPU only support fp32 temporarily, run with fp32.') + net_cfg.compute_type = mstype.float32 + else: + raise Exception("Target error, GPU or Ascend is supported.") + + if args_input.do_train.lower() == "true": + netwithloss = BertCLS(net_cfg, True, num_labels=num_class, dropout_prob=0.1) + train_ds = create_classification_dataset(batch_size=args_input.train_batch_size, repeat_count=1, \ + data_file_path=args_input.train_data_file_path, \ + do_shuffle=(args_input.train_data_shuffle.lower() == "true"), drop_remainder=True) + do_train(train_ds, netwithloss, load_pretrain_checkpoint_path, save_finetune_checkpoint_path, epoch_num) + + if args_input.do_eval.lower() == "true": + eval_ds = create_classification_dataset(batch_size=args_input.eval_batch_size, repeat_count=1, \ + data_file_path=args_input.eval_data_file_path, \ + do_shuffle=(args_input.eval_data_shuffle.lower() == "true"), drop_remainder=True) + if args_input.task_name in ['atis_intent', 'mrda', 'swda']: + eval_metric = metric_class("classification") + else: + eval_metric = metric_class() + #load model from path and eval + if args_input.eval_ckpt_path: + do_eval(eval_ds, BertCLS, num_class, eval_metric, args_input.eval_ckpt_path) + #eval all saved models + else: + ckpt_list = GetAllCkptPath(save_finetune_checkpoint_path) + print("saved models:", ckpt_list) + for filepath in ckpt_list: + eval_result = do_eval(eval_ds, BertCLS, num_class, eval_metric, filepath) + eval_file_dict[filepath] = str(eval_result) + print(eval_file_dict) + if args_input.is_modelarts_work == 'true': + for filename in eval_file_dict: + ckpt_result = eval_file_dict[filename].replace('[', '').replace(']', '').replace(', ', '_', 2) + save_file_name = args_input.train_url + ckpt_result + "_" + filename.split('/')[-1] + mox.file.copy_parallel(filename, save_file_name) + print("upload model " + filename + " to " + save_file_name) + #frozen_to_air + save_ckpt_list = GetAllCkptPath(save_finetune_checkpoint_path) + ckpt_model = save_ckpt_list[-1] + print("frozen:", ckpt_model) + frozen_to_air_args = {'ckpt_file': ckpt_model, + 'batch_size': 1, + 'file_name': CACHE_TRAINING_URL + args_input.task_name + '.air', + 'file_format': 'AIR'} + net = BertCLS(net_cfg, False, num_labels=num_class) + frozen_to_air(net, frozen_to_air_args) + + mox.file.copy_parallel(CACHE_TRAINING_URL, args_input.train_url) + +def frozen_to_air(net, args): + load_checkpoint(args.get("ckpt_file"), net=net) + net.set_train(False) + batch_size = args.get("batch_size") + input_ids = Tensor(np.zeros([batch_size, net_cfg.seq_length]), mstype.int32) + input_mask = Tensor(np.zeros([batch_size, net_cfg.seq_length]), mstype.int32) + token_type_id = Tensor(np.zeros([batch_size, net_cfg.seq_length]), mstype.int32) + + input_data = [input_ids, input_mask, token_type_id] + export(net.bert, *input_data, file_name=args.get("file_name"), file_format=args.get("file_format")) + +def print_args_input(args_input): + print('----------- Configuration Arguments -----------') + for arg, value in sorted(vars(args_input).items()): + print('%s: %s' % (arg, value)) + print('------------------------------------------------') + +def set_bert_cfg(): + """set bert cfg""" + global net_cfg + global eval_net_cfg + if args_opt.task_name == 'udc': + net_cfg = bert_net_udc_cfg + eval_net_cfg = bert_net_udc_cfg + print("use udc_bert_cfg") + else: + net_cfg = bert_net_cfg + eval_net_cfg = bert_net_cfg + return net_cfg, eval_net_cfg + +if __name__ == '__main__': + TASK_CLASSES = { + 'udc': (data.UDCv1, metric.RecallAtK), + 'atis_intent': (data.ATIS_DID, Accuracy), + 'mrda': (data.MRDA, Accuracy), + 'swda': (data.SwDA, Accuracy) + } + os.environ['GLOG_v'] = '3' + eval_file_dict = {} + args_opt = parse_args() + set_default_args(args_opt) + net_cfg, eval_net_cfg = set_bert_cfg() + load_pretrain_checkpoint_path = args_opt.model_name_or_path + save_finetune_checkpoint_path = args_opt.checkpoints_path + args_opt.task_name + save_finetune_checkpoint_path = make_directory(save_finetune_checkpoint_path) + if args_opt.is_modelarts_work == 'true': + import moxing as mox + local_load_pretrain_checkpoint_path = args_opt.local_model_name_or_path + local_data_path = '/cache/data/' + args_opt.task_name + mox.file.copy_parallel(args_opt.data_url + args_opt.task_name, local_data_path) + mox.file.copy_parallel('obs:/' + load_pretrain_checkpoint_path, local_load_pretrain_checkpoint_path) + load_pretrain_checkpoint_path = local_load_pretrain_checkpoint_path + if not args_opt.train_data_file_path: + args_opt.train_data_file_path = local_data_path + '/' + args_opt.task_name + '_train.mindrecord' + if not args_opt.eval_data_file_path: + args_opt.eval_data_file_path = local_data_path + '/' + args_opt.task_name + '_test.mindrecord' + print_args_input(args_opt) + run_dgu(args_opt)