diff --git a/research/nlp/ternarybert/infer/convert/convert.sh b/research/nlp/ternarybert/infer/convert/convert.sh new file mode 100644 index 0000000000000000000000000000000000000000..3d2c9ea30d64efe840948995b49e46e1f854f34f --- /dev/null +++ b/research/nlp/ternarybert/infer/convert/convert.sh @@ -0,0 +1,26 @@ +#!/bin/bash +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +air_path=$1 +om_path=$2 + +echo "Input AIR file path: ${air_path}" +echo "Output OM file path: ${om_path}" + +atc --framework=1 --model="${air_path}" \ + --output="${om_path}" \ + --soc_version=Ascend310 \ + --op_select_implmode="high_precision" \ No newline at end of file diff --git a/research/nlp/ternarybert/infer/data/config/ternarybert.pipeline b/research/nlp/ternarybert/infer/data/config/ternarybert.pipeline new file mode 100644 index 0000000000000000000000000000000000000000..89d270408fda1d812963e245895e1276d7f23ac7 --- /dev/null +++ b/research/nlp/ternarybert/infer/data/config/ternarybert.pipeline @@ -0,0 +1,46 @@ +{ + "im_ternarybert": { + "stream_config": { + "deviceId": "0" + }, + "appsrc0": { + "props": { + "blocksize": "409600" + }, + "factory": "appsrc", + "next": "mxpi_tensorinfer0:0" + }, + "appsrc1": { + "props": { + "blocksize": "409600" + }, + "factory": "appsrc", + "next": "mxpi_tensorinfer0:1" + }, + "appsrc2": { + "props": { + "blocksize": "409600" + }, + "factory": "appsrc", + "next": "mxpi_tensorinfer0:2" + }, + "mxpi_tensorinfer0": { + "props": { + "dataSource": "appsrc0,appsrc1,appsrc2", + "modelPath": "../data/model/ternarybert.om" + }, + "factory": "mxpi_tensorinfer", + "next": "mxpi_dataserialize0" + }, + "mxpi_dataserialize0": { + "props": { + "outputDataKeys": "mxpi_tensorinfer0" + }, + "factory": "mxpi_dataserialize", + "next": "appsink0" + }, + "appsink0": { + "factory": "appsink" + } + } +} diff --git a/research/nlp/ternarybert/infer/docker_start_infer.sh b/research/nlp/ternarybert/infer/docker_start_infer.sh new file mode 100644 index 0000000000000000000000000000000000000000..c76879755acc27849fd8ca75ab0fcd76daea1658 --- /dev/null +++ b/research/nlp/ternarybert/infer/docker_start_infer.sh @@ -0,0 +1,43 @@ +#!/bin/bash +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +docker_image=$1 +share_dir=$2 +data_dir=$3 +echo "$1" +echo "$2" +if [ -z "${docker_image}" ]; then + echo "please input docker_image" + exit 1 +fi + +if [ ! -d "${share_dir}" ]; then + echo "please input share directory that contains dataset, models and codes" + exit 1 +fi + + +docker run -it -u root \ + --device=/dev/davinci0 \ + --device=/dev/davinci_manager \ + --device=/dev/devmm_svm \ + --device=/dev/hisi_hdc \ + --privileged \ + -v //usr/local/bin/npu-smi:/usr/local/bin/npu-smi \ + -v /usr/local/Ascend/driver:/usr/local/Ascend/driver \ + -v ${data_dir}:${data_dir} \ + -v ${share_dir}:${share_dir} \ + ${docker_image} \ + /bin/bash diff --git a/research/nlp/ternarybert/infer/mxbase/CMakeLists.txt b/research/nlp/ternarybert/infer/mxbase/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..e792cce1eef7eb5bdc8de625b542f1f676b49ada --- /dev/null +++ b/research/nlp/ternarybert/infer/mxbase/CMakeLists.txt @@ -0,0 +1,51 @@ +cmake_minimum_required(VERSION 3.10.0) +project(ternarybert) + +set(TARGET main) + +add_definitions(-DENABLE_DVPP_INTERFACE) +add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0) +add_definitions(-Dgoogle=mindxsdk_private) +add_compile_options(-std=c++11 -fPIE -fstack-protector-all -fPIC -Wall) +add_link_options(-Wl,-z,relro,-z,now,-z,noexecstack -s -pie) + +# Check environment variable +if(NOT DEFINED ENV{ASCEND_HOME}) + message(FATAL_ERROR "please define environment variable:ASCEND_HOME") +endif() +if(NOT DEFINED ENV{ASCEND_VERSION}) + message(WARNING "please define environment variable:ASCEND_VERSION") +endif() +if(NOT DEFINED ENV{ARCH_PATTERN}) + message(WARNING "please define environment variable:ARCH_PATTERN") +endif() +set(ACL_INC_DIR $ENV{ASCEND_HOME}/$ENV{ASCEND_VERSION}/$ENV{ARCH_PATTERN}/acllib/include) +set(ACL_LIB_DIR $ENV{ASCEND_HOME}/$ENV{ASCEND_VERSION}/$ENV{ARCH_PATTERN}/acllib/lib64) + +set(MXBASE_ROOT_DIR $ENV{MX_SDK_HOME}) +set(MXBASE_INC ${MXBASE_ROOT_DIR}/include) +set(MXBASE_LIB_DIR ${MXBASE_ROOT_DIR}/lib) +set(MXBASE_POST_LIB_DIR ${MXBASE_ROOT_DIR}/lib/modelpostprocessors) +set(MXBASE_POST_PROCESS_DIR ${MXBASE_ROOT_DIR}/include/MxBase/postprocess/include) +if(DEFINED ENV{MXSDK_OPENSOURCE_DIR}) + set(OPENSOURCE_DIR $ENV{MXSDK_OPENSOURCE_DIR}) +else() + set(OPENSOURCE_DIR ${MXBASE_ROOT_DIR}/opensource) +endif() + +include_directories(${ACL_INC_DIR}) +include_directories(${OPENSOURCE_DIR}/include) +include_directories(${OPENSOURCE_DIR}/include/opencv4) + +include_directories(${MXBASE_INC}) +include_directories(${MXBASE_POST_PROCESS_DIR}) + +link_directories(${ACL_LIB_DIR}) +link_directories(${OPENSOURCE_DIR}/lib) +link_directories(${MXBASE_LIB_DIR}) +link_directories(${MXBASE_POST_LIB_DIR}) + +add_executable(${TARGET} src/main.cpp src/TernaryBERT.cpp) +target_link_libraries(${TARGET} glog cpprest mxbase opencv_world stdc++fs) + +install(TARGETS ${TARGET} RUNTIME DESTINATION ${PROJECT_SOURCE_DIR}/) diff --git a/research/nlp/ternarybert/infer/mxbase/build.sh b/research/nlp/ternarybert/infer/mxbase/build.sh new file mode 100644 index 0000000000000000000000000000000000000000..620d093dcb867e190d7bda0c0b1b76f6fcaaadf3 --- /dev/null +++ b/research/nlp/ternarybert/infer/mxbase/build.sh @@ -0,0 +1,55 @@ +#!/bin/bash + +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +path_cur=$(dirname $0) + +function check_env() +{ + # set ASCEND_VERSION to ascend-toolkit/latest when it was not specified by user + if [ ! "${ASCEND_VERSION}" ]; then + export ASCEND_VERSION=ascend-toolkit/latest + echo "Set ASCEND_VERSION to the default value: ${ASCEND_VERSION}" + else + echo "ASCEND_VERSION is set to ${ASCEND_VERSION} by user" + fi + + if [ ! "${ARCH_PATTERN}" ]; then + # set ARCH_PATTERN to ./ when it was not specified by user + export ARCH_PATTERN=./ + echo "ARCH_PATTERN is set to the default value: ${ARCH_PATTERN}" + else + echo "ARCH_PATTERN is set to ${ARCH_PATTERN} by user" + fi +} + +function build_ternarybert() +{ + cd $path_cur + rm -rf build + mkdir -p build + cd build + cmake .. + make + ret=$? + if [ ${ret} -ne 0 ]; then + echo "Failed to build ternarybert." + exit ${ret} + fi + make install +} + +check_env +build_ternarybert \ No newline at end of file diff --git a/research/nlp/ternarybert/infer/mxbase/src/TernaryBERT.cpp b/research/nlp/ternarybert/infer/mxbase/src/TernaryBERT.cpp new file mode 100644 index 0000000000000000000000000000000000000000..fa31af6cd6badfcf1cce35fb1dfeccc2212b34b2 --- /dev/null +++ b/research/nlp/ternarybert/infer/mxbase/src/TernaryBERT.cpp @@ -0,0 +1,195 @@ +/** + * Copyright 2022 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "TernaryBERT.h" +#include <unistd.h> +#include <sys/stat.h> +#include <map> +#include <fstream> +#include "MxBase/DeviceManager/DeviceManager.h" +#include "MxBase/Log/Log.h" + +const uint32_t MAX_LENGTH = 4096; + +APP_ERROR TernaryBERT::Init(const InitParam &initParam) { + deviceId_ = initParam.deviceId; + APP_ERROR ret = MxBase::DeviceManager::GetInstance()->InitDevices(); + if (ret != APP_ERR_OK) { + LogError << "Init devices failed, ret=" << ret << "."; + return ret; + } + ret = MxBase::TensorContext::GetInstance()->SetContext(initParam.deviceId); + if (ret != APP_ERR_OK) { + LogError << "Set context failed, ret=" << ret << "."; + return ret; + } + model_ = std::make_shared<MxBase::ModelInferenceProcessor>(); + ret = model_->Init(initParam.modelPath, modelDesc_); + if (ret != APP_ERR_OK) { + LogError << "ModelInferenceProcessor init failed, ret=" << ret << "."; + return ret; + } + return APP_ERR_OK; +} + +APP_ERROR TernaryBERT::DeInit() { + model_->DeInit(); + MxBase::DeviceManager::GetInstance()->DestroyDevices(); + return APP_ERR_OK; +} + +APP_ERROR TernaryBERT::ReadTensorFromFile(const std::string &file, uint32_t *data, uint32_t size) { + // read file into data + std::ifstream infile; + infile.open(file, std::ios_base::in | std::ios_base::binary); + if (infile.fail()) { + LogError << "Failed to open label file: " << file << "."; + return APP_ERR_COMM_OPEN_FAIL; + } + infile.read(reinterpret_cast<char*>(data), sizeof(uint32_t) * size); + infile.close(); + return APP_ERR_OK; +} + +APP_ERROR TernaryBERT::ReadInputTensor(const std::string &fileName, \ + uint32_t index, \ + std::vector<MxBase::TensorBase> *inputs) { + // read file into inputs + uint32_t data[MAX_LENGTH] = {0}; + APP_ERROR ret = ReadTensorFromFile(fileName, data, MAX_LENGTH); + if (ret != APP_ERR_OK) { + LogError << "ReadTensorFromFile failed."; + return ret; + } + + const uint32_t dataSize = modelDesc_.inputTensors[index].tensorSize; + MxBase::MemoryData memoryDataDst(dataSize, MxBase::MemoryData::MEMORY_DEVICE, deviceId_); + MxBase::MemoryData memoryDataSrc(reinterpret_cast<void*>(data), dataSize, MxBase::MemoryData::MEMORY_HOST_MALLOC); + ret = MxBase::MemoryHelper::MxbsMallocAndCopy(memoryDataDst, memoryDataSrc); + if (ret != APP_ERR_OK) { + LogError << GetError(ret) << "Memory malloc and copy failed."; + return ret; + } + + std::vector<uint32_t> shape = {32, 128}; + inputs->push_back(MxBase::TensorBase(memoryDataDst, false, shape, MxBase::TENSOR_DTYPE_UINT32)); + return APP_ERR_OK; +} + +APP_ERROR TernaryBERT::Inference(const std::vector<MxBase::TensorBase> &inputs, \ + std::vector<MxBase::TensorBase> *outputs) { + auto dtypes = model_->GetOutputDataType(); + for (size_t i = 0; i < modelDesc_.outputTensors.size(); ++i) { + std::vector<uint32_t> shape = {}; + for (size_t j = 0; j < modelDesc_.outputTensors[i].tensorDims.size(); ++j) { + shape.push_back((uint32_t)modelDesc_.outputTensors[i].tensorDims[j]); + } + MxBase::TensorBase tensor(shape, dtypes[i], MxBase::MemoryData::MemoryType::MEMORY_DEVICE, deviceId_); + APP_ERROR ret = MxBase::TensorBase::TensorBaseMalloc(tensor); + if (ret != APP_ERR_OK) { + LogError << "TensorBaseMalloc failed, ret=" << ret << "."; + return ret; + } + outputs->push_back(tensor); + } + + // model infer + MxBase::DynamicInfo dynamicInfo = {}; + dynamicInfo.dynamicType = MxBase::DynamicType::STATIC_BATCH; + auto startTime = std::chrono::high_resolution_clock::now(); + APP_ERROR ret = model_->ModelInference(inputs, *outputs, dynamicInfo); + auto endTime = std::chrono::high_resolution_clock::now(); + if (ret != APP_ERR_OK) { + LogError << "ModelInference failed, ret=" << ret << "."; + return ret; + } + + double costMs = std::chrono::duration<double, std::milli>(endTime - startTime).count(); + g_inferCost.push_back(costMs); + return APP_ERR_OK; +} + +APP_ERROR TernaryBERT::WriteResult(std::vector<MxBase::TensorBase> *outputs, const std::string &fileName) { + MxBase::TensorBase &tensor = outputs->at(13); + APP_ERROR ret = tensor.ToHost(); + if (ret != APP_ERR_OK) { + LogError << GetError(ret) << "Tensor deploy to host failed."; + return ret; + } + + std::string resultPathName = "mxbase_result"; + // create result directory when it does not exit + if (access(resultPathName.c_str(), 0) != 0) { + int mkdir_ret = mkdir(resultPathName.c_str(), S_IRUSR | S_IWUSR | S_IXUSR); + if (mkdir_ret != 0) { + LogError << "Failed to create result directory: " << resultPathName << ", ret = " << mkdir_ret; + return APP_ERR_COMM_OPEN_FAIL; + } + } + + auto outputShape = tensor.GetShape(); + uint32_t length = outputShape[0]; + uint32_t classNum = outputShape[1]; + LogInfo << "output shape is: " << outputShape[0] << " "<< outputShape[1] << std::endl; + + void* data = tensor.GetBuffer(); + std::string outFileName = "mxbase_result/" + fileName; + FILE * outputFile = fopen(outFileName.c_str(), "wb"); + fwrite(data, sizeof(float), length * classNum, outputFile); + fclose(outputFile); + + return APP_ERR_OK; +} + +APP_ERROR TernaryBERT::Process(const std::string &inferPath, const std::string &fileName) { + // read file into inputs + std::vector<MxBase::TensorBase> inputs = {}; + std::string inputContent = inferPath + "/00_input_ids/" + fileName; + APP_ERROR ret = ReadInputTensor(inputContent, INPUT_IDS, &inputs); + if (ret != APP_ERR_OK) { + LogError << "Read input content failed, ret=" << ret << "."; + return ret; + } + + std::string inputSenLen = inferPath + "/01_token_type_id/" + fileName; + ret = ReadInputTensor(inputSenLen, TOKEN_TYPE_ID, &inputs); + if (ret != APP_ERR_OK) { + LogError << "Read input sen_len failed, ret=" << ret << "."; + return ret; + } + + std::string inputAspect = inferPath + "/02_input_mask/" + fileName; + ret = ReadInputTensor(inputAspect, INPUT_MASK, &inputs); + if (ret != APP_ERR_OK) { + LogError << "Read input aspect failed, ret=" << ret << "."; + return ret; + } + + // infer and put result into outputs + std::vector<MxBase::TensorBase> outputs = {}; + ret = Inference(inputs, &outputs); + if (ret != APP_ERR_OK) { + LogError << "Inference failed, ret=" << ret << "."; + return ret; + } + + // write result + ret = WriteResult(&outputs, fileName); + if (ret != APP_ERR_OK) { + LogError << "save result failed, ret=" << ret << "."; + return ret; + } + return APP_ERR_OK; +} diff --git a/research/nlp/ternarybert/infer/mxbase/src/TernaryBERT.h b/research/nlp/ternarybert/infer/mxbase/src/TernaryBERT.h new file mode 100644 index 0000000000000000000000000000000000000000..937af56fcb108c8419934f178176f4ba576d628f --- /dev/null +++ b/research/nlp/ternarybert/infer/mxbase/src/TernaryBERT.h @@ -0,0 +1,62 @@ +/** + * Copyright 2022 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MXBASE_TERNARYBERT_H +#define MXBASE_TERNARYBERT_H + +#include <memory> +#include <utility> +#include <vector> +#include <string> +#include <map> +#include <opencv2/opencv.hpp> +#include "MxBase/DvppWrapper/DvppWrapper.h" +#include "MxBase/ModelInfer/ModelInferenceProcessor.h" +#include "MxBase/Tensor/TensorContext/TensorContext.h" + +extern std::vector<double> g_inferCost; + +struct InitParam { + uint32_t deviceId; + std::string modelPath; +}; + +enum DataIndex { + INPUT_IDS = 0, + TOKEN_TYPE_ID = 1, + INPUT_MASK = 2, +}; + +class TernaryBERT { + public: + APP_ERROR Init(const InitParam &initParam); + APP_ERROR DeInit(); + APP_ERROR Inference(const std::vector<MxBase::TensorBase> &inputs, std::vector<MxBase::TensorBase> *outputs); + APP_ERROR Process(const std::string &inferPath, const std::string &fileName); + + protected: + APP_ERROR ReadTensorFromFile(const std::string &file, uint32_t *data, uint32_t size); + APP_ERROR ReadInputTensor(const std::string &fileName, uint32_t index, std::vector<MxBase::TensorBase> *inputs); + APP_ERROR WriteResult(std::vector<MxBase::TensorBase> *outputs, const std::string &fileName); + + private: + std::shared_ptr<MxBase::DvppWrapper> dvppWrapper_; + std::shared_ptr<MxBase::ModelInferenceProcessor> model_; + MxBase::ModelDesc modelDesc_ = {}; + uint32_t deviceId_ = 0; +}; + +#endif diff --git a/research/nlp/ternarybert/infer/mxbase/src/main.cpp b/research/nlp/ternarybert/infer/mxbase/src/main.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9c9045543ec3c1abed784e79caa654f61e3e0bd1 --- /dev/null +++ b/research/nlp/ternarybert/infer/mxbase/src/main.cpp @@ -0,0 +1,99 @@ +/** + * Copyright 2022 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <unistd.h> +#include <dirent.h> +#include <iostream> +#include <fstream> +#include <vector> +#include "TernaryBERT.h" +#include "MxBase/Log/Log.h" + +std::vector<double> g_inferCost; + +void InitTernaryBERTParam(InitParam* initParam) { + initParam->deviceId = 0; + initParam->modelPath = "../data/model/ternarybert.om"; +} + +APP_ERROR ReadFilesFromPath(const std::string &path, std::vector<std::string> *files) { + DIR *dir = NULL; + struct dirent *ptr = NULL; + + if ((dir=opendir(path.c_str())) == NULL) { + LogError << "Open dir error: " << path; + return APP_ERR_COMM_OPEN_FAIL; + } + + while ((ptr=readdir(dir)) != NULL) { + // d_type == 8 is file + if (ptr->d_type == 8) { + files->push_back(ptr->d_name); + } + } + + closedir(dir); + // sort ascending order + sort(files->begin(), files->end()); + return APP_ERR_OK; +} + +int main(int argc, char* argv[]) { + if (argc <= 1) { + LogWarn << "Please input data path."; + return APP_ERR_OK; + } + + // init model + InitParam initParam; + InitTernaryBERTParam(&initParam); + auto ternaryBert = std::make_shared<TernaryBERT>(); + APP_ERROR ret = ternaryBert->Init(initParam); + if (ret != APP_ERR_OK) { + LogError << "TernaryBERT init failed, ret=" << ret << "."; + return ret; + } + + // get all file name + std::string inferPath = argv[1]; + std::vector<std::string> files; + ret = ReadFilesFromPath(inferPath + "/00_input_ids", &files); + if (ret != APP_ERR_OK) { + LogError << "Read files from path failed, ret=" << ret << "."; + return ret; + } + + // do process + for (uint32_t i = 0; i < files.size(); i++) { + LogInfo << "read file name: " << files[i]; + ret = ternaryBert->Process(inferPath, files[i]); + if (ret != APP_ERR_OK) { + LogError << "TernaryBERT process failed, ret=" << ret << "."; + ternaryBert->DeInit(); + return ret; + } + } + ternaryBert->DeInit(); + + double costSum = 0; + for (uint32_t i = 0; i < g_inferCost.size(); i++) { + costSum += g_inferCost[i]; + } + LogInfo << "Infer sum: " << g_inferCost.size() << ", cost total time: " << costSum << " ms."; + LogInfo << "The throughput: " << g_inferCost.size() * 1000 / costSum << " bin/sec."; + + return APP_ERR_OK; +} diff --git a/research/nlp/ternarybert/infer/sdk/main.py b/research/nlp/ternarybert/infer/sdk/main.py new file mode 100644 index 0000000000000000000000000000000000000000..cedd6f0bbb78eb6a4d0bb1ee87902a4422700ba4 --- /dev/null +++ b/research/nlp/ternarybert/infer/sdk/main.py @@ -0,0 +1,162 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +""" +sample script of ternarybert infer using SDK run in docker +""" + +import argparse +import glob +import os +import time + +import MxpiDataType_pb2 as MxpiDataType +import numpy as np +from StreamManagerApi import StreamManagerApi, InProtobufVector, \ + MxProtobufIn, StringVector + +parser = argparse.ArgumentParser(description="ternaryBert inference") +parser.add_argument("--pipeline_file", type=str, required=True, help="SDK infer pipeline") +parser.add_argument("--data_dir", type=str, required=True, help="input data directory") +parser.add_argument("--res_dir", type=str, required=True, help="results directory") +parser.add_argument('--batch_size', type=int, default=32, help='batch size for infering') +parser.add_argument('--seq_length', type=int, default=128, help='sequence length') +args = parser.parse_args() + +def send_source_data(appsrc_id, file_name, stream_name, stream_manager): + """ + Construct the input of the stream, + send inputs data to a specified stream based on streamName. + + Returns: + bool: send data success or not + """ + # transform file to tensor + tensors = np.fromfile(file_name, dtype=np.int32).reshape([args.batch_size, args.seq_length]) + tensor_package_list = MxpiDataType.MxpiTensorPackageList() + + for i in range(args.batch_size): + tensor = np.expand_dims(tensors[i, :], 0) + tensor_package = tensor_package_list.tensorPackageVec.add() + tensor_vec = tensor_package.tensorVec.add() + tensor_vec.deviceId = 0 + tensor_vec.memType = 0 + tensor_vec.tensorShape.extend(tensor.shape) + tensor_vec.tensorDataType = 3 # int32 + array_bytes = tensor.tobytes() + tensor_vec.dataStr = array_bytes + tensor_vec.tensorDataSize = tensor.shape[0] + + key = "appsrc{}".format(appsrc_id).encode('utf-8') + protobuf_vec = InProtobufVector() + protobuf = MxProtobufIn() + protobuf.key = key + protobuf.type = b'MxTools.MxpiTensorPackageList' + protobuf.protobuf = tensor_package_list.SerializeToString() + protobuf_vec.push_back(protobuf) + + ret = stream_manager.SendProtobuf(stream_name, appsrc_id, protobuf_vec) + if ret < 0: + print("Failed to send data to stream.") + return False + return True + + +def send_appsrc_data(file_name, stream_name, stream_manager): + """ + send three stream to infer model, include input_ids, token_type_id and input_mask. + + Returns: + bool: send data success or not + """ + input_ids_path = os.path.realpath(os.path.join(args.data_dir, "00_input_ids", file_name)) + if not send_source_data(0, input_ids_path, stream_name, stream_manager): + return False + + token_type_id_path = os.path.realpath(os.path.join(args.data_dir, "01_token_type_id", file_name)) + if not send_source_data(1, token_type_id_path, stream_name, stream_manager): + return False + + input_mask_path = os.path.realpath(os.path.join(args.data_dir, "02_input_mask", file_name)) + if not send_source_data(2, input_mask_path, stream_name, stream_manager): + return False + return True + + +def save_result(file_name, infer_result): + """ + save the result of infer tensor. + Args: + file_name: label file name. + infer_result: get logit from infer result + """ + result = MxpiDataType.MxpiTensorPackageList() + result.ParseFromString(infer_result[0].messageBuf) + + res_file = os.path.realpath(os.path.join(args.res_dir, file_name)) + with open(res_file, 'ab') as f: + for k in range(args.batch_size): + f.write(result.tensorPackageVec[k].tensorVec[13].dataStr) + + +def run(): + """ + read pipeline and do infer + """ + # init stream manager + stream_manager_api = StreamManagerApi() + ret = stream_manager_api.InitManager() + if ret != 0: + print("Failed to init Stream manager, ret=%s" % str(ret)) + exit() + + # create streams by pipeline config file + with open(os.path.realpath(args.pipeline_file), 'rb') as f: + pipeline_str = f.read() + ret = stream_manager_api.CreateMultipleStreams(pipeline_str) + if ret != 0: + print("Failed to create Stream, ret=%s" % str(ret)) + exit() + + stream_name = b'im_ternarybert' + infer_total_time = 0 + # get all files endwith 'bin' + file_list = glob.glob(os.path.join(os.path.realpath(os.path.join(args.data_dir, "00_input_ids")), "*.bin")) + for input_ids in file_list: + # send appsrc data + file_name = input_ids.split('/')[-1] + if not send_appsrc_data(file_name, stream_name, stream_manager_api): + return + # obtain the inference result + key_vec = StringVector() + key_vec.push_back(b'mxpi_tensorinfer0') + start_time = time.time() + infer_result = stream_manager_api.GetProtobuf(stream_name, 0, key_vec) + infer_total_time += time.time() - start_time + if infer_result.size() == 0: + print("inferResult is null") + return + if infer_result[0].errorCode != 0: + print("GetProtobuf error. errorCode=%d" % (infer_result[0].errorCode)) + return + save_result(file_name, infer_result) + + print("Infer images sum: {}, cost total time: {:.6f} sec.".format(len(file_list), infer_total_time)) + # destroy streams + stream_manager_api.DestroyAllStreams() + + +if __name__ == '__main__': + run() diff --git a/research/nlp/ternarybert/infer/sdk/run.sh b/research/nlp/ternarybert/infer/sdk/run.sh new file mode 100644 index 0000000000000000000000000000000000000000..d632eb8d13d059272c8ed31ca2fcd7b644972ea8 --- /dev/null +++ b/research/nlp/ternarybert/infer/sdk/run.sh @@ -0,0 +1,36 @@ +#!/bin/bash + +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +set -e + +# Simple log helper functions +info() { echo -e "\033[1;34m[INFO ][MxStream] $1\033[1;37m" ; } +warn() { echo >&4 -e "\033[1;31m[WARN ][MxStream] $1\033[1;37m" ; } + +export LD_LIBRARY_PATH=${MX_SDK_HOME}/lib:${MX_SDK_HOME}/opensource/lib:${MX_SDK_HOME}/opensource/lib64:/usr/local/Ascend/ascend-toolkit/latest/acllib/lib64:${LD_LIBRARY_PATH} +export GST_PLUGIN_SCANNER=${MX_SDK_HOME}/opensource/libexec/gstreamer-1.0/gst-plugin-scanner +export GST_PLUGIN_PATH=${MX_SDK_HOME}/opensource/lib/gstreamer-1.0:${MX_SDK_HOME}/lib/plugins + +#to set PYTHONPATH, import the StreamManagerApi.py +export PYTHONPATH=$PYTHONPATH:${MX_SDK_HOME}/python + +if [ -d sdk_result ]; then + rm -rf sdk_result +fi +mkdir sdk_result + +python3 main.py --pipeline_file ../data/config/ternarybert.pipeline --data_dir ../data/input --res_dir sdk_result &> sdk_infer.log +exit 0 \ No newline at end of file diff --git a/research/nlp/ternarybert/infer/utils/cal_pearsonr.py b/research/nlp/ternarybert/infer/utils/cal_pearsonr.py new file mode 100644 index 0000000000000000000000000000000000000000..eb2b22461cabeabff65ca4a74c1028db619157b9 --- /dev/null +++ b/research/nlp/ternarybert/infer/utils/cal_pearsonr.py @@ -0,0 +1,67 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""task metric""" + +import os +import argparse +import numpy as np + +parser = argparse.ArgumentParser(description="ternarybert task metric") +parser.add_argument("--label_path", type=str, required=True, help="label directory") +parser.add_argument("--result_path", type=str, required=True, help="result directory") +args = parser.parse_args() + +BATCH_SIZE = 32 +LABEL_NUM = 1 + +class Pearsonr: + """Pearsonr""" + def __init__(self): + self.logits_array = np.array([]) + self.labels_array = np.array([]) + self.name = 'Pearsonr' + + def update(self, logits, labels): + label = np.reshape(labels, -1) + logit = np.reshape(logits, -1) + self.labels_array = np.concatenate([self.labels_array, label]) + self.logits_array = np.concatenate([self.logits_array, logit]) + + def get_metrics(self): + if len(self.labels_array) < 2: + return 0.0 + x_mean = self.logits_array.mean() + y_mean = self.labels_array.mean() + xm = self.logits_array - x_mean + ym = self.labels_array - y_mean + norm_xm = np.linalg.norm(xm) + norm_ym = np.linalg.norm(ym) + return np.dot(xm / norm_xm, ym / norm_ym) * 100.0 + + +if __name__ == '__main__': + label_numpys = np.load(args.label_path) + callback = Pearsonr() + file_num = len(os.listdir(args.result_path)) + for i in range(file_num): + f_name = "tinybert_bs" + str(BATCH_SIZE) + "_" + str(i) + ".bin" + result_numpy = np.fromfile(os.path.join(args.result_path, f_name), np.float32) + print(f_name) + print(result_numpy) + label_numpy = label_numpys[i] + callback.update(result_numpy, label_numpy) + metrics = callback.get_metrics() + print('{}: {}'.format(callback.name, metrics)) diff --git a/research/nlp/ternarybert/modelarts/start.py b/research/nlp/ternarybert/modelarts/start.py new file mode 100644 index 0000000000000000000000000000000000000000..96fc4221a7de9681032167e144f182fb6132f00e --- /dev/null +++ b/research/nlp/ternarybert/modelarts/start.py @@ -0,0 +1,287 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""task distill script""" + +import datetime +import os +import argparse +import ast + +import re +import numpy as np + +from mindspore import context, Tensor +from mindspore.train.model import Model +from mindspore.nn.optim import AdamWeightDecay +from mindspore import set_seed +from mindspore.train.callback import TimeMonitor +from mindspore.train.callback import LossMonitor +import mindspore.communication.management as D +from mindspore.context import ParallelMode +from mindspore.nn.wrap.loss_scale import DynamicLossScaleUpdateCell +from mindspore.train.serialization import load_checkpoint, load_param_into_net, export + +from src.dataset import create_dataset +from src.utils import StepCallBack, ModelSaveCkpt, EvalCallBack, BertLearningRate +from src.config import train_cfg, eval_cfg, teacher_net_cfg, student_net_cfg, task_cfg, cfg_cfg +from src.cell_wrapper import BertNetworkWithLoss, BertTrainOneStepWithLossScaleCell +from src.tinybert_model import BertModelCLS + + +WEIGHTS_NAME = cfg_cfg.WEIGHTS_NAME +EVAL_DATA_NAME = cfg_cfg.EVAL_DATA_NAME +TRAIN_DATA_NAME = cfg_cfg.TRAIN_DATA_NAME +DEFAULT_NUM_LABELS = cfg_cfg.DEFAULT_NUM_LABELS +DEFAULT_SEQ_LENGTH = cfg_cfg.DEFAULT_SEQ_LENGTH +DEFAULT_BS = cfg_cfg.DEFAULT_BS + +def parse_args(): + """ + parse args + """ + parser = argparse.ArgumentParser(description='ternarybert task distill') + parser.add_argument('--device_target', type=str, default='Ascend', choices=['Ascend', 'GPU'], + help='Device where the code will be implemented. (Default: GPU)') + parser.add_argument('--do_eval', type=ast.literal_eval, default=True, + help='Do eval task during training or not. (Default: True)') + parser.add_argument('--epoch_size', type=int, default=5, help='Epoch size for train phase. (Default: 3)') + parser.add_argument('--device_id', type=int, default=0, help='Device id. (Default: 0)') + parser.add_argument("--device_num", type=int, default=1, help="Use device nums, default is 1.") + parser.add_argument('--do_shuffle', type=ast.literal_eval, default=True, + help='Enable shuffle for train dataset. (Default: True)') + parser.add_argument('--enable_data_sink', type=ast.literal_eval, default=True, + help='Enable data sink. (Default: True)') + parser.add_argument('--save_ckpt_step', type=int, default=50, + help='If do_eval is False, the checkpoint will be saved every save_ckpt_step. (Default: 50)') + parser.add_argument('--max_ckpt_num', type=int, default=50, + help='The number of checkpoints will not be larger than max_ckpt_num. (Default: 50)') + parser.add_argument('--data_sink_steps', type=int, default=50, help='Sink steps for each epoch. (Default: 1)') + parser.add_argument('--teacher_model_dir', type=str, default='', help='The checkpoint directory of teacher model.') + parser.add_argument('--student_model_dir', type=str, default='', help='The checkpoint directory of student model.') + parser.add_argument('--data_dir', type=str, default='', help='Data directory.') + parser.add_argument('--output_dir', type=str, default='', help='The output checkpoint directory.') + parser.add_argument('--task_name', type=str, default='sts-b', choices=['sts-b', 'qnli', 'mnli'], + help='The name of the task to train. (Default: sts-b)') + parser.add_argument('--dataset_type', type=str, default='tfrecord', choices=['tfrecord', 'mindrecord'], + help='The name of the task to train. (Default: tfrecord)') + parser.add_argument('--seed', type=int, default=1, help='The random seed') + parser.add_argument('--train_batch_size', type=int, default=16, help='Batch size for training') + parser.add_argument('--eval_batch_size', type=int, default=32, help='Eval Batch size in callback') + parser.add_argument("--distribute", type=str, default="false", choices=["true", "false"], + help="Run distribute, default is false.") + parser.add_argument("--file_name", type=str, default="ternarybert", help="The name of the output file.") + parser.add_argument("--file_format", type=str, default="MINDIR", choices=["AIR", "MINDIR"], + help="output model type") + # model art + parser.add_argument('--enable_modelarts', type=ast.literal_eval, default=False, + help='Do modelarts or not. (Default: False)') + parser.add_argument("--data_url", type=str, default="./dataset", help='real input file path') + parser.add_argument("--train_url", type=str, default="", help='real output file path include .ckpt and .air') # modelarts -> obs + parser.add_argument("--modelarts_data_dir", type=str, default="/cache/dataset", help='modelart input path') + parser.add_argument("--modelarts_result_dir", type=str, default="/cache/result", help='modelart output path.') + parser.add_argument("--result_dir", type=str, default="./output", help='output') + parser.add_argument("--modelarts_attrs", type=str, default="") + + return parser.parse_args() + +def obs_data2modelarts(args_opt): + """ + Copy train data from obs to modelarts by using moxing api. + """ + import moxing as mox + start = datetime.datetime.now() + print("===>>>Copy files from obs:{} to modelarts dir:{}".format(args_opt.data_url, args_opt.modelarts_data_dir)) + mox.file.copy_parallel(src_url=args_opt.data_url, dst_url=args_opt.modelarts_data_dir) + end = datetime.datetime.now() + print("===>>>Copy from obs to modelarts, time use:{}(s)".format((end - start).seconds)) + if not mox.file.exists(args_opt.result_dir): + mox.file.make_dirs(args_opt.result_dir) + +def modelarts_result2obs(args_opt): + """ + Copy result data from modelarts to obs. + """ + import moxing as mox + train_url = args_opt.train_url + if not mox.file.exists(train_url): + print(f"train_url[{train_url}] not exist!") + mox.file.make_dirs(train_url) + save_ckpt_dir = os.path.join(args_opt.result_dir, args_opt.task_name) + mox.file.copy_parallel(src_url=save_ckpt_dir, dst_url=os.path.join(train_url, args_opt.task_name)) + files = os.listdir(args_opt.result_dir) + print("===>>>current Files:", files) + print("===>>>Copy Event or Checkpoint from modelarts dir: ./ckpt to obs:{}".format(train_url)) + if args_opt.file_format == "MINDIR": + mox.file.copy(src_url='ternarybert.mindir', + dst_url=os.path.join(train_url, 'ternarybert.mindir')) + else: + mox.file.copy(src_url='ternarybert.air', + dst_url=os.path.join(train_url, 'ternarybert.air')) + +def export_MODEL(args_opt): + """ + start modelarts export + """ + class Task: + """ + Encapsulation class of get the task parameter. + """ + + def __init__(self, task_name): + self.task_name = task_name + + @property + def num_labels(self): + if self.task_name in task_cfg and "num_labels" in task_cfg[self.task_name]: + return task_cfg[self.task_name]["num_labels"] + return DEFAULT_NUM_LABELS + + @property + def seq_length(self): + if self.task_name in task_cfg and "seq_length" in task_cfg[self.task_name]: + return task_cfg[self.task_name]["seq_length"] + return DEFAULT_SEQ_LENGTH + + task = Task(args_opt.task_name) + student_net_cfg.seq_length = task.seq_length + student_net_cfg.batch_size = DEFAULT_BS + student_net_cfg.do_quant = False + + ckpt_file = os.path.join(args_opt.result_dir, args_opt.task_name, WEIGHTS_NAME) + eval_model = BertModelCLS(student_net_cfg, False, task.num_labels, 0.0, phase_type='student') + param_dict = load_checkpoint(ckpt_file) + new_param_dict = {} + for key, value in param_dict.items(): + new_key = re.sub('tinybert_', 'bert_', key) + new_key = re.sub('^bert.', '', new_key) + new_param_dict[new_key] = value + load_param_into_net(eval_model, new_param_dict) + eval_model.set_train(False) + input_ids = Tensor(np.zeros((student_net_cfg.batch_size, task.seq_length), np.int32)) + token_type_id = Tensor(np.zeros((student_net_cfg.batch_size, task.seq_length), np.int32)) + input_mask = Tensor(np.zeros((student_net_cfg.batch_size, task.seq_length), np.int32)) + + input_data = [input_ids, token_type_id, input_mask] + + export(eval_model, *input_data, file_name=args_opt.file_name, file_format=args_opt.file_format) + + +def run_task_distill(args_opt): + """ + run task distill + """ + if args_opt.enable_modelarts: + args.student_model_dir = os.path.join(args.modelarts_data_dir, args.student_model_dir) #args.student_model_dir = '/data/weights/student_model/' + args.teacher_model_dir = os.path.join(args.modelarts_data_dir, args.teacher_model_dir) #args.teacher_model_dir = '/data/weights/teacher_model/' + args.data_dir = os.path.join(args.modelarts_data_dir, args.data_dir) #args.data_dir = '/data' + args.output_dir = args.result_dir + task = task_cfg[args_opt.task_name] + teacher_net_cfg.seq_length = task.seq_length + student_net_cfg.seq_length = task.seq_length + train_cfg.batch_size = args_opt.train_batch_size + eval_cfg.batch_size = args_opt.eval_batch_size + teacher_ckpt = os.path.join(args_opt.teacher_model_dir, args_opt.task_name, WEIGHTS_NAME) + student_ckpt = os.path.join(args_opt.student_model_dir, args_opt.task_name, WEIGHTS_NAME) + train_data_dir = os.path.join(args_opt.data_dir, args_opt.task_name, TRAIN_DATA_NAME) + eval_data_dir = os.path.join(args_opt.data_dir, args_opt.task_name, EVAL_DATA_NAME) + save_ckpt_dir = os.path.join(args_opt.output_dir, args_opt.task_name) + if args_opt.distribute == "true": + device_id = int(os.getenv("DEVICE_ID")) + context.set_context(mode=context.GRAPH_MODE, device_target=args_opt.device_target, device_id=device_id) + D.init() + device_num = args_opt.device_num + rank = device_id % device_num + print("device_id is {}, rank_id is {}".format(device_id, rank)) + context.reset_auto_parallel_context() + context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, gradients_mean=True, + device_num=device_num) + save_ckpt_dir = save_ckpt_dir + '_ckpt_' + str(rank) + else: + if args_opt.device_target == "Ascend" or args_opt.device_target == "GPU": + context.set_context(mode=context.GRAPH_MODE, device_target=args_opt.device_target, + device_id=args_opt.device_id) + else: + raise Exception("Target error, GPU or Ascend is supported.") + rank = 0 + device_num = 1 + train_dataset = create_dataset(batch_size=train_cfg.batch_size, device_num=device_num, rank=rank, + do_shuffle=args_opt.do_shuffle, data_dir=train_data_dir, + data_type=args_opt.dataset_type, seq_length=task.seq_length, + task_type=task.task_type, drop_remainder=True) + dataset_size = train_dataset.get_dataset_size() + print('train dataset size:', dataset_size) + eval_dataset = create_dataset(batch_size=eval_cfg.batch_size, device_num=1, rank=0, + do_shuffle=args_opt.do_shuffle, data_dir=eval_data_dir, + data_type=args_opt.dataset_type, seq_length=task.seq_length, + task_type=task.task_type, drop_remainder=False) + print('eval dataset size:', eval_dataset.get_dataset_size()) + repeat_count = args_opt.epoch_size + time_monitor_steps = dataset_size + netwithloss = BertNetworkWithLoss(teacher_config=teacher_net_cfg, teacher_ckpt=teacher_ckpt, + student_config=student_net_cfg, student_ckpt=student_ckpt, + is_training=True, task_type=task.task_type, num_labels=task.num_labels) + params = netwithloss.trainable_params() + optimizer_cfg = train_cfg.optimizer_cfg + lr_schedule = BertLearningRate(learning_rate=optimizer_cfg.AdamWeightDecay.learning_rate, + end_learning_rate=optimizer_cfg.AdamWeightDecay.end_learning_rate, + warmup_steps=int(dataset_size * args_opt.epoch_size * + optimizer_cfg.AdamWeightDecay.warmup_ratio), + decay_steps=int(dataset_size * args_opt.epoch_size), + power=optimizer_cfg.AdamWeightDecay.power) + decay_params = list(filter(optimizer_cfg.AdamWeightDecay.decay_filter, params)) + other_params = list(filter(lambda x: not optimizer_cfg.AdamWeightDecay.decay_filter(x), params)) + group_params = [{'params': decay_params, 'weight_decay': optimizer_cfg.AdamWeightDecay.weight_decay}, + {'params': other_params, 'weight_decay': 0.0}, {'order_params': params}] + optimizer = AdamWeightDecay(group_params, learning_rate=lr_schedule, eps=optimizer_cfg.AdamWeightDecay.eps) + update_cell = DynamicLossScaleUpdateCell(loss_scale_value=2 ** 20, scale_factor=2.0, scale_window=1000) + netwithgrads = BertTrainOneStepWithLossScaleCell(netwithloss, optimizer=optimizer, scale_update_cell=update_cell) + callback_size = dataset_size + if args_opt.do_eval: + eval_dataset = list(eval_dataset.create_dict_iterator()) + callback = [TimeMonitor(time_monitor_steps), LossMonitor(callback_size), + EvalCallBack(network=netwithloss.bert, dataset=eval_dataset, + eval_ckpt_step=dataset_size, + save_ckpt_dir=save_ckpt_dir, + embedding_bits=student_net_cfg.embedding_bits, + weight_bits=student_net_cfg.weight_bits, + clip_value=student_net_cfg.weight_clip_value, + metrics=task.metrics)] + else: + callback = [TimeMonitor(time_monitor_steps), StepCallBack(), LossMonitor(callback_size), + ModelSaveCkpt(network=netwithloss.bert, save_ckpt_step=args_opt.save_ckpt_step, + max_ckpt_num=args_opt.max_ckpt_num, output_dir=save_ckpt_dir, + embedding_bits=student_net_cfg.embedding_bits, + weight_bits=student_net_cfg.weight_bits, + clip_value=student_net_cfg.weight_clip_value)] + model = Model(netwithgrads) + model.train(repeat_count, train_dataset, callbacks=callback, + dataset_sink_mode=args_opt.enable_data_sink) + + +if __name__ == '__main__': + args = parse_args() + set_seed(args.seed) + if args.enable_modelarts: + obs_data2modelarts(args) + run_task_distill(args) + print("===========training success================") + if args.enable_modelarts: + ## start export air + export_MODEL(args) + print("===========export success================") + ## copy result from modelarts to obs + modelarts_result2obs(args) + print("===========Done!!!!!================") diff --git a/research/nlp/ternarybert/scripts/docker_start.sh b/research/nlp/ternarybert/scripts/docker_start.sh new file mode 100644 index 0000000000000000000000000000000000000000..eff8f5260602cd3ed58ee589d4123e4048dedd50 --- /dev/null +++ b/research/nlp/ternarybert/scripts/docker_start.sh @@ -0,0 +1,35 @@ +#!/bin/bash +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License.mitations under the License. + +docker_image=$1 +data_dir=$2 +model_dir=$3 + +docker run -it -u root --ipc=host \ + --device=/dev/davinci0 \ + --device=/dev/davinci1 \ + --device=/dev/davinci2 \ + --device=/dev/davinci3 \ + --device=/dev/davinci4 \ + --device=/dev/davinci5 \ + --device=/dev/davinci6 \ + --device=/dev/davinci7 \ + --device=/dev/davinci_manager \ + --device=/dev/devmm_svm --device=/dev/hisi_hdc \ + -v /usr/local/Ascend/driver:/usr/local/Ascend/driver \ + -v /usr/local/Ascend/add-ons/:/usr/local/Ascend/add-ons/ \ + -v ${model_dir}:${model_dir} \ + -v ${data_dir}:${data_dir} \ + -v /root/ascend/log:/root/ascend/log ${docker_image} /bin/bash