diff --git a/official/audio/ecapa_tdnn/docker_start.sh b/official/audio/ecapa_tdnn/docker_start.sh new file mode 100644 index 0000000000000000000000000000000000000000..c6d7efcbcd2be378d97bcc125601080804a6553b --- /dev/null +++ b/official/audio/ecapa_tdnn/docker_start.sh @@ -0,0 +1,38 @@ +#!/bin/bash + +# Copyright(C) 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +docker_image=$1 +data_dir=$2 +model_dir=$3 + +docker run -it -u root --ipc=host \ + --device=/dev/davinci0 \ + --device=/dev/davinci1 \ + --device=/dev/davinci2 \ + --device=/dev/davinci3 \ + --device=/dev/davinci4 \ + --device=/dev/davinci5 \ + --device=/dev/davinci6 \ + --device=/dev/davinci7 \ + --device=/dev/davinci_manager \ + --device=/dev/devmm_svm \ + --device=/dev/hisi_hdc \ + -v /usr/local/Ascend/driver:/usr/local/Ascend/driver \ + -v /usr/local/Ascend/add-ons/:/usr/local/Ascend/add-ons/ \ + -v ${model_dir}:${model_dir} \ + -v ${data_dir}:${data_dir} \ + -v /root/ascend/log:/root/ascend/log ${docker_image} \ + /bin/bash diff --git a/official/audio/ecapa_tdnn/infer/convert/airtoom.sh b/official/audio/ecapa_tdnn/infer/convert/airtoom.sh new file mode 100644 index 0000000000000000000000000000000000000000..262c433faf0440f7dd5c5c83a480845860a65053 --- /dev/null +++ b/official/audio/ecapa_tdnn/infer/convert/airtoom.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env bash + +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +model_path=$1 +output_model_name=$2 + +atc --model=$model_path \ + --framework=1 \ + --output=$output_model_name \ + --input_format=NCHW \ + --soc_version=Ascend310 \ + --output_type=FP32 \ No newline at end of file diff --git a/official/audio/ecapa_tdnn/infer/data/config/ecapa_tdnn.pipeline b/official/audio/ecapa_tdnn/infer/data/config/ecapa_tdnn.pipeline new file mode 100644 index 0000000000000000000000000000000000000000..c5f24f84c5b7396fedd0023d35fd40af9ef55de7 --- /dev/null +++ b/official/audio/ecapa_tdnn/infer/data/config/ecapa_tdnn.pipeline @@ -0,0 +1,33 @@ +{ + "ecapa_tdnn": { + "stream_config": { + "deviceId": "0" + }, + "appsrc0": { + "props": { + "blocksize": "409600" + }, + "factory": "appsrc", + "next": "mxpi_tensorinfer0" + }, + "mxpi_tensorinfer0": { + "props": { + "dataSource": "appsrc0", + "modelPath": "../data/model/ecapatdnn.om", + "outputDeviceId": "-1" + }, + "factory": "mxpi_tensorinfer", + "next": "mxpi_dataserialize0" + }, + "mxpi_dataserialize0": { + "props": { + "outputDataKeys": "mxpi_tensorinfer0" + }, + "factory": "mxpi_dataserialize", + "next": "appsink0" + }, + "appsink0": { + "factory": "appsink" + } + } +} diff --git a/official/audio/ecapa_tdnn/infer/docker_start_infer.sh b/official/audio/ecapa_tdnn/infer/docker_start_infer.sh new file mode 100644 index 0000000000000000000000000000000000000000..05c1f51b0965d290a8b5f33c7aa7d89b061a2013 --- /dev/null +++ b/official/audio/ecapa_tdnn/infer/docker_start_infer.sh @@ -0,0 +1,38 @@ +#!/bin/bash + +# Copyright(C) 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +docker_image=$1 +model_dir=$2 + +if [ -z "${docker_image}" ]; then + echo "please input docker_image" + exit 1 +fi + +if [ ! -d "${model_dir}" ]; then + echo "please input model_dir" + exit 1 +fi + +docker run -it -u root\ + --device=/dev/davinci0 \ + --device=/dev/davinci_manager \ + --device=/dev/devmm_svm \ + --device=/dev/hisi_hdc \ + -v /usr/local/Ascend/driver:/usr/local/Ascend/driver \ + -v ${model_dir}:${model_dir} \ + ${docker_image} \ + /bin/bash \ No newline at end of file diff --git a/official/audio/ecapa_tdnn/infer/mxbase/CMakeLists.txt b/official/audio/ecapa_tdnn/infer/mxbase/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..1645fd79b651a1cf43995b2d2cb46630e7a79b6c --- /dev/null +++ b/official/audio/ecapa_tdnn/infer/mxbase/CMakeLists.txt @@ -0,0 +1,36 @@ +cmake_minimum_required(VERSION 3.5.2) +project(ecapa_tdnn) +add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0) + + +set(TARGET_MAIN ecapa_tdnn) + +set(ACL_LIB_PATH $ENV{ASCEND_HOME}/ascend-toolkit/latest/acllib) + +include_directories(${CMAKE_CURRENT_BINARY_DIR}) + +include_directories($ENV{MX_SDK_HOME}/include) +include_directories($ENV{MX_SDK_HOME}/opensource/include) +include_directories($ENV{MX_SDK_HOME}/opensource/include/opencv4) +include_directories($ENV{MX_SDK_HOME}/opensource/include/gstreamer-1.0) +include_directories($ENV{MX_SDK_HOME}/opensource/include/glib-2.0) +include_directories($ENV{MX_SDK_HOME}/opensource/lib/glib-2.0/include) + +link_directories($ENV{MX_SDK_HOME}/lib) +link_directories($ENV{MX_SDK_HOME}/opensource/lib/) + + +add_compile_options(-std=c++11 -fPIC -fstack-protector-all -pie -Wno-deprecated-declarations) +add_compile_options("-DPLUGIN_NAME=${PLUGIN_NAME}") +add_compile_options("-Dgoogle=mindxsdk_private") + +add_definitions(-DENABLE_DVPP_INTERFACE) + +include_directories(${ACL_LIB_PATH}/include) +link_directories(${ACL_LIB_PATH}/lib64/) + + + +add_executable(${TARGET_MAIN} src/main.cpp src/Ecapa_tdnn.cpp) +target_link_libraries(${TARGET_MAIN} ${TARGET_LIBRARY} glog cpprest mxbase libascendcl.so) +install(TARGETS ${TARGET_MAIN} RUNTIME DESTINATION ${PROJECT_SOURCE_DIR}/) diff --git a/official/audio/ecapa_tdnn/infer/mxbase/build.sh b/official/audio/ecapa_tdnn/infer/mxbase/build.sh new file mode 100644 index 0000000000000000000000000000000000000000..6389ec17f8e3ba5c0b1f1ecde7e0ff3d5f1021b9 --- /dev/null +++ b/official/audio/ecapa_tdnn/infer/mxbase/build.sh @@ -0,0 +1,63 @@ +#!/bin/bash + +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +path_cur=$(dirname $0) + +function check_env() +{ + # set ASCEND_VERSION to ascend-toolkit/latest when it was not specified by user + if [ ! "${ASCEND_HOME}" ]; then + export ASCEND_HOME=/usr/local/Ascend/ + echo "Set ASCEND_HOME to the default value: ${ASCEND_HOME}" + else + echo "ASCEND_HOME is set to ${ASCEND_HOME} by user" + fi + + if [ ! "${ASCEND_VERSION}" ]; then + export ASCEND_VERSION=ascend-toolkit/latest + echo "Set ASCEND_VERSION to the default value: ${ASCEND_VERSION}" + else + echo "ASCEND_VERSION is set to ${ASCEND_VERSION} by user" + fi + + if [ ! "${ARCH_PATTERN}" ]; then + # set ARCH_PATTERN to ./ when it was not specified by user + export ARCH_PATTERN=./ + echo "ARCH_PATTERN is set to the default value: ${ARCH_PATTERN}" + else + echo "ARCH_PATTERN is set to ${ARCH_PATTERN} by user" + fi +} + +function build_ecapatdnn() +{ + cd $path_cur + rm -rf build + mkdir -p build + cd build + cmake .. + make + ret=$? + if [ ${ret} -ne 0 ]; then + echo "Failed to build ecapa_tdnn." + exit ${ret} + fi + make install +} + +check_env +build_ecapatdnn \ No newline at end of file diff --git a/official/audio/ecapa_tdnn/infer/mxbase/src/Ecapa_tdnn.cpp b/official/audio/ecapa_tdnn/infer/mxbase/src/Ecapa_tdnn.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ccbfeb0bae337467c28e4157f9fdf8f3dc0debfa --- /dev/null +++ b/official/audio/ecapa_tdnn/infer/mxbase/src/Ecapa_tdnn.cpp @@ -0,0 +1,183 @@ +/** + * Copyright 2022 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "Ecapa_tdnn.h" +#include <unistd.h> +#include <sys/stat.h> +#include <map> +#include <fstream> +#include "MxBase/DeviceManager/DeviceManager.h" +#include "MxBase/Log/Log.h" + +APP_ERROR ECAPATDNN::Init(const InitParam &initParam) { + this->deviceId_ = initParam.deviceId; + this->outputDataPath_ = initParam.outputDataPath; + APP_ERROR ret = MxBase::DeviceManager::GetInstance()->InitDevices(); + if (ret != APP_ERR_OK) { + LogError << "Init devices failed, ret=" << ret << "."; + return ret; + } + + ret = MxBase::TensorContext::GetInstance()->SetContext(initParam.deviceId); + if (ret != APP_ERR_OK) { + LogError << "Set context failed, ret=" << ret << "."; + return ret; + } + + this->model_ = std::make_shared<MxBase::ModelInferenceProcessor>(); + ret = this->model_->Init(initParam.modelPath, this->modelDesc_); + if (ret != APP_ERR_OK) { + LogError << "ModelInferenceProcessor init failed, ret=" << ret << "."; + return ret; + } + uint32_t input_data_size = 1; + for (size_t j = 0; j < this->modelDesc_.inputTensors[0].tensorDims.size(); ++j) { + this->inputDataShape_[j] = (uint32_t)this->modelDesc_.inputTensors[0].tensorDims[j]; + input_data_size *= this->inputDataShape_[j]; + } + this->inputDataSize_ = input_data_size; + + return APP_ERR_OK; +} + +APP_ERROR ECAPATDNN::DeInit() { + this->model_->DeInit(); + MxBase::DeviceManager::GetInstance()->DestroyDevices(); + return APP_ERR_OK; +} + +APP_ERROR ECAPATDNN::ReadTensorFromFile(const std::string &file, float *data) { + if (data == NULL) { + LogError << "input data is invalid."; + return APP_ERR_COMM_INVALID_POINTER; + } + + std::ifstream infile; + // open data file + infile.open(file, std::ios_base::in | std::ios_base::binary); + // check data file validity + if (infile.fail()) { + LogError << "Failed to open data file: " << file << "."; + return APP_ERR_COMM_OPEN_FAIL; + } + infile.read(reinterpret_cast<char*>(data), sizeof(float) * this->inputDataSize_); + infile.close(); + return APP_ERR_OK; +} + +APP_ERROR ECAPATDNN::ReadInputTensor(const std::string &fileName, std::vector<MxBase::TensorBase> *inputs) { + float data[this->inputDataSize_] = {0}; + + APP_ERROR ret = ReadTensorFromFile(fileName, data); + + if (ret != APP_ERR_OK) { + LogError << "ReadTensorFromFile failed."; + return ret; + } + + const uint32_t dataSize = this->modelDesc_.inputTensors[0].tensorSize; + MxBase::MemoryData memoryDataDst(dataSize, MxBase::MemoryData::MEMORY_DEVICE, this->deviceId_); + MxBase::MemoryData memoryDataSrc(reinterpret_cast<void*>(data), dataSize, MxBase::MemoryData::MEMORY_HOST_MALLOC); + + LogInfo << "========== datasize ---> " << dataSize; + ret = MxBase::MemoryHelper::MxbsMallocAndCopy(memoryDataDst, memoryDataSrc); + if (ret != APP_ERR_OK) { + LogError << GetError(ret) << "Memory malloc and copy failed."; + return ret; + } + + inputs->push_back(MxBase::TensorBase(memoryDataDst, false, this->inputDataShape_, MxBase::TENSOR_DTYPE_FLOAT32)); + return APP_ERR_OK; +} + + +APP_ERROR ECAPATDNN::Inference(const std::vector<MxBase::TensorBase> &inputs, + std::vector<MxBase::TensorBase> *outputs) { + auto dtypes = this->model_->GetOutputDataType(); + for (size_t i = 0; i < this->modelDesc_.outputTensors.size(); ++i) { + std::vector<uint32_t> shape = {}; + for (size_t j = 0; j < modelDesc_.outputTensors[i].tensorDims.size(); ++j) { + shape.push_back((uint32_t)this->modelDesc_.outputTensors[i].tensorDims[j]); + } + MxBase::TensorBase tensor(shape, dtypes[i], MxBase::MemoryData::MemoryType::MEMORY_DEVICE, this->deviceId_); + APP_ERROR ret = MxBase::TensorBase::TensorBaseMalloc(tensor); + if (ret != APP_ERR_OK) { + LogError << "TensorBaseMalloc failed, ret=" << ret << "."; + return ret; + } + outputs->push_back(tensor); + } + + MxBase::DynamicInfo dynamicInfo = {}; + dynamicInfo.dynamicType = MxBase::DynamicType::STATIC_BATCH; + auto startTime = std::chrono::high_resolution_clock::now(); + APP_ERROR ret = this->model_->ModelInference(inputs, *outputs, dynamicInfo); + auto endTime = std::chrono::high_resolution_clock::now(); + double costMs = std::chrono::duration<double, std::milli>(endTime - startTime).count(); + g_inferCost.push_back(costMs); + + if (ret != APP_ERR_OK) { + LogError << "ModelInference failed, ret=" << ret << "."; + return ret; + } + return APP_ERR_OK; +} + +APP_ERROR ECAPATDNN::WriteResult(const std::string &imageFile, std::vector<MxBase::TensorBase> outputs) { + APP_ERROR ret = outputs[0].ToHost(); + if (ret != APP_ERR_OK) { + LogError << GetError(ret) << "tohost fail."; + return ret; + } + auto dataptr = (float *)outputs[0].GetBuffer(); // NOLINT + int pos = imageFile.rfind('/'); + std::string fileName(imageFile, pos + 1); + fileName.replace(fileName.find('.'), fileName.size() - fileName.find('.'), ".txt"); + std::string outFileName = this->outputDataPath_ + "/" + fileName; + + LogInfo << "file path for saving result: " << outFileName; + std::ofstream tfile(outFileName); + if (tfile.fail()) { + LogError << "Failed to open result file"; + return APP_ERR_COMM_FAILURE; + } + for (size_t i = 0; i < 512; ++i) { + tfile << *(dataptr + i) << std::endl; + } + tfile.close(); + return APP_ERR_OK; +} + +APP_ERROR ECAPATDNN::Process(const std::string &inferPath, const std::string &fileName) { + std::vector<MxBase::TensorBase> inputs = {}; + std::string inputIdsFile = inferPath + fileName; + APP_ERROR ret = ReadInputTensor(inputIdsFile, &inputs); + if (ret != APP_ERR_OK) { + LogError << "Read input ids failed, ret=" << ret << "."; + return ret; + } + std::vector<MxBase::TensorBase> outputs = {}; + ret = Inference(inputs, &outputs); + if (ret != APP_ERR_OK) { + LogError << "Inference failed, ret=" << ret << "."; + return ret; + } + ret = WriteResult(fileName, outputs); + if (ret != APP_ERR_OK) { + LogError << "Write result failed, ret=" << ret << "."; + return ret; + } + return APP_ERR_OK; +} diff --git a/official/audio/ecapa_tdnn/infer/mxbase/src/Ecapa_tdnn.h b/official/audio/ecapa_tdnn/infer/mxbase/src/Ecapa_tdnn.h new file mode 100644 index 0000000000000000000000000000000000000000..d1882e2504964d8166e14a633beb565d00c7ede1 --- /dev/null +++ b/official/audio/ecapa_tdnn/infer/mxbase/src/Ecapa_tdnn.h @@ -0,0 +1,58 @@ +/* + * Copyright 2022 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MXBASE_ECAPATDNN_H +#define MXBASE_ECAPATDNN_H + +#include <memory> +#include <utility> +#include <vector> +#include <string> +#include <map> +#include "MxBase/ModelInfer/ModelInferenceProcessor.h" +#include "MxBase/Tensor/TensorContext/TensorContext.h" + +extern std::vector<double> g_inferCost; + +struct InitParam { + uint32_t deviceId; + std::string modelPath; + std::string outputDataPath; +}; + +class ECAPATDNN { + public: + APP_ERROR Init(const InitParam &initParam); + APP_ERROR DeInit(); + APP_ERROR Inference(const std::vector<MxBase::TensorBase> &inputs, std::vector<MxBase::TensorBase> *outputs); + APP_ERROR Process(const std::string &inferPath, const std::string &fileName); + + protected: + APP_ERROR ReadTensorFromFile(const std::string &file, float *data); + APP_ERROR ReadInputTensor(const std::string &fileName, std::vector<MxBase::TensorBase> *inputs); + APP_ERROR WriteResult(const std::string &imageFile, std::vector<MxBase::TensorBase> outputs); + + private: + std::shared_ptr<MxBase::ModelInferenceProcessor> model_; + MxBase::ModelDesc modelDesc_ = {}; + uint32_t deviceId_ = 0; + std::string outputDataPath_ = "./result"; + std::vector<uint32_t> inputDataShape_ = {1, 301, 80}; + + uint32_t inputDataSize_ = 24080; +}; + +#endif diff --git a/official/audio/ecapa_tdnn/infer/mxbase/src/main.cpp b/official/audio/ecapa_tdnn/infer/mxbase/src/main.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f4511d484946f516b581df5ea71b3b032976604d --- /dev/null +++ b/official/audio/ecapa_tdnn/infer/mxbase/src/main.cpp @@ -0,0 +1,102 @@ + +/** + * Copyright 2022 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <unistd.h> +#include <dirent.h> +#include <iostream> +#include <fstream> +#include <vector> +#include "Ecapa_tdnn.h" +#include "MxBase/Log/Log.h" + +std::vector<double> g_inferCost; + +void InitProtonetParam(InitParam* initParam, const std::string &model_path, const std::string &output_data_path) { + initParam->deviceId = 0; + initParam->modelPath = model_path; + initParam->outputDataPath = output_data_path; +} +APP_ERROR ReadFilesFromPath(const std::string &path, std::vector<std::string> *files) { + DIR *dirPtr = opendir(path.c_str()); + if (dirPtr == nullptr) { + LogError << "opendir failed. dir:" << path; + return APP_ERR_INTERNAL_ERROR; + } + dirent *direntPtr = nullptr; + while ((direntPtr = readdir(dirPtr)) != nullptr) { + std::string fileName = direntPtr->d_name; + if (fileName == "." || fileName == "..") { + continue; + } + + files->push_back(fileName); + } + closedir(dirPtr); + return APP_ERR_OK; +} +int main(int argc, char* argv[]) { + LogInfo << "======================================= !!!Parameters setting!!!" << \ + "========================================"; + std::string model_path = argv[1]; + LogInfo << "========== loading model weights from: " << model_path; + + std::string input_data_path = argv[2]; + LogInfo << "========== input data path = " << input_data_path; + + std::string output_data_path = argv[3]; + LogInfo << "========== output data path = " << output_data_path << \ + " WARNING: please make sure that this folder is created in advance!!!"; + + LogInfo << "======================================== !!!Parameters setting!!! " << \ + "========================================"; + + InitParam initParam; + InitProtonetParam(&initParam, model_path, output_data_path); + auto ecapatdnn = std::make_shared<ECAPATDNN>(); + APP_ERROR ret = ecapatdnn->Init(initParam); + if (ret != APP_ERR_OK) { + LogError << "ecapatdnn init failed, ret=" << ret << "."; + return ret; + } + std::vector<std::string> files; + ret = ReadFilesFromPath(input_data_path, &files); + if (ret != APP_ERR_OK) { + LogError << "Read files from path failed, ret=" << ret << "."; + return ret; + } + + // do infer + for (uint32_t i = 0; i < files.size(); i++) { + LogInfo << "Processing: " + std::to_string(i+1) + "/" + std::to_string(files.size()) + " ---> " + files[i]; + ret = ecapatdnn->Process(input_data_path, files[i]); + if (ret != APP_ERR_OK) { + LogError << "ecapatdnn process failed, ret=" << ret << "."; + ecapatdnn->DeInit(); + return ret; + } + } + LogInfo << "infer succeed and write the result data with binary file !"; + ecapatdnn->DeInit(); + double costSum = 0; + for (uint32_t i = 0; i < g_inferCost.size(); i++) { + costSum += g_inferCost[i]; + } + LogInfo << "Infer images sum " << g_inferCost.size() << ", cost total time: " << costSum << " ms."; + LogInfo << "The throughput: " << g_inferCost.size() * 1000 / costSum << " bin/sec."; + LogInfo << "========== The infer result has been saved in ---> " << output_data_path; + return APP_ERR_OK; +} diff --git a/official/audio/ecapa_tdnn/infer/sdk/main.py b/official/audio/ecapa_tdnn/infer/sdk/main.py new file mode 100644 index 0000000000000000000000000000000000000000..891e893175a7b10bd6995b7cc234ceeaaf4366d3 --- /dev/null +++ b/official/audio/ecapa_tdnn/infer/sdk/main.py @@ -0,0 +1,143 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import argparse +import os +from datetime import datetime +import pickle +import numpy as np +import MxpiDataType_pb2 as MxpiDataType +from StreamManagerApi import StreamManagerApi, InProtobufVector, MxProtobufIn, StringVector, MxDataInput + +class DatasetGenerator: + def __init__(self, data_dir, drop=True): + self.data = [] + self.label = [] + filelist = os.path.join(data_dir, "fea.lst") + labellist = os.path.join(data_dir, "label.lst") + with open(filelist, 'r') as fp: + for fpa in fp: + self.data.append(os.path.join(data_dir, fpa.strip())) + with open(labellist, 'r') as fp: + for lab in fp: + self.label.append(os.path.join(data_dir, lab.strip())) + if drop: + self.data.pop() + self.label.pop() + print("dataset init ok, total len:", len(self.data)) + + def __getitem__(self, ind): + npdata = np.load(self.data[ind]) + nplabel = np.load(self.label[ind]).tolist() + return npdata, nplabel[0] + + def __len__(self): + return len(self.data) + +def inference(input_tensor): + tensor_bytes = input_tensor.tobytes() + in_plugin_id = 0 + tensorPackageList = MxpiDataType.MxpiTensorPackageList() + tensorPackage = tensorPackageList.tensorPackageVec.add() + dataInput = MxDataInput() + dataInput.data = tensor_bytes + tensorVec = tensorPackage.tensorVec.add() + tensorVec.deviceId = 0 + tensorVec.memType = 0 + for t in input_tensor.shape: + tensorVec.tensorShape.append(t) + tensorVec.dataStr = dataInput.data + tensorVec.tensorDataSize = len(tensor_bytes) + # add feature data end + key = "appsrc{}".format(in_plugin_id).encode('utf-8') + protobufVec = InProtobufVector() + protobuf = MxProtobufIn() + protobuf.key = key + protobuf.type = b'MxTools.MxpiTensorPackageList' + protobuf.protobuf = tensorPackageList.SerializeToString() + protobufVec.push_back(protobuf) + unique_id = stream_manager_api.SendProtobuf(stream_name, in_plugin_id, protobufVec) + if unique_id < 0: + print("Failed to send data to stream.") + exit() + # Obtain the inference result by specifying streamName and uniqueId. + keyVec = StringVector() + keyVec.push_back(b'mxpi_tensorinfer0') + infer_result = stream_manager_api.GetProtobuf(stream_name, in_plugin_id, keyVec) + if infer_result.size() == 0: + print("inferResult is null") + exit() + if infer_result[0].errorCode != 0: + print("GetProtobuf error. errorCode=%d" % (infer_result[0].errorCode)) + exit() + # get infer result + result = MxpiDataType.MxpiTensorPackageList() + result.ParseFromString(infer_result[0].messageBuf) + # convert the inference result to Numpy array + out = np.frombuffer(result.tensorPackageVec[0].tensorVec[0].dataStr, dtype=np.float32) + return out + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--pipeline_path', type=str, default='../data/config/ecapa_tdnn.pipeline') + parser.add_argument('--eval_data_path', type=str, default='../data/feat_eval/') + parser.add_argument('--output_path', type=str, default='../output/') + parser.add_argument('--npy_path', type=str, default='../npy/') + hparams = parser.parse_args() + # init stream manager + stream_manager_api = StreamManagerApi() + ret = stream_manager_api.InitManager() + if ret != 0: + print("Failed to init Stream manager, ret=%s" % str(ret)) + exit() + + # create streams by pipeline config file + with open(hparams.pipeline_path, 'rb') as f: + pipelineStr = f.read() + ret = stream_manager_api.CreateMultipleStreams(pipelineStr) + if ret != 0: + print("Failed to create Stream, ret=%s" % str(ret)) + exit() + if not os.path.exists(hparams.output_path): + os.makedirs(hparams.output_path) + if not os.path.exists(hparams.npy_path): + os.makedirs(hparams.npy_path) + stream_name = b'ecapa_tdnn' + eval_data_path = hparams.eval_data_path + dataset_enroll = DatasetGenerator(eval_data_path, False) + steps_per_epoch_enroll = len(dataset_enroll) + print("size of enroll, test:", steps_per_epoch_enroll) + fpath = os.path.join(hparams.npy_path, f'enroll_dict_bleeched.npy') + files_len = len(os.listdir(hparams.eval_data_path)) + data = {} + enroll_dict = dict() + for index in range(0, 50000): + if index >= len(dataset_enroll): + exit() + batchdata = dataset_enroll[index][0][:, :301, :] + if index % 1000 == 0: + print(f"{datetime.now()}, iter-{index}") + embs = inference(batchdata) + for index1 in range(0, 1): + enroll_dict1 = dict() + enroll_dict1[dataset_enroll[index][1]] = embs.copy() #返回具有从改数组复制的值的numpy.ndarray对象 + with open(hparams.output_path+str(index)+'.txt', 'w') as f_write: + f_write.write(str(enroll_dict1)) + enroll_dict[dataset_enroll[index][1]] = embs.copy() + pickle.dump(enroll_dict, open(fpath, "wb")) + + # destroy streams + stream_manager_api.DestroyAllStreams() + \ No newline at end of file diff --git a/official/audio/ecapa_tdnn/infer/sdk/mse.py b/official/audio/ecapa_tdnn/infer/sdk/mse.py new file mode 100644 index 0000000000000000000000000000000000000000..386ac53dbf901c11e7430d16f72f4a3429860c71 --- /dev/null +++ b/official/audio/ecapa_tdnn/infer/sdk/mse.py @@ -0,0 +1,103 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import argparse +import numpy as np +from scipy.spatial.distance import cosine + +def evaluate(spk2emb, utt2emb, trials): + # Evaluate EER given utterance to embedding mapping and trials file + scores, labels = [], [] + with open(trials, "r") as f: + for trial in f: + trial = trial.strip() + label, spk, test = trial.split(" ") + spk = spk[:-4] + if label == '1': + labels.append(1) + else: + labels.append(0) + enroll_emb = spk2emb[spk] + test_emb = utt2emb[test[:-4]] + scores.append(1 - cosine(enroll_emb, test_emb)) + + return get_EER_from_scores(scores, labels)[0] + +def get_EER_from_scores(scores, labels, pos_label=1): + """Compute EER given scores and labels + """ + P_fa, P_miss, thresholds = compute_fa_miss(scores, labels, pos_label, return_thresholds=True) + eer, thresh_eer = get_EER(P_fa, P_miss, thresholds) + return eer, thresh_eer + +def compute_fa_miss(scores, labels, pos_label=1, return_thresholds=True): + """Returns P_fa, P_miss, [thresholds] + """ + from sklearn.metrics import roc_curve + fpr, tpr, thresholds = roc_curve(labels, scores, pos_label=pos_label) + P_fa = fpr[::-1] + P_miss = 1. - tpr[::-1] + thresholds = thresholds[::-1] + if return_thresholds: + return P_fa, P_miss, thresholds + return P_fa, P_miss + +def get_EER(P_fa, P_miss, thresholds=None): + """Compute EER given false alarm and miss probabilities + """ + from scipy.optimize import brentq + from scipy.interpolate import interp1d + eer = brentq(lambda x: x - interp1d(P_fa, P_miss)(x), 0., 1.) + eer = float(eer) + if thresholds is None: + return eer + thresh_eer = interp1d(P_fa, thresholds)(eer) + thresh_eer = float(thresh_eer) + return eer, thresh_eer + +def emb_mean(g_mean, increment, emb_dict): + emb_dict_mean = dict() + for utt in emb_dict: + if increment == 0: + g_mean = emb_dict[utt] + else: + weight = 1 / (increment + 1) + g_mean = ( + 1 - weight + ) * g_mean + weight * emb_dict[utt] + emb_dict_mean[utt] = emb_dict[utt] - g_mean + increment += 1 + if increment % 3000 == 0: + print('processing ', increment) + return emb_dict_mean, g_mean, increment + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--npy_path', type=str, default='../output/enroll_dict_bleeched.npy') + parser.add_argument('--veri_file_path', type=str, default='../feat_eval/veri_test_bleeched.txt') + hparams = parser.parse_args() + npy_path = hparams.npy_path + veri_file_path = hparams.veri_file_path + enroll_dict = np.load(npy_path, allow_pickle=True) + eer1 = evaluate(enroll_dict, enroll_dict, veri_file_path) + print("eer baseline:", eer1) + print("Sub mean...") + glob_mean = np.zeros(8) + cnt = 0 + enroll_dict_mean, glob_mean, cnt = emb_mean(glob_mean, cnt, enroll_dict) + enroll_dict_mean, glob_mean, cnt = emb_mean(glob_mean, cnt, enroll_dict) + enroll_dict_mean, glob_mean, cnt = emb_mean(glob_mean, cnt, enroll_dict) + eer2 = evaluate(enroll_dict_mean, enroll_dict_mean, veri_file_path) + print("eer with sub mean:", eer2) diff --git a/official/audio/ecapa_tdnn/infer/utils/preprocess.py b/official/audio/ecapa_tdnn/infer/utils/preprocess.py new file mode 100644 index 0000000000000000000000000000000000000000..4c4b4a773b108f59e71ddbc1badefbf476b4b451 --- /dev/null +++ b/official/audio/ecapa_tdnn/infer/utils/preprocess.py @@ -0,0 +1,43 @@ +import os +import sys +import numpy as np + +class DatasetGenerator: + def __init__(self, data_dir, drop=True): + self.data = [] + self.label = [] + filelist = os.path.join(data_dir, "fea.lst") + labellist = os.path.join(data_dir, "label.lst") + with open(filelist, 'r') as fp: + for fpath in fp: + self.data.append(os.path.join(data_dir, fpath.strip())) + with open(labellist, 'r') as fp: + for label in fp: + self.label.append(os.path.join(data_dir, label.strip())) + if drop: + self.data.pop() + self.label.pop() + print("dataset init ok, total len:", len(self.data)) + + def __getitem__(self, index): + npdata = np.load(self.data[index]) + nplabel = np.load(self.label[index]).tolist() + return npdata, nplabel[0] + + def __len__(self): + return len(self.data) +if __name__ == "__main__": + data_path = sys.argv[1] + output_path = "testdata/" + dataset_eval = DatasetGenerator(data_path, False) + steps_per_epoch_enroll = len(dataset_eval) + print("size of eval data:", steps_per_epoch_enroll) + + if not os.path.exists(output_path): + os.makedirs(output_path, exist_ok=False) + + for idx in range(steps_per_epoch_enroll): + datacut = dataset_eval[idx][0][0, :301, :] + savename = os.path.join(output_path, dataset_eval[idx][1].replace('/', '_') + '.bin') + datacut.tofile(savename) + \ No newline at end of file diff --git a/official/audio/ecapa_tdnn/modelart/ecapatdnn-modelart.py b/official/audio/ecapa_tdnn/modelart/ecapatdnn-modelart.py new file mode 100644 index 0000000000000000000000000000000000000000..44c98e6045bb99ccd418d1a16de4db851f81fc58 --- /dev/null +++ b/official/audio/ecapa_tdnn/modelart/ecapatdnn-modelart.py @@ -0,0 +1,276 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" +train model +""" +import argparse +import os +import time +import ast +from datetime import datetime +import math +import numpy as np +import mindspore as ms +import mindspore.nn as nn +from mindspore import Tensor +import mindspore.dataset as ds +from mindspore.nn import FixedLossScaleUpdateCell +from mindspore import context, load_checkpoint, load_param_into_net, export +from mindspore.train.callback import ModelCheckpoint +from mindspore.train.callback import CheckpointConfig +from mindspore.train.callback import RunContext, _InternalCallbackParam +from mindspore.context import ParallelMode +from mindspore.communication.management import init, get_rank, get_group_size +from src.ecapa_tdnn import ECAPA_TDNN, Classifier +from src.reader import DatasetGeneratorBatch as DatasetGenerator +from src.util import AdditiveAngularMargin +from src.loss_scale import TrainOneStepWithLossScaleCellv2 as TrainOneStepWithLossScaleCell +from src.model_utils.config import config as hparams +from src.sampler import DistributedSampler + +parser = argparse.ArgumentParser(description='ecapatdnn', formatter_class=argparse.ArgumentDefaultsHelpFormatter) +parser.add_argument('--data_url', type=str, default=None, help='Location of Data') +parser.add_argument('--train_url', type=str, default='', help='Location of training outputs') +parser.add_argument('--enable_modelarts', type=ast.literal_eval, default=True, help='choose modelarts') +args, unknown = parser.parse_known_args() + +def save_ckpt_to_air(save_ckpt_path, path): + context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") + + in_channels = 80 + channels = 1024 + emb_size = 192 + net = ECAPA_TDNN(in_channels, channels=[channels, channels, channels, channels, channels * 3], + lin_neurons=emb_size, global_context=False) + + # assert config.ckpt_file is not None, "config.ckpt_file is None." + param_dict = load_checkpoint(path) + load_param_into_net(net, param_dict) + input_arr = Tensor(np.ones([1, 301, 80]), ms.float32) + export(net, input_arr, file_name=save_ckpt_path+'ecapatdnn', file_format="AIR") + +def create_dataset(cfg, data_home, shuffle=False): + """ + create a train or evaluate cifar10 dataset for resnet50 + Args: + data_home(string): the path of dataset. + batch_size(int): the batch size of dataset. + repeat_num(int): the repeat times of dataset. Default: 1 + Returns: + dataset + """ + + dataset_generator = DatasetGenerator(data_home) + distributed_sampler = None + if cfg.run_distribute: + distributed_sampler = DistributedSampler(len(dataset_generator), cfg.group_size, cfg.rank, shuffle=True) + vox2_ds = ds.GeneratorDataset(dataset_generator, ["data", "label"], shuffle=shuffle, sampler=distributed_sampler) + cnt = int(len(dataset_generator) / cfg.group_size) + return vox2_ds, cnt + +class CorrectLabelNum(nn.Cell): + def __init__(self): + super(CorrectLabelNum, self).__init__() + self.argmax = ms.ops.Argmax(axis=1) + self.sum = ms.ops.ReduceSum() + + def construct(self, output, target): + output = self.argmax(output) + correct = self.sum((output == target).astype(ms.dtype.float32)) + return correct + +class BuildTrainNetwork(nn.Cell): + '''Build train network.''' + def __init__(self, my_network, classifier, lossfunc, my_criterion, train_batch_size, class_num_): + super(BuildTrainNetwork, self).__init__() + self.network = my_network + self.classifier = classifier + self.criterion = my_criterion + self.lossfunc = lossfunc + # Initialize self.output + self.output = ms.Parameter(Tensor(np.ones((train_batch_size, class_num_)), ms.float32), requires_grad=False) + self.onehot = ms.nn.OneHot(depth=class_num_, axis=-1, dtype=ms.float32) + + def construct(self, input_data, label): + output = self.network(input_data) + label_onehot = self.onehot(label) + # Get the network output and assign it to self.output + logits = self.classifier(output) + output = self.lossfunc(logits, label_onehot) + self.output = output + loss0 = self.criterion(output, label_onehot) + return loss0 + +def update_average(loss_, avg_loss, step): + avg_loss -= avg_loss / step + avg_loss += loss_ / step + return avg_loss + +def train_net(rank, model, epoch_max, data_train, ckpt_cb, steps_per_epoch, + train_batch_size): + """define the training method""" + # Create dict to save internal callback object's parameters + cb_params = _InternalCallbackParam() + cb_params.train_network = model + cb_params.epoch_num = epoch_max + cb_params.batch_num = steps_per_epoch + cb_params.cur_epoch_num = 0 + cb_params.cur_step_num = 0 + run_context = RunContext(cb_params) + ckpt_cb.begin(run_context) + if rank == 0: + print("============== Starting Training ==============") + correct_num = CorrectLabelNum() + correct_num.set_train(False) + + for epoch in range(epoch_max): + t_start = time.time() + train_loss = 0 + avg_loss = 0 + train_loss_cur = 0 + train_correct_cur = 0 + train_correct = 0 + print_dur = 3000 + i = 0 + for idx, (data, gt_classes) in enumerate(data_train): + i = i + 1 + if i == 1000: + break + model.set_train() + batch_loss, _, _, output = model(data, gt_classes) + correct = correct_num(output, gt_classes) + train_loss += batch_loss + train_correct += correct.sum() + train_loss_cur += batch_loss + avg_loss = update_average(batch_loss, avg_loss, idx+1) + train_correct_cur += correct.sum() + if rank == 0 and idx % print_dur == 0: + cur_loss = train_loss_cur.asnumpy() + acc = correct.sum().asnumpy() / float(train_batch_size) + total_avg = train_loss.asnumpy() / float(idx+1) + if idx > 0: + cur_loss = train_loss_cur.asnumpy()/float(print_dur) + acc = train_correct_cur.asnumpy() / float(train_batch_size *print_dur) + print(f"{datetime.now()}, epoch:{epoch + 1}/{epoch_max}, iter-{idx}/{steps_per_epoch}," + f'cur loss:{cur_loss:.4f}, aver loss:{avg_loss.asnumpy():.4f},' + f'total_avg loss:{total_avg:.4f}, acc_aver:{acc:.4f}') + train_loss_cur = 0 + train_correct_cur = 0 + # Update current step number + cb_params.cur_step_num += 1 + # Check whether save checkpoint or not + if rank == 0: + ckpt_cb.step_end(run_context) + + cb_params.cur_epoch_num += 1 + my_train_loss = train_loss/steps_per_epoch + my_train_accuracy = 100 * train_correct / (train_batch_size * steps_per_epoch) + time_used = time.time() - t_start + fps = train_batch_size*steps_per_epoch / time_used + if rank == 0: + print('epoch[{}], {:.2f} imgs/sec'.format(epoch, fps)) + print('Train Loss:', my_train_loss) + print('Train Accuracy:', my_train_accuracy, '%') + +def triangular(): + """ + triangular for cyclic LR. https://arxiv.org/abs/1506.01186 + """ + return 1.0 + +def triangular2(cycle): + """ + triangular2 for cyclic LR. https://arxiv.org/abs/1506.01186 + """ + return 1.0 / (2.**(cycle - 1)) + +def learning_rate_clr_triangle_function(step_size, max_lr, base_lr, clr_iterations): + """ + get learning rate for cyclic LR. https://arxiv.org/abs/1506.01186 + """ + cycle = math.floor(1 + clr_iterations / (2 * step_size)) + x = abs(clr_iterations / step_size - 2 * cycle + 1) + return base_lr + (max_lr - base_lr) * max(0, (1 - x)) * triangular() + +def train(): + # init distributed + if hparams.run_distribute: + device_id = int(os.getenv('DEVICE_ID', '0')) + context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=device_id) + init() + hparams.rank = get_rank() + hparams.group_size = get_group_size() + context.reset_auto_parallel_context() + context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, gradients_mean=True, device_num=8, + parameter_broadcast=True) + else: + hparams.rank = 0 + hparams.group_size = 1 + context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=hparams.device_id) + data_dir = args.data_url + in_channels = hparams.in_channels + channels = hparams.channels + base_lrate = hparams.base_lrate + max_lrate = hparams.max_lrate + weight_decay = hparams.weight_decay + num_epochs = 1 + minibatch_size = hparams.minibatch_size + emb_size = hparams.emb_size + clc_step_size = hparams.step_size + class_num = 7205 + ckpt_save_dir = args.train_url + # Configure operation information + + mymodel = ECAPA_TDNN(in_channels, channels=(channels, channels, channels, channels, channels * 3), + lin_neurons=emb_size) + # Construct model + ds_train, steps_per_epoch_train = create_dataset(hparams, data_dir) + print(f'group_size:{hparams.group_size}, data total len:{steps_per_epoch_train}') + # Define the optimizer and model + my_classifier = Classifier(1, 0, emb_size, class_num) + aam = AdditiveAngularMargin(0.2, 30) + lr_list = [] + lr_list_total = steps_per_epoch_train * num_epochs + for i in range(lr_list_total): + lr_list.append(learning_rate_clr_triangle_function(clc_step_size, max_lrate, base_lrate, i)) + + loss = nn.loss.SoftmaxCrossEntropyWithLogits(sparse=False, reduction='mean') + + loss_scale_manager = FixedLossScaleUpdateCell(loss_scale_value=2**14) + model_constructed = BuildTrainNetwork(mymodel, my_classifier, aam, loss, minibatch_size, class_num) + opt = nn.Adam(model_constructed.trainable_params(), learning_rate=lr_list, weight_decay=weight_decay) + model_constructed = TrainOneStepWithLossScaleCell(model_constructed, opt, + scale_sense=loss_scale_manager) + + if hparams.pre_trained: + pre_trained_model = os.path.join(ckpt_save_dir, hparams.checkpoint_path) + param_dict = load_checkpoint(pre_trained_model) + # load parameter to the network + load_param_into_net(model_constructed, param_dict) + # CheckPoint CallBack definition + save_steps = int(steps_per_epoch_train/10) + config_ck = CheckpointConfig(save_checkpoint_steps=save_steps, + keep_checkpoint_max=hparams.keep_checkpoint_max) + ckpoint_cb = ModelCheckpoint(prefix="train_ecapa_vox12", + directory=ckpt_save_dir, config=config_ck) + + train_net(hparams.rank, model_constructed, num_epochs, ds_train, ckpoint_cb, steps_per_epoch_train, minibatch_size) + print("============== End Training ==============") + path = os.path.join(ckpt_save_dir, 'train_ecapa_vox12-0_936.ckpt') + print("ckpt_save_dir ", ckpt_save_dir, "path ", path) + save_ckpt_to_air(ckpt_save_dir, path) + +if __name__ == "__main__": + train()