diff --git a/.jenkins/check/config/filter_cpplint.txt b/.jenkins/check/config/filter_cpplint.txt index 313125d94c16be396051efe60f6418b66f0796ff..a260d89e5daecacd455fe2e13e79c8df39a21678 100644 --- a/.jenkins/check/config/filter_cpplint.txt +++ b/.jenkins/check/config/filter_cpplint.txt @@ -139,6 +139,10 @@ "models/official/cv/retinanet/infer/mxbase/retinanetDetection/RetinanetDetection.h" "runtime/references" "models/official/cv/retinanet/infer/mxbase/retinanetDetection/RetinanetDetection.cpp" "runtime/references" +"models/official/audio/melgan/infer/mxbase/src/main.cpp" "runtime/references" +"models/official/audio/melgan/infer/mxbase/src/Melgan.h" "runtime/references" +"models/official/audio/melgan/infer/mxbase/src/Melgan.cpp" "runtime/references" + "models/official/audio/lpcnet/ascend310_infer/inc/lpcnet.h" "runtime/int" "models/official/audio/lpcnet/ascend310_infer/src/main.cc" "build/include_subdir" diff --git a/official/audio/melgan/infer/convert/convert.sh b/official/audio/melgan/infer/convert/convert.sh new file mode 100644 index 0000000000000000000000000000000000000000..88d611edb9e578952033ce82af47c3b57be6c395 --- /dev/null +++ b/official/audio/melgan/infer/convert/convert.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +model_path=$1 +output_path=$2 + +atc --model=$model_path \ + --framework=1 \ + --output=$output_path \ + --input_format=NCHW \ + --log=error \ + --soc_version=Ascend310 \ No newline at end of file diff --git a/official/audio/melgan/infer/data/config/melgan.pipeline b/official/audio/melgan/infer/data/config/melgan.pipeline new file mode 100644 index 0000000000000000000000000000000000000000..68d854be6c60add682311ca0ed511280056b7390 --- /dev/null +++ b/official/audio/melgan/infer/data/config/melgan.pipeline @@ -0,0 +1,33 @@ +{ + "im_melgan": { + "stream_config": { + "deviceId": "0" + }, + "appsrc0": { + "props": { + "blocksize": "409600" + }, + "factory": "appsrc", + "next": "mxpi_tensorinfer0" + }, + "mxpi_tensorinfer0": { + "props": { + "dataSource": "appsrc0", + "modelPath": "../data/models/melgan.om", + "outputDeviceId": "-1" + }, + "factory": "mxpi_tensorinfer", + "next": "mxpi_dataserialize0" + }, + "mxpi_dataserialize0": { + "props": { + "outputDataKeys": "mxpi_tensorinfer0" + }, + "factory": "mxpi_dataserialize", + "next": "appsink0" + }, + "appsink0": { + "factory": "appsink" + } + } +} diff --git a/official/audio/melgan/infer/docker_start_infer.sh b/official/audio/melgan/infer/docker_start_infer.sh new file mode 100644 index 0000000000000000000000000000000000000000..2678ff3f94b2b0be1bb20af554f3787f58b70aef --- /dev/null +++ b/official/audio/melgan/infer/docker_start_infer.sh @@ -0,0 +1,49 @@ +#!/usr/bin/env bash + +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +docker_image=$1 +model_dir=$2 + + +function show_help() { + echo "Usage: docker_start.sh docker_image model_dir data_dir" +} + +function param_check() { + if [ -z "${docker_image}" ]; then + echo "please input docker_image" + show_help + exit 1 + fi + + if [ -z "${model_dir}" ]; then + echo "please input model_dir" + show_help + exit 1 + fi +} + +param_check + +docker run -it -u root \ + --device=/dev/davinci0 \ + --device=/dev/davinci_manager \ + --device=/dev/devmm_svm \ + --device=/dev/hisi_hdc \ + -v /usr/local/Ascend/driver:/usr/local/Ascend/driver \ + -v ${model_dir}:${model_dir} \ + ${docker_image} \ + /bin/bash diff --git a/official/audio/melgan/infer/mxbase/CMakeLists.txt b/official/audio/melgan/infer/mxbase/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..7e8b570b8cb126de89b24340687a0445d68e5cac --- /dev/null +++ b/official/audio/melgan/infer/mxbase/CMakeLists.txt @@ -0,0 +1,36 @@ +cmake_minimum_required(VERSION 3.5.2) +project(Melgan) +add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0) + + +set(TARGET_MAIN Melgan) + +set(ACL_LIB_PATH $ENV{ASCEND_HOME}/ascend-toolkit/latest/acllib) + +include_directories(${CMAKE_CURRENT_BINARY_DIR}) + +include_directories($ENV{MX_SDK_HOME}/include) +include_directories($ENV{MX_SDK_HOME}/opensource/include) +include_directories($ENV{MX_SDK_HOME}/opensource/include/opencv4) +include_directories($ENV{MX_SDK_HOME}/opensource/include/gstreamer-1.0) +include_directories($ENV{MX_SDK_HOME}/opensource/include/glib-2.0) +include_directories($ENV{MX_SDK_HOME}/opensource/lib/glib-2.0/include) + +link_directories($ENV{MX_SDK_HOME}/lib) +link_directories($ENV{MX_SDK_HOME}/opensource/lib/) + + +add_compile_options(-std=c++11 -fPIC -fstack-protector-all -pie -Wno-deprecated-declarations) +add_compile_options("-DPLUGIN_NAME=${PLUGIN_NAME}") +add_compile_options("-Dgoogle=mindxsdk_private") + +add_definitions(-DENABLE_DVPP_INTERFACE) + +include_directories(${ACL_LIB_PATH}/include) +link_directories(${ACL_LIB_PATH}/lib64/) + + + +add_executable(${TARGET_MAIN} src/main.cpp src/Melgan.cpp) +target_link_libraries(${TARGET_MAIN} ${TARGET_LIBRARY} glog cpprest mxbase libascendcl.so) +install(TARGETS ${TARGET_MAIN} RUNTIME DESTINATION ${PROJECT_SOURCE_DIR}/) diff --git a/official/audio/melgan/infer/mxbase/build.sh b/official/audio/melgan/infer/mxbase/build.sh new file mode 100644 index 0000000000000000000000000000000000000000..9bf269447ff3bd48fc0531d6669df6a909d64b57 --- /dev/null +++ b/official/audio/melgan/infer/mxbase/build.sh @@ -0,0 +1,63 @@ +#!/bin/bash + +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +path_cur=$(dirname $0) + +function check_env() +{ + # set ASCEND_VERSION to ascend-toolkit/latest when it was not specified by user + if [ ! "${ASCEND_HOME}" ]; then + export ASCEND_HOME=/usr/local/Ascend/ + echo "Set ASCEND_HOME to the default value: ${ASCEND_HOME}" + else + echo "ASCEND_HOME is set to ${ASCEND_HOME} by user" + fi + + if [ ! "${ASCEND_VERSION}" ]; then + export ASCEND_VERSION=nnrt/latest + echo "Set ASCEND_VERSION to the default value: ${ASCEND_VERSION}" + else + echo "ASCEND_VERSION is set to ${ASCEND_VERSION} by user" + fi + + if [ ! "${ARCH_PATTERN}" ]; then + # set ARCH_PATTERN to ./ when it was not specified by user + export ARCH_PATTERN=./ + echo "ARCH_PATTERN is set to the default value: ${ARCH_PATTERN}" + else + echo "ARCH_PATTERN is set to ${ARCH_PATTERN} by user" + fi +} + +function build_melgan() +{ + cd $path_cur + rm -rf build + mkdir -p build + cd build + cmake .. + make + ret=$? + if [ ${ret} -ne 0 ]; then + echo "Failed to build melgan." + exit ${ret} + fi + make install +} + +check_env +build_melgan \ No newline at end of file diff --git a/official/audio/melgan/infer/mxbase/src/Melgan.cpp b/official/audio/melgan/infer/mxbase/src/Melgan.cpp new file mode 100644 index 0000000000000000000000000000000000000000..80492d910ca7e6a992176ea59efef54bd2c355b7 --- /dev/null +++ b/official/audio/melgan/infer/mxbase/src/Melgan.cpp @@ -0,0 +1,198 @@ +/* + * Copyright 2022 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "Melgan.h" +#include <sys/stat.h> +#include <unistd.h> +#include <algorithm> +#include <fstream> +#include <string> +#include <memory> +#include <map> +#include <vector> +#include "acl/acl.h" +#include "MxBase/DeviceManager/DeviceManager.h" +#include "MxBase/Log/Log.h" + +APP_ERROR MELGAN::Init(const InitParam &initParam) { + deviceId_ = initParam.deviceId; + APP_ERROR ret = MxBase::DeviceManager::GetInstance()->InitDevices(); + if (ret != APP_ERR_OK) { + LogError << "Init devices failed, ret=" << ret << "."; + return ret; + } + ret = MxBase::TensorContext::GetInstance()->SetContext(initParam.deviceId); + if (ret != APP_ERR_OK) { + LogError << "Set context failed, ret=" << ret << "."; + return ret; + } + model_ = std::make_shared<MxBase::ModelInferenceProcessor>(); + ret = model_->Init(initParam.modelPath, modelDesc_); + if (ret != APP_ERR_OK) { + LogError << "ModelInferenceProcessor init failed, ret=" << ret << "."; + return ret; + } + return APP_ERR_OK; +} + +APP_ERROR MELGAN::DeInit() { + dvppWrapper_->DeInit(); + model_->DeInit(); + MxBase::DeviceManager::GetInstance()->DestroyDevices(); + return APP_ERR_OK; +} + +APP_ERROR MELGAN::VectorToTensorBase(const std::vector<std::vector<std::vector<float>>> &input_x, + MxBase::TensorBase *tensorBase) { + const uint32_t dataSize = modelDesc_.inputTensors[0].tensorSize / 4; + float *metaFeatureData = new float[dataSize]; + uint32_t idx = 0; + for (size_t bs = 0; bs < input_x.size(); bs++) { + for (size_t c = 0; c < input_x[0].size(); c++) { + for (size_t d = 0; d < input_x[0][0].size(); d++) { + metaFeatureData[idx++] = input_x[bs][c][d]; + } + } + } + MxBase::MemoryData memoryDataDst(dataSize * 4, MxBase::MemoryData::MEMORY_DEVICE, deviceId_); + MxBase::MemoryData memoryDataSrc(reinterpret_cast<void *>(metaFeatureData), + dataSize * 4, MxBase::MemoryData::MEMORY_HOST_MALLOC); + + APP_ERROR ret = MxBase::MemoryHelper::MxbsMallocAndCopy(memoryDataDst, memoryDataSrc); + if (ret != APP_ERR_OK) { + LogError << GetError(ret) << "Memory malloc failed."; + return ret; + } + + std::vector<uint32_t> shape = {1, 80, 240}; + *tensorBase = MxBase::TensorBase(memoryDataDst, false, shape, MxBase::TENSOR_DTYPE_FLOAT32); + return APP_ERR_OK; +} + +APP_ERROR MELGAN::Inference(const std::vector<MxBase::TensorBase> &inputs, + std::vector<MxBase::TensorBase> *outputs) { + auto dtypes = model_->GetOutputDataType(); + for (size_t i = 0; i < modelDesc_.outputTensors.size(); i++) { + std::vector<uint32_t> shape = {}; + for (size_t j = 0; j < modelDesc_.outputTensors[i].tensorDims.size(); j++) { + shape.push_back((uint32_t) modelDesc_.outputTensors[i].tensorDims[j]); + } + MxBase::TensorBase tensor(shape, dtypes[i], MxBase::MemoryData::MemoryType::MEMORY_DEVICE, deviceId_); + APP_ERROR ret = MxBase::TensorBase::TensorBaseMalloc(tensor); + if (ret != APP_ERR_OK) { + LogError << "TensorBaseMalloc failed, ret=" << ret << "."; + return ret; + } + (*outputs).push_back(tensor); + } + + MxBase::DynamicInfo dynamicInfo = {}; + dynamicInfo.dynamicType = MxBase::DynamicType::STATIC_BATCH; + auto startTime = std::chrono::high_resolution_clock::now(); + APP_ERROR ret = model_->ModelInference(inputs, *outputs, dynamicInfo); + auto endTime = std::chrono::high_resolution_clock::now(); + double costMs = std::chrono::duration<double, std::milli>(endTime - startTime).count(); + inferCostTimeMilliSec += costMs; + if (ret != APP_ERR_OK) { + LogError << "ModelInference failed, ret=" << ret << "."; + return ret; + } + return APP_ERR_OK; +} + +APP_ERROR MELGAN::SaveInferResult(std::vector<float> *outputs, std::vector<MxBase::TensorBase> *inputs) { + MxBase::TensorBase &tensor = inputs->at(0); + APP_ERROR ret = tensor.ToHost(); + if (ret != APP_ERR_OK) { + LogError << GetError(ret) << "Tensor deploy to host failed."; + return ret; + } + + // check tensor is available + auto outputShape = tensor.GetShape(); + uint32_t length = outputShape[2]; + LogInfo << "output shape is: (" << outputShape[0] << ',' << outputShape[1] << ',' << outputShape[2] << ')'; + void *data = tensor.GetBuffer(); + for (uint32_t i = 0; i < length; i++) { + float value = *(reinterpret_cast<float *>(data) + i); + outputs->emplace_back(value); + } + return APP_ERR_OK; +} + + +APP_ERROR MELGAN::Process(const std::string &fileName, const std::vector<std::vector<std::vector<float>>> &input_x, + InitParam &initParam, std::vector<float> output) { + std::vector<MxBase::TensorBase> inputs = {}; + std::vector<MxBase::TensorBase> outputs; + MxBase::TensorBase tensorBase; + auto ret = VectorToTensorBase(input_x, &tensorBase); + if (ret != APP_ERR_OK) { + LogError << "ToTensorBase failed, ret=" << ret << "."; + return ret; + } + inputs.push_back(tensorBase); + auto startTime = std::chrono::high_resolution_clock::now(); + ret = Inference(inputs, &outputs); + + auto endTime = std::chrono::high_resolution_clock::now(); + double costMs = std::chrono::duration<double, std::milli>(endTime - startTime).count(); + inferCostTimeMilliSec += costMs; + if (ret != APP_ERR_OK) { + LogError << "Inference failed, ret=" << ret << "."; + return ret; + } + + ret = SaveInferResult(&output, &outputs); + if (ret != APP_ERR_OK) { + LogError << "Save model infer results into file failed. ret = " << ret << "."; + return ret; + } + + ret = WriteResult(fileName, output); + if (ret != APP_ERR_OK) { + LogError << "WriteResult failed, ret=" << ret << "."; + return ret; + } + return APP_ERR_OK; +} + +APP_ERROR MELGAN::WriteResult(const std::string &fileName, + const std::vector<float> &output) { + std::string resultPathName = "output"; + // create result directory when it does not exit + if (access(resultPathName.c_str(), 0) != 0) { + int ret = mkdir(resultPathName.c_str(), S_IRUSR | S_IWUSR | S_IXUSR); + if (ret != 0) { + LogError << "Failed to create result directory: " << resultPathName + << ", ret = " << ret; + return APP_ERR_COMM_OPEN_FAIL; + } + } + // create result file under result directory + resultPathName = resultPathName + "/restruction_" + fileName; + std::ofstream tfile(resultPathName, std::ofstream::app); + if (tfile.fail()) { + LogError << "Failed to open result file: " << resultPathName; + return APP_ERR_COMM_OPEN_FAIL; + } + + for (uint32_t i = 0; i < output.size(); i++) { + tfile << std::to_string(output[i]) << " "; + } + tfile.close(); + return APP_ERR_OK; +} diff --git a/official/audio/melgan/infer/mxbase/src/Melgan.h b/official/audio/melgan/infer/mxbase/src/Melgan.h new file mode 100644 index 0000000000000000000000000000000000000000..ca56ba669c6662f9a95d2479034b24c26657e725 --- /dev/null +++ b/official/audio/melgan/infer/mxbase/src/Melgan.h @@ -0,0 +1,53 @@ +/* + * Copyright 2022 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MxBase_STGCN_H +#define MxBase_STGCN_H +#include <memory> +#include <string> +#include <vector> +#include "acl/acl.h" +#include "MxBase/DvppWrapper/DvppWrapper.h" +#include "MxBase/ModelInfer/ModelInferenceProcessor.h" +#include "MxBase/Tensor/TensorContext/TensorContext.h" + +struct InitParam { + uint32_t deviceId; + bool checkTensor; + std::string modelPath; +}; + +class MELGAN { + public: + APP_ERROR Init(const InitParam &initParam); + APP_ERROR DeInit(); + APP_ERROR VectorToTensorBase(const std::vector<std::vector<std::vector<float>>> &input_x, + MxBase::TensorBase *tensorBase); + APP_ERROR Inference(const std::vector<MxBase::TensorBase> &inputs, std::vector<MxBase::TensorBase> *outputs); + APP_ERROR Process(const std::string &fileName, const std::vector<std::vector<std::vector<float>>> &input_x, + InitParam &initParam, std::vector<float> output); + APP_ERROR SaveInferResult(std::vector<float> *outputs, std::vector<MxBase::TensorBase> *inputs); + APP_ERROR WriteResult(const std::string &fileName, const std::vector<float> &output); + double GetInferCostMilliSec() const { return inferCostTimeMilliSec; } + + private: + std::shared_ptr<MxBase::DvppWrapper> dvppWrapper_; + std::shared_ptr<MxBase::ModelInferenceProcessor> model_; + MxBase::ModelDesc modelDesc_; + uint32_t deviceId_ = 0; + double inferCostTimeMilliSec = 0.0; +}; +#endif diff --git a/official/audio/melgan/infer/mxbase/src/main.cpp b/official/audio/melgan/infer/mxbase/src/main.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f2fc568506650e759e4aba922836c841a74c1834 --- /dev/null +++ b/official/audio/melgan/infer/mxbase/src/main.cpp @@ -0,0 +1,116 @@ +/* + * Copyright 2022 Huawei Technologies Co., Ltd. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <dirent.h> +#include <fstream> +#include <string> +#include <sstream> +#include <cstdlib> +#include <vector> +#include <cmath> +#include <cstdio> +#include "Melgan.h" +#include "MxBase/Log/Log.h" + +int eval_length = 240; +int hop_size = 256; +int repeat_frame = 30; +int sample = 22050; + + +APP_ERROR ReadTxt(const std::string &path, std::vector<std::vector<std::vector<float>>> *dataset) { + std::ifstream fp(path); + std::string line; + std::vector<std::vector<float>> data; + int count = 0; + while (std::getline(fp, line)) { + std::vector<float> data_line; + std::string number; + std::istringstream readstr(line); + for (int j = 0; j < 240; j++) { + std::getline(readstr, number, ' '); + data_line.push_back(static_cast<float>(atof(number.c_str()))); + } + data.push_back(data_line); + count++; + if (count % 80 == 0) { + std::vector<std::vector<float>> dataseg; + for (int i = count - 80; i < count; i++) { + dataseg.push_back(data[i]); + } + dataset->push_back(dataseg); + } + } + return APP_ERR_OK; +} + + +int main(int argc, char *argv[]) { + std::string model_path = argv[1]; + std::string eval_data_path = argv[2]; + std::string list_filename = argv[3]; + + InitParam initParam = {}; + initParam.deviceId = 0; + initParam.checkTensor = true; + initParam.modelPath = model_path; + + auto melgan = std::make_shared<MELGAN>(); + printf("Start running\n"); + APP_ERROR ret = melgan->Init(initParam); + if (ret != APP_ERR_OK) { + melgan->DeInit(); + LogError << "melgan init failed, ret=" << ret << "."; + return ret; + } + + // get test data filename + std::string path = eval_data_path + "/" + list_filename; + std::ifstream fp(path); + std::string filename; + while (std::getline(fp, filename)) { + LogInfo << "Start inference " << filename << std::endl; + std::string dataPath = eval_data_path + "/" + filename; + std::vector<std::vector<std::vector<float>>> test_data; + + ret = ReadTxt(dataPath, &test_data); + if (ret != APP_ERR_OK) { + melgan->DeInit(); + LogError << "read test_data failed, ret=" << ret << "."; + return ret; + } + + int data_seg = test_data.size(); + int data_row = test_data[0].size(); + int data_col = test_data[0][0].size(); + LogInfo << filename << "data shape: (" << data_seg << ',' << data_row << ',' << data_col << ')'; + for (int iter = 0; iter < data_seg; iter++) { + std::vector<float> output; + std::vector<std::vector<std::vector<float>>> data; + data.push_back(test_data[iter]); + ret = melgan->Process(filename, data, initParam, output); + if (ret != APP_ERR_OK) { + LogError << "melgan process failed, ret=" << ret << "."; + melgan->DeInit(); + return ret; + } + } + LogInfo << "File " << filename << " inference successfully!"; + } + + melgan->DeInit(); + return APP_ERR_OK; +} diff --git a/official/audio/melgan/infer/sdk/main.py b/official/audio/melgan/infer/sdk/main.py new file mode 100644 index 0000000000000000000000000000000000000000..e8259845fe88953be0a3391c680fea563df384b9 --- /dev/null +++ b/official/audio/melgan/infer/sdk/main.py @@ -0,0 +1,135 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import os +import argparse + +import MxpiDataType_pb2 as MxpiDataType +import numpy as np +from StreamManagerApi import StreamManagerApi, InProtobufVector, MxProtobufIn, StringVector, MxDataInput + + +def inference(input_tensor): + tensor_bytes = input_tensor.tobytes() + in_plugin_id = 0 + tensorPackageList = MxpiDataType.MxpiTensorPackageList() + tensorPackage = tensorPackageList.tensorPackageVec.add() + dataInput = MxDataInput() + dataInput.data = tensor_bytes + tensorVec = tensorPackage.tensorVec.add() + tensorVec.deviceId = 0 + tensorVec.memType = 0 + for t in input_tensor.shape: + tensorVec.tensorShape.append(t) + tensorVec.dataStr = dataInput.data + tensorVec.tensorDataSize = len(tensor_bytes) + # add feature data end + key = "appsrc{}".format(in_plugin_id).encode('utf-8') + protobufVec = InProtobufVector() + protobuf = MxProtobufIn() + protobuf.key = key + protobuf.type = b'MxTools.MxpiTensorPackageList' + protobuf.protobuf = tensorPackageList.SerializeToString() + protobufVec.push_back(protobuf) + unique_id = stream_manager_api.SendProtobuf(stream_name, in_plugin_id, protobufVec) + if unique_id < 0: + print("Failed to send data to stream.") + exit() + # Obtain the inference result by specifying streamName and uniqueId. + keyVec = StringVector() + keyVec.push_back(b'mxpi_tensorinfer0') + infer_result = stream_manager_api.GetProtobuf(stream_name, in_plugin_id, keyVec) + if infer_result.size() == 0: + print("inferResult is null") + exit() + if infer_result[0].errorCode != 0: + print("GetProtobuf error. errorCode=%d" % ( + infer_result[0].errorCode)) + exit() + # get infer result + result = MxpiDataType.MxpiTensorPackageList() + result.ParseFromString(infer_result[0].messageBuf) + # convert the inference result to Numpy array + out = np.frombuffer(result.tensorPackageVec[0].tensorVec[0].dataStr, dtype=np.float32).ravel() + return out + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--eval_path', type=str, default='../data/input/', + help="input data path") + parser.add_argument('--pipeline_path', type=str, default='./output', + help='pipeline path') + parser.add_argument('--output_path', type=str, default='./output', + help='output data path') + parser.add_argument('--eval_length', type=int, default=240, + help='eval length') + parser.add_argument('--hop_size', type=int, default=256, + help='hop size') + parser.add_argument('--sample', type=int, default=22050, + help='sample') + opts = parser.parse_args() + eval_path = opts.eval_path + output_path = opts.output_path + pipeline_path = opts.pipeline_path + eval_length = opts.eval_length + hop_size = opts.hop_size + sample = opts.sample + repeat_frame = eval_length // 8 + + # init stream manager + stream_manager_api = StreamManagerApi() + ret = stream_manager_api.InitManager() + if ret != 0: + print("Failed to init Stream manager, ret=%s" % str(ret)) + exit() + + # create streams by pipeline config file + with open(pipeline_path, 'rb') as f: + pipelineStr = f.read() + ret = stream_manager_api.CreateMultipleStreams(pipelineStr) + + if ret != 0: + print("Failed to create Stream, ret=%s" % str(ret)) + exit() + + if not os.path.exists(output_path): + os.makedirs(output_path) + + # Construct the input of the stream + infer_total_time = 0 + files = os.listdir(eval_path) + for file_name in files: + if "_test.txt" in file_name: + data_path = os.path.join(eval_path, file_name) + all_test_data = np.loadtxt(data_path, dtype=np.float32) + stream_name = b'im_melgan' + all_test_data = all_test_data.reshape((-1, 1, 80, 240)) + num = all_test_data.shape[0] + + # first frame + wav_data = np.array([]) + tensor = all_test_data[0].reshape((1, 80, 240)) + for idx in range(0, num): + tensor = all_test_data[idx].reshape((1, 80, 240)) + output = inference(tensor) + wav_data = np.concatenate((wav_data, output)) + + # save as txt file + out_path = os.path.join(output_path, 'restruction_' + file_name) + np.savetxt(out_path, wav_data.reshape(-1), fmt='%.18e') + print("File " + file_name + " inference successfully!") + + # destroy streams + stream_manager_api.DestroyAllStreams() diff --git a/official/audio/melgan/infer/sdk/run.sh b/official/audio/melgan/infer/sdk/run.sh new file mode 100644 index 0000000000000000000000000000000000000000..3bb088186254fcb8f3ff037496350dc2b20ac24a --- /dev/null +++ b/official/audio/melgan/infer/sdk/run.sh @@ -0,0 +1,33 @@ +#!/bin/bash + +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +set -e + +pipeline_path=$1 +eval_path=$2 +output_path=$3 + +info() { echo -e "\033[1;34m[INFO ][MxStream] $1\033[1;37m" ; } +warn() { echo >&2 -e "\033[1;31m[WARN ][MxStream] $1\033[1;37m" ; } + +export LD_LIBRARY_PATH=${MX_SDK_HOME}/lib:${MX_SDK_HOME}/opensource/lib:${MX_SDK_HOME}/opensource/lib64:/usr/local/Ascend/ascend-toolkit/latest/acllib/lib64:${LD_LIBRARY_PATH} +export GST_PLUGIN_SCANNER=${MX_SDK_HOME}/opensource/libexec/gstreamer-1.0/gst-plugin-scanner +export GST_PLUGIN_PATH=${MX_SDK_HOME}/opensource/lib/gstreamer-1.0:${MX_SDK_HOME}/lib/plugins +export PYTHONPATH=$PYTHONPATH:${MX_SDK_HOME}/python + +python3 main.py --pipeline_path $pipeline_path --eval_path $eval_path --output_path $output_path +exit 0 diff --git a/official/audio/melgan/infer/utils/infer_postprocess.py b/official/audio/melgan/infer/utils/infer_postprocess.py new file mode 100644 index 0000000000000000000000000000000000000000..1c3f0fb4d77f17efade3734db0137e6bd3b232c7 --- /dev/null +++ b/official/audio/melgan/infer/utils/infer_postprocess.py @@ -0,0 +1,94 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""MelGAN eval""" +import argparse +import os + +import numpy as np +from scipy.io.wavfile import write + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--data_path', type=str, default='../data/input/', + help="input data path") + parser.add_argument('--output_path', type=str, default='../mxbase/output', + help='output data path') + parser.add_argument('--eval_length', type=int, default=240, + help='eval length') + parser.add_argument('--hop_size', type=int, default=256, + help='hop size') + parser.add_argument('--sample', type=int, default=22050, + help='sample') + opts = parser.parse_args() + data_path = opts.data_path + output_path = opts.output_path + eval_length = opts.eval_length + hop_size = opts.hop_size + sample = opts.sample + + data_list = os.listdir(output_path) + print(data_list) + for data_name in data_list: + if 'test.txt' in data_name: + txt_data = np.loadtxt(os.path.join(output_path, data_name), dtype=np.float32).reshape((-1, 61440)) + melname = data_name.replace('txt', 'npy').replace('restruction_', '').replace('_test', '') + meldata = np.load(os.path.join(data_path, melname)).reshape((80, -1)) + + pad_node = 0 + if meldata.shape[1] < eval_length: + pad_node = eval_length - meldata.shape[1] + + # first frame + wav_data = np.array([]) + output = txt_data[0].ravel() + wav_data = np.concatenate((wav_data, output)) + + # initialization parameters + repeat_frame = eval_length // 8 + i = eval_length - repeat_frame + length = eval_length + num_weights = i + interval = (hop_size * repeat_frame) // num_weights + weights = np.linspace(0.0, 1.0, num_weights) + + while i < meldata.shape[1]: + meldata_s = meldata[:, i:i + length] + if meldata_s.shape[1] != eval_length: + pad_node = hop_size * (eval_length - meldata_s.shape[1]) + i = i + length - repeat_frame + + for idx in range(1, txt_data.shape[0]): + # i-th frame + output = txt_data[idx].ravel() + lenwav = hop_size * repeat_frame + lenout = 0 + # overlap + for j in range(num_weights - 1): + wav_data[-lenwav:-lenwav + interval] = weights[-j - 1] * wav_data[-lenwav:-lenwav + interval] + \ + weights[j] * output[lenout:lenout + interval] + lenwav = lenwav - interval + lenout = lenout + interval + wav_data[-lenwav:] = weights[-num_weights] * wav_data[-lenwav:] + \ + weights[num_weights - 1] * output[lenout:lenout + lenwav] + wav_data = np.concatenate((wav_data, output[hop_size * repeat_frame:])) + i = i + length - repeat_frame + + if pad_node != 0: + wav_data = wav_data[:-pad_node] + + # save as wav file + wav_data = 32768.0 * wav_data + out_path = os.path.join(output_path, 'restruction_' + data_name.replace('txt', 'wav')) + write(out_path, sample, wav_data.astype('int16')) diff --git a/official/audio/melgan/infer/utils/infer_preprocess.py b/official/audio/melgan/infer/utils/infer_preprocess.py new file mode 100644 index 0000000000000000000000000000000000000000..b4a3aff102a42f31e8db731407900c717e0d9c18 --- /dev/null +++ b/official/audio/melgan/infer/utils/infer_preprocess.py @@ -0,0 +1,76 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""MelGAN eval""" +import os +import argparse + +import numpy as np + + +if __name__ == '__main__': + parser = argparse.ArgumentParser() + parser.add_argument('--data_path', type=str, default='../data/input/', + help="input data path") + parser.add_argument('--eval_length', type=int, default=240, + help='eval length') + parser.add_argument('--hop_size', type=int, default=256, + help='hop size') + opts = parser.parse_args() + data_path = opts.data_path + eval_length = opts.eval_length + hop_size = opts.hop_size + + file_list = os.listdir(data_path) + data_list = [] + for data_name in file_list: + if '.npy' in data_name: + print(data_name) + npypath = os.path.join(data_path, data_name) + + # data preprocessing + meldata = np.load(npypath) + meldata = (meldata + 5.0) / 5.0 + pad_node = 0 + + if meldata.shape[1] < eval_length: + pad_node = eval_length - meldata.shape[1] + meldata = np.pad(meldata, ((0, 0), (0, pad_node)), mode='constant', constant_values=0.0) + meldata_s = meldata[np.newaxis, :, 0:eval_length] + new_data = meldata_s + + repeat_frame = eval_length // 8 + i = eval_length - repeat_frame + length = eval_length + + while i < meldata.shape[1]: + # data preprocessing + meldata_s = meldata[:, i:i + length] + if meldata_s.shape[1] != eval_length: + pad_node = hop_size * (eval_length - meldata_s.shape[1]) + meldata_s = np.pad(meldata_s, ((0, 0), (0, eval_length - meldata_s.shape[1])), mode='edge') + meldata_s = meldata_s[np.newaxis, :, :] + new_data = np.concatenate((new_data, meldata_s), axis=1) + i = i + length - repeat_frame + out_file = npypath.replace('.npy', '_test.txt') + np.savetxt(out_file, new_data.reshape((-1, eval_length)), fmt='%.18e') + d = np.loadtxt(out_file, dtype=np.float32) + data_list.append(data_name.replace('.npy', '_test.txt')) + print((new_data.reshape((-1, eval_length))).shape) + + data_list_str = "\n".join(data_list) + print(data_list_str) + f = open(os.path.join(data_path, 'data_list.txt'), 'w') + f.write(data_list_str) + f.close() diff --git a/official/audio/melgan/modelarts/train_modelarts.py b/official/audio/melgan/modelarts/train_modelarts.py new file mode 100644 index 0000000000000000000000000000000000000000..681db402e3a735ce3bf93e031dc84484c89f7fc9 --- /dev/null +++ b/official/audio/melgan/modelarts/train_modelarts.py @@ -0,0 +1,177 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""MelGAN train""" +import os +import time + +import numpy as np +import mindspore as ms +import mindspore.common.dtype as mstype +import mindspore.context as context +import mindspore.dataset as de +import mindspore.nn as nn +from mindspore.common import set_seed +from mindspore.common.tensor import Tensor +from mindspore.communication.management import init, get_rank, get_group_size +from mindspore.context import ParallelMode +from mindspore.train.callback import RunContext, ModelCheckpoint, CheckpointConfig, _InternalCallbackParam +from mindspore.train.loss_scale_manager import DynamicLossScaleManager +from mindspore.train.serialization import load_checkpoint, load_param_into_net, export +from src.dataset import Generator1D +from src.loss import MelganLoss_G, MelganLoss_D +from src.model import MultiDiscriminator, Generator +from src.model_utils.config import config as cfg +from src.model_utils.moxing_adapter import moxing_wrapper +from src.sampler import DistributedSampler +from src.trainonestep import TrainOneStepCellGEN, TrainOneStepCellDIS + +set_seed(1) + + +class BuildGenNetwork(nn.Cell): + """build generator""" + + def __init__(self, network, criterion): + super(BuildGenNetwork, self).__init__(auto_prefix=False) + self.network = network + self.criterion = criterion + + def construct(self, data): + fake_wav = self.network(data) + return fake_wav + + +class BuildDisNetwork(nn.Cell): + """build discriminator""" + + def __init__(self, network, criterion): + super(BuildDisNetwork, self).__init__(auto_prefix=False) + self.network = network + self.criterion = criterion + + def construct(self, fake_wav, wav): + y1 = self.network(fake_wav) + y2 = self.network(wav) + loss = self.criterion(y1, y2) + return loss + + +@moxing_wrapper() +def train(): + """main train process""" + # init distributed + if cfg.run_distribute: + device_id = int(os.getenv('DEVICE_ID', '0')) + context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=device_id) + init() + cfg.rank = get_rank() + cfg.group_size = get_group_size() + context.reset_auto_parallel_context() + context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, gradients_mean=True, device_num=8, + parameter_broadcast=True) + else: + cfg.rank = 0 + cfg.group_size = 1 + context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=cfg.device_id) + # get network and init + net_D = MultiDiscriminator() + net_G = Generator(alpha=cfg.leaky_alpha) + + criterion_G = MelganLoss_G() + criterion_D = MelganLoss_D() + + gen_network_train = BuildGenNetwork(net_G, criterion_G) + gen_network_train.set_train() + dis_network_train_1 = BuildDisNetwork(net_D, criterion_G) + dis_network_train_1.set_train() + dis_network_train_2 = BuildDisNetwork(net_D, criterion_D) + dis_network_train_2.set_train() + scale_manager = DynamicLossScaleManager(init_loss_scale=2 ** 10, scale_factor=2, scale_window=2000) + + # optimizer + opt_G = nn.Adam(params=net_G.trainable_params(), learning_rate=cfg.lr_g, beta1=cfg.beta1, beta2=cfg.beta2, + weight_decay=cfg.weight_decay) + opt_D = nn.Adam(params=net_D.trainable_params(), learning_rate=cfg.lr_d, beta1=cfg.beta1, beta2=cfg.beta2, + weight_decay=cfg.weight_decay) + if cfg.pre_trained: + param_dict = load_checkpoint(cfg.checkpoint_path) + load_param_into_net(net_G, param_dict) + load_param_into_net(net_D, param_dict) + + gen_network_train_wrap = TrainOneStepCellGEN(gen_network_train, opt_G, dis_network_train_1, criterion_G) + dis_network_train_wrap = TrainOneStepCellDIS(gen_network_train, dis_network_train_2, opt_D, criterion_D) + + # dataloader + Wavmeldataset = Generator1D(cfg.data_path, cfg.train_length, cfg.hop_size) + distributed_sampler = DistributedSampler(len(Wavmeldataset), cfg.group_size, cfg.rank, shuffle=True) + dataset = de.GeneratorDataset(Wavmeldataset, ["data", "wav", "datad", "wavd"], sampler=distributed_sampler) + dataset = dataset.batch(cfg.batch_size, drop_remainder=True) + + # checkpoint save + config_ck = CheckpointConfig(save_checkpoint_steps=cfg.save_steps, keep_checkpoint_max=100000) + ckpt_cb = ModelCheckpoint(prefix=cfg.save_checkpoint_name, directory=cfg.train_url, config=config_ck) + cb_params = _InternalCallbackParam() + cb_params.train_network = gen_network_train_wrap + cb_params.epoch_num = cfg.epoch_size + run_context = RunContext(cb_params) + ckpt_cb.begin(run_context) + + i = 1 + print(cfg.epoch_size) + epoch_t = time.perf_counter() + + # epoch loop + for epoch in range(cfg.epoch_size): + cb_params.cur_epoch_num = epoch + 1 + for data, wav, datad, wavd in dataset.create_tuple_iterator(): + scaling_sens = Tensor(scale_manager.get_loss_scale(), dtype=mstype.float32) + start = time.perf_counter() + data = (data + 5.0) / 5.0 + datad = (datad + 5.0) / 5.0 + + _, loss_G, cond_g = gen_network_train_wrap(Tensor(wav, mstype.float32), Tensor(data, mstype.float32), + scaling_sens) + + _, loss_D, cond_d = dis_network_train_wrap(Tensor(datad, mstype.float32), Tensor(wavd, mstype.float32), + scaling_sens) + if cond_g: + scale_manager.update_loss_scale(cond_g) + else: + scale_manager.update_loss_scale(False) + if cond_d: + scale_manager.update_loss_scale(cond_d) + else: + scale_manager.update_loss_scale(False) + duration = time.perf_counter() - start + + print( + '{}epoch {}iter loss_G={} loss_D={} {:.2f}s/it'.format(epoch + 1, i, loss_G.asnumpy(), loss_D.asnumpy(), + duration)) + + i = i + 1 + if cfg.rank == 0: + cb_params.cur_step_num = i + cb_params.batch_num = i + ckpt_cb.step_end(run_context) + + duration = time.perf_counter() - epoch_t + print('finish in {:.2f}mins'.format(duration / 60)) + + input_arr = Tensor(np.random.uniform(0.0, 1.0, size=[1, 80, 240]), ms.float32) + export(net_G, input_arr, file_name=os.path.join(cfg.train_url, 'melgan_final'), file_format="AIR") + + +if __name__ == "__main__": + train() diff --git a/official/audio/melgan/scripts/docker_start.sh b/official/audio/melgan/scripts/docker_start.sh new file mode 100644 index 0000000000000000000000000000000000000000..6b452b3d3f4b1596501ed63d6047052717115c0f --- /dev/null +++ b/official/audio/melgan/scripts/docker_start.sh @@ -0,0 +1,38 @@ +#!/bin/bash +# Copyright (c) 2022. Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +docker_image=$1 +data_dir=$2 +model_dir=$3 + +docker run -it -u root --ipc=host \ + --device=/dev/davinci0 \ + --device=/dev/davinci1 \ + --device=/dev/davinci2 \ + --device=/dev/davinci3 \ + --device=/dev/davinci4 \ + --device=/dev/davinci5 \ + --device=/dev/davinci6 \ + --device=/dev/davinci7 \ + --device=/dev/davinci_manager \ + --device=/dev/devmm_svm \ + --device=/dev/hisi_hdc \ + --privileged \ + -v /usr/local/Ascend/driver:/usr/local/Ascend/driver \ + -v /usr/local/Ascend/add-ons/:/usr/local/Ascend/add-ons \ + -v ${data_dir}:${data_dir} \ + -v ${model_dir}:${model_dir} \ + -v /root/ascend/log:/root/ascend/log ${docker_image} /bin/bash \ No newline at end of file