[浙江大学][高校贡献][Mindspore][melgan]-高性能预训练模型提交+精度达标

6a6fb843 · cdemo123 · 6246e0ca · 6a6fb843 · 6a6fb843 · 6a6fb843
Commit 6a6fb843 authored 3 years ago by cdemo123
--- a/.jenkins/check/config/filter_cpplint.txt
+++ b/.jenkins/check/config/filter_cpplint.txt
@@ -139,6 +139,10 @@
 "models/official/cv/retinanet/infer/mxbase/retinanetDetection/RetinanetDetection.h"     "runtime/references"
 "models/official/cv/retinanet/infer/mxbase/retinanetDetection/RetinanetDetection.cpp"   "runtime/references"

+"models/official/audio/melgan/infer/mxbase/src/main.cpp" "runtime/references"
+"models/official/audio/melgan/infer/mxbase/src/Melgan.h" "runtime/references"
+"models/official/audio/melgan/infer/mxbase/src/Melgan.cpp" "runtime/references"
+
 "models/official/audio/lpcnet/ascend310_infer/inc/lpcnet.h" "runtime/int"
 "models/official/audio/lpcnet/ascend310_infer/src/main.cc" "build/include_subdir"


--- a/official/audio/melgan/infer/convert/convert.sh
+++ b/official/audio/melgan/infer/convert/convert.sh
+#!/bin/bash
+
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+model_path=$1
+output_path=$2
+
+atc  --model=$model_path \
+    --framework=1 \
+    --output=$output_path \
+    --input_format=NCHW \
+    --log=error \
+    --soc_version=Ascend310
\ No newline at end of file
--- a/official/audio/melgan/infer/data/config/melgan.pipeline
+++ b/official/audio/melgan/infer/data/config/melgan.pipeline
+{
+    "im_melgan": {
+        "stream_config": {
+            "deviceId": "0"
+        },
+        "appsrc0": {
+            "props": {
+                "blocksize": "409600"
+            },
+            "factory": "appsrc",
+            "next": "mxpi_tensorinfer0"
+        },
+        "mxpi_tensorinfer0": {
+            "props": {
+                "dataSource": "appsrc0",
+                "modelPath":  "../data/models/melgan.om",
+                "outputDeviceId": "-1"
+            },
+            "factory": "mxpi_tensorinfer",
+            "next": "mxpi_dataserialize0"
+        },
+        "mxpi_dataserialize0": {
+            "props": {
+                "outputDataKeys": "mxpi_tensorinfer0"
+            },
+            "factory": "mxpi_dataserialize",
+            "next": "appsink0"
+        },
+        "appsink0": {
+            "factory": "appsink"
+        }
+    }
+}
--- a/official/audio/melgan/infer/docker_start_infer.sh
+++ b/official/audio/melgan/infer/docker_start_infer.sh
+#!/usr/bin/env bash
+
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+docker_image=$1
+model_dir=$2
+
+
+function show_help() {
+    echo "Usage: docker_start.sh docker_image model_dir data_dir"
+}
+
+function param_check() {
+    if [ -z "${docker_image}" ]; then
+        echo "please input docker_image"
+        show_help
+        exit 1
+    fi
+
+    if [ -z "${model_dir}" ]; then
+        echo "please input model_dir"
+        show_help
+        exit 1
+    fi
+}
+
+param_check
+
+docker run -it -u root \
+  --device=/dev/davinci0 \
+  --device=/dev/davinci_manager \
+  --device=/dev/devmm_svm \
+  --device=/dev/hisi_hdc \
+  -v /usr/local/Ascend/driver:/usr/local/Ascend/driver \
+  -v ${model_dir}:${model_dir} \
+  ${docker_image} \
+  /bin/bash
--- a/official/audio/melgan/infer/mxbase/CMakeLists.txt
+++ b/official/audio/melgan/infer/mxbase/CMakeLists.txt
+cmake_minimum_required(VERSION 3.5.2)
+project(Melgan)
+add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0)
+
+
+set(TARGET_MAIN Melgan)
+
+set(ACL_LIB_PATH $ENV{ASCEND_HOME}/ascend-toolkit/latest/acllib)
+
+include_directories(${CMAKE_CURRENT_BINARY_DIR})
+
+include_directories($ENV{MX_SDK_HOME}/include)
+include_directories($ENV{MX_SDK_HOME}/opensource/include)
+include_directories($ENV{MX_SDK_HOME}/opensource/include/opencv4)
+include_directories($ENV{MX_SDK_HOME}/opensource/include/gstreamer-1.0)
+include_directories($ENV{MX_SDK_HOME}/opensource/include/glib-2.0)
+include_directories($ENV{MX_SDK_HOME}/opensource/lib/glib-2.0/include)
+
+link_directories($ENV{MX_SDK_HOME}/lib)
+link_directories($ENV{MX_SDK_HOME}/opensource/lib/)
+
+
+add_compile_options(-std=c++11 -fPIC -fstack-protector-all -pie -Wno-deprecated-declarations)
+add_compile_options("-DPLUGIN_NAME=${PLUGIN_NAME}")
+add_compile_options("-Dgoogle=mindxsdk_private")
+
+add_definitions(-DENABLE_DVPP_INTERFACE)
+
+include_directories(${ACL_LIB_PATH}/include)
+link_directories(${ACL_LIB_PATH}/lib64/)
+
+
+
+add_executable(${TARGET_MAIN} src/main.cpp src/Melgan.cpp)
+target_link_libraries(${TARGET_MAIN} ${TARGET_LIBRARY} glog  cpprest mxbase libascendcl.so)
+install(TARGETS ${TARGET_MAIN} RUNTIME DESTINATION ${PROJECT_SOURCE_DIR}/)
--- a/official/audio/melgan/infer/mxbase/build.sh
+++ b/official/audio/melgan/infer/mxbase/build.sh
+#!/bin/bash
+
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+path_cur=$(dirname $0)
+
+function check_env()
+{
+    # set ASCEND_VERSION to ascend-toolkit/latest when it was not specified by user
+    if [ ! "${ASCEND_HOME}" ]; then
+        export ASCEND_HOME=/usr/local/Ascend/
+        echo "Set ASCEND_HOME to the default value: ${ASCEND_HOME}"
+    else
+        echo "ASCEND_HOME is set to ${ASCEND_HOME} by user"
+    fi
+
+    if [ ! "${ASCEND_VERSION}" ]; then
+        export ASCEND_VERSION=nnrt/latest
+        echo "Set ASCEND_VERSION to the default value: ${ASCEND_VERSION}"
+    else
+        echo "ASCEND_VERSION is set to ${ASCEND_VERSION} by user"
+    fi
+
+    if [ ! "${ARCH_PATTERN}" ]; then
+        # set ARCH_PATTERN to ./ when it was not specified by user
+        export ARCH_PATTERN=./
+        echo "ARCH_PATTERN is set to the default value: ${ARCH_PATTERN}"
+    else
+        echo "ARCH_PATTERN is set to ${ARCH_PATTERN} by user"
+    fi
+}
+
+function build_melgan()
+{
+    cd $path_cur
+    rm -rf build
+    mkdir -p build
+    cd build
+    cmake ..
+    make
+    ret=$?
+    if [ ${ret} -ne 0 ]; then
+        echo "Failed to build melgan."
+        exit ${ret}
+    fi
+    make install
+}
+
+check_env
+build_melgan
\ No newline at end of file
--- a/official/audio/melgan/infer/mxbase/src/Melgan.cpp
+++ b/official/audio/melgan/infer/mxbase/src/Melgan.cpp
+/*
+ * Copyright 2022 Huawei Technologies Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Melgan.h"
+#include <sys/stat.h>
+#include <unistd.h>
+#include <algorithm>
+#include <fstream>
+#include <string>
+#include <memory>
+#include <map>
+#include <vector>
+#include "acl/acl.h"
+#include "MxBase/DeviceManager/DeviceManager.h"
+#include "MxBase/Log/Log.h"
+
+APP_ERROR MELGAN::Init(const InitParam &initParam) {
+    deviceId_ = initParam.deviceId;
+    APP_ERROR ret = MxBase::DeviceManager::GetInstance()->InitDevices();
+    if (ret != APP_ERR_OK) {
+        LogError << "Init devices failed, ret=" << ret << ".";
+        return ret;
+    }
+    ret = MxBase::TensorContext::GetInstance()->SetContext(initParam.deviceId);
+    if (ret != APP_ERR_OK) {
+        LogError << "Set context failed, ret=" << ret << ".";
+        return ret;
+    }
+    model_ = std::make_shared<MxBase::ModelInferenceProcessor>();
+    ret = model_->Init(initParam.modelPath, modelDesc_);
+    if (ret != APP_ERR_OK) {
+        LogError << "ModelInferenceProcessor init failed, ret=" << ret << ".";
+        return ret;
+    }
+    return APP_ERR_OK;
+}
+
+APP_ERROR MELGAN::DeInit() {
+    dvppWrapper_->DeInit();
+    model_->DeInit();
+    MxBase::DeviceManager::GetInstance()->DestroyDevices();
+    return APP_ERR_OK;
+}
+
+APP_ERROR MELGAN::VectorToTensorBase(const std::vector<std::vector<std::vector<float>>> &input_x,
+                                                                 MxBase::TensorBase *tensorBase) {
+    const uint32_t dataSize = modelDesc_.inputTensors[0].tensorSize / 4;
+    float *metaFeatureData = new float[dataSize];
+    uint32_t idx = 0;
+    for (size_t bs = 0; bs < input_x.size(); bs++) {
+        for (size_t c = 0; c < input_x[0].size(); c++) {
+            for (size_t d = 0; d < input_x[0][0].size(); d++) {
+                metaFeatureData[idx++] = input_x[bs][c][d];
+            }
+        }
+    }
+    MxBase::MemoryData memoryDataDst(dataSize * 4, MxBase::MemoryData::MEMORY_DEVICE, deviceId_);
+    MxBase::MemoryData memoryDataSrc(reinterpret_cast<void *>(metaFeatureData),
+                                     dataSize * 4, MxBase::MemoryData::MEMORY_HOST_MALLOC);
+
+    APP_ERROR ret = MxBase::MemoryHelper::MxbsMallocAndCopy(memoryDataDst, memoryDataSrc);
+    if (ret != APP_ERR_OK) {
+        LogError << GetError(ret) << "Memory malloc failed.";
+        return ret;
+    }
+
+    std::vector<uint32_t> shape = {1, 80, 240};
+    *tensorBase = MxBase::TensorBase(memoryDataDst, false, shape, MxBase::TENSOR_DTYPE_FLOAT32);
+    return APP_ERR_OK;
+}
+
+APP_ERROR MELGAN::Inference(const std::vector<MxBase::TensorBase> &inputs,
+                            std::vector<MxBase::TensorBase> *outputs) {
+    auto dtypes = model_->GetOutputDataType();
+    for (size_t i = 0; i < modelDesc_.outputTensors.size(); i++) {
+        std::vector<uint32_t> shape = {};
+        for (size_t j = 0; j < modelDesc_.outputTensors[i].tensorDims.size(); j++) {
+            shape.push_back((uint32_t) modelDesc_.outputTensors[i].tensorDims[j]);
+        }
+        MxBase::TensorBase tensor(shape, dtypes[i], MxBase::MemoryData::MemoryType::MEMORY_DEVICE, deviceId_);
+        APP_ERROR ret = MxBase::TensorBase::TensorBaseMalloc(tensor);
+        if (ret != APP_ERR_OK) {
+            LogError << "TensorBaseMalloc failed, ret=" << ret << ".";
+            return ret;
+        }
+        (*outputs).push_back(tensor);
+    }
+
+    MxBase::DynamicInfo dynamicInfo = {};
+    dynamicInfo.dynamicType = MxBase::DynamicType::STATIC_BATCH;
+    auto startTime = std::chrono::high_resolution_clock::now();
+    APP_ERROR ret = model_->ModelInference(inputs, *outputs, dynamicInfo);
+    auto endTime = std::chrono::high_resolution_clock::now();
+    double costMs = std::chrono::duration<double, std::milli>(endTime - startTime).count();
+    inferCostTimeMilliSec += costMs;
+    if (ret != APP_ERR_OK) {
+        LogError << "ModelInference failed, ret=" << ret << ".";
+        return ret;
+    }
+    return APP_ERR_OK;
+}
+
+APP_ERROR MELGAN::SaveInferResult(std::vector<float> *outputs, std::vector<MxBase::TensorBase> *inputs) {
+    MxBase::TensorBase &tensor = inputs->at(0);
+    APP_ERROR ret = tensor.ToHost();
+    if (ret != APP_ERR_OK) {
+        LogError << GetError(ret) << "Tensor deploy to host failed.";
+        return ret;
+    }
+
+    // check tensor is available
+    auto outputShape = tensor.GetShape();
+    uint32_t length = outputShape[2];
+    LogInfo << "output shape is: (" << outputShape[0] << ',' << outputShape[1] << ',' << outputShape[2] << ')';
+    void *data = tensor.GetBuffer();
+    for (uint32_t i = 0; i < length; i++) {
+        float value = *(reinterpret_cast<float *>(data) + i);
+        outputs->emplace_back(value);
+    }
+    return APP_ERR_OK;
+}
+
+
+APP_ERROR MELGAN::Process(const std::string &fileName, const std::vector<std::vector<std::vector<float>>> &input_x,
+                            InitParam &initParam, std::vector<float> output) {
+    std::vector<MxBase::TensorBase> inputs = {};
+    std::vector<MxBase::TensorBase> outputs;
+    MxBase::TensorBase tensorBase;
+    auto ret = VectorToTensorBase(input_x, &tensorBase);
+    if (ret != APP_ERR_OK) {
+        LogError << "ToTensorBase failed, ret=" << ret << ".";
+        return ret;
+    }
+    inputs.push_back(tensorBase);
+    auto startTime = std::chrono::high_resolution_clock::now();
+    ret = Inference(inputs, &outputs);
+
+    auto endTime = std::chrono::high_resolution_clock::now();
+    double costMs = std::chrono::duration<double, std::milli>(endTime - startTime).count();
+    inferCostTimeMilliSec += costMs;
+    if (ret != APP_ERR_OK) {
+        LogError << "Inference failed, ret=" << ret << ".";
+        return ret;
+    }
+
+    ret = SaveInferResult(&output, &outputs);
+    if (ret != APP_ERR_OK) {
+        LogError << "Save model infer results into file failed. ret = " << ret << ".";
+        return ret;
+    }
+
+    ret = WriteResult(fileName, output);
+    if (ret != APP_ERR_OK) {
+        LogError << "WriteResult failed, ret=" << ret << ".";
+        return ret;
+    }
+    return APP_ERR_OK;
+}
+
+APP_ERROR MELGAN::WriteResult(const std::string &fileName,
+                              const std::vector<float> &output) {
+    std::string resultPathName = "output";
+    // create result directory when it does not exit
+    if (access(resultPathName.c_str(), 0) != 0) {
+        int ret = mkdir(resultPathName.c_str(), S_IRUSR | S_IWUSR | S_IXUSR);
+        if (ret != 0) {
+            LogError << "Failed to create result directory: " << resultPathName
+                     << ", ret = " << ret;
+            return APP_ERR_COMM_OPEN_FAIL;
+        }
+    }
+    // create result file under result directory
+    resultPathName = resultPathName + "/restruction_" + fileName;
+    std::ofstream tfile(resultPathName, std::ofstream::app);
+    if (tfile.fail()) {
+        LogError << "Failed to open result file: " << resultPathName;
+        return APP_ERR_COMM_OPEN_FAIL;
+    }
+
+    for (uint32_t i = 0; i < output.size(); i++) {
+        tfile << std::to_string(output[i]) << " ";
+    }
+    tfile.close();
+    return APP_ERR_OK;
+}
--- a/official/audio/melgan/infer/mxbase/src/Melgan.h
+++ b/official/audio/melgan/infer/mxbase/src/Melgan.h
+/*
+ * Copyright 2022 Huawei Technologies Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MxBase_STGCN_H
+#define MxBase_STGCN_H
+#include <memory>
+#include <string>
+#include <vector>
+#include "acl/acl.h"
+#include "MxBase/DvppWrapper/DvppWrapper.h"
+#include "MxBase/ModelInfer/ModelInferenceProcessor.h"
+#include "MxBase/Tensor/TensorContext/TensorContext.h"
+
+struct InitParam {
+    uint32_t deviceId;
+    bool checkTensor;
+    std::string modelPath;
+};
+
+class MELGAN {
+ public:
+     APP_ERROR Init(const InitParam &initParam);
+     APP_ERROR DeInit();
+     APP_ERROR VectorToTensorBase(const std::vector<std::vector<std::vector<float>>> &input_x,
+                                  MxBase::TensorBase *tensorBase);
+     APP_ERROR Inference(const std::vector<MxBase::TensorBase> &inputs, std::vector<MxBase::TensorBase> *outputs);
+     APP_ERROR Process(const std::string &fileName, const std::vector<std::vector<std::vector<float>>> &input_x,
+                                                         InitParam &initParam, std::vector<float> output);
+     APP_ERROR SaveInferResult(std::vector<float> *outputs, std::vector<MxBase::TensorBase> *inputs);
+     APP_ERROR WriteResult(const std::string &fileName, const std::vector<float> &output);
+     double GetInferCostMilliSec() const { return inferCostTimeMilliSec; }
+
+ private:
+     std::shared_ptr<MxBase::DvppWrapper> dvppWrapper_;
+     std::shared_ptr<MxBase::ModelInferenceProcessor> model_;
+     MxBase::ModelDesc modelDesc_;
+     uint32_t deviceId_ = 0;
+     double inferCostTimeMilliSec = 0.0;
+};
+#endif
--- a/official/audio/melgan/infer/mxbase/src/main.cpp
+++ b/official/audio/melgan/infer/mxbase/src/main.cpp
+/*
+ * Copyright 2022 Huawei Technologies Co., Ltd.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <dirent.h>
+#include <fstream>
+#include <string>
+#include <sstream>
+#include <cstdlib>
+#include <vector>
+#include <cmath>
+#include <cstdio>
+#include "Melgan.h"
+#include "MxBase/Log/Log.h"
+
+int eval_length = 240;
+int hop_size = 256;
+int repeat_frame = 30;
+int sample = 22050;
+
+
+APP_ERROR ReadTxt(const std::string &path, std::vector<std::vector<std::vector<float>>> *dataset) {
+    std::ifstream fp(path);
+    std::string line;
+    std::vector<std::vector<float>> data;
+    int count = 0;
+    while (std::getline(fp, line)) {
+        std::vector<float> data_line;
+        std::string number;
+        std::istringstream readstr(line);
+        for (int j = 0; j < 240; j++) {
+            std::getline(readstr, number, ' ');
+            data_line.push_back(static_cast<float>(atof(number.c_str())));
+        }
+        data.push_back(data_line);
+        count++;
+        if (count % 80 == 0) {
+            std::vector<std::vector<float>> dataseg;
+            for (int i = count - 80; i < count; i++) {
+                dataseg.push_back(data[i]);
+            }
+            dataset->push_back(dataseg);
+        }
+    }
+    return APP_ERR_OK;
+}
+
+
+int main(int argc, char *argv[]) {
+    std::string model_path = argv[1];
+    std::string eval_data_path = argv[2];
+    std::string list_filename = argv[3];
+
+    InitParam initParam = {};
+    initParam.deviceId = 0;
+    initParam.checkTensor = true;
+    initParam.modelPath = model_path;
+
+    auto melgan = std::make_shared<MELGAN>();
+    printf("Start running\n");
+    APP_ERROR ret = melgan->Init(initParam);
+    if (ret != APP_ERR_OK) {
+        melgan->DeInit();
+        LogError << "melgan init failed, ret=" << ret << ".";
+        return ret;
+    }
+
+    // get test data filename
+    std::string path = eval_data_path + "/" + list_filename;
+    std::ifstream fp(path);
+    std::string filename;
+    while (std::getline(fp, filename)) {
+        LogInfo << "Start inference " << filename << std::endl;
+        std::string dataPath = eval_data_path + "/" + filename;
+        std::vector<std::vector<std::vector<float>>> test_data;
+
+        ret = ReadTxt(dataPath, &test_data);
+        if (ret != APP_ERR_OK) {
+            melgan->DeInit();
+            LogError << "read test_data failed, ret=" << ret << ".";
+            return ret;
+        }
+
+        int data_seg = test_data.size();
+        int data_row = test_data[0].size();
+        int data_col = test_data[0][0].size();
+        LogInfo << filename << "data shape: (" << data_seg << ',' << data_row << ',' << data_col << ')';
+        for (int iter = 0; iter < data_seg; iter++) {
+            std::vector<float> output;
+            std::vector<std::vector<std::vector<float>>> data;
+            data.push_back(test_data[iter]);
+            ret = melgan->Process(filename, data, initParam, output);
+            if (ret != APP_ERR_OK) {
+                LogError << "melgan process failed, ret=" << ret << ".";
+                melgan->DeInit();
+                return ret;
+            }
+        }
+        LogInfo << "File " << filename << " inference successfully!";
+    }
+
+    melgan->DeInit();
+    return APP_ERR_OK;
+}
--- a/official/audio/melgan/infer/sdk/main.py
+++ b/official/audio/melgan/infer/sdk/main.py
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+import os
+import argparse
+
+import MxpiDataType_pb2 as MxpiDataType
+import numpy as np
+from StreamManagerApi import StreamManagerApi, InProtobufVector, MxProtobufIn, StringVector, MxDataInput
+
+
+def inference(input_tensor):
+    tensor_bytes = input_tensor.tobytes()
+    in_plugin_id = 0
+    tensorPackageList = MxpiDataType.MxpiTensorPackageList()
+    tensorPackage = tensorPackageList.tensorPackageVec.add()
+    dataInput = MxDataInput()
+    dataInput.data = tensor_bytes
+    tensorVec = tensorPackage.tensorVec.add()
+    tensorVec.deviceId = 0
+    tensorVec.memType = 0
+    for t in input_tensor.shape:
+        tensorVec.tensorShape.append(t)
+    tensorVec.dataStr = dataInput.data
+    tensorVec.tensorDataSize = len(tensor_bytes)
+    # add feature data end
+    key = "appsrc{}".format(in_plugin_id).encode('utf-8')
+    protobufVec = InProtobufVector()
+    protobuf = MxProtobufIn()
+    protobuf.key = key
+    protobuf.type = b'MxTools.MxpiTensorPackageList'
+    protobuf.protobuf = tensorPackageList.SerializeToString()
+    protobufVec.push_back(protobuf)
+    unique_id = stream_manager_api.SendProtobuf(stream_name, in_plugin_id, protobufVec)
+    if unique_id < 0:
+        print("Failed to send data to stream.")
+        exit()
+    # Obtain the inference result by specifying streamName and uniqueId.
+    keyVec = StringVector()
+    keyVec.push_back(b'mxpi_tensorinfer0')
+    infer_result = stream_manager_api.GetProtobuf(stream_name, in_plugin_id, keyVec)
+    if infer_result.size() == 0:
+        print("inferResult is null")
+        exit()
+    if infer_result[0].errorCode != 0:
+        print("GetProtobuf error. errorCode=%d" % (
+            infer_result[0].errorCode))
+        exit()
+    # get infer result
+    result = MxpiDataType.MxpiTensorPackageList()
+    result.ParseFromString(infer_result[0].messageBuf)
+    # convert the inference result to Numpy array
+    out = np.frombuffer(result.tensorPackageVec[0].tensorVec[0].dataStr, dtype=np.float32).ravel()
+    return out
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--eval_path', type=str, default='../data/input/',
+                        help="input data path")
+    parser.add_argument('--pipeline_path', type=str, default='./output',
+                        help='pipeline path')
+    parser.add_argument('--output_path', type=str, default='./output',
+                        help='output data path')
+    parser.add_argument('--eval_length', type=int, default=240,
+                        help='eval length')
+    parser.add_argument('--hop_size', type=int, default=256,
+                        help='hop size')
+    parser.add_argument('--sample', type=int, default=22050,
+                        help='sample')
+    opts = parser.parse_args()
+    eval_path = opts.eval_path
+    output_path = opts.output_path
+    pipeline_path = opts.pipeline_path
+    eval_length = opts.eval_length
+    hop_size = opts.hop_size
+    sample = opts.sample
+    repeat_frame = eval_length // 8
+
+    # init stream manager
+    stream_manager_api = StreamManagerApi()
+    ret = stream_manager_api.InitManager()
+    if ret != 0:
+        print("Failed to init Stream manager, ret=%s" % str(ret))
+        exit()
+
+    # create streams by pipeline config file
+    with open(pipeline_path, 'rb') as f:
+        pipelineStr = f.read()
+    ret = stream_manager_api.CreateMultipleStreams(pipelineStr)
+
+    if ret != 0:
+        print("Failed to create Stream, ret=%s" % str(ret))
+        exit()
+
+    if not os.path.exists(output_path):
+        os.makedirs(output_path)
+
+    # Construct the input of the stream
+    infer_total_time = 0
+    files = os.listdir(eval_path)
+    for file_name in files:
+        if "_test.txt" in file_name:
+            data_path = os.path.join(eval_path, file_name)
+            all_test_data = np.loadtxt(data_path, dtype=np.float32)
+            stream_name = b'im_melgan'
+            all_test_data = all_test_data.reshape((-1, 1, 80, 240))
+            num = all_test_data.shape[0]
+
+            # first frame
+            wav_data = np.array([])
+            tensor = all_test_data[0].reshape((1, 80, 240))
+            for idx in range(0, num):
+                tensor = all_test_data[idx].reshape((1, 80, 240))
+                output = inference(tensor)
+                wav_data = np.concatenate((wav_data, output))
+
+            # save as txt file
+            out_path = os.path.join(output_path, 'restruction_' + file_name)
+            np.savetxt(out_path, wav_data.reshape(-1), fmt='%.18e')
+            print("File " + file_name + " inference successfully!")
+
+    # destroy streams
+    stream_manager_api.DestroyAllStreams()
--- a/official/audio/melgan/infer/sdk/run.sh
+++ b/official/audio/melgan/infer/sdk/run.sh
+#!/bin/bash
+
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+set -e
+
+pipeline_path=$1
+eval_path=$2
+output_path=$3
+
+info() { echo -e "\033[1;34m[INFO ][MxStream] $1\033[1;37m" ; }
+warn() { echo >&2 -e "\033[1;31m[WARN ][MxStream] $1\033[1;37m" ; }
+
+export LD_LIBRARY_PATH=${MX_SDK_HOME}/lib:${MX_SDK_HOME}/opensource/lib:${MX_SDK_HOME}/opensource/lib64:/usr/local/Ascend/ascend-toolkit/latest/acllib/lib64:${LD_LIBRARY_PATH}
+export GST_PLUGIN_SCANNER=${MX_SDK_HOME}/opensource/libexec/gstreamer-1.0/gst-plugin-scanner
+export GST_PLUGIN_PATH=${MX_SDK_HOME}/opensource/lib/gstreamer-1.0:${MX_SDK_HOME}/lib/plugins
+export PYTHONPATH=$PYTHONPATH:${MX_SDK_HOME}/python
+
+python3 main.py --pipeline_path $pipeline_path --eval_path $eval_path --output_path $output_path
+exit 0
--- a/official/audio/melgan/infer/utils/infer_postprocess.py
+++ b/official/audio/melgan/infer/utils/infer_postprocess.py
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""MelGAN eval"""
+import argparse
+import os
+
+import numpy as np
+from scipy.io.wavfile import write
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--data_path', type=str, default='../data/input/',
+                        help="input data path")
+    parser.add_argument('--output_path', type=str, default='../mxbase/output',
+                        help='output data path')
+    parser.add_argument('--eval_length', type=int, default=240,
+                        help='eval length')
+    parser.add_argument('--hop_size', type=int, default=256,
+                        help='hop size')
+    parser.add_argument('--sample', type=int, default=22050,
+                        help='sample')
+    opts = parser.parse_args()
+    data_path = opts.data_path
+    output_path = opts.output_path
+    eval_length = opts.eval_length
+    hop_size = opts.hop_size
+    sample = opts.sample
+
+    data_list = os.listdir(output_path)
+    print(data_list)
+    for data_name in data_list:
+        if 'test.txt' in data_name:
+            txt_data = np.loadtxt(os.path.join(output_path, data_name), dtype=np.float32).reshape((-1, 61440))
+            melname = data_name.replace('txt', 'npy').replace('restruction_', '').replace('_test', '')
+            meldata = np.load(os.path.join(data_path, melname)).reshape((80, -1))
+
+            pad_node = 0
+            if meldata.shape[1] < eval_length:
+                pad_node = eval_length - meldata.shape[1]
+
+            # first frame
+            wav_data = np.array([])
+            output = txt_data[0].ravel()
+            wav_data = np.concatenate((wav_data, output))
+
+            # initialization parameters
+            repeat_frame = eval_length // 8
+            i = eval_length - repeat_frame
+            length = eval_length
+            num_weights = i
+            interval = (hop_size * repeat_frame) // num_weights
+            weights = np.linspace(0.0, 1.0, num_weights)
+
+            while i < meldata.shape[1]:
+                meldata_s = meldata[:, i:i + length]
+                if meldata_s.shape[1] != eval_length:
+                    pad_node = hop_size * (eval_length - meldata_s.shape[1])
+                i = i + length - repeat_frame
+
+            for idx in range(1, txt_data.shape[0]):
+                # i-th frame
+                output = txt_data[idx].ravel()
+                lenwav = hop_size * repeat_frame
+                lenout = 0
+                # overlap
+                for j in range(num_weights - 1):
+                    wav_data[-lenwav:-lenwav + interval] = weights[-j - 1] * wav_data[-lenwav:-lenwav + interval] + \
+                                                           weights[j] * output[lenout:lenout + interval]
+                    lenwav = lenwav - interval
+                    lenout = lenout + interval
+                wav_data[-lenwav:] = weights[-num_weights] * wav_data[-lenwav:] + \
+                                     weights[num_weights - 1] * output[lenout:lenout + lenwav]
+                wav_data = np.concatenate((wav_data, output[hop_size * repeat_frame:]))
+                i = i + length - repeat_frame
+
+            if pad_node != 0:
+                wav_data = wav_data[:-pad_node]
+
+            # save as wav file
+            wav_data = 32768.0 * wav_data
+            out_path = os.path.join(output_path, 'restruction_' + data_name.replace('txt', 'wav'))
+            write(out_path, sample, wav_data.astype('int16'))
--- a/official/audio/melgan/infer/utils/infer_preprocess.py
+++ b/official/audio/melgan/infer/utils/infer_preprocess.py
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""MelGAN eval"""
+import os
+import argparse
+
+import numpy as np
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--data_path', type=str, default='../data/input/',
+                        help="input data path")
+    parser.add_argument('--eval_length', type=int, default=240,
+                        help='eval length')
+    parser.add_argument('--hop_size', type=int, default=256,
+                        help='hop size')
+    opts = parser.parse_args()
+    data_path = opts.data_path
+    eval_length = opts.eval_length
+    hop_size = opts.hop_size
+
+    file_list = os.listdir(data_path)
+    data_list = []
+    for data_name in file_list:
+        if '.npy' in data_name:
+            print(data_name)
+            npypath = os.path.join(data_path, data_name)
+
+            # data preprocessing
+            meldata = np.load(npypath)
+            meldata = (meldata + 5.0) / 5.0
+            pad_node = 0
+
+            if meldata.shape[1] < eval_length:
+                pad_node = eval_length - meldata.shape[1]
+                meldata = np.pad(meldata, ((0, 0), (0, pad_node)), mode='constant', constant_values=0.0)
+            meldata_s = meldata[np.newaxis, :, 0:eval_length]
+            new_data = meldata_s
+
+            repeat_frame = eval_length // 8
+            i = eval_length - repeat_frame
+            length = eval_length
+
+            while i < meldata.shape[1]:
+                # data preprocessing
+                meldata_s = meldata[:, i:i + length]
+                if meldata_s.shape[1] != eval_length:
+                    pad_node = hop_size * (eval_length - meldata_s.shape[1])
+                    meldata_s = np.pad(meldata_s, ((0, 0), (0, eval_length - meldata_s.shape[1])), mode='edge')
+                meldata_s = meldata_s[np.newaxis, :, :]
+                new_data = np.concatenate((new_data, meldata_s), axis=1)
+                i = i + length - repeat_frame
+            out_file = npypath.replace('.npy', '_test.txt')
+            np.savetxt(out_file, new_data.reshape((-1, eval_length)), fmt='%.18e')
+            d = np.loadtxt(out_file, dtype=np.float32)
+            data_list.append(data_name.replace('.npy', '_test.txt'))
+            print((new_data.reshape((-1, eval_length))).shape)
+
+    data_list_str = "\n".join(data_list)
+    print(data_list_str)
+    f = open(os.path.join(data_path, 'data_list.txt'), 'w')
+    f.write(data_list_str)
+    f.close()
--- a/official/audio/melgan/modelarts/train_modelarts.py
+++ b/official/audio/melgan/modelarts/train_modelarts.py
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""MelGAN train"""
+import os
+import time
+
+import numpy as np
+import mindspore as ms
+import mindspore.common.dtype as mstype
+import mindspore.context as context
+import mindspore.dataset as de
+import mindspore.nn as nn
+from mindspore.common import set_seed
+from mindspore.common.tensor import Tensor
+from mindspore.communication.management import init, get_rank, get_group_size
+from mindspore.context import ParallelMode
+from mindspore.train.callback import RunContext, ModelCheckpoint, CheckpointConfig, _InternalCallbackParam
+from mindspore.train.loss_scale_manager import DynamicLossScaleManager
+from mindspore.train.serialization import load_checkpoint, load_param_into_net, export
+from src.dataset import Generator1D
+from src.loss import MelganLoss_G, MelganLoss_D
+from src.model import MultiDiscriminator, Generator
+from src.model_utils.config import config as cfg
+from src.model_utils.moxing_adapter import moxing_wrapper
+from src.sampler import DistributedSampler
+from src.trainonestep import TrainOneStepCellGEN, TrainOneStepCellDIS
+
+set_seed(1)
+
+
+class BuildGenNetwork(nn.Cell):
+    """build generator"""
+
+    def __init__(self, network, criterion):
+        super(BuildGenNetwork, self).__init__(auto_prefix=False)
+        self.network = network
+        self.criterion = criterion
+
+    def construct(self, data):
+        fake_wav = self.network(data)
+        return fake_wav
+
+
+class BuildDisNetwork(nn.Cell):
+    """build discriminator"""
+
+    def __init__(self, network, criterion):
+        super(BuildDisNetwork, self).__init__(auto_prefix=False)
+        self.network = network
+        self.criterion = criterion
+
+    def construct(self, fake_wav, wav):
+        y1 = self.network(fake_wav)
+        y2 = self.network(wav)
+        loss = self.criterion(y1, y2)
+        return loss
+
+
+@moxing_wrapper()
+def train():
+    """main train process"""
+    # init distributed
+    if cfg.run_distribute:
+        device_id = int(os.getenv('DEVICE_ID', '0'))
+        context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=device_id)
+        init()
+        cfg.rank = get_rank()
+        cfg.group_size = get_group_size()
+        context.reset_auto_parallel_context()
+        context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, gradients_mean=True, device_num=8,
+                                          parameter_broadcast=True)
+    else:
+        cfg.rank = 0
+        cfg.group_size = 1
+        context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=cfg.device_id)
+    # get network and init
+    net_D = MultiDiscriminator()
+    net_G = Generator(alpha=cfg.leaky_alpha)
+
+    criterion_G = MelganLoss_G()
+    criterion_D = MelganLoss_D()
+
+    gen_network_train = BuildGenNetwork(net_G, criterion_G)
+    gen_network_train.set_train()
+    dis_network_train_1 = BuildDisNetwork(net_D, criterion_G)
+    dis_network_train_1.set_train()
+    dis_network_train_2 = BuildDisNetwork(net_D, criterion_D)
+    dis_network_train_2.set_train()
+    scale_manager = DynamicLossScaleManager(init_loss_scale=2 ** 10, scale_factor=2, scale_window=2000)
+
+    # optimizer
+    opt_G = nn.Adam(params=net_G.trainable_params(), learning_rate=cfg.lr_g, beta1=cfg.beta1, beta2=cfg.beta2,
+                    weight_decay=cfg.weight_decay)
+    opt_D = nn.Adam(params=net_D.trainable_params(), learning_rate=cfg.lr_d, beta1=cfg.beta1, beta2=cfg.beta2,
+                    weight_decay=cfg.weight_decay)
+    if cfg.pre_trained:
+        param_dict = load_checkpoint(cfg.checkpoint_path)
+        load_param_into_net(net_G, param_dict)
+        load_param_into_net(net_D, param_dict)
+
+    gen_network_train_wrap = TrainOneStepCellGEN(gen_network_train, opt_G, dis_network_train_1, criterion_G)
+    dis_network_train_wrap = TrainOneStepCellDIS(gen_network_train, dis_network_train_2, opt_D, criterion_D)
+
+    # dataloader
+    Wavmeldataset = Generator1D(cfg.data_path, cfg.train_length, cfg.hop_size)
+    distributed_sampler = DistributedSampler(len(Wavmeldataset), cfg.group_size, cfg.rank, shuffle=True)
+    dataset = de.GeneratorDataset(Wavmeldataset, ["data", "wav", "datad", "wavd"], sampler=distributed_sampler)
+    dataset = dataset.batch(cfg.batch_size, drop_remainder=True)
+
+    # checkpoint save
+    config_ck = CheckpointConfig(save_checkpoint_steps=cfg.save_steps, keep_checkpoint_max=100000)
+    ckpt_cb = ModelCheckpoint(prefix=cfg.save_checkpoint_name, directory=cfg.train_url, config=config_ck)
+    cb_params = _InternalCallbackParam()
+    cb_params.train_network = gen_network_train_wrap
+    cb_params.epoch_num = cfg.epoch_size
+    run_context = RunContext(cb_params)
+    ckpt_cb.begin(run_context)
+
+    i = 1
+    print(cfg.epoch_size)
+    epoch_t = time.perf_counter()
+
+    # epoch loop
+    for epoch in range(cfg.epoch_size):
+        cb_params.cur_epoch_num = epoch + 1
+        for data, wav, datad, wavd in dataset.create_tuple_iterator():
+            scaling_sens = Tensor(scale_manager.get_loss_scale(), dtype=mstype.float32)
+            start = time.perf_counter()
+            data = (data + 5.0) / 5.0
+            datad = (datad + 5.0) / 5.0
+
+            _, loss_G, cond_g = gen_network_train_wrap(Tensor(wav, mstype.float32), Tensor(data, mstype.float32),
+                                                       scaling_sens)
+
+            _, loss_D, cond_d = dis_network_train_wrap(Tensor(datad, mstype.float32), Tensor(wavd, mstype.float32),
+                                                       scaling_sens)
+            if cond_g:
+                scale_manager.update_loss_scale(cond_g)
+            else:
+                scale_manager.update_loss_scale(False)
+            if cond_d:
+                scale_manager.update_loss_scale(cond_d)
+            else:
+                scale_manager.update_loss_scale(False)
+            duration = time.perf_counter() - start
+
+            print(
+                '{}epoch {}iter loss_G={} loss_D={} {:.2f}s/it'.format(epoch + 1, i, loss_G.asnumpy(), loss_D.asnumpy(),
+                                                                       duration))
+
+            i = i + 1
+            if cfg.rank == 0:
+                cb_params.cur_step_num = i
+                cb_params.batch_num = i
+                ckpt_cb.step_end(run_context)
+
+    duration = time.perf_counter() - epoch_t
+    print('finish in {:.2f}mins'.format(duration / 60))
+
+    input_arr = Tensor(np.random.uniform(0.0, 1.0, size=[1, 80, 240]), ms.float32)
+    export(net_G, input_arr, file_name=os.path.join(cfg.train_url, 'melgan_final'), file_format="AIR")
+
+
+if __name__ == "__main__":
+    train()
--- a/official/audio/melgan/scripts/docker_start.sh
+++ b/official/audio/melgan/scripts/docker_start.sh
+#!/bin/bash
+# Copyright (c) 2022. Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+docker_image=$1
+data_dir=$2
+model_dir=$3
+
+docker run -it -u root --ipc=host \
+               --device=/dev/davinci0 \
+               --device=/dev/davinci1 \
+               --device=/dev/davinci2 \
+               --device=/dev/davinci3 \
+               --device=/dev/davinci4 \
+               --device=/dev/davinci5 \
+               --device=/dev/davinci6 \
+               --device=/dev/davinci7 \
+               --device=/dev/davinci_manager \
+               --device=/dev/devmm_svm \
+               --device=/dev/hisi_hdc \
+               --privileged \
+               -v /usr/local/Ascend/driver:/usr/local/Ascend/driver \
+               -v /usr/local/Ascend/add-ons/:/usr/local/Ascend/add-ons \
+               -v ${data_dir}:${data_dir} \
+               -v ${model_dir}:${model_dir} \
+               -v /root/ascend/log:/root/ascend/log ${docker_image} /bin/bash
\ No newline at end of file