Skip to content
Snippets Groups Projects
Commit 6a6fb843 authored by cdemo123's avatar cdemo123
Browse files

[浙江大学][高校贡献][Mindspore][melgan]-高性能预训练模型提交+精度达标

parent 6246e0ca
No related branches found
No related tags found
No related merge requests found
Showing
with 1131 additions and 0 deletions
......@@ -139,6 +139,10 @@
"models/official/cv/retinanet/infer/mxbase/retinanetDetection/RetinanetDetection.h" "runtime/references"
"models/official/cv/retinanet/infer/mxbase/retinanetDetection/RetinanetDetection.cpp" "runtime/references"
"models/official/audio/melgan/infer/mxbase/src/main.cpp" "runtime/references"
"models/official/audio/melgan/infer/mxbase/src/Melgan.h" "runtime/references"
"models/official/audio/melgan/infer/mxbase/src/Melgan.cpp" "runtime/references"
"models/official/audio/lpcnet/ascend310_infer/inc/lpcnet.h" "runtime/int"
"models/official/audio/lpcnet/ascend310_infer/src/main.cc" "build/include_subdir"
......
#!/bin/bash
# Copyright 2022 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
model_path=$1
output_path=$2
atc --model=$model_path \
--framework=1 \
--output=$output_path \
--input_format=NCHW \
--log=error \
--soc_version=Ascend310
\ No newline at end of file
{
"im_melgan": {
"stream_config": {
"deviceId": "0"
},
"appsrc0": {
"props": {
"blocksize": "409600"
},
"factory": "appsrc",
"next": "mxpi_tensorinfer0"
},
"mxpi_tensorinfer0": {
"props": {
"dataSource": "appsrc0",
"modelPath": "../data/models/melgan.om",
"outputDeviceId": "-1"
},
"factory": "mxpi_tensorinfer",
"next": "mxpi_dataserialize0"
},
"mxpi_dataserialize0": {
"props": {
"outputDataKeys": "mxpi_tensorinfer0"
},
"factory": "mxpi_dataserialize",
"next": "appsink0"
},
"appsink0": {
"factory": "appsink"
}
}
}
#!/usr/bin/env bash
# Copyright 2022 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
docker_image=$1
model_dir=$2
function show_help() {
echo "Usage: docker_start.sh docker_image model_dir data_dir"
}
function param_check() {
if [ -z "${docker_image}" ]; then
echo "please input docker_image"
show_help
exit 1
fi
if [ -z "${model_dir}" ]; then
echo "please input model_dir"
show_help
exit 1
fi
}
param_check
docker run -it -u root \
--device=/dev/davinci0 \
--device=/dev/davinci_manager \
--device=/dev/devmm_svm \
--device=/dev/hisi_hdc \
-v /usr/local/Ascend/driver:/usr/local/Ascend/driver \
-v ${model_dir}:${model_dir} \
${docker_image} \
/bin/bash
cmake_minimum_required(VERSION 3.5.2)
project(Melgan)
add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0)
set(TARGET_MAIN Melgan)
set(ACL_LIB_PATH $ENV{ASCEND_HOME}/ascend-toolkit/latest/acllib)
include_directories(${CMAKE_CURRENT_BINARY_DIR})
include_directories($ENV{MX_SDK_HOME}/include)
include_directories($ENV{MX_SDK_HOME}/opensource/include)
include_directories($ENV{MX_SDK_HOME}/opensource/include/opencv4)
include_directories($ENV{MX_SDK_HOME}/opensource/include/gstreamer-1.0)
include_directories($ENV{MX_SDK_HOME}/opensource/include/glib-2.0)
include_directories($ENV{MX_SDK_HOME}/opensource/lib/glib-2.0/include)
link_directories($ENV{MX_SDK_HOME}/lib)
link_directories($ENV{MX_SDK_HOME}/opensource/lib/)
add_compile_options(-std=c++11 -fPIC -fstack-protector-all -pie -Wno-deprecated-declarations)
add_compile_options("-DPLUGIN_NAME=${PLUGIN_NAME}")
add_compile_options("-Dgoogle=mindxsdk_private")
add_definitions(-DENABLE_DVPP_INTERFACE)
include_directories(${ACL_LIB_PATH}/include)
link_directories(${ACL_LIB_PATH}/lib64/)
add_executable(${TARGET_MAIN} src/main.cpp src/Melgan.cpp)
target_link_libraries(${TARGET_MAIN} ${TARGET_LIBRARY} glog cpprest mxbase libascendcl.so)
install(TARGETS ${TARGET_MAIN} RUNTIME DESTINATION ${PROJECT_SOURCE_DIR}/)
#!/bin/bash
# Copyright 2022 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
path_cur=$(dirname $0)
function check_env()
{
# set ASCEND_VERSION to ascend-toolkit/latest when it was not specified by user
if [ ! "${ASCEND_HOME}" ]; then
export ASCEND_HOME=/usr/local/Ascend/
echo "Set ASCEND_HOME to the default value: ${ASCEND_HOME}"
else
echo "ASCEND_HOME is set to ${ASCEND_HOME} by user"
fi
if [ ! "${ASCEND_VERSION}" ]; then
export ASCEND_VERSION=nnrt/latest
echo "Set ASCEND_VERSION to the default value: ${ASCEND_VERSION}"
else
echo "ASCEND_VERSION is set to ${ASCEND_VERSION} by user"
fi
if [ ! "${ARCH_PATTERN}" ]; then
# set ARCH_PATTERN to ./ when it was not specified by user
export ARCH_PATTERN=./
echo "ARCH_PATTERN is set to the default value: ${ARCH_PATTERN}"
else
echo "ARCH_PATTERN is set to ${ARCH_PATTERN} by user"
fi
}
function build_melgan()
{
cd $path_cur
rm -rf build
mkdir -p build
cd build
cmake ..
make
ret=$?
if [ ${ret} -ne 0 ]; then
echo "Failed to build melgan."
exit ${ret}
fi
make install
}
check_env
build_melgan
\ No newline at end of file
/*
* Copyright 2022 Huawei Technologies Co., Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "Melgan.h"
#include <sys/stat.h>
#include <unistd.h>
#include <algorithm>
#include <fstream>
#include <string>
#include <memory>
#include <map>
#include <vector>
#include "acl/acl.h"
#include "MxBase/DeviceManager/DeviceManager.h"
#include "MxBase/Log/Log.h"
APP_ERROR MELGAN::Init(const InitParam &initParam) {
deviceId_ = initParam.deviceId;
APP_ERROR ret = MxBase::DeviceManager::GetInstance()->InitDevices();
if (ret != APP_ERR_OK) {
LogError << "Init devices failed, ret=" << ret << ".";
return ret;
}
ret = MxBase::TensorContext::GetInstance()->SetContext(initParam.deviceId);
if (ret != APP_ERR_OK) {
LogError << "Set context failed, ret=" << ret << ".";
return ret;
}
model_ = std::make_shared<MxBase::ModelInferenceProcessor>();
ret = model_->Init(initParam.modelPath, modelDesc_);
if (ret != APP_ERR_OK) {
LogError << "ModelInferenceProcessor init failed, ret=" << ret << ".";
return ret;
}
return APP_ERR_OK;
}
APP_ERROR MELGAN::DeInit() {
dvppWrapper_->DeInit();
model_->DeInit();
MxBase::DeviceManager::GetInstance()->DestroyDevices();
return APP_ERR_OK;
}
APP_ERROR MELGAN::VectorToTensorBase(const std::vector<std::vector<std::vector<float>>> &input_x,
MxBase::TensorBase *tensorBase) {
const uint32_t dataSize = modelDesc_.inputTensors[0].tensorSize / 4;
float *metaFeatureData = new float[dataSize];
uint32_t idx = 0;
for (size_t bs = 0; bs < input_x.size(); bs++) {
for (size_t c = 0; c < input_x[0].size(); c++) {
for (size_t d = 0; d < input_x[0][0].size(); d++) {
metaFeatureData[idx++] = input_x[bs][c][d];
}
}
}
MxBase::MemoryData memoryDataDst(dataSize * 4, MxBase::MemoryData::MEMORY_DEVICE, deviceId_);
MxBase::MemoryData memoryDataSrc(reinterpret_cast<void *>(metaFeatureData),
dataSize * 4, MxBase::MemoryData::MEMORY_HOST_MALLOC);
APP_ERROR ret = MxBase::MemoryHelper::MxbsMallocAndCopy(memoryDataDst, memoryDataSrc);
if (ret != APP_ERR_OK) {
LogError << GetError(ret) << "Memory malloc failed.";
return ret;
}
std::vector<uint32_t> shape = {1, 80, 240};
*tensorBase = MxBase::TensorBase(memoryDataDst, false, shape, MxBase::TENSOR_DTYPE_FLOAT32);
return APP_ERR_OK;
}
APP_ERROR MELGAN::Inference(const std::vector<MxBase::TensorBase> &inputs,
std::vector<MxBase::TensorBase> *outputs) {
auto dtypes = model_->GetOutputDataType();
for (size_t i = 0; i < modelDesc_.outputTensors.size(); i++) {
std::vector<uint32_t> shape = {};
for (size_t j = 0; j < modelDesc_.outputTensors[i].tensorDims.size(); j++) {
shape.push_back((uint32_t) modelDesc_.outputTensors[i].tensorDims[j]);
}
MxBase::TensorBase tensor(shape, dtypes[i], MxBase::MemoryData::MemoryType::MEMORY_DEVICE, deviceId_);
APP_ERROR ret = MxBase::TensorBase::TensorBaseMalloc(tensor);
if (ret != APP_ERR_OK) {
LogError << "TensorBaseMalloc failed, ret=" << ret << ".";
return ret;
}
(*outputs).push_back(tensor);
}
MxBase::DynamicInfo dynamicInfo = {};
dynamicInfo.dynamicType = MxBase::DynamicType::STATIC_BATCH;
auto startTime = std::chrono::high_resolution_clock::now();
APP_ERROR ret = model_->ModelInference(inputs, *outputs, dynamicInfo);
auto endTime = std::chrono::high_resolution_clock::now();
double costMs = std::chrono::duration<double, std::milli>(endTime - startTime).count();
inferCostTimeMilliSec += costMs;
if (ret != APP_ERR_OK) {
LogError << "ModelInference failed, ret=" << ret << ".";
return ret;
}
return APP_ERR_OK;
}
APP_ERROR MELGAN::SaveInferResult(std::vector<float> *outputs, std::vector<MxBase::TensorBase> *inputs) {
MxBase::TensorBase &tensor = inputs->at(0);
APP_ERROR ret = tensor.ToHost();
if (ret != APP_ERR_OK) {
LogError << GetError(ret) << "Tensor deploy to host failed.";
return ret;
}
// check tensor is available
auto outputShape = tensor.GetShape();
uint32_t length = outputShape[2];
LogInfo << "output shape is: (" << outputShape[0] << ',' << outputShape[1] << ',' << outputShape[2] << ')';
void *data = tensor.GetBuffer();
for (uint32_t i = 0; i < length; i++) {
float value = *(reinterpret_cast<float *>(data) + i);
outputs->emplace_back(value);
}
return APP_ERR_OK;
}
APP_ERROR MELGAN::Process(const std::string &fileName, const std::vector<std::vector<std::vector<float>>> &input_x,
InitParam &initParam, std::vector<float> output) {
std::vector<MxBase::TensorBase> inputs = {};
std::vector<MxBase::TensorBase> outputs;
MxBase::TensorBase tensorBase;
auto ret = VectorToTensorBase(input_x, &tensorBase);
if (ret != APP_ERR_OK) {
LogError << "ToTensorBase failed, ret=" << ret << ".";
return ret;
}
inputs.push_back(tensorBase);
auto startTime = std::chrono::high_resolution_clock::now();
ret = Inference(inputs, &outputs);
auto endTime = std::chrono::high_resolution_clock::now();
double costMs = std::chrono::duration<double, std::milli>(endTime - startTime).count();
inferCostTimeMilliSec += costMs;
if (ret != APP_ERR_OK) {
LogError << "Inference failed, ret=" << ret << ".";
return ret;
}
ret = SaveInferResult(&output, &outputs);
if (ret != APP_ERR_OK) {
LogError << "Save model infer results into file failed. ret = " << ret << ".";
return ret;
}
ret = WriteResult(fileName, output);
if (ret != APP_ERR_OK) {
LogError << "WriteResult failed, ret=" << ret << ".";
return ret;
}
return APP_ERR_OK;
}
APP_ERROR MELGAN::WriteResult(const std::string &fileName,
const std::vector<float> &output) {
std::string resultPathName = "output";
// create result directory when it does not exit
if (access(resultPathName.c_str(), 0) != 0) {
int ret = mkdir(resultPathName.c_str(), S_IRUSR | S_IWUSR | S_IXUSR);
if (ret != 0) {
LogError << "Failed to create result directory: " << resultPathName
<< ", ret = " << ret;
return APP_ERR_COMM_OPEN_FAIL;
}
}
// create result file under result directory
resultPathName = resultPathName + "/restruction_" + fileName;
std::ofstream tfile(resultPathName, std::ofstream::app);
if (tfile.fail()) {
LogError << "Failed to open result file: " << resultPathName;
return APP_ERR_COMM_OPEN_FAIL;
}
for (uint32_t i = 0; i < output.size(); i++) {
tfile << std::to_string(output[i]) << " ";
}
tfile.close();
return APP_ERR_OK;
}
/*
* Copyright 2022 Huawei Technologies Co., Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MxBase_STGCN_H
#define MxBase_STGCN_H
#include <memory>
#include <string>
#include <vector>
#include "acl/acl.h"
#include "MxBase/DvppWrapper/DvppWrapper.h"
#include "MxBase/ModelInfer/ModelInferenceProcessor.h"
#include "MxBase/Tensor/TensorContext/TensorContext.h"
struct InitParam {
uint32_t deviceId;
bool checkTensor;
std::string modelPath;
};
class MELGAN {
public:
APP_ERROR Init(const InitParam &initParam);
APP_ERROR DeInit();
APP_ERROR VectorToTensorBase(const std::vector<std::vector<std::vector<float>>> &input_x,
MxBase::TensorBase *tensorBase);
APP_ERROR Inference(const std::vector<MxBase::TensorBase> &inputs, std::vector<MxBase::TensorBase> *outputs);
APP_ERROR Process(const std::string &fileName, const std::vector<std::vector<std::vector<float>>> &input_x,
InitParam &initParam, std::vector<float> output);
APP_ERROR SaveInferResult(std::vector<float> *outputs, std::vector<MxBase::TensorBase> *inputs);
APP_ERROR WriteResult(const std::string &fileName, const std::vector<float> &output);
double GetInferCostMilliSec() const { return inferCostTimeMilliSec; }
private:
std::shared_ptr<MxBase::DvppWrapper> dvppWrapper_;
std::shared_ptr<MxBase::ModelInferenceProcessor> model_;
MxBase::ModelDesc modelDesc_;
uint32_t deviceId_ = 0;
double inferCostTimeMilliSec = 0.0;
};
#endif
/*
* Copyright 2022 Huawei Technologies Co., Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <dirent.h>
#include <fstream>
#include <string>
#include <sstream>
#include <cstdlib>
#include <vector>
#include <cmath>
#include <cstdio>
#include "Melgan.h"
#include "MxBase/Log/Log.h"
int eval_length = 240;
int hop_size = 256;
int repeat_frame = 30;
int sample = 22050;
APP_ERROR ReadTxt(const std::string &path, std::vector<std::vector<std::vector<float>>> *dataset) {
std::ifstream fp(path);
std::string line;
std::vector<std::vector<float>> data;
int count = 0;
while (std::getline(fp, line)) {
std::vector<float> data_line;
std::string number;
std::istringstream readstr(line);
for (int j = 0; j < 240; j++) {
std::getline(readstr, number, ' ');
data_line.push_back(static_cast<float>(atof(number.c_str())));
}
data.push_back(data_line);
count++;
if (count % 80 == 0) {
std::vector<std::vector<float>> dataseg;
for (int i = count - 80; i < count; i++) {
dataseg.push_back(data[i]);
}
dataset->push_back(dataseg);
}
}
return APP_ERR_OK;
}
int main(int argc, char *argv[]) {
std::string model_path = argv[1];
std::string eval_data_path = argv[2];
std::string list_filename = argv[3];
InitParam initParam = {};
initParam.deviceId = 0;
initParam.checkTensor = true;
initParam.modelPath = model_path;
auto melgan = std::make_shared<MELGAN>();
printf("Start running\n");
APP_ERROR ret = melgan->Init(initParam);
if (ret != APP_ERR_OK) {
melgan->DeInit();
LogError << "melgan init failed, ret=" << ret << ".";
return ret;
}
// get test data filename
std::string path = eval_data_path + "/" + list_filename;
std::ifstream fp(path);
std::string filename;
while (std::getline(fp, filename)) {
LogInfo << "Start inference " << filename << std::endl;
std::string dataPath = eval_data_path + "/" + filename;
std::vector<std::vector<std::vector<float>>> test_data;
ret = ReadTxt(dataPath, &test_data);
if (ret != APP_ERR_OK) {
melgan->DeInit();
LogError << "read test_data failed, ret=" << ret << ".";
return ret;
}
int data_seg = test_data.size();
int data_row = test_data[0].size();
int data_col = test_data[0][0].size();
LogInfo << filename << "data shape: (" << data_seg << ',' << data_row << ',' << data_col << ')';
for (int iter = 0; iter < data_seg; iter++) {
std::vector<float> output;
std::vector<std::vector<std::vector<float>>> data;
data.push_back(test_data[iter]);
ret = melgan->Process(filename, data, initParam, output);
if (ret != APP_ERR_OK) {
LogError << "melgan process failed, ret=" << ret << ".";
melgan->DeInit();
return ret;
}
}
LogInfo << "File " << filename << " inference successfully!";
}
melgan->DeInit();
return APP_ERR_OK;
}
# Copyright 2022 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import os
import argparse
import MxpiDataType_pb2 as MxpiDataType
import numpy as np
from StreamManagerApi import StreamManagerApi, InProtobufVector, MxProtobufIn, StringVector, MxDataInput
def inference(input_tensor):
tensor_bytes = input_tensor.tobytes()
in_plugin_id = 0
tensorPackageList = MxpiDataType.MxpiTensorPackageList()
tensorPackage = tensorPackageList.tensorPackageVec.add()
dataInput = MxDataInput()
dataInput.data = tensor_bytes
tensorVec = tensorPackage.tensorVec.add()
tensorVec.deviceId = 0
tensorVec.memType = 0
for t in input_tensor.shape:
tensorVec.tensorShape.append(t)
tensorVec.dataStr = dataInput.data
tensorVec.tensorDataSize = len(tensor_bytes)
# add feature data end
key = "appsrc{}".format(in_plugin_id).encode('utf-8')
protobufVec = InProtobufVector()
protobuf = MxProtobufIn()
protobuf.key = key
protobuf.type = b'MxTools.MxpiTensorPackageList'
protobuf.protobuf = tensorPackageList.SerializeToString()
protobufVec.push_back(protobuf)
unique_id = stream_manager_api.SendProtobuf(stream_name, in_plugin_id, protobufVec)
if unique_id < 0:
print("Failed to send data to stream.")
exit()
# Obtain the inference result by specifying streamName and uniqueId.
keyVec = StringVector()
keyVec.push_back(b'mxpi_tensorinfer0')
infer_result = stream_manager_api.GetProtobuf(stream_name, in_plugin_id, keyVec)
if infer_result.size() == 0:
print("inferResult is null")
exit()
if infer_result[0].errorCode != 0:
print("GetProtobuf error. errorCode=%d" % (
infer_result[0].errorCode))
exit()
# get infer result
result = MxpiDataType.MxpiTensorPackageList()
result.ParseFromString(infer_result[0].messageBuf)
# convert the inference result to Numpy array
out = np.frombuffer(result.tensorPackageVec[0].tensorVec[0].dataStr, dtype=np.float32).ravel()
return out
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--eval_path', type=str, default='../data/input/',
help="input data path")
parser.add_argument('--pipeline_path', type=str, default='./output',
help='pipeline path')
parser.add_argument('--output_path', type=str, default='./output',
help='output data path')
parser.add_argument('--eval_length', type=int, default=240,
help='eval length')
parser.add_argument('--hop_size', type=int, default=256,
help='hop size')
parser.add_argument('--sample', type=int, default=22050,
help='sample')
opts = parser.parse_args()
eval_path = opts.eval_path
output_path = opts.output_path
pipeline_path = opts.pipeline_path
eval_length = opts.eval_length
hop_size = opts.hop_size
sample = opts.sample
repeat_frame = eval_length // 8
# init stream manager
stream_manager_api = StreamManagerApi()
ret = stream_manager_api.InitManager()
if ret != 0:
print("Failed to init Stream manager, ret=%s" % str(ret))
exit()
# create streams by pipeline config file
with open(pipeline_path, 'rb') as f:
pipelineStr = f.read()
ret = stream_manager_api.CreateMultipleStreams(pipelineStr)
if ret != 0:
print("Failed to create Stream, ret=%s" % str(ret))
exit()
if not os.path.exists(output_path):
os.makedirs(output_path)
# Construct the input of the stream
infer_total_time = 0
files = os.listdir(eval_path)
for file_name in files:
if "_test.txt" in file_name:
data_path = os.path.join(eval_path, file_name)
all_test_data = np.loadtxt(data_path, dtype=np.float32)
stream_name = b'im_melgan'
all_test_data = all_test_data.reshape((-1, 1, 80, 240))
num = all_test_data.shape[0]
# first frame
wav_data = np.array([])
tensor = all_test_data[0].reshape((1, 80, 240))
for idx in range(0, num):
tensor = all_test_data[idx].reshape((1, 80, 240))
output = inference(tensor)
wav_data = np.concatenate((wav_data, output))
# save as txt file
out_path = os.path.join(output_path, 'restruction_' + file_name)
np.savetxt(out_path, wav_data.reshape(-1), fmt='%.18e')
print("File " + file_name + " inference successfully!")
# destroy streams
stream_manager_api.DestroyAllStreams()
#!/bin/bash
# Copyright 2022 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
set -e
pipeline_path=$1
eval_path=$2
output_path=$3
info() { echo -e "\033[1;34m[INFO ][MxStream] $1\033[1;37m" ; }
warn() { echo >&2 -e "\033[1;31m[WARN ][MxStream] $1\033[1;37m" ; }
export LD_LIBRARY_PATH=${MX_SDK_HOME}/lib:${MX_SDK_HOME}/opensource/lib:${MX_SDK_HOME}/opensource/lib64:/usr/local/Ascend/ascend-toolkit/latest/acllib/lib64:${LD_LIBRARY_PATH}
export GST_PLUGIN_SCANNER=${MX_SDK_HOME}/opensource/libexec/gstreamer-1.0/gst-plugin-scanner
export GST_PLUGIN_PATH=${MX_SDK_HOME}/opensource/lib/gstreamer-1.0:${MX_SDK_HOME}/lib/plugins
export PYTHONPATH=$PYTHONPATH:${MX_SDK_HOME}/python
python3 main.py --pipeline_path $pipeline_path --eval_path $eval_path --output_path $output_path
exit 0
# Copyright 2022 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""MelGAN eval"""
import argparse
import os
import numpy as np
from scipy.io.wavfile import write
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--data_path', type=str, default='../data/input/',
help="input data path")
parser.add_argument('--output_path', type=str, default='../mxbase/output',
help='output data path')
parser.add_argument('--eval_length', type=int, default=240,
help='eval length')
parser.add_argument('--hop_size', type=int, default=256,
help='hop size')
parser.add_argument('--sample', type=int, default=22050,
help='sample')
opts = parser.parse_args()
data_path = opts.data_path
output_path = opts.output_path
eval_length = opts.eval_length
hop_size = opts.hop_size
sample = opts.sample
data_list = os.listdir(output_path)
print(data_list)
for data_name in data_list:
if 'test.txt' in data_name:
txt_data = np.loadtxt(os.path.join(output_path, data_name), dtype=np.float32).reshape((-1, 61440))
melname = data_name.replace('txt', 'npy').replace('restruction_', '').replace('_test', '')
meldata = np.load(os.path.join(data_path, melname)).reshape((80, -1))
pad_node = 0
if meldata.shape[1] < eval_length:
pad_node = eval_length - meldata.shape[1]
# first frame
wav_data = np.array([])
output = txt_data[0].ravel()
wav_data = np.concatenate((wav_data, output))
# initialization parameters
repeat_frame = eval_length // 8
i = eval_length - repeat_frame
length = eval_length
num_weights = i
interval = (hop_size * repeat_frame) // num_weights
weights = np.linspace(0.0, 1.0, num_weights)
while i < meldata.shape[1]:
meldata_s = meldata[:, i:i + length]
if meldata_s.shape[1] != eval_length:
pad_node = hop_size * (eval_length - meldata_s.shape[1])
i = i + length - repeat_frame
for idx in range(1, txt_data.shape[0]):
# i-th frame
output = txt_data[idx].ravel()
lenwav = hop_size * repeat_frame
lenout = 0
# overlap
for j in range(num_weights - 1):
wav_data[-lenwav:-lenwav + interval] = weights[-j - 1] * wav_data[-lenwav:-lenwav + interval] + \
weights[j] * output[lenout:lenout + interval]
lenwav = lenwav - interval
lenout = lenout + interval
wav_data[-lenwav:] = weights[-num_weights] * wav_data[-lenwav:] + \
weights[num_weights - 1] * output[lenout:lenout + lenwav]
wav_data = np.concatenate((wav_data, output[hop_size * repeat_frame:]))
i = i + length - repeat_frame
if pad_node != 0:
wav_data = wav_data[:-pad_node]
# save as wav file
wav_data = 32768.0 * wav_data
out_path = os.path.join(output_path, 'restruction_' + data_name.replace('txt', 'wav'))
write(out_path, sample, wav_data.astype('int16'))
# Copyright 2022 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""MelGAN eval"""
import os
import argparse
import numpy as np
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--data_path', type=str, default='../data/input/',
help="input data path")
parser.add_argument('--eval_length', type=int, default=240,
help='eval length')
parser.add_argument('--hop_size', type=int, default=256,
help='hop size')
opts = parser.parse_args()
data_path = opts.data_path
eval_length = opts.eval_length
hop_size = opts.hop_size
file_list = os.listdir(data_path)
data_list = []
for data_name in file_list:
if '.npy' in data_name:
print(data_name)
npypath = os.path.join(data_path, data_name)
# data preprocessing
meldata = np.load(npypath)
meldata = (meldata + 5.0) / 5.0
pad_node = 0
if meldata.shape[1] < eval_length:
pad_node = eval_length - meldata.shape[1]
meldata = np.pad(meldata, ((0, 0), (0, pad_node)), mode='constant', constant_values=0.0)
meldata_s = meldata[np.newaxis, :, 0:eval_length]
new_data = meldata_s
repeat_frame = eval_length // 8
i = eval_length - repeat_frame
length = eval_length
while i < meldata.shape[1]:
# data preprocessing
meldata_s = meldata[:, i:i + length]
if meldata_s.shape[1] != eval_length:
pad_node = hop_size * (eval_length - meldata_s.shape[1])
meldata_s = np.pad(meldata_s, ((0, 0), (0, eval_length - meldata_s.shape[1])), mode='edge')
meldata_s = meldata_s[np.newaxis, :, :]
new_data = np.concatenate((new_data, meldata_s), axis=1)
i = i + length - repeat_frame
out_file = npypath.replace('.npy', '_test.txt')
np.savetxt(out_file, new_data.reshape((-1, eval_length)), fmt='%.18e')
d = np.loadtxt(out_file, dtype=np.float32)
data_list.append(data_name.replace('.npy', '_test.txt'))
print((new_data.reshape((-1, eval_length))).shape)
data_list_str = "\n".join(data_list)
print(data_list_str)
f = open(os.path.join(data_path, 'data_list.txt'), 'w')
f.write(data_list_str)
f.close()
# Copyright 2022 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""MelGAN train"""
import os
import time
import numpy as np
import mindspore as ms
import mindspore.common.dtype as mstype
import mindspore.context as context
import mindspore.dataset as de
import mindspore.nn as nn
from mindspore.common import set_seed
from mindspore.common.tensor import Tensor
from mindspore.communication.management import init, get_rank, get_group_size
from mindspore.context import ParallelMode
from mindspore.train.callback import RunContext, ModelCheckpoint, CheckpointConfig, _InternalCallbackParam
from mindspore.train.loss_scale_manager import DynamicLossScaleManager
from mindspore.train.serialization import load_checkpoint, load_param_into_net, export
from src.dataset import Generator1D
from src.loss import MelganLoss_G, MelganLoss_D
from src.model import MultiDiscriminator, Generator
from src.model_utils.config import config as cfg
from src.model_utils.moxing_adapter import moxing_wrapper
from src.sampler import DistributedSampler
from src.trainonestep import TrainOneStepCellGEN, TrainOneStepCellDIS
set_seed(1)
class BuildGenNetwork(nn.Cell):
"""build generator"""
def __init__(self, network, criterion):
super(BuildGenNetwork, self).__init__(auto_prefix=False)
self.network = network
self.criterion = criterion
def construct(self, data):
fake_wav = self.network(data)
return fake_wav
class BuildDisNetwork(nn.Cell):
"""build discriminator"""
def __init__(self, network, criterion):
super(BuildDisNetwork, self).__init__(auto_prefix=False)
self.network = network
self.criterion = criterion
def construct(self, fake_wav, wav):
y1 = self.network(fake_wav)
y2 = self.network(wav)
loss = self.criterion(y1, y2)
return loss
@moxing_wrapper()
def train():
"""main train process"""
# init distributed
if cfg.run_distribute:
device_id = int(os.getenv('DEVICE_ID', '0'))
context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=device_id)
init()
cfg.rank = get_rank()
cfg.group_size = get_group_size()
context.reset_auto_parallel_context()
context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, gradients_mean=True, device_num=8,
parameter_broadcast=True)
else:
cfg.rank = 0
cfg.group_size = 1
context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=cfg.device_id)
# get network and init
net_D = MultiDiscriminator()
net_G = Generator(alpha=cfg.leaky_alpha)
criterion_G = MelganLoss_G()
criterion_D = MelganLoss_D()
gen_network_train = BuildGenNetwork(net_G, criterion_G)
gen_network_train.set_train()
dis_network_train_1 = BuildDisNetwork(net_D, criterion_G)
dis_network_train_1.set_train()
dis_network_train_2 = BuildDisNetwork(net_D, criterion_D)
dis_network_train_2.set_train()
scale_manager = DynamicLossScaleManager(init_loss_scale=2 ** 10, scale_factor=2, scale_window=2000)
# optimizer
opt_G = nn.Adam(params=net_G.trainable_params(), learning_rate=cfg.lr_g, beta1=cfg.beta1, beta2=cfg.beta2,
weight_decay=cfg.weight_decay)
opt_D = nn.Adam(params=net_D.trainable_params(), learning_rate=cfg.lr_d, beta1=cfg.beta1, beta2=cfg.beta2,
weight_decay=cfg.weight_decay)
if cfg.pre_trained:
param_dict = load_checkpoint(cfg.checkpoint_path)
load_param_into_net(net_G, param_dict)
load_param_into_net(net_D, param_dict)
gen_network_train_wrap = TrainOneStepCellGEN(gen_network_train, opt_G, dis_network_train_1, criterion_G)
dis_network_train_wrap = TrainOneStepCellDIS(gen_network_train, dis_network_train_2, opt_D, criterion_D)
# dataloader
Wavmeldataset = Generator1D(cfg.data_path, cfg.train_length, cfg.hop_size)
distributed_sampler = DistributedSampler(len(Wavmeldataset), cfg.group_size, cfg.rank, shuffle=True)
dataset = de.GeneratorDataset(Wavmeldataset, ["data", "wav", "datad", "wavd"], sampler=distributed_sampler)
dataset = dataset.batch(cfg.batch_size, drop_remainder=True)
# checkpoint save
config_ck = CheckpointConfig(save_checkpoint_steps=cfg.save_steps, keep_checkpoint_max=100000)
ckpt_cb = ModelCheckpoint(prefix=cfg.save_checkpoint_name, directory=cfg.train_url, config=config_ck)
cb_params = _InternalCallbackParam()
cb_params.train_network = gen_network_train_wrap
cb_params.epoch_num = cfg.epoch_size
run_context = RunContext(cb_params)
ckpt_cb.begin(run_context)
i = 1
print(cfg.epoch_size)
epoch_t = time.perf_counter()
# epoch loop
for epoch in range(cfg.epoch_size):
cb_params.cur_epoch_num = epoch + 1
for data, wav, datad, wavd in dataset.create_tuple_iterator():
scaling_sens = Tensor(scale_manager.get_loss_scale(), dtype=mstype.float32)
start = time.perf_counter()
data = (data + 5.0) / 5.0
datad = (datad + 5.0) / 5.0
_, loss_G, cond_g = gen_network_train_wrap(Tensor(wav, mstype.float32), Tensor(data, mstype.float32),
scaling_sens)
_, loss_D, cond_d = dis_network_train_wrap(Tensor(datad, mstype.float32), Tensor(wavd, mstype.float32),
scaling_sens)
if cond_g:
scale_manager.update_loss_scale(cond_g)
else:
scale_manager.update_loss_scale(False)
if cond_d:
scale_manager.update_loss_scale(cond_d)
else:
scale_manager.update_loss_scale(False)
duration = time.perf_counter() - start
print(
'{}epoch {}iter loss_G={} loss_D={} {:.2f}s/it'.format(epoch + 1, i, loss_G.asnumpy(), loss_D.asnumpy(),
duration))
i = i + 1
if cfg.rank == 0:
cb_params.cur_step_num = i
cb_params.batch_num = i
ckpt_cb.step_end(run_context)
duration = time.perf_counter() - epoch_t
print('finish in {:.2f}mins'.format(duration / 60))
input_arr = Tensor(np.random.uniform(0.0, 1.0, size=[1, 80, 240]), ms.float32)
export(net_G, input_arr, file_name=os.path.join(cfg.train_url, 'melgan_final'), file_format="AIR")
if __name__ == "__main__":
train()
#!/bin/bash
# Copyright (c) 2022. Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
docker_image=$1
data_dir=$2
model_dir=$3
docker run -it -u root --ipc=host \
--device=/dev/davinci0 \
--device=/dev/davinci1 \
--device=/dev/davinci2 \
--device=/dev/davinci3 \
--device=/dev/davinci4 \
--device=/dev/davinci5 \
--device=/dev/davinci6 \
--device=/dev/davinci7 \
--device=/dev/davinci_manager \
--device=/dev/devmm_svm \
--device=/dev/hisi_hdc \
--privileged \
-v /usr/local/Ascend/driver:/usr/local/Ascend/driver \
-v /usr/local/Ascend/add-ons/:/usr/local/Ascend/add-ons \
-v ${data_dir}:${data_dir} \
-v ${model_dir}:${model_dir} \
-v /root/ascend/log:/root/ascend/log ${docker_image} /bin/bash
\ No newline at end of file
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment