!3159 [浙江大学][高校贡献][Mindspore][fasttext]-高性能预训练模型提交+精度达标

Merge pull request !3159 from 张璇/fasttext

!3159 [浙江大学][高校贡献][Mindspore][fasttext]-高性能预训练模型提交+精度达标
Merge pull request !3159 from 张璇/fasttext
e0330a22 · i-robot · Gitee · f25a150a · 75297ef8 · e0330a22
Unverified Commit e0330a22 authored 2 years ago by i-robot Committed by Gitee 2 years ago
--- a/official/nlp/fasttext/infer/convert/air2om.sh
+++ b/official/nlp/fasttext/infer/convert/air2om.sh
+#!/usr/bin/env bash
+
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+model_type=$1
+output_name=$2
+
+atc --model=${model_type} \
+    --framework=1 \
+    --output=${output_name} \
+    --soc_version=Ascend310
--- a/official/nlp/fasttext/infer/data/config/fasttext.pipline
+++ b/official/nlp/fasttext/infer/data/config/fasttext.pipline
+{
+    "fasttext": {
+        "stream_config": {
+            "deviceId": "0"
+        },
+        "appsrc0": {
+            "props": {
+                "blocksize": "409600"
+            },
+            "factory": "appsrc",
+            "next": "mxpi_tensorinfer0:0"
+        },
+        "appsrc1": {
+            "props": {
+                "blocksize": "409600"
+            },
+            "factory": "appsrc",
+            "next": "mxpi_tensorinfer0:1"
+        },
+        "mxpi_tensorinfer0": {
+            "props": {
+                "dataSource":"appsrc0,appsrc1",
+                "modelPath": "../data/model/fasttext_agnews.om"
+            },
+            "factory": "mxpi_tensorinfer",
+            "next": "mxpi_dataserialize0"
+        },
+        "mxpi_dataserialize0": {
+            "props": {
+                "outputDataKeys": "mxpi_tensorinfer0"
+            },
+            "factory": "mxpi_dataserialize",
+            "next": "appsink0"
+        },
+        "appsink0": {
+            "props": {
+                "blocksize": "4096000"
+            },
+            "factory": "appsink"
+        }
+    }
+}
--- a/official/nlp/fasttext/infer/docker_start_infer.sh
+++ b/official/nlp/fasttext/infer/docker_start_infer.sh
+#!/bin/bash
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+docker_image=$1
+share_dir=$2
+data_dir=$3
+echo "$1"
+echo "$2"
+if [ -z "${docker_image}" ]; then
+    echo "please input docker_image"
+    exit 1
+fi
+
+if [ ! -d "${share_dir}" ]; then
+    echo "please input share directory that contains dataset, models and codes"
+    exit 1
+fi
+
+
+docker run -it -u root \
+    --device=/dev/davinci0 \
+    --device=/dev/davinci_manager \
+    --device=/dev/devmm_svm \
+    --device=/dev/hisi_hdc \
+    --privileged \
+    -v /usr/local/bin/npu-smi:/usr/local/bin/npu-smi \
+    -v /usr/local/Ascend/driver:/usr/local/Ascend/driver \
+    -v ${data_dir}:${data_dir}  \
+    -v ${share_dir}:${share_dir} \
+    ${docker_image} \
+    /bin/bash
--- a/official/nlp/fasttext/infer/mxbase/CMakeLists.txt
+++ b/official/nlp/fasttext/infer/mxbase/CMakeLists.txt
+cmake_minimum_required(VERSION 3.10.0)
+project(fasttext)
+
+set(TARGET fasttext)
+
+add_definitions(-DENABLE_DVPP_INTERFACE)
+add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0)
+add_definitions(-Dgoogle=mindxsdk_private)
+add_compile_options(-std=c++11 -fPIE -fstack-protector-all -fPIC -Wall)
+add_link_options(-Wl,-z,relro,-z,now,-z,noexecstack -s -pie)
+
+# Check environment variable
+if(NOT DEFINED ENV{ASCEND_HOME})
+    message(FATAL_ERROR "please define environment variable:ASCEND_HOME")
+endif()
+if(NOT DEFINED ENV{ASCEND_VERSION})
+    message(WARNING "please define environment variable:ASCEND_VERSION")
+endif()
+if(NOT DEFINED ENV{ARCH_PATTERN})
+    message(WARNING "please define environment variable:ARCH_PATTERN")
+endif()
+set(ACL_INC_DIR $ENV{ASCEND_HOME}/$ENV{ASCEND_VERSION}/$ENV{ARCH_PATTERN}/acllib/include)
+set(ACL_LIB_DIR $ENV{ASCEND_HOME}/$ENV{ASCEND_VERSION}/$ENV{ARCH_PATTERN}/acllib/lib64)
+
+set(MXBASE_ROOT_DIR $ENV{MX_SDK_HOME})
+set(MXBASE_INC ${MXBASE_ROOT_DIR}/include)
+set(MXBASE_LIB_DIR ${MXBASE_ROOT_DIR}/lib)
+set(MXBASE_POST_LIB_DIR ${MXBASE_ROOT_DIR}/lib/modelpostprocessors)
+set(MXBASE_POST_PROCESS_DIR ${MXBASE_ROOT_DIR}/include/MxBase/postprocess/include)
+if(DEFINED ENV{MXSDK_OPENSOURCE_DIR})
+    set(OPENSOURCE_DIR $ENV{MXSDK_OPENSOURCE_DIR})
+else()
+    set(OPENSOURCE_DIR ${MXBASE_ROOT_DIR}/opensource)
+endif()
+
+include_directories(${ACL_INC_DIR})
+include_directories(${OPENSOURCE_DIR}/include)
+include_directories(${OPENSOURCE_DIR}/include/opencv4)
+
+include_directories(${MXBASE_INC})
+include_directories(${MXBASE_POST_PROCESS_DIR})
+
+link_directories(${ACL_LIB_DIR})
+link_directories(${OPENSOURCE_DIR}/lib)
+link_directories(${MXBASE_LIB_DIR})
+link_directories(${MXBASE_POST_LIB_DIR})
+
+add_executable(${TARGET} src/main.cpp src/Fasttext.cpp)
+target_link_libraries(${TARGET} glog cpprest mxbase opencv_world stdc++fs)
+
+install(TARGETS ${TARGET} RUNTIME DESTINATION ${PROJECT_SOURCE_DIR}/)
--- a/official/nlp/fasttext/infer/mxbase/build.sh
+++ b/official/nlp/fasttext/infer/mxbase/build.sh
+#!/bin/bash
+
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+path_cur=$(dirname $0)
+
+# set ASCEND_VERSION to ascend-toolkit/latest when it was not specified by user
+if [ ! "${ASCEND_VERSION}" ]; then
+    export ASCEND_VERSION=ascend-toolkit/latest
+    echo "Set ASCEND_VERSION to the default value: ${ASCEND_VERSION}"
+else
+    echo "ASCEND_VERSION is set to ${ASCEND_VERSION} by user"
+fi
+
+if [ ! "${ARCH_PATTERN}" ]; then
+    # set ARCH_PATTERN to ./ when it was not specified by user
+    export ARCH_PATTERN=./
+    echo "ARCH_PATTERN is set to the default value: ${ARCH_PATTERN}"
+else
+    echo "ARCH_PATTERN is set to ${ARCH_PATTERN} by user"
+fi
+
+cd $path_cur
+rm -rf build
+mkdir -p build
+cd build
+cmake ..
+make
+ret=$?
+if [ ${ret} -ne 0 ]; then
+    echo "Failed to build fasttext."
+    exit ${ret}
+fi
+make install
--- a/official/nlp/fasttext/infer/mxbase/run.sh
+++ b/official/nlp/fasttext/infer/mxbase/run.sh
+#!/bin/bash
+
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+modelPath=$1
+inferSrcTokensPath=$2
+resultName=$3
+
+# run
+./build/fasttext ${modelPath} ${inferSrcTokensPath} ${resultName}
--- a/official/nlp/fasttext/infer/mxbase/src/Fasttext.cpp
+++ b/official/nlp/fasttext/infer/mxbase/src/Fasttext.cpp
+/**
+ * Copyright 2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "Fasttext.h"
+
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include <algorithm>
+#include <fstream>
+#include <map>
+
+#include "MxBase/DeviceManager/DeviceManager.h"
+#include "MxBase/Log/Log.h"
+
+APP_ERROR FasttextNerBase::Init(const InitParam &initParam) {
+    deviceId_ = initParam.deviceId;
+    resultName_ = initParam.resultName;
+    APP_ERROR ret = MxBase::DeviceManager::GetInstance()->InitDevices();
+    if (ret != APP_ERR_OK) {
+        LogError << "Init devices failed, ret=" << ret << ".";
+        return ret;
+    }
+    ret = MxBase::TensorContext::GetInstance()->SetContext(initParam.deviceId);
+    if (ret != APP_ERR_OK) {
+        LogError << "Set context failed, ret=" << ret << ".";
+        return ret;
+    }
+    dvppWrapper_ = std::make_shared<MxBase::DvppWrapper>();
+    ret = dvppWrapper_->Init();
+    if (ret != APP_ERR_OK) {
+        LogError << "DvppWrapper init failed, ret=" << ret << ".";
+        return ret;
+    }
+    model_ = std::make_shared<MxBase::ModelInferenceProcessor>();
+    ret = model_->Init(initParam.modelPath, modelDesc_);
+    if (ret != APP_ERR_OK) {
+        LogError << "ModelInferenceProcessor init failed, ret=" << ret << ".";
+        return ret;
+    }
+
+    return APP_ERR_OK;
+}
+
+APP_ERROR FasttextNerBase::DeInit() {
+    dvppWrapper_->DeInit();
+    model_->DeInit();
+    MxBase::DeviceManager::GetInstance()->DestroyDevices();
+    return APP_ERR_OK;
+}
+
+APP_ERROR FasttextNerBase::ReadInputTensor(int32_t *data, uint32_t index, std::vector<MxBase::TensorBase> *inputs,
+                                           const uint32_t size) {
+    const uint32_t dataSize = modelDesc_.inputTensors[index].tensorSize;
+    MxBase::MemoryData memoryDataDst(dataSize, MxBase::MemoryData::MEMORY_DEVICE, deviceId_);
+    MxBase::MemoryData memoryDataSrc(reinterpret_cast<void *>(data), dataSize,
+                                     MxBase::MemoryData::MEMORY_HOST_MALLOC);
+    APP_ERROR ret = MxBase::MemoryHelper::MxbsMallocAndCopy(memoryDataDst, memoryDataSrc);
+    if (ret != APP_ERR_OK) {
+        LogError << GetError(ret) << "Memory malloc and copy failed.";
+        return ret;
+    }
+    std::vector<uint32_t> shape = {1, size};
+    inputs->push_back(MxBase::TensorBase(memoryDataDst, false, shape, MxBase::TENSOR_DTYPE_INT32));
+    delete[] data;
+    return APP_ERR_OK;
+}
+
+APP_ERROR FasttextNerBase::Inference(const std::vector<MxBase::TensorBase> &inputs,
+                                     std::vector<MxBase::TensorBase> *outputs) {
+    auto dtypes = model_->GetOutputDataType();
+    for (size_t i = 0; i < modelDesc_.outputTensors.size(); ++i) {
+        std::vector<uint32_t> shape = {};
+        for (size_t j = 0; j < modelDesc_.outputTensors[i].tensorDims.size(); ++j) {
+            shape.push_back((uint32_t)modelDesc_.outputTensors[i].tensorDims[j]);
+        }
+        MxBase::TensorBase tensor(shape, dtypes[i], MxBase::MemoryData::MemoryType::MEMORY_DEVICE, deviceId_);
+        APP_ERROR ret = MxBase::TensorBase::TensorBaseMalloc(tensor);
+        if (ret != APP_ERR_OK) {
+            LogError << "TensorBaseMalloc failed, ret=" << ret << ".";
+            return ret;
+        }
+        outputs->push_back(tensor);
+    }
+    MxBase::DynamicInfo dynamicInfo = {};
+    dynamicInfo.dynamicType = MxBase::DynamicType::STATIC_BATCH;
+    auto startTime = std::chrono::high_resolution_clock::now();
+    APP_ERROR ret = model_->ModelInference(inputs, *outputs, dynamicInfo);
+    auto endTime = std::chrono::high_resolution_clock::now();
+    double costMs = std::chrono::duration<double, std::milli>(endTime - startTime).count();
+    g_inferCost.push_back(costMs);
+    if (ret != APP_ERR_OK) {
+        LogError << "ModelInference failed, ret=" << ret << ".";
+        return ret;
+    }
+    return APP_ERR_OK;
+}
+
+APP_ERROR FasttextNerBase::PostProcess(std::vector<MxBase::TensorBase> *outputs,
+                                       std::vector<uint32_t> *predict) {
+    MxBase::TensorBase &tensor = outputs->at(0);
+    APP_ERROR ret = tensor.ToHost();
+    if (ret != APP_ERR_OK) {
+        LogError << GetError(ret) << "Tensor deploy to host failed.";
+        return ret;
+    }
+    // check tensor is available
+    auto outputShape = tensor.GetShape();
+    uint32_t length = outputShape[0];
+    void *data = tensor.GetBuffer();
+    for (uint32_t i = 0; i < length; i++) {
+        int32_t value = *(reinterpret_cast<int32_t *>(data) + i);
+        predict->push_back(value);
+    }
+    return APP_ERR_OK;
+}
+
+APP_ERROR FasttextNerBase::WriteResult(const std::string &fileName, const std::vector<uint32_t> &predict) {
+    std::ofstream tfile(fileName, std::ofstream::app);
+    if (tfile.fail()) {
+        LogError << "Failed to open result file: " << fileName;
+        return APP_ERR_COMM_OPEN_FAIL;
+    }
+    // write inference result into file
+    LogInfo << "==============================================================";
+    LogInfo << "Infer finished!";
+
+    tfile << predict[0];
+    tfile << std::endl;
+
+    LogInfo << "==============================================================";
+    tfile.close();
+    return APP_ERR_OK;
+}
+
+APP_ERROR FasttextNerBase::Process(const std::string &inferSrcTokensPath, const std::string &fileName) {
+    APP_ERROR ret;
+    std::ifstream fp1(inferSrcTokensPath);
+    std::string line1;
+    while (std::getline(fp1, line1)) {
+        int32_t *data1 = new int32_t[maxLength_];
+        int32_t *data2 = new int32_t[1];
+        std::vector<MxBase::TensorBase> inputs = {};
+        std::vector<MxBase::TensorBase> outputs = {};
+        std::string number1;
+        std::istringstream readstr1(line1);
+        for (uint32_t j = 0; j < maxLength_; j++) {
+            std::getline(readstr1, number1, ' ');
+            data1[j] = atoi(number1.c_str());
+        }
+        data2[0] = maxLength_;
+        ret = ReadInputTensor(data1, INPUT_SRCTOKENS, &inputs, maxLength_);
+        if (ret != APP_ERR_OK) {
+            LogError << "Read input src_tokens failed, ret=" << ret << ".";
+            return ret;
+        }
+        ret = ReadInputTensor(data2, INPUT_SRCTOKENSLENGTH, &inputs, 1);
+        if (ret != APP_ERR_OK) {
+            LogError << "Read input src_tokens_length file failed, ret=" << ret << ".";
+            return ret;
+        }
+        ret = Inference(inputs, &outputs);
+        if (ret != APP_ERR_OK) {
+            LogError << "Inference failed, ret=" << ret << ".";
+            return ret;
+        }
+        std::vector<uint32_t> predict;
+        ret = PostProcess(&outputs, &predict);
+        if (ret != APP_ERR_OK) {
+            LogError << "PostProcess failed, ret=" << ret << ".";
+            return ret;
+        }
+        ret = WriteResult(fileName, predict);
+        if (ret != APP_ERR_OK) {
+            LogError << "save result failed, ret=" << ret << ".";
+            return ret;
+        }
+    }
+    return APP_ERR_OK;
+}
--- a/official/nlp/fasttext/infer/mxbase/src/Fasttext.h
+++ b/official/nlp/fasttext/infer/mxbase/src/Fasttext.h
+/**
+ * Copyright 2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MXBASE_FASTTEXTBASE_H
+#define MXBASE_FASTTEXTBASE_H
+
+#include <map>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "MxBase/DvppWrapper/DvppWrapper.h"
+#include "MxBase/ModelInfer/ModelInferenceProcessor.h"
+#include "MxBase/Tensor/TensorContext/TensorContext.h"
+
+extern std::vector<double> g_inferCost;
+
+struct InitParam {
+    uint32_t deviceId;
+    std::string modelPath;
+    std::string inferSrcTokensPath;
+    std::string resultName;
+};
+
+enum DataIndex {
+    INPUT_SRCTOKENS = 0,
+    INPUT_SRCTOKENSLENGTH = 1,
+};
+
+class FasttextNerBase {
+ public:
+    APP_ERROR Init(const InitParam &initParam);
+    APP_ERROR DeInit();
+    APP_ERROR Inference(const std::vector<MxBase::TensorBase> &inputs, std::vector<MxBase::TensorBase> *outputs);
+    APP_ERROR Process(const std::string &inferSrcTokensPath, const std::string &fileName);
+    APP_ERROR PostProcess(std::vector<MxBase::TensorBase> *outputs, std::vector<uint32_t> *predict);
+
+ protected:
+    APP_ERROR ReadInputTensor(int32_t *data, uint32_t index, std::vector<MxBase::TensorBase> *inputs,
+                              const uint32_t size);
+    APP_ERROR WriteResult(const std::string &fileName, const std::vector<uint32_t> &predict);
+
+ private:
+    std::shared_ptr<MxBase::DvppWrapper> dvppWrapper_;
+    std::shared_ptr<MxBase::ModelInferenceProcessor> model_;
+    MxBase::ModelDesc modelDesc_ = {};
+    uint32_t deviceId_ = 0;
+    uint32_t maxLength_ = 467;
+    std::string resultName_ = "";
+};
+#endif
--- a/official/nlp/fasttext/infer/mxbase/src/main.cpp
+++ b/official/nlp/fasttext/infer/mxbase/src/main.cpp
+/**
+ * Copyright 2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <dirent.h>
+#include <unistd.h>
+
+#include <algorithm>
+#include <fstream>
+#include <iostream>
+#include <vector>
+
+#include "Fasttext.h"
+#include "MxBase/Log/Log.h"
+
+std::vector<double> g_inferCost;
+
+void InitFasttextParam(InitParam* initParam) {
+    initParam->deviceId = 0;
+    initParam->modelPath = "../data/model/fasttext_agnews.om";
+    initParam->inferSrcTokensPath = "../data/input/src_tokens.txt";
+    initParam->resultName = "mxbase_predictions_sens.txt";
+}
+
+int main(int argc, char* argv[]) {
+    if (argc < 3) {
+        LogWarn << "Please input model_path, infer_src_tokens_path and result_name.";
+        return APP_ERR_OK;
+    }
+    InitParam initParam;
+    InitFasttextParam(&initParam);
+    initParam.modelPath = argv[1];
+    initParam.inferSrcTokensPath = argv[2];
+    initParam.resultName = argv[3];
+    auto fasttextBase = std::make_shared<FasttextNerBase>();
+    APP_ERROR ret = fasttextBase->Init(initParam);
+    if (ret != APP_ERR_OK) {
+        LogError << "Fasttextbase init failed, ret=" << ret << ".";
+        return ret;
+    }
+    // process
+    ret = fasttextBase->Process(initParam.inferSrcTokensPath, initParam.resultName);
+    if (ret != APP_ERR_OK) {
+        LogError << "Fasttextbase process failed, ret=" << ret << ".";
+        fasttextBase->DeInit();
+        return ret;
+    }
+    fasttextBase->DeInit();
+    double costSum = 0;
+    for (uint32_t i = 0; i < g_inferCost.size(); i++) {
+        costSum += g_inferCost[i];
+    }
+    LogInfo << "Infer texts sum " << g_inferCost.size()
+            << ", cost total time: " << costSum << " ms.";
+    LogInfo << "The throughput: " << g_inferCost.size() * 1000 / costSum
+            << " bin/sec.";
+    return APP_ERR_OK;
+}
--- a/official/nlp/fasttext/infer/sdk/main.py
+++ b/official/nlp/fasttext/infer/sdk/main.py
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+"""
+sample script of CLUE infer using SDK run in docker
+"""
+
+import argparse
+import os
+
+import datetime
+import numpy as np
+import MxpiDataType_pb2 as MxpiDataType
+from StreamManagerApi import StreamManagerApi, InProtobufVector, MxProtobufIn, StringVector, MxDataInput
+
+def parse_args():
+    """set and check parameters."""
+    parser = argparse.ArgumentParser(description="Mass process")
+    parser.add_argument("--pipeline", type=str, default="../data/config/fasttext.pipline", help="SDK infer pipeline")
+    parser.add_argument("--data_dir", type=str, default="../data/input")
+    args_opt = parser.parse_args()
+    return args_opt
+
+
+def send_source_data(tensor, tensor_bytes, name, manager_api, in_plugin_id):
+    """
+    Construct the input of the stream,
+    send inputs data to a specified stream based on streamName.
+
+    Returns:
+        bool: send data success or not
+    """
+    tensorPackageList = MxpiDataType.MxpiTensorPackageList()
+    tensorPackage = tensorPackageList.tensorPackageVec.add()
+    dataInput = MxDataInput()
+    dataInput.data = tensor_bytes
+    tensorVec = tensorPackage.tensorVec.add()
+    tensorVec.deviceId = 0
+    tensorVec.memType = 0
+    for t in tensor.shape:
+        tensorVec.tensorShape.append(t)
+    tensorVec.dataStr = dataInput.data
+    tensorVec.tensorDataSize = len(tensor_bytes)
+    key = "appsrc{}".format(in_plugin_id).encode('utf-8')
+    protobufVec = InProtobufVector()
+    protobuf = MxProtobufIn()
+    protobuf.key = key
+    protobuf.type = b'MxTools.MxpiTensorPackageList'
+    protobuf.protobuf = tensorPackageList.SerializeToString()
+    protobufVec.push_back(protobuf)
+    unique_id = manager_api.SendProtobuf(name, in_plugin_id, protobufVec)
+    if unique_id < 0:
+        print("Failed to send data to stream.")
+        exit()
+
+def w2txt(file_path, data):
+    with open(file_path, "w") as file:
+        for i in range(data.shape[0]):
+            s = ' '.join(str(num) for num in data[i])
+            file.write(s+"\n")
+
+if __name__ == '__main__':
+    args = parse_args()
+
+    # init stream manager
+    stream_manager_api = StreamManagerApi()
+    ret = stream_manager_api.InitManager()
+    if ret != 0:
+        print("Failed to init Stream manager, ret=%s" % str(ret))
+        exit()
+
+    # create streams by pipeline config file
+    with open(args.pipeline, 'rb') as f:
+        pipelineStr = f.read()
+    ret = stream_manager_api.CreateMultipleStreams(pipelineStr)
+    if ret != 0:
+        print("Failed to create Stream, ret=%s" % str(ret))
+        exit()
+
+    # Construct the input of the stream
+    infer_total_time = 0
+    src_tokens_path = os.path.join(args.data_dir, "src_tokens.txt")
+    target_sens_path = os.path.join(args.data_dir, "target_sens.txt")
+    src_tokens = np.loadtxt(src_tokens_path, dtype=np.int32).reshape(-1, 467)
+    target_sens = np.loadtxt(target_sens_path, dtype=np.int32).reshape(-1, 1)
+    output_name = "predict_sens.txt"
+
+    stream_name = b'fasttext'
+    num = src_tokens.shape[0]
+    predictions = []
+    for idx in range(num):
+        tensor0 = src_tokens[idx]
+        tensor0 = np.expand_dims(tensor0, 0)
+        tensor_bytes0 = tensor0.tobytes()
+        send_source_data(tensor0, tensor_bytes0, stream_name, stream_manager_api, 0)
+
+        tensor1 = np.array([len(src_tokens[idx])]).astype(np.int32)
+        tensor1 = np.expand_dims(tensor1, 0)
+        tensor_bytes1 = tensor1.tobytes()
+        send_source_data(tensor1, tensor_bytes1, stream_name, stream_manager_api, 1)
+
+        # Obtain the inference result by specifying streamName and uniqueId.
+        start_time = datetime.datetime.now()
+        keyVec = StringVector()
+        keyVec.push_back(b'mxpi_tensorinfer0')
+        infer_result = stream_manager_api.GetProtobuf(stream_name, 0, keyVec)
+        if infer_result.size() == 0:
+            print("inferResult is null")
+            exit()
+        if infer_result[0].errorCode != 0:
+            print("GetProtobuf error. errorCode=%d" % (
+                infer_result[0].errorCode))
+            exit()
+
+        # get infer result
+        result = MxpiDataType.MxpiTensorPackageList()
+        result.ParseFromString(infer_result[0].messageBuf)
+
+        # convert the inference result to Numpy array
+        output = np.frombuffer(result.tensorPackageVec[0].tensorVec[0].dataStr, dtype=np.int32)
+        predictions.append(output)
+
+    predictions_sents = np.array(predictions).astype(np.int32)
+    w2txt("sdk_predictions_sens.txt", predictions_sents)
+
+    # computer accuracy
+    from sklearn.metrics import accuracy_score, classification_report
+    target_sens = np.array(target_sens).flatten()
+    merge_target_sens = []
+    target_label1 = ['0', '1', '2', '3']
+    for target_sen in target_sens:
+        merge_target_sens.extend([target_sen])
+    target_sens = merge_target_sens
+    predictions = np.array(predictions).flatten()
+    merge_predictions = []
+    for prediction in predictions:
+        merge_predictions.extend([prediction])
+    predictions = merge_predictions
+    acc = accuracy_score(target_sens, predictions)
+
+    result_report = classification_report(target_sens, predictions, target_names=target_label1)
+    print("********Accuracy: ", acc)
+    print(result_report)
+
+    # destroy streams
+    stream_manager_api.DestroyAllStreams()
--- a/official/nlp/fasttext/infer/sdk/run.sh
+++ b/official/nlp/fasttext/infer/sdk/run.sh
+#!/bin/bash
+
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -e
+
+# Simple log helper functions
+info() { echo -e "\033[1;34m[INFO ][MxStream] $1\033[1;37m" ; }
+warn() { echo >&2 -e "\033[1;31m[WARN ][MxStream] $1\033[1;37m" ; }
+
+export LD_LIBRARY_PATH=${MX_SDK_HOME}/lib:${MX_SDK_HOME}/opensource/lib:${MX_SDK_HOME}/opensource/lib64:/usr/local/Ascend/ascend-toolkit/latest/acllib/lib64:${LD_LIBRARY_PATH}
+export GST_PLUGIN_SCANNER=${MX_SDK_HOME}/opensource/libexec/gstreamer-1.0/gst-plugin-scanner
+export GST_PLUGIN_PATH=${MX_SDK_HOME}/opensource/lib/gstreamer-1.0:${MX_SDK_HOME}/lib/plugins
+
+#to set PYTHONPATH, import the StreamManagerApi.py
+export PYTHONPATH=$PYTHONPATH:${MX_SDK_HOME}/python
+
+python3 main.py
+exit 0
--- a/official/nlp/fasttext/infer/utils/postprocess_infer.py
+++ b/official/nlp/fasttext/infer/utils/postprocess_infer.py
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""postprocess_infer data"""
+
+import argparse
+import numpy as np
+from sklearn.metrics import accuracy_score, classification_report
+
+parser = argparse.ArgumentParser(description='Postprocess of Fasttext Inference')
+parser.add_argument('--target_label_path', type=str)
+parser.add_argument('--predict_label_path', type=str)
+args = parser.parse_args()
+
+target_sens = np.loadtxt(args.target_label_path, dtype=np.int32).reshape(-1, 1)
+predictions = np.loadtxt(args.predict_label_path, dtype=np.int32).reshape(-1, 1)
+
+target_sens = np.array(target_sens).flatten()
+merge_target_sens = []
+target_label1 = ['0', '1', '2', '3']
+for target_sen in target_sens:
+    merge_target_sens.extend([target_sen])
+target_sens = merge_target_sens
+predictions = np.array(predictions).flatten()
+merge_predictions = []
+for prediction in predictions:
+    merge_predictions.extend([prediction])
+predictions = merge_predictions
+acc = accuracy_score(target_sens, predictions)
+
+result_report = classification_report(target_sens, predictions, target_names=target_label1)
+print("********Accuracy: ", acc)
+print(result_report)
--- a/official/nlp/fasttext/infer/utils/preprocess_infer.py
+++ b/official/nlp/fasttext/infer/utils/preprocess_infer.py
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""FastText for Evaluation"""
+import argparse
+import os
+import mindspore.common.dtype as mstype
+import mindspore.dataset as ds
+import mindspore.dataset.transforms.c_transforms as deC
+from mindspore import context
+import numpy as np
+
+parser = argparse.ArgumentParser(description='FastText Classification')
+parser.add_argument('--data_name', type=str, default='ag')
+parser.add_argument('--device_target', default='CPU', type=str)
+parser.add_argument('--batch_size', default=512, type=int)
+parser.add_argument('--dataset_path', type=str)
+parser.add_argument('--test_buckets', default=[467], type=list)
+parser.add_argument('--outputdir', default='', type=str)
+args = parser.parse_args()
+
+if args.data_name == "ag":
+    target_label1 = ['0', '1', '2', '3']
+elif args.data_name == 'dbpedia':
+    target_label1 = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13']
+elif args.data_name == 'yelp_p':
+    target_label1 = ['0', '1']
+
+context.set_context(mode=context.GRAPH_MODE, save_graphs=False, device_target=args.device_target)
+
+def load_infer_dataset(batch_size, datafile, bucket):
+    """data loader for infer"""
+    def batch_per_bucket(bucket_length, input_file):
+        input_file = input_file + '/test_dataset_bs_' + str(bucket_length) + '.mindrecord'
+        if not input_file:
+            raise FileNotFoundError("input file parameter must not be empty.")
+
+        data_set = ds.MindDataset(input_file,
+                                  columns_list=['src_tokens', 'src_tokens_length', 'label_idx'])
+        type_cast_op = deC.TypeCast(mstype.int32)
+        data_set = data_set.map(operations=type_cast_op, input_columns="src_tokens")
+        data_set = data_set.map(operations=type_cast_op, input_columns="src_tokens_length")
+        data_set = data_set.map(operations=type_cast_op, input_columns="label_idx")
+
+        data_set = data_set.batch(batch_size, drop_remainder=False)
+        return data_set
+    for i, _ in enumerate(bucket):
+        bucket_len = bucket[i]
+        ds_per = batch_per_bucket(bucket_len, datafile)
+        if i == 0:
+            data_set = ds_per
+        else:
+            data_set = data_set + ds_per
+
+    return data_set
+
+def w2txt(file, data):
+    with open(file, "w") as f:
+        for i in range(data.shape[0]):
+            s = ' '.join(str(num) for num in data[i])
+            f.write(s+"\n")
+
+if __name__ == '__main__':
+    dataset = load_infer_dataset(batch_size=args.batch_size, datafile=args.dataset_path, bucket=args.test_buckets)
+    src_tokens_sents = []
+    target_sens_sents = []
+    for batch in dataset.create_dict_iterator(output_numpy=True, num_epochs=1):
+        src_tokens = batch['src_tokens'].astype(np.int32)
+        target_sens = batch['label_idx'].astype(np.int32)
+        src_tokens_shape = src_tokens.shape
+        target_sens_shape = target_sens.shape
+        for index in range(src_tokens_shape[0]):
+            src_tokens_sents.append(src_tokens[index].astype(np.int32))
+        for index in range(target_sens_shape[0]):
+            target_sens_sents.append(target_sens[index].astype(np.int32))
+
+    src_tokens_sents = np.array(src_tokens_sents).astype(np.int32)
+    target_sens_sents = np.array(target_sens_sents).astype(np.int32)
+    w2txt(os.path.join(args.outputdir, "src_tokens.txt"), src_tokens_sents)
+    w2txt(os.path.join(args.outputdir, "target_sens.txt"), target_sens_sents)
--- a/official/nlp/fasttext/modelarts/train_start.py
+++ b/official/nlp/fasttext/modelarts/train_start.py
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+"""FastText for train"""
+import os
+import time
+import numpy as np
+from mindspore import context
+import mindspore.nn as nn
+import mindspore.ops.operations as P
+from mindspore.nn.optim import Adam
+from mindspore.common import set_seed
+from mindspore.train.model import Model
+import mindspore.common.dtype as mstype
+from mindspore.common.tensor import Tensor
+from mindspore.context import ParallelMode
+from mindspore.train.callback import Callback, TimeMonitor
+from mindspore.communication import management as MultiDevice
+from mindspore.train.callback import CheckpointConfig, ModelCheckpoint
+from mindspore.train.serialization import load_checkpoint, export, load_param_into_net
+
+from src.load_dataset import load_dataset
+from src.lr_schedule import polynomial_decay_scheduler
+from src.fasttext_train import FastTextTrainOneStepCell, FastTextNetWithLoss
+from src.fasttext_model import FastText
+
+from model_utils.config import config
+from model_utils.moxing_adapter import moxing_wrapper
+from model_utils.device_adapter import get_device_id, get_device_num
+
+
+def get_ms_timestamp():
+    t = time.time()
+    return int(round(t * 1000))
+
+set_seed(5)
+time_stamp_init = False
+time_stamp_first = 0
+context.set_context(mode=context.GRAPH_MODE, save_graphs=False, device_target=config.device_target)
+
+if config.data_name == "ag":
+    target_label1 = ['0', '1', '2', '3']
+elif config.data_name == 'dbpedia':
+    target_label1 = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13']
+elif config.data_name == 'yelp_p':
+    target_label1 = ['0', '1']
+
+class LossCallBack(Callback):
+    """
+    Monitor the loss in training.
+
+    If the loss is NAN or INF terminating training.
+
+    Note:
+        If per_print_times is 0 do not print loss.
+
+    Args:
+        per_print_times (int): Print loss every times. Default: 1.
+    """
+    def __init__(self, per_print_times=1, rank_ids=0):
+        super(LossCallBack, self).__init__()
+        if not isinstance(per_print_times, int) or per_print_times < 0:
+            raise ValueError("print_step must be int and >= 0.")
+        self._per_print_times = per_print_times
+        self.rank_id = rank_ids
+        global time_stamp_init, time_stamp_first
+        if not time_stamp_init:
+            time_stamp_first = get_ms_timestamp()
+            time_stamp_init = True
+
+    def step_end(self, run_context):
+        """Monitor the loss in training."""
+        global time_stamp_first
+        time_stamp_current = get_ms_timestamp()
+        cb_params = run_context.original_args()
+        print("time: {}, epoch: {}, step: {}, outputs are {}".format(time_stamp_current - time_stamp_first,
+                                                                     cb_params.cur_epoch_num,
+                                                                     cb_params.cur_step_num,
+                                                                     str(cb_params.net_outputs)))
+        with open("./loss_{}.log".format(self.rank_id), "a+") as f:
+            f.write("time: {}, epoch: {}, step: {}, loss: {}".format(
+                time_stamp_current - time_stamp_first,
+                cb_params.cur_epoch_num,
+                cb_params.cur_step_num,
+                str(cb_params.net_outputs.asnumpy())))
+            f.write('\n')
+
+
+class FastTextInferExportCell(nn.Cell):
+    """
+    Encapsulation class of FastText network infer.
+
+    Args:
+        network (nn.Cell): FastText model.
+
+    Returns:
+        Tuple[Tensor, Tensor], predicted_ids
+    """
+    def __init__(self, network):
+        super(FastTextInferExportCell, self).__init__(auto_prefix=False)
+        self.network = network
+        self.argmax = P.ArgMaxWithValue(axis=1, keep_dims=True)
+        self.log_softmax = nn.LogSoftmax(axis=1)
+
+    def construct(self, src_tokens, src_tokens_lengths):
+        """construct fasttext infer cell"""
+        prediction = self.network(src_tokens, src_tokens_lengths)
+        predicted_idx = self.log_softmax(prediction)
+        predicted_idx, _ = self.argmax(predicted_idx)
+
+        return predicted_idx
+
+
+def _build_training_pipeline(pre_dataset, run_distribute=False):
+    """
+    Build training pipeline
+
+    Args:
+        pre_dataset: preprocessed dataset
+    """
+    net_with_loss = FastTextNetWithLoss(config.vocab_size, config.embedding_dims, config.num_class)
+    net_with_loss.init_parameters_data()
+    if config.pretrain_ckpt_dir:
+        parameter_dict = load_checkpoint(config.pretrain_ckpt_dir)
+        load_param_into_net(net_with_loss, parameter_dict)
+    if pre_dataset is None:
+        raise ValueError("pre-process dataset must be provided")
+
+    #get learning rate
+    update_steps = config.epoch * pre_dataset.get_dataset_size()
+    decay_steps = pre_dataset.get_dataset_size()
+    rank_size = os.getenv("RANK_SIZE")
+    if isinstance(rank_size, int):
+        raise ValueError("RANK_SIZE must be integer")
+    if rank_size is not None and int(rank_size) > 1:
+        base_lr = config.lr
+    else:
+        base_lr = config.lr / 10
+    print("+++++++++++Total update steps ", update_steps)
+    lr = Tensor(polynomial_decay_scheduler(lr=base_lr,
+                                           min_lr=config.min_lr,
+                                           decay_steps=decay_steps,
+                                           total_update_num=update_steps,
+                                           warmup_steps=config.warmup_steps,
+                                           power=config.poly_lr_scheduler_power), dtype=mstype.float32)
+    optimizer = Adam(net_with_loss.trainable_params(), lr, beta1=0.9, beta2=0.999)
+
+    net_with_grads = FastTextTrainOneStepCell(net_with_loss, optimizer=optimizer)
+    net_with_grads.set_train(True)
+    model = Model(net_with_grads)
+    loss_monitor = LossCallBack(rank_ids=config.rank_id)
+    dataset_size = pre_dataset.get_dataset_size()
+    time_monitor = TimeMonitor(data_size=dataset_size)
+    ckpt_config = CheckpointConfig(save_checkpoint_steps=decay_steps * config.epoch,
+                                   keep_checkpoint_max=config.keep_ckpt_max)
+    callbacks = [time_monitor, loss_monitor]
+    if not run_distribute:
+        ckpt_callback = ModelCheckpoint(prefix='fasttext',
+                                        directory=os.path.join(config.save_ckpt_dir,
+                                                               'ckpt_{}'.format(os.getenv("DEVICE_ID"))),
+                                        config=ckpt_config)
+        callbacks.append(ckpt_callback)
+    if run_distribute and MultiDevice.get_rank() % 8 == 0:
+        ckpt_callback = ModelCheckpoint(prefix='fasttext',
+                                        directory=os.path.join(config.save_ckpt_dir,
+                                                               'ckpt_{}'.format(os.getenv("DEVICE_ID"))),
+                                        config=ckpt_config)
+        callbacks.append(ckpt_callback)
+    print("Prepare to Training....")
+    epoch_size = pre_dataset.get_repeat_count()
+    print("Epoch size ", epoch_size)
+    if run_distribute:
+        print(f" | Rank {MultiDevice.get_rank()} Call model train.")
+    model.train(epoch=config.epoch, train_dataset=pre_dataset, callbacks=callbacks, dataset_sink_mode=False)
+
+
+def train_single(input_file_path):
+    """
+    Train model on single device
+    Args:
+        input_file_path: preprocessed dataset path
+    """
+    print("Staring training on single device.")
+    preprocessed_data = load_dataset(dataset_path=input_file_path,
+                                     batch_size=config.batch_size,
+                                     epoch_count=config.epoch_count,
+                                     bucket=config.buckets)
+    _build_training_pipeline(preprocessed_data)
+
+
+def set_parallel_env():
+    context.reset_auto_parallel_context()
+    MultiDevice.init()
+    context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL,
+                                      device_num=MultiDevice.get_group_size(),
+                                      gradients_mean=True)
+
+def train_paralle(input_file_path):
+    """
+    Train model on multi device
+    Args:
+        input_file_path: preprocessed dataset path
+    """
+    set_parallel_env()
+    print("Starting traning on multiple devices. |~ _ ~| |~ _ ~| |~ _ ~| |~ _ ~|")
+    batch_size = config.batch_size
+    if config.device_target == 'GPU':
+        batch_size = config.distribute_batch_size_gpu
+
+    preprocessed_data = load_dataset(dataset_path=input_file_path,
+                                     batch_size=batch_size,
+                                     epoch_count=config.epoch_count,
+                                     rank_size=MultiDevice.get_group_size(),
+                                     rank_id=MultiDevice.get_rank(),
+                                     bucket=config.buckets,
+                                     shuffle=False)
+    _build_training_pipeline(preprocessed_data, True)
+
+
+def modelarts_pre_process():
+    '''modelarts pre process function.'''
+    def unzip(zip_file, save_dir):
+        import zipfile
+        s_time = time.time()
+        if not os.path.exists(os.path.join(save_dir, config.modelarts_dataset_unzip_name)):
+            zip_isexist = zipfile.is_zipfile(zip_file)
+            if zip_isexist:
+                fz = zipfile.ZipFile(zip_file, 'r')
+                data_num = len(fz.namelist())
+                print("Extract Start...")
+                print("unzip file num: {}".format(data_num))
+                data_print = int(data_num / 100) if data_num > 100 else 1
+                i = 0
+                for file in fz.namelist():
+                    if i % data_print == 0:
+                        print("unzip percent: {}%".format(int(i * 100 / data_num)), flush=True)
+                    i += 1
+                    fz.extract(file, save_dir)
+                print("cost time: {}min:{}s.".format(int((time.time() - s_time) / 60),
+                                                     int(int(time.time() - s_time) % 60)))
+                print("Extract Done.")
+            else:
+                print("This is not zip.")
+        else:
+            print("Zip has been extracted.")
+
+    if config.need_modelarts_dataset_unzip:
+        zip_file_1 = os.path.join(config.data_path, config.modelarts_dataset_unzip_name + ".zip")
+        save_dir_1 = os.path.join(config.data_path)
+
+        sync_lock = "/tmp/unzip_sync.lock"
+
+        # Each server contains 8 devices as most.
+        if get_device_id() % min(get_device_num(), 8) == 0 and not os.path.exists(sync_lock):
+            print("Zip file path: ", zip_file_1)
+            print("Unzip file save dir: ", save_dir_1)
+            unzip(zip_file_1, save_dir_1)
+            print("===Finish extract data synchronization===")
+            try:
+                os.mknod(sync_lock)
+            except IOError:
+                pass
+
+        while True:
+            if os.path.exists(sync_lock):
+                break
+            time.sleep(1)
+
+        print("Device: {}, Finish sync unzip data from {} to {}.".format(get_device_id(), zip_file_1, save_dir_1))
+
+    config.save_ckpt_dir = os.path.join(config.output_path, config.save_ckpt_dir)
+
+
+@moxing_wrapper(pre_process=modelarts_pre_process)
+def run_train():
+    '''run train.'''
+    config.rank_id = int(os.environ.get("RANK_ID", "0"))
+    if config.run_distribute:
+        train_paralle(config.dataset_path)
+    else:
+        train_single(config.dataset_path)
+
+def run_fasttext_export():
+    """export function"""
+    fasttext_model = FastText(config.vocab_size, config.embedding_dims, config.num_class)
+    print("================config.ckpt_file===========")
+    ckpt_dir = os.path.join(config.train_url, 'ckpt_{}'.format(os.getenv("DEVICE_ID")))
+    config.ckpt_file = os.path.join(ckpt_dir, 'fasttext-5_35.ckpt')
+    parameter_dict = load_checkpoint(config.ckpt_file)
+    load_param_into_net(fasttext_model, parameter_dict)
+    ft_infer = FastTextInferExportCell(fasttext_model)
+    batch_size = 1
+    if config.data_name == "ag":
+        src_tokens_shape = [batch_size, 467]
+        src_tokens_length_shape = [batch_size, 1]
+    elif config.data_name == 'dbpedia':
+        src_tokens_shape = [batch_size, 1120]
+        src_tokens_length_shape = [batch_size, 1]
+    elif config.data_name == 'yelp_p':
+        src_tokens_shape = [batch_size, 2955]
+        src_tokens_length_shape = [batch_size, 1]
+
+    file_name = os.path.join(config.train_url, config.file_name + '_' + config.data_name)
+    src_tokens = Tensor(np.ones((src_tokens_shape)).astype(np.int32))
+    src_tokens_length = Tensor(np.ones((src_tokens_length_shape)).astype(np.int32))
+    export(ft_infer, src_tokens, src_tokens_length, file_name=file_name, file_format='AIR')
+
+
+if __name__ == "__main__":
+    run_train()
+    run_fasttext_export()
--- a/official/nlp/fasttext/scripts/docker_start.sh
+++ b/official/nlp/fasttext/scripts/docker_start.sh
+#!/bin/bash
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+docker_image=$1
+data_dir=$2
+model_dir=$3
+
+docker run -it --ipc=host \
+              --device=/dev/davinci0 \
+              --device=/dev/davinci1 \
+              --device=/dev/davinci2 \
+              --device=/dev/davinci3 \
+              --device=/dev/davinci4 \
+              --device=/dev/davinci5 \
+              --device=/dev/davinci6 \
+              --device=/dev/davinci7 \
+              --device=/dev/davinci_manager \
+              --device=/dev/devmm_svm --device=/dev/hisi_hdc \
+              -v /usr/local/Ascend/driver:/usr/local/Ascend/driver \
+              -v /usr/local/Ascend/add-ons/:/usr/local/Ascend/add-ons/ \
+              -v ${model_dir}:${model_dir} \
+              -v ${data_dir}:${data_dir}  \
+              -v /var/log/npu/conf/slog/slog.conf:/var/log/npu/conf/slog/slog.conf \
+              -v /var/log/npu/slog/:/var/log/npu/slog -v /var/log/npu/profiling/:/var/log/npu/profiling \
+              -v /var/log/npu/dump/:/var/log/npu/dump -v /var/log/npu/:/usr/slog ${docker_image} \
+              /bin/bash