diff --git a/official/nlp/lstm/README.md b/official/nlp/lstm/README.md index 1e54c8e53131ad9c41ffde78cd44531597fdf1bd..01c3055c812a3842c01b666bd0d43c2671b11d72 100644 --- a/official/nlp/lstm/README.md +++ b/official/nlp/lstm/README.md @@ -42,8 +42,8 @@ LSTM contains embeding, encoder and decoder modules. Encoder module consists of Note that you can run the scripts based on the dataset mentioned in original paper or widely used in relevant domain/network architecture. In the following sections, we will introduce how to run the scripts using the related dataset below. -- aclImdb_v1 for training evaluation.[Large Movie Review Dataset](http://ai.stanford.edu/~amaas/data/sentiment/) -- GloVe: Vector representations for words.[GloVe: Global Vectors for Word Representation](https://nlp.stanford.edu/projects/glove/) +- aclImdb_v1 for training evaluation.[Large Movie Review Dataset](https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz) +- GloVe: Vector representations for words.[GloVe: Global Vectors for Word Representation](https://nlp.stanford.edu/data/glove.6B.zip) # [Environment Requirements](#contents) @@ -198,6 +198,21 @@ Note that you can run the scripts based on the dataset mentioned in original pap . 鈹溾攢鈹€ lstm 鈹溾攢鈹€ README.md # descriptions about LSTM + 鈹溾攢鈹€ alimdb # aclimdb dataset + 鈹� 鈹溾攢鈹€ test # test set + 鈹� 鈹� 鈹溾攢鈹€ neg + 鈹� 鈹� 鈹溾攢鈹€ pos + 鈹� 鈹� 鈹溾攢鈹€ ... + 鈹� 鈹溾攢鈹€ train # train set + 鈹� 鈹� 鈹溾攢鈹€ neg + 鈹� 鈹� 鈹溾攢鈹€ pos + 鈹� 鈹� 鈹溾攢鈹€ ... + 鈹� 鈹溾攢鈹€ ... + 鈹溾攢鈹€ glove # glove pretrained word vector files + 鈹� 鈹溾攢鈹€ glove.6B.50d.txt + 鈹� 鈹溾攢鈹€ glove.6B.100d.txt + 鈹� 鈹溾攢鈹€ glove.6B.200d.txt + 鈹� 鈹溾攢鈹€ glove.6B.300d.txt 鈹溾攢鈹€ script 鈹� 鈹溾攢鈹€ run_eval_gpu.sh # shell script for evaluation on GPU 鈹� 鈹溾攢鈹€ run_eval_ascend.sh # shell script for evaluation on Ascend @@ -217,10 +232,12 @@ Note that you can run the scripts based on the dataset mentioned in original pap 鈹� 鈹溾攢鈹€ local_adapter.py # Get local ID 鈹� 鈹斺攢鈹€ moxing_adapter.py # Parameter processing 鈹溾攢鈹€ default_config.yaml # Training parameter profile(cpu/gpu) + 鈹溾攢鈹€ onnx_infer_config.yaml # Onnx infer parameter profile(cpu/gpu) 鈹溾攢鈹€ config_ascend.yaml # Training parameter profile(ascend) 鈹溾攢鈹€ config_ascend_8p.yaml # Training parameter profile(ascend_8p) 鈹溾攢鈹€ eval.py # evaluation script on GPU, CPU and Ascend - 鈹斺攢鈹€ train.py # training script on GPU, CPU and Ascend + 鈹溾攢鈹€ train.py # training script on GPU, CPU and Ascend + 鈹斺攢鈹€ eval_onnx.py # Onnx infer script on GPU, CPU and Ascend ``` ## [Script Parameters](#contents) @@ -402,8 +419,9 @@ Ascend: python export.py --ckpt_file [CKPT_PATH] --file_name [FILE_NAME] --file_format [FILE_FORMAT] --config_path [YAML_CONFIG_PATH] ``` +- `weight.txt` is required, please generate it by run preprocess.py. Then you will see this file in /preprocess. - `ckpt_file` parameter is required. -- `FILE_FORMAT` should be in ["AIR", "MINDIR"]. +- `FILE_FORMAT` should be in ["AIR", "MINDIR", "ONNX"]. - `YAML_CONFIG_PATH` default is `default_config.yaml`. ## [Inference Process](#contents) @@ -421,6 +439,14 @@ bash run_infer_310.sh [MINDIR_PATH] [DATASET_PATH] [NEED_PREPROCESS] [DEVICE_TAR `NEED_PREPROCESS` means weather need preprocess or not, it's value is 'y' or 'n' `DEVICE_ID` is optional, default value is 0. +### ONNX Model Eval + +```shell + bash run_infer_onnx.sh [DEVICE_ID] + # example: bash run_infer_onnx.sh 0 + Warning: Default config file is onnx_infer_config.yaml, for more detail information please refer to this file. +``` + #### result Inference result is saved in current path, you can find result in acc.log file. diff --git a/official/nlp/lstm/README_CN.md b/official/nlp/lstm/README_CN.md index f92b2c43c51d5a1689695255ac21bf99bba4f426..7972a64fb59a999a2f4e9553c6b02867b29fc85b 100644 --- a/official/nlp/lstm/README_CN.md +++ b/official/nlp/lstm/README_CN.md @@ -43,8 +43,8 @@ LSTM妯″瀷鍖呭惈宓屽叆灞傘€佺紪鐮佸櫒鍜岃В鐮佸櫒杩欏嚑涓ā鍧楋紝缂栫爜鍣ㄦā # 鏁版嵁闆� -- aclImdb_v1鐢ㄤ簬璁粌璇勪及銆俒澶у瀷鐢靛奖璇勮鏁版嵁闆哴(http://ai.stanford.edu/~amaas/data/sentiment/) -- 鍗曡瘝琛ㄧず褰㈠紡鐨勫叏灞€鐭㈤噺锛圙loVe锛夛細鐢ㄤ簬鍗曡瘝鐨勫悜閲忚〃绀恒€俒GloVe](https://nlp.stanford.edu/projects/glove/) +- aclImdb_v1鐢ㄤ簬璁粌璇勪及銆俒澶у瀷鐢靛奖璇勮鏁版嵁闆哴(https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz) +- 鍗曡瘝琛ㄧず褰㈠紡鐨勫叏灞€鐭㈤噺锛圙loVe锛夛細鐢ㄤ簬鍗曡瘝鐨勫悜閲忚〃绀恒€俒GloVe](https://nlp.stanford.edu/data/glove.6B.zip) # 鐜瑕佹眰 @@ -199,6 +199,21 @@ LSTM妯″瀷鍖呭惈宓屽叆灞傘€佺紪鐮佸櫒鍜岃В鐮佸櫒杩欏嚑涓ā鍧楋紝缂栫爜鍣ㄦā . 鈹溾攢鈹€ lstm 鈹溾攢鈹€ README.md # LSTM鐩稿叧璇存槑 + 鈹溾攢鈹€ alimdb # aclimdb 鏁版嵁闆� + 鈹� 鈹溾攢鈹€ test # 娴嬭瘯闆� + 鈹� 鈹� 鈹溾攢鈹€ neg + 鈹� 鈹� 鈹溾攢鈹€ pos + 鈹� 鈹� 鈹溾攢鈹€ ... + 鈹� 鈹溾攢鈹€ train # 璁粌闆� + 鈹� 鈹� 鈹溾攢鈹€ neg + 鈹� 鈹� 鈹溾攢鈹€ pos + 鈹� 鈹� 鈹溾攢鈹€ ... + 鈹� 鈹溾攢鈹€ ... + 鈹溾攢鈹€ glove # glove 棰勮缁冪殑璇嶅悜閲忔枃浠� + 鈹� 鈹溾攢鈹€ glove.6B.50d.txt + 鈹� 鈹溾攢鈹€ glove.6B.100d.txt + 鈹� 鈹溾攢鈹€ glove.6B.200d.txt + 鈹� 鈹溾攢鈹€ glove.6B.300d.txt 鈹溾攢鈹€ script 鈹� 鈹溾攢鈹€ run_eval_ascend.sh # Ascend璇勪及鐨剆hell鑴氭湰 鈹� 鈹溾攢鈹€ run_eval_gpu.sh # GPU璇勪及鐨剆hell鑴氭湰 @@ -206,7 +221,8 @@ LSTM妯″瀷鍖呭惈宓屽叆灞傘€佺紪鐮佸櫒鍜岃В鐮佸櫒杩欏嚑涓ā鍧楋紝缂栫爜鍣ㄦā 鈹� 鈹溾攢鈹€ run_train_ascend.sh # Ascend璁粌鐨剆hell鑴氭湰 鈹� 鈹溾攢鈹€ run_train_gpu.sh # GPU璁粌鐨剆hell鑴氭湰 鈹� 鈹溾攢鈹€ run_train_cpu.sh # CPU璁粌鐨剆hell鑴氭湰 - 鈹� 鈹斺攢鈹€ run_infer_310.sh # infer310鐨剆hell鑴氭湰 + 鈹� 鈹溾攢鈹€ run_infer_310.sh # infer310鐨剆hell鑴氭湰 + 鈹� 鈹溾攢鈹€ run_infer_onnx.sh # ONNX妯″瀷鎺ㄧ悊鑴氭湰 鈹溾攢鈹€ src 鈹� 鈹溾攢鈹€ lstm.py # 鎯呮劅妯″瀷 鈹� 鈹溾攢鈹€ dataset.py # 鏁版嵁闆嗛澶勭悊 @@ -218,10 +234,12 @@ LSTM妯″瀷鍖呭惈宓屽叆灞傘€佺紪鐮佸櫒鍜岃В鐮佸櫒杩欏嚑涓ā鍧楋紝缂栫爜鍣ㄦā 鈹� 鈹溾攢鈹€ local_adapter.py # 鑾峰彇鏈湴id 鈹� 鈹斺攢鈹€ moxing_adapter.py # 浜戜笂鏁版嵁鍑嗗 鈹溾攢鈹€ default_config.yaml # 璁粌閰嶇疆鍙傛暟(cpu/gpu) + 鈹溾攢鈹€ onnx_infer_config.yaml # onnx鎺ㄧ悊閰嶇疆鍙傛暟(cpu/gpu) 鈹溾攢鈹€ config_ascend.yaml # 璁粌閰嶇疆鍙傛暟(ascend) 鈹溾攢鈹€ config_ascend_8p.yaml # 璁粌閰嶇疆鍙傛暟(ascend_8p) 鈹溾攢鈹€ eval.py # GPU銆丆PU鍜孉scend鐨勮瘎浼拌剼鏈� - 鈹斺攢鈹€ train.py # GPU銆丆PU鍜孉scend鐨勮缁冭剼鏈� + 鈹溾攢鈹€ train.py # GPU銆丆PU鍜孉scend鐨勮缁冭剼鏈� + 鈹斺攢鈹€ eval_onnx.py # GPU銆丆PU鍜孉scend鐨刼nnx鎺ㄧ悊鑴氭湰 ``` ## 鑴氭湰鍙傛暟 @@ -397,14 +415,15 @@ Ascend: bash run_eval_cpu.sh 0 ./aclimdb ./glove_dir lstm-20_390.ckpt ``` -## 瀵煎嚭mindir妯″瀷 +## 瀵煎嚭妯″瀷 ```shell python export.py --ckpt_file [CKPT_PATH] --file_name [FILE_NAME] --file_format [FILE_FORMAT] --config_path [YAML_CONFIG_PATH] ``` +- `weight.txt` 鏂囦欢鍦ㄥ鍑鸿剼鏈腑瑕佺敤鍒帮紝闇€瑕佽繍琛宲reprocess.py鏂囦欢鐢熸垚銆� - `ckpt_file` 鏄繀闇€鐨勩€� -- `FILE_FORMAT` 蹇呴』鍦� ["AIR", "MINDIR"]涓繘琛岄€夋嫨銆� +- `FILE_FORMAT` 蹇呴』鍦� ["AIR", "MINDIR", "ONNX"]涓繘琛岄€夋嫨銆� - `YAML_CONFIG_PATH` 榛樿鏄� `default_config.yaml`銆� ## 鎺ㄧ悊杩囩▼ @@ -422,6 +441,14 @@ bash run_infer_310.sh [MINDIR_PATH] [DATASET_PATH] [NEED_PREPROCESS] [DEVICE_TAR - `NEED_PREPROCESS` 琛ㄧず鏁版嵁鏄惁闇€瑕侀澶勭悊锛屽彲閫夊€艰寖鍥翠负锛�'y' 鎴栬€� 'n' - `DEVICE_ID` 鍙€�, 榛樿鍊间负0 +### ONNX妯″瀷璇勪及 + +```shell + bash run_infer_onnx.sh [DEVICE_ID] + # example: bash run_infer_onnx.sh 0 + 娉ㄦ剰:姝ゅ鎺ㄧ悊浣跨敤鐨勬槸onnx_infer_config.yaml閰嶇疆鏂囦欢,璇︾粏鍙傛暟璇疯姝ゆ枃浠� +``` + ### 缁撴灉 鎺ㄧ悊缁撴灉淇濆瓨鍦ㄥ綋鍓嶈矾寰勶紝鍙湪acc.log涓湅鍒版渶缁堢簿搴︾粨鏋溿€� diff --git a/official/nlp/lstm/default_config.yaml b/official/nlp/lstm/default_config.yaml index 5dbbf148d91a80219126e6e7aced647821f141fb..acedac2946c10c1f2a2629149e1ba7425a12da4c 100644 --- a/official/nlp/lstm/default_config.yaml +++ b/official/nlp/lstm/default_config.yaml @@ -76,4 +76,4 @@ enable_graph_kernel: 'Accelerate by graph kernel, default is true.' device_target: ['Ascend', 'GPU', 'CPU'] distribute: ['true', 'false'] enable_graph_kernel: ['true', 'false'] -file_format: ['AIR', 'MINDIR'] +file_format: ['AIR', 'MINDIR', 'ONNX'] diff --git a/official/nlp/lstm/eval_onnx.py b/official/nlp/lstm/eval_onnx.py new file mode 100644 index 0000000000000000000000000000000000000000..f36075e12470c82aaf90ba98e7def63459ea9a8f --- /dev/null +++ b/official/nlp/lstm/eval_onnx.py @@ -0,0 +1,56 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" +#################Inference On Onnx######################## +""" +import onnxruntime as ort +import mindspore.nn as nn +from src.model_utils.config import config +from src.dataset import lstm_create_dataset + +def create_session(onnx_checkpoint_path, target_device): + if target_device == 'GPU': + providers = ['CUDAExecutionProvider'] + elif target_device == 'CPU': + providers = ['CPUExecutionProvider'] + else: + raise ValueError( + f'Unsupported target device {target_device}, ' + f'Expected one of: "CPU", "GPU"' + ) + session = ort.InferenceSession(onnx_checkpoint_path, providers=providers) + + input_name = session.get_inputs()[0].name + return session, input_name + +def eval_lstm(): + """ eval lstm on onnx""" + print('\neval_onnx.py config: \n', config) + session, input_name = create_session(config.onnx_file, config.onnx_target) + dataset = lstm_create_dataset(config.preprocess_path, config.batch_size, training=False) + eval_metrics = {'acc': nn.Accuracy(), 'recall': nn.Recall(), 'f1': nn.F1()} + + for batch in dataset: + y_pred = session.run(None, {input_name: batch[0].asnumpy()})[0] + for metric in eval_metrics.values(): + metric.update(y_pred, batch[1].asnumpy()) + return {name: metric.eval() for name, metric in eval_metrics.items()} + +if __name__ == '__main__': + result = eval_lstm() + print("=================================Inference Result=================================") + for name, value in result.items(): + print(name, value) + print("=================================================================================") diff --git a/official/nlp/lstm/onnx_infer_config.yaml b/official/nlp/lstm/onnx_infer_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..740cee801d8437c8a1824bc74f124ec53bf0c7ef --- /dev/null +++ b/official/nlp/lstm/onnx_infer_config.yaml @@ -0,0 +1,9 @@ +#ONNX runtime inference +#onnx model path +onnx_file: '/home/mindspore/lstm/biLSTM/lstm/lstm.onnx' +onnx_target: 'GPU' +embed_size: 300 +aclimdb_path: "/home/mindspore/lstm/biLSTM/aclImdb" +batch_size: 64 +preprocess_path: "/home/mindspore/lstm/preprocess" +glove_path: "/home/mindspore/lstm/biLSTM/glove" \ No newline at end of file diff --git a/official/nlp/lstm/preprocess.py b/official/nlp/lstm/preprocess.py index eca269da4111d1053d22a5ec9b4782bbab2f4b80..6a1b82e98f3a1c3a3c8e7913d686fb339377a53f 100644 --- a/official/nlp/lstm/preprocess.py +++ b/official/nlp/lstm/preprocess.py @@ -18,11 +18,13 @@ import os import numpy as np -from src.dataset import lstm_create_dataset +from src.dataset import lstm_create_dataset, convert_to_mindrecord from src.model_utils.config import config if __name__ == '__main__': + print("============== Starting Data Pre-processing ==============") + convert_to_mindrecord(config.embed_size, config.aclimdb_path, config.preprocess_path, config.glove_path) dataset = lstm_create_dataset(config.preprocess_path, config.batch_size, training=False) img_path = os.path.join(config.result_path, "00_data") os.makedirs(img_path) diff --git a/official/nlp/lstm/requirements.txt b/official/nlp/lstm/requirements.txt index 78dd8a1d053cba2d525309a2cfc7c3661dc0d1d6..4241f622801b9ca1074dd8d46a2c0abfbd8c366a 100644 --- a/official/nlp/lstm/requirements.txt +++ b/official/nlp/lstm/requirements.txt @@ -1,3 +1,4 @@ gensim numpy pyyaml +onnxruntime-gpu \ No newline at end of file diff --git a/official/nlp/lstm/scripts/run_infer_onnx.sh b/official/nlp/lstm/scripts/run_infer_onnx.sh new file mode 100644 index 0000000000000000000000000000000000000000..2e1f0206f1833b3978fe74825337b7643dc87315 --- /dev/null +++ b/official/nlp/lstm/scripts/run_infer_onnx.sh @@ -0,0 +1,36 @@ +#!/bin/bash +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +if [[ $# -eq 1 ]]; then + echo "Usage: bash run_infer_onnx.sh [DEVICE_ID] + DEVICE_ID means device id, it can be set by environment variable DEVICE_ID. + for example: bash run_infer_onnx.sh 0 + You can also run 'python eval_onnx.py --config_path=../onnx_infer_config.yaml' command to run the script. + Please Check the file path in Default_config.yaml. Make sure your related files paths right." +exit 1 +fi + +DEVICE_ID=$1 +export CUDA_VISIBLE_DEVICES=$DEVICE_ID + +mkdir -p ms_log +CUR_DIR=`pwd` +export GLOG_log_dir=${CUR_DIR}/ms_log +export GLOG_logtostderr=0 + +BASE_PATH=$(cd ./"`dirname $0`" || exit; pwd) +CONFIG_FILE="${BASE_PATH}/../onnx_infer_config.yaml" +python ../eval_onnx.py \ + --config_path=$CONFIG_FILE > onnx_infer.txt diff --git a/official/nlp/lstm/src/imdb.py b/official/nlp/lstm/src/imdb.py index ed0bb180c150171902d3f87aa85c87cb1228356c..7e1976ed7d34b3b77724e74a85ca8a99fe14f3ee 100644 --- a/official/nlp/lstm/src/imdb.py +++ b/official/nlp/lstm/src/imdb.py @@ -1,4 +1,4 @@ -# Copyright 2020 Huawei Technologies Co., Ltd +# Copyright 2020-2022 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -47,7 +47,7 @@ class ImdbParser(): """ parse imdb data to memory """ - self.__wvmodel = gensim.models.KeyedVectors.load_word2vec_format(self.__glove_file) + self.__wvmodel = gensim.models.KeyedVectors.load_word2vec_format(self.__glove_file, no_header=True) for seg in self.__segs: self.__parse_imdb_datas(seg)