diff --git a/official/nlp/fasttext/README.md b/official/nlp/fasttext/README.md index 262b194a8873243a4ffba8ade60d2fc02beca8dc..542eb24338ceb94903c99991fe76fa9133d75b7f 100644 --- a/official/nlp/fasttext/README.md +++ b/official/nlp/fasttext/README.md @@ -223,14 +223,7 @@ Parameters for both training and evaluation can be set in config.py. All the dat ```bash cd ./scripts - bash run_standalone_train_gpu.sh [DATASET_PATH] - ``` - - - Running scripts for distributed training of FastText. Task training on multiple device and run the following command in bash to be executed in `scripts/`: - - ```bash - cd ./scripts - bash run_distribute_train_gpu.sh [DATASET_PATH] [NUM_OF_DEVICES] + bash run_standalone_train_gpu.sh [DATASET_PATH] [DATANAME] ``` ### [Inference Process](#content) diff --git a/official/nlp/fasttext/scripts/run_distribute_train_gpu.sh b/official/nlp/fasttext/scripts/run_distribute_train_gpu.sh deleted file mode 100644 index 878c68e6bfb20b6d9d2fc7d50003440b4876d9f0..0000000000000000000000000000000000000000 --- a/official/nlp/fasttext/scripts/run_distribute_train_gpu.sh +++ /dev/null @@ -1,55 +0,0 @@ -#!/bin/bash -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -echo "==============================================================================================================" -echo "Please run the script as: " -echo "sh run_distributed_train_gpu.sh DATASET_PATH DEVICE_NUM" -echo "for example: sh run_distributed_train_gpu.sh /home/workspace/ag 8" -echo "It is better to use absolute path." -echo "==============================================================================================================" -get_real_path(){ - if [ "${1:0:1}" == "/" ]; then - echo "$1" - else - echo "$(realpath -m $PWD/$1)" - fi -} - -DATASET=$(get_real_path $1) -echo $DATASET -DATANAME=$(basename $DATASET) - -echo $DATANAME - -config_path="./${DATANAME}_config.yaml" -echo "config path is : ${config_path}" - -if [ -d "distribute_train" ]; -then - rm -rf ./distribute_train -fi -mkdir ./distribute_train -cp ../*.py ./distribute_train -cp ../*.yaml ./distribute_train -cp -r ../src ./distribute_train -cp -r ../model_utils ./distribute_train -cp -r ../scripts/*.sh ./distribute_train -cd ./distribute_train || exit -echo "start training for $2 GPU devices" - -mpirun -n $2 --allow-run-as-root --output-filename log_output --merge-stderr-to-stdout \ -python ../../train.py --config_path $config_path --device_target GPU --run_distribute True --dataset_path $DATASET --data_name $DATANAME -cd .. diff --git a/official/nlp/fasttext/scripts/run_standalone_train_gpu.sh b/official/nlp/fasttext/scripts/run_standalone_train_gpu.sh index 920afca8d7f780327d85c734b114dca8a132f993..59aa1958c8f7c5769a25fcb7d5542d06bd67620c 100644 --- a/official/nlp/fasttext/scripts/run_standalone_train_gpu.sh +++ b/official/nlp/fasttext/scripts/run_standalone_train_gpu.sh @@ -15,8 +15,8 @@ # ============================================================================ echo "==============================================================================================================" echo "Please run the script as: " -echo "sh run_standalone_train_gpu.sh DATASET_PATH" -echo "for example: sh run_standalone_train_gpu.sh /home/workspace/ag" +echo "sh run_standalone_train_gpu.sh DATASET_PATH DATASET_NAME" +echo "for example: sh run_standalone_train_gpu.sh /home/workspace/ag ag" echo "It is better to use absolute path." echo "==============================================================================================================" get_real_path(){ @@ -28,9 +28,9 @@ get_real_path(){ } DATASET=$(get_real_path $1) -echo $DATASET -DATANAME=$(basename $DATASET) -echo $DATANAME +echo DATASET_PATH=$DATASET +DATANAME=$2 +echo DATANAME=$DATANAME config_path="./${DATANAME}_config.yaml" echo "config path is : ${config_path}" diff --git a/official/nlp/fasttext/train.py b/official/nlp/fasttext/train.py index 9e102c203c64b66f7ec994e4a8ebeb088c35f88b..f7a1b99bc3c364f2c6f5f9daa392daf29d6495de 100644 --- a/official/nlp/fasttext/train.py +++ b/official/nlp/fasttext/train.py @@ -16,7 +16,6 @@ import os import time from mindspore import context -from mindspore.communication.management import init, get_rank from mindspore.nn.optim import Adam from mindspore.common import set_seed from mindspore.train.model import Model @@ -249,14 +248,7 @@ def modelarts_pre_process(): @moxing_wrapper(pre_process=modelarts_pre_process) def run_train(): '''run train.''' - if config.device_target == "Ascend": - config.rank_id = get_device_id() - elif config.device_target == "GPU": - init("nccl") - config.rank_id = get_rank() - else: - raise ValueError("Not support device target: {}".format(config.device_target)) - + config.rank_id = int(os.environ.get("RANK_ID", "0")) if config.run_distribute: train_paralle(config.dataset_path) else: