Fix single run error for gpt model

ece8f5e9 · huangxinjing · 2b24802f · ece8f5e9 · ece8f5e9
Commit ece8f5e9 authored 2 years ago by huangxinjing
--- a/official/nlp/gpt/scripts/run_standalone_train.sh
+++ b/official/nlp/gpt/scripts/run_standalone_train.sh
@@ -30,4 +30,5 @@ python train.py  \
    --epoch_size=$EPOCH_SIZE \
    --device_id=$DEVICE_ID \
    --data_path=$DATA_DIR \
+    --model_parallel=1 \
    --optimizer="adam" > training_log.txt 2>&1 &
--- a/official/nlp/pangu_alpha/README.md
+++ b/official/nlp/pangu_alpha/README.md
@@ -241,7 +241,7 @@ Training 60B model using 8 NPU in one server requires that the server has at lea
 ```bash
 # run distributed training example in one ascend machine

-bash run_distributed_train_moe_host_device.sh /path/dataset /path/hccl.json 8 fp32 2.6B 1 1 1 0 8 36 0
+bash run_distributed_train_moe_host_device.sh /path/dataset /path/hccl.json 8 fp32 2.6B 1 1 2 0 8 36 0
 ```

 #### Training on homogeneous