diff --git a/official/nlp/bert/pretrain_config.yaml b/official/nlp/bert/pretrain_config.yaml
index 55603ee1565da6345f71f6d471111a84729c63d4..d4469561bd5b9732c6df31e18f382e5dba449ae2 100644
--- a/official/nlp/bert/pretrain_config.yaml
+++ b/official/nlp/bert/pretrain_config.yaml
@@ -32,7 +32,7 @@ save_checkpoint_num: 1
 data_dir: ''
 schema_dir: ''
 dataset_format: "mindrecord"
-num_samples: None   # is the option which could be set by user to specify steps
+num_samples: None   # is the option which could be set by user to specify steps when bert_network is base
 
 # ==============================================================================
 # pretrain related
diff --git a/official/nlp/bert/pretrain_config_Ascend_Boost.yaml b/official/nlp/bert/pretrain_config_Ascend_Boost.yaml
index 77be3be95f74e0569950e575c70c7b277095ef64..0a96802354e7f6c2321496bfa5383d500748e210 100644
--- a/official/nlp/bert/pretrain_config_Ascend_Boost.yaml
+++ b/official/nlp/bert/pretrain_config_Ascend_Boost.yaml
@@ -32,7 +32,7 @@ save_checkpoint_num: 1
 data_dir: ''
 schema_dir: ''
 dataset_format: "mindrecord"
-num_samples: None   # is the option which could be set by user to specify steps
+num_samples: None   # is the option which could be set by user to specify steps when bert_network is base
 
 # ==============================================================================
 # pretrain related
diff --git a/official/nlp/bert/pretrain_config_Ascend_Thor.yaml b/official/nlp/bert/pretrain_config_Ascend_Thor.yaml
index 666ab18314100311b642fa21fbb7363b4ffe92e3..31ac77f8a8c6f3d9d854865ab4e05cdb1eed940b 100644
--- a/official/nlp/bert/pretrain_config_Ascend_Thor.yaml
+++ b/official/nlp/bert/pretrain_config_Ascend_Thor.yaml
@@ -32,7 +32,7 @@ save_checkpoint_num: 5
 data_dir: ''
 schema_dir: ''
 dataset_format: "mindrecord"
-num_samples: None   # is the option which could be set by user to specify steps
+num_samples: None   # is the option which could be set by user to specify steps when bert_network is base
 
 # ==============================================================================
 # pretrain related
diff --git a/official/nlp/bert/scripts/run_distributed_pretrain_for_gpu.sh b/official/nlp/bert/scripts/run_distributed_pretrain_for_gpu.sh
index 770dab31195c38fd8a1e77bc93c5cc643399b20e..a03d18387346dfcbcfbd62b1c539e0145231bf97 100644
--- a/official/nlp/bert/scripts/run_distributed_pretrain_for_gpu.sh
+++ b/official/nlp/bert/scripts/run_distributed_pretrain_for_gpu.sh
@@ -16,8 +16,8 @@
 
 echo "=============================================================================================================="
 echo "Please run the script as: "
-echo "bash scripts/run_distributed_pretrain.sh DEVICE_NUM EPOCH_SIZE DATA_DIR SCHEMA_DIR"
-echo "for example: bash scripts/run_distributed_pretrain.sh 8 40 /path/zh-wiki/ [/path/Schema.json](optional)"
+echo "bash scripts/run_distributed_pretrain_for_gpu.sh DEVICE_NUM EPOCH_SIZE DATA_DIR SCHEMA_DIR"
+echo "for example: bash scripts/run_distributed_pretrain_for_gpu.sh 8 40 /path/zh-wiki/ [/path/Schema.json](optional)"
 echo "It is better to use absolute path."
 echo "=============================================================================================================="
 
diff --git a/official/nlp/bert/src/dataset.py b/official/nlp/bert/src/dataset.py
index f1277646b698bbf2c83e2cb8744ce69680764a77..01f9659e6b97ae16f9c00cf65fa5a92d68e04d6b 100644
--- a/official/nlp/bert/src/dataset.py
+++ b/official/nlp/bert/src/dataset.py
@@ -126,7 +126,7 @@ def create_bert_dataset(device_num=1, rank=0, do_shuffle="true", data_dir=None,
                 (dataset_format == "mindrecord" and "mindrecord" in file_name and "mindrecord.db" not in file_name):
             data_files.append(os.path.join(data_dir, file_name))
     if dataset_format == "mindrecord":
-        if num_samples is not None:
+        if str(num_samples).lower() != "none":
             data_set = ds.MindDataset(data_files,
                                       columns_list=["input_ids", "input_mask", "segment_ids", "next_sentence_labels",
                                                     "masked_lm_positions", "masked_lm_ids", "masked_lm_weights"],
@@ -279,10 +279,15 @@ def create_eval_dataset(batchsize=32, device_num=1, rank=0, data_dir=None, schem
     else:
         data_files.append(data_dir)
     if dataset_format == "mindrecord":
-        data_set = ds.MindDataset(data_files,
-                                  columns_list=["input_ids", "input_mask", "segment_ids", "next_sentence_labels",
-                                                "masked_lm_positions", "masked_lm_ids", "masked_lm_weights"],
-                                  num_samples=num_samples)
+        if str(num_samples).lower() != "none":
+            data_set = ds.MindDataset(data_files,
+                                      columns_list=["input_ids", "input_mask", "segment_ids", "next_sentence_labels",
+                                                    "masked_lm_positions", "masked_lm_ids", "masked_lm_weights"],
+                                      num_samples=num_samples)
+        else:
+            data_set = ds.MindDataset(data_files,
+                                      columns_list=["input_ids", "input_mask", "segment_ids", "next_sentence_labels",
+                                                    "masked_lm_positions", "masked_lm_ids", "masked_lm_weights"])
     elif dataset_format == "tfrecord":
         data_set = ds.TFRecordDataset(data_files, schema_dir if schema_dir != "" else None,
                                       columns_list=["input_ids", "input_mask", "segment_ids", "next_sentence_labels",
@@ -312,10 +317,16 @@ def create_eval_dataset(batchsize=32, device_num=1, rank=0, data_dir=None, schem
         eval_ds.use_sampler(sampler)
     else:
         if dataset_format == "mindrecord":
-            eval_ds = ds.MindDataset(data_files,
-                                     columns_list=["input_ids", "input_mask", "segment_ids", "next_sentence_labels",
-                                                   "masked_lm_positions", "masked_lm_ids", "masked_lm_weights"],
-                                     num_shards=device_num, shard_id=rank)
+            if str(num_samples).lower() != "none":
+                eval_ds = ds.MindDataset(data_files,
+                                         columns_list=["input_ids", "input_mask", "segment_ids", "next_sentence_labels",
+                                                       "masked_lm_positions", "masked_lm_ids", "masked_lm_weights"],
+                                         num_shards=device_num, shard_id=rank, num_samples=num_samples)
+            else:
+                eval_ds = ds.MindDataset(data_files,
+                                         columns_list=["input_ids", "input_mask", "segment_ids", "next_sentence_labels",
+                                                       "masked_lm_positions", "masked_lm_ids", "masked_lm_weights"],
+                                         num_shards=device_num, shard_id=rank)
         elif dataset_format == "tfrecord":
             eval_ds = ds.TFRecordDataset(data_files, schema_dir if schema_dir != "" else None,
                                          columns_list=["input_ids", "input_mask", "segment_ids", "next_sentence_labels",
diff --git a/official/nlp/bert/src/tools/parallel_tfrecord_to_mindrecord.py b/official/nlp/bert/src/tools/parallel_tfrecord_to_mindrecord.py
index c65d5f8e52bdec2bee12e65db8f937d8a545027a..20f6e2085c2d5bf927be56561d2cc9cb2afcba6e 100644
--- a/official/nlp/bert/src/tools/parallel_tfrecord_to_mindrecord.py
+++ b/official/nlp/bert/src/tools/parallel_tfrecord_to_mindrecord.py
@@ -22,7 +22,8 @@ def tf_2_mr(item):
     item_path = item
     if not os.path.exists(args.output_mindrecord_dir):
         os.makedirs(args.output_mindrecord_dir, exist_ok=True)
-    mindrecord_path = args.output_mindrecord_dir + item[item.rfind('/') + 1:item.rfind('.')] + '.mindrecord'
+    mindrecord_path = os.path.join(args.output_mindrecord_dir,
+                                   item[item.rfind('/') + 1:item.rfind('.')] + '.mindrecord')
     print("Start convert {} to {}.".format(item_path, mindrecord_path))
     writer = FileWriter(file_name=mindrecord_path, shard_num=1, overwrite=True)
     nlp_schema = {"input_ids": {"type": "int64", "shape": [-1]},