diff --git a/official/cv/FCN8s/README.md b/official/cv/FCN8s/README.md
index 558112e36b8e8db4715e7bbdee03397cb860c00f..1ce8edd2a6240de9a5bb178e98d4c77f1c80b23a 100644
--- a/official/cv/FCN8s/README.md
+++ b/official/cv/FCN8s/README.md
@@ -197,11 +197,15 @@ ckpt_file: /home/FCN8s/ckpt/FCN8s_1-133_300.ckpt
 
 - build mindrecord training data
 
+涓嬭浇寰楀埌鐨刡enchmark.tgz鍜孷OCtrainval_11-May-2012.tar鏂囦欢瑙ｅ帇鍚庢斁鍦�/path_to_data/fcn8s_data鐩綍涓�
+
   ```python
+  python src/data/get_dataset_list.py --data_dir=/path_to_data/fcn8s_data
+
   bash build_data.sh
   or
-  python src/data/build_seg_data.py  --data_root=/home/sun/data/Mindspore/benchmark_RELEASE/dataset  \
-                                     --data_lst=/home/sun/data/Mindspore/benchmark_RELEASE/dataset/trainaug.txt  \
+  python src/data/build_seg_data.py  --data_root=/path_to_data/fcn8s_data/benchmark_RELEASE/dataset  \
+                                     --data_lst=/path_to_data/fcn8s_data/vocaug_train_lst.txt  \
                                      --dst_path=dataset/MINDRECORED_NAME.mindrecord  \
                                      --num_shards=1  \
                                      --shuffle=True
diff --git a/official/cv/FCN8s/src/data/get_dataset_list.py b/official/cv/FCN8s/src/data/get_dataset_list.py
new file mode 100644
index 0000000000000000000000000000000000000000..8c548900b6180dab942b7210916e83f349b6169c
--- /dev/null
+++ b/official/cv/FCN8s/src/data/get_dataset_list.py
@@ -0,0 +1,156 @@
+# Copyright 2022 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+import argparse
+import os
+
+import numpy as np
+import scipy.io
+from PIL import Image
+
+parser = argparse.ArgumentParser('dataset list generator')
+parser.add_argument("--data_dir", type=str, default='./', help='where dataset stored.')
+
+args, _ = parser.parse_known_args()
+
+data_dir = args.data_dir
+print("Data dir is:", data_dir)
+
+#
+VOC_IMG_DIR = os.path.join(data_dir, 'VOCdevkit/VOC2012/JPEGImages')
+VOC_ANNO_DIR = os.path.join(data_dir, 'VOCdevkit/VOC2012/SegmentationClass')
+VOC_ANNO_GRAY_DIR = os.path.join(data_dir, 'VOCdevkit/VOC2012/SegmentationClassGray')
+VOC_TRAIN_TXT = os.path.join(data_dir, 'VOCdevkit/VOC2012/ImageSets/Segmentation/train.txt')
+VOC_VAL_TXT = os.path.join(data_dir, 'VOCdevkit/VOC2012/ImageSets/Segmentation/val.txt')
+
+SBD_ANNO_DIR = os.path.join(data_dir, 'benchmark_RELEASE/dataset/cls')
+SBD_IMG_DIR = os.path.join(data_dir, 'benchmark_RELEASE/dataset/img')
+SBD_ANNO_PNG_DIR = os.path.join(data_dir, 'benchmark_RELEASE/dataset/cls_png')
+SBD_ANNO_GRAY_DIR = os.path.join(data_dir, 'benchmark_RELEASE/dataset/cls_png_gray')
+SBD_TRAIN_TXT = os.path.join(data_dir, 'benchmark_RELEASE/dataset/train.txt')
+SBD_VAL_TXT = os.path.join(data_dir, 'benchmark_RELEASE/dataset/val.txt')
+
+VOC_TRAIN_LST_TXT = os.path.join(data_dir, 'voc_train_lst.txt')
+VOC_VAL_LST_TXT = os.path.join(data_dir, 'voc_val_lst.txt')
+VOC_AUG_TRAIN_LST_TXT = os.path.join(data_dir, 'vocaug_train_lst.txt')
+
+
+def __get_data_list(data_list_file):
+    with open(data_list_file, mode='r') as f:
+        return f.readlines()
+
+
+def conv_voc_colorpng_to_graypng():
+    if not os.path.exists(VOC_ANNO_GRAY_DIR):
+        os.makedirs(VOC_ANNO_GRAY_DIR)
+
+    for ann in os.listdir(VOC_ANNO_DIR):
+        ann_im = Image.open(os.path.join(VOC_ANNO_DIR, ann))
+        ann_im = Image.fromarray(np.array(ann_im))
+        ann_im.save(os.path.join(VOC_ANNO_GRAY_DIR, ann))
+
+
+def __gen_palette(cls_nums=256):
+    palette = np.zeros((cls_nums, 3), dtype=np.uint8)
+    for i in range(cls_nums):
+        lbl = i
+        j = 0
+        while lbl:
+            palette[i, 0] |= (((lbl >> 0) & 1) << (7 - j))
+            palette[i, 1] |= (((lbl >> 1) & 1) << (7 - j))
+            palette[i, 2] |= (((lbl >> 2) & 1) << (7 - j))
+            lbl >>= 3
+            j += 1
+    return palette.flatten()
+
+
+def conv_sbd_mat_to_png():
+    if not os.path.exists(SBD_ANNO_PNG_DIR):
+        os.makedirs(SBD_ANNO_PNG_DIR)
+    if not os.path.exists(SBD_ANNO_GRAY_DIR):
+        os.makedirs(SBD_ANNO_GRAY_DIR)
+
+    palette = __gen_palette()
+    for an in os.listdir(SBD_ANNO_DIR):
+        img_id = an[:-4]
+        mat = scipy.io.loadmat(os.path.join(SBD_ANNO_DIR, an))
+        anno = mat['GTcls'][0]['Segmentation'][0].astype(np.uint8)
+        anno_png = Image.fromarray(anno)
+        # save to gray png
+        anno_png.save(os.path.join(SBD_ANNO_GRAY_DIR, img_id + '.png'))
+        # save to color png use palette
+        anno_png.putpalette(palette)
+        anno_png.save(os.path.join(SBD_ANNO_PNG_DIR, img_id + '.png'))
+
+
+def create_voc_train_lst_txt():
+    voc_train_data_lst = __get_data_list(VOC_TRAIN_TXT)
+    with open(VOC_TRAIN_LST_TXT, mode='w') as f:
+        for id_ in voc_train_data_lst:
+            id_ = id_.strip()
+            img_ = os.path.join(VOC_IMG_DIR, id_ + '.jpg').replace('./', '')
+            anno_ = os.path.join(VOC_ANNO_GRAY_DIR, id_ + '.png').replace('./', '')
+            f.write(img_ + ' ' + anno_ + '\n')
+
+
+def create_voc_val_lst_txt():
+    voc_val_data_lst = __get_data_list(VOC_VAL_TXT)
+    with open(VOC_VAL_LST_TXT, mode='w') as f:
+        for id_ in voc_val_data_lst:
+            id_ = id_.strip()
+            img_ = os.path.join(VOC_IMG_DIR, id_ + '.jpg').replace('./', '')
+            anno_ = os.path.join(VOC_ANNO_GRAY_DIR, id_ + '.png').replace('./', '')
+            f.write(img_ + ' ' + anno_ + '\n')
+
+
+def create_voc_train_aug_lst_txt():
+    voc_train_data_lst = __get_data_list(VOC_TRAIN_TXT)
+    voc_val_data_lst = __get_data_list(VOC_VAL_TXT)
+
+    sbd_train_data_lst = __get_data_list(SBD_TRAIN_TXT)
+    sbd_val_data_lst = __get_data_list(SBD_VAL_TXT)
+
+    with open(VOC_AUG_TRAIN_LST_TXT, mode='w') as f:
+        for id_ in sbd_train_data_lst + sbd_val_data_lst:
+            if id_ in voc_train_data_lst + voc_val_data_lst:
+                continue
+            id_ = id_.strip()
+            img_ = os.path.join(SBD_IMG_DIR, id_ + '.jpg').replace('./', '')
+            anno_ = os.path.join(SBD_ANNO_GRAY_DIR, id_ + '.png').replace('./', '')
+            f.write(img_ + ' ' + anno_ + '\n')
+
+        for id_ in voc_train_data_lst:
+            id_ = id_.strip()
+            img_ = os.path.join(VOC_IMG_DIR, id_ + '.jpg').replace('./', '')
+            anno_ = os.path.join(VOC_ANNO_GRAY_DIR, id_ + '.png').replace('./', '')
+            f.write(img_ + ' ' + anno_ + '\n')
+
+
+if __name__ == '__main__':
+    print('converting voc color png to gray png ...')
+    conv_voc_colorpng_to_graypng()
+    print('converting done.')
+
+    create_voc_train_lst_txt()
+    print('generating voc train list success.')
+
+    create_voc_val_lst_txt()
+    print('generating voc val list success.')
+
+    print('converting sbd annotations to png ...')
+    conv_sbd_mat_to_png()
+    print('converting done')
+
+    create_voc_train_aug_lst_txt()
+    print('generating voc train aug list success.')