diff --git a/official/cv/FCN8s/README.md b/official/cv/FCN8s/README.md index 558112e36b8e8db4715e7bbdee03397cb860c00f..1ce8edd2a6240de9a5bb178e98d4c77f1c80b23a 100644 --- a/official/cv/FCN8s/README.md +++ b/official/cv/FCN8s/README.md @@ -197,11 +197,15 @@ ckpt_file: /home/FCN8s/ckpt/FCN8s_1-133_300.ckpt - build mindrecord training data +涓嬭浇寰楀埌鐨刡enchmark.tgz鍜孷OCtrainval_11-May-2012.tar鏂囦欢瑙e帇鍚庢斁鍦�/path_to_data/fcn8s_data鐩綍涓� + ```python + python src/data/get_dataset_list.py --data_dir=/path_to_data/fcn8s_data + bash build_data.sh or - python src/data/build_seg_data.py --data_root=/home/sun/data/Mindspore/benchmark_RELEASE/dataset \ - --data_lst=/home/sun/data/Mindspore/benchmark_RELEASE/dataset/trainaug.txt \ + python src/data/build_seg_data.py --data_root=/path_to_data/fcn8s_data/benchmark_RELEASE/dataset \ + --data_lst=/path_to_data/fcn8s_data/vocaug_train_lst.txt \ --dst_path=dataset/MINDRECORED_NAME.mindrecord \ --num_shards=1 \ --shuffle=True diff --git a/official/cv/FCN8s/src/data/get_dataset_list.py b/official/cv/FCN8s/src/data/get_dataset_list.py new file mode 100644 index 0000000000000000000000000000000000000000..8c548900b6180dab942b7210916e83f349b6169c --- /dev/null +++ b/official/cv/FCN8s/src/data/get_dataset_list.py @@ -0,0 +1,156 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +import argparse +import os + +import numpy as np +import scipy.io +from PIL import Image + +parser = argparse.ArgumentParser('dataset list generator') +parser.add_argument("--data_dir", type=str, default='./', help='where dataset stored.') + +args, _ = parser.parse_known_args() + +data_dir = args.data_dir +print("Data dir is:", data_dir) + +# +VOC_IMG_DIR = os.path.join(data_dir, 'VOCdevkit/VOC2012/JPEGImages') +VOC_ANNO_DIR = os.path.join(data_dir, 'VOCdevkit/VOC2012/SegmentationClass') +VOC_ANNO_GRAY_DIR = os.path.join(data_dir, 'VOCdevkit/VOC2012/SegmentationClassGray') +VOC_TRAIN_TXT = os.path.join(data_dir, 'VOCdevkit/VOC2012/ImageSets/Segmentation/train.txt') +VOC_VAL_TXT = os.path.join(data_dir, 'VOCdevkit/VOC2012/ImageSets/Segmentation/val.txt') + +SBD_ANNO_DIR = os.path.join(data_dir, 'benchmark_RELEASE/dataset/cls') +SBD_IMG_DIR = os.path.join(data_dir, 'benchmark_RELEASE/dataset/img') +SBD_ANNO_PNG_DIR = os.path.join(data_dir, 'benchmark_RELEASE/dataset/cls_png') +SBD_ANNO_GRAY_DIR = os.path.join(data_dir, 'benchmark_RELEASE/dataset/cls_png_gray') +SBD_TRAIN_TXT = os.path.join(data_dir, 'benchmark_RELEASE/dataset/train.txt') +SBD_VAL_TXT = os.path.join(data_dir, 'benchmark_RELEASE/dataset/val.txt') + +VOC_TRAIN_LST_TXT = os.path.join(data_dir, 'voc_train_lst.txt') +VOC_VAL_LST_TXT = os.path.join(data_dir, 'voc_val_lst.txt') +VOC_AUG_TRAIN_LST_TXT = os.path.join(data_dir, 'vocaug_train_lst.txt') + + +def __get_data_list(data_list_file): + with open(data_list_file, mode='r') as f: + return f.readlines() + + +def conv_voc_colorpng_to_graypng(): + if not os.path.exists(VOC_ANNO_GRAY_DIR): + os.makedirs(VOC_ANNO_GRAY_DIR) + + for ann in os.listdir(VOC_ANNO_DIR): + ann_im = Image.open(os.path.join(VOC_ANNO_DIR, ann)) + ann_im = Image.fromarray(np.array(ann_im)) + ann_im.save(os.path.join(VOC_ANNO_GRAY_DIR, ann)) + + +def __gen_palette(cls_nums=256): + palette = np.zeros((cls_nums, 3), dtype=np.uint8) + for i in range(cls_nums): + lbl = i + j = 0 + while lbl: + palette[i, 0] |= (((lbl >> 0) & 1) << (7 - j)) + palette[i, 1] |= (((lbl >> 1) & 1) << (7 - j)) + palette[i, 2] |= (((lbl >> 2) & 1) << (7 - j)) + lbl >>= 3 + j += 1 + return palette.flatten() + + +def conv_sbd_mat_to_png(): + if not os.path.exists(SBD_ANNO_PNG_DIR): + os.makedirs(SBD_ANNO_PNG_DIR) + if not os.path.exists(SBD_ANNO_GRAY_DIR): + os.makedirs(SBD_ANNO_GRAY_DIR) + + palette = __gen_palette() + for an in os.listdir(SBD_ANNO_DIR): + img_id = an[:-4] + mat = scipy.io.loadmat(os.path.join(SBD_ANNO_DIR, an)) + anno = mat['GTcls'][0]['Segmentation'][0].astype(np.uint8) + anno_png = Image.fromarray(anno) + # save to gray png + anno_png.save(os.path.join(SBD_ANNO_GRAY_DIR, img_id + '.png')) + # save to color png use palette + anno_png.putpalette(palette) + anno_png.save(os.path.join(SBD_ANNO_PNG_DIR, img_id + '.png')) + + +def create_voc_train_lst_txt(): + voc_train_data_lst = __get_data_list(VOC_TRAIN_TXT) + with open(VOC_TRAIN_LST_TXT, mode='w') as f: + for id_ in voc_train_data_lst: + id_ = id_.strip() + img_ = os.path.join(VOC_IMG_DIR, id_ + '.jpg').replace('./', '') + anno_ = os.path.join(VOC_ANNO_GRAY_DIR, id_ + '.png').replace('./', '') + f.write(img_ + ' ' + anno_ + '\n') + + +def create_voc_val_lst_txt(): + voc_val_data_lst = __get_data_list(VOC_VAL_TXT) + with open(VOC_VAL_LST_TXT, mode='w') as f: + for id_ in voc_val_data_lst: + id_ = id_.strip() + img_ = os.path.join(VOC_IMG_DIR, id_ + '.jpg').replace('./', '') + anno_ = os.path.join(VOC_ANNO_GRAY_DIR, id_ + '.png').replace('./', '') + f.write(img_ + ' ' + anno_ + '\n') + + +def create_voc_train_aug_lst_txt(): + voc_train_data_lst = __get_data_list(VOC_TRAIN_TXT) + voc_val_data_lst = __get_data_list(VOC_VAL_TXT) + + sbd_train_data_lst = __get_data_list(SBD_TRAIN_TXT) + sbd_val_data_lst = __get_data_list(SBD_VAL_TXT) + + with open(VOC_AUG_TRAIN_LST_TXT, mode='w') as f: + for id_ in sbd_train_data_lst + sbd_val_data_lst: + if id_ in voc_train_data_lst + voc_val_data_lst: + continue + id_ = id_.strip() + img_ = os.path.join(SBD_IMG_DIR, id_ + '.jpg').replace('./', '') + anno_ = os.path.join(SBD_ANNO_GRAY_DIR, id_ + '.png').replace('./', '') + f.write(img_ + ' ' + anno_ + '\n') + + for id_ in voc_train_data_lst: + id_ = id_.strip() + img_ = os.path.join(VOC_IMG_DIR, id_ + '.jpg').replace('./', '') + anno_ = os.path.join(VOC_ANNO_GRAY_DIR, id_ + '.png').replace('./', '') + f.write(img_ + ' ' + anno_ + '\n') + + +if __name__ == '__main__': + print('converting voc color png to gray png ...') + conv_voc_colorpng_to_graypng() + print('converting done.') + + create_voc_train_lst_txt() + print('generating voc train list success.') + + create_voc_val_lst_txt() + print('generating voc val list success.') + + print('converting sbd annotations to png ...') + conv_sbd_mat_to_png() + print('converting done') + + create_voc_train_aug_lst_txt() + print('generating voc train aug list success.')