diff --git a/.jenkins/check/config/whitelizard.txt b/.jenkins/check/config/whitelizard.txt index a571f39f3f1561273618439722f250b731dbd806..d833e83952a9973b61170286d3b1e3ccfb0ad2f8 100644 --- a/.jenkins/check/config/whitelizard.txt +++ b/.jenkins/check/config/whitelizard.txt @@ -6,6 +6,7 @@ # models/official/recommend/wide_and_deep/src/wide_and_deep.py:__init__ models/official/recommend/wide_and_deep_multitable/src/wide_and_deep.py:__init__ +models/research/cv/centernet/src/dataset.py:preprocess_fn models/research/cvtmodel/wide_resnet/src/wide_resnet101_2.py:__init__ models/research/cvtmodel/resnest/src/resnest200.py:__init__ models/research/cvtmodel/resnest/src/resnest200.py:construct diff --git a/official/cv/centerface/src/dataset.py b/official/cv/centerface/src/dataset.py index b2d0cf015ec90cb9bc59fa61038f1efc74578f9b..bd46470bb2faeb9c90b6211fc97ad0451e7efa82 100644 --- a/official/cv/centerface/src/dataset.py +++ b/official/cv/centerface/src/dataset.py @@ -43,7 +43,6 @@ def GetDataLoader(per_batch_size, columns = ['image', "hm", 'reg_mask', 'ind', 'wh', 'wight_mask', 'hm_offset', 'hps_mask', 'landmarks'] de_dataset = de_dataset.map(input_columns=["image", "anns"], output_columns=columns, - column_order=columns, operations=compose_map_func, num_parallel_workers=num_parallel_workers, python_multiprocessing=True) diff --git a/official/cv/cnn_direction_model/src/dataset.py b/official/cv/cnn_direction_model/src/dataset.py index b91ec59855333ce5d9e3bc01671d702d5134e739..a26564caebd6cd2909c5eda519d2cf071108ca67 100644 --- a/official/cv/cnn_direction_model/src/dataset.py +++ b/official/cv/cnn_direction_model/src/dataset.py @@ -195,7 +195,7 @@ def create_dataset_train(mindrecord_file_pos, config, dataset_name='ocr'): data_set = data_set.map(operations=crop_image((0, 150), (0, 150)), input_columns=["image"], num_parallel_workers=8) data_set = data_set.map(operations=create_label(), input_columns=["image"], output_columns=["image", "label"], - column_order=["image", "label"], num_parallel_workers=8) + num_parallel_workers=8) augmentor = Augmentor(config.augment_severity, config.augment_prob) operation = augmentor.process data_set = data_set.map(operations=operation, input_columns=["image"], @@ -252,7 +252,7 @@ def create_dataset_eval(mindrecord_file_pos, config, dataset_name='ocr'): data_set = data_set.map(operations=crop_image((0, 150), (0, 150)), input_columns=["image"], num_parallel_workers=8) data_set = data_set.map(operations=create_label(), input_columns=["image"], output_columns=["image", "label"], - column_order=["image", "label"], num_parallel_workers=8) + num_parallel_workers=8) global image_height global image_width image_height = config.im_size_h diff --git a/official/cv/crnn_seq2seq_ocr/src/dataset.py b/official/cv/crnn_seq2seq_ocr/src/dataset.py index 9d12f36c2e069254ba937ee17a4c968eb020f134..ff60738d3155259b62ec849cd57e474d2c48811c 100644 --- a/official/cv/crnn_seq2seq_ocr/src/dataset.py +++ b/official/cv/crnn_seq2seq_ocr/src/dataset.py @@ -114,8 +114,7 @@ def create_ocr_train_dataset(mindrecord_file, batch_size=32, rank_size=1, rank_i num_parallel_workers=num_parallel_workers) ds = ds.map(operations=ops.PadEnd([config.max_length], 0), input_columns=["decoder_target"]) ds = ds.map(operations=random_teacher_force, input_columns=["image", "decoder_input", "decoder_target"], - output_columns=["image", "decoder_input", "decoder_target", "teacher_force"], - column_order=["image", "decoder_input", "decoder_target", "teacher_force"]) + output_columns=["image", "decoder_input", "decoder_target", "teacher_force"]) type_cast_op_bool = ops.TypeCast(mstype.bool_) ds = ds.map(operations=type_cast_op_bool, input_columns="teacher_force") print("Train dataset size= %s" % (int(ds.get_dataset_size()))) diff --git a/official/cv/ctpn/src/dataset.py b/official/cv/ctpn/src/dataset.py index fc8b651b17cb78664f99bc773e6304fcbbc9d67a..53c2633cd0c75fa01fdb06b627663ecc623296d1 100644 --- a/official/cv/ctpn/src/dataset.py +++ b/official/cv/ctpn/src/dataset.py @@ -297,7 +297,6 @@ def create_ctpn_dataset(mindrecord_file, batch_size=1, device_num=1, rank_id=0, if is_training: ds = ds.map(operations=compose_map_func, input_columns=["image", "annotation"], output_columns=["image", "box", "label", "valid_num", "image_shape"], - column_order=["image", "box", "label", "valid_num", "image_shape"], num_parallel_workers=num_parallel_workers, python_multiprocessing=True) ds = ds.map(operations=[normalize_op, type_cast0], input_columns=["image"], @@ -310,10 +309,8 @@ def create_ctpn_dataset(mindrecord_file, batch_size=1, device_num=1, rank_id=0, ds = ds.map(operations=compose_map_func, input_columns=["image", "annotation"], output_columns=["image", "box", "label", "valid_num", "image_shape"], - column_order=["image", "box", "label", "valid_num", "image_shape"], num_parallel_workers=8, python_multiprocessing=True) - ds = ds.map(operations=[normalize_op, hwc_to_chw, type_cast1], input_columns=["image"], num_parallel_workers=8) # transpose_column from python to c diff --git a/official/cv/deeptext/src/dataset.py b/official/cv/deeptext/src/dataset.py index f85cf024154994fd36c071b2957bd9871faf2029..02f933e3df67988792e723c67b7bc9dfe19dac1a 100644 --- a/official/cv/deeptext/src/dataset.py +++ b/official/cv/deeptext/src/dataset.py @@ -488,7 +488,6 @@ def create_deeptext_dataset(mindrecord_file, batch_size=2, repeat_num=12, device if is_training: ds = ds.map(operations=compose_map_func, input_columns=["image", "annotation"], output_columns=["image", "image_shape", "box", "label", "valid_num"], - column_order=["image", "image_shape", "box", "label", "valid_num"], num_parallel_workers=num_parallel_workers) flip = (np.random.rand() < config.flip_ratio) @@ -508,9 +507,7 @@ def create_deeptext_dataset(mindrecord_file, batch_size=2, repeat_num=12, device ds = ds.map(operations=compose_map_func, input_columns=["image", "annotation"], output_columns=["image", "image_shape", "box", "label", "valid_num"], - column_order=["image", "image_shape", "box", "label", "valid_num"], num_parallel_workers=num_parallel_workers) - ds = ds.map(operations=[normalize_op, hwc_to_chw, type_cast1], input_columns=["image"], num_parallel_workers=24) diff --git a/official/cv/faster_rcnn/src/dataset.py b/official/cv/faster_rcnn/src/dataset.py index ab81ebd1f722350b0c75196cc70757fefcea27d8..c88f4cd282748b94b40ec01715dded653e6cbb5f 100644 --- a/official/cv/faster_rcnn/src/dataset.py +++ b/official/cv/faster_rcnn/src/dataset.py @@ -560,14 +560,12 @@ def create_fasterrcnn_dataset(config, mindrecord_file, batch_size=2, device_num= if is_training: ds = ds.map(input_columns=["image", "annotation"], output_columns=["image", "image_shape", "box", "label", "valid_num"], - column_order=["image", "image_shape", "box", "label", "valid_num"], operations=compose_map_func, python_multiprocessing=python_multiprocessing, num_parallel_workers=num_parallel_workers) ds = ds.batch(batch_size, drop_remainder=True) else: ds = ds.map(input_columns=["image", "annotation"], output_columns=["image", "image_shape", "box", "label", "valid_num"], - column_order=["image", "image_shape", "box", "label", "valid_num"], operations=compose_map_func, num_parallel_workers=num_parallel_workers) ds = ds.batch(batch_size, drop_remainder=True) diff --git a/official/cv/maskrcnn/src/dataset.py b/official/cv/maskrcnn/src/dataset.py index 1529ce9a7713c7dabfe6791bca045dcc61edf752..cf8f949acb780948e970fc449ede3005f870a70d 100644 --- a/official/cv/maskrcnn/src/dataset.py +++ b/official/cv/maskrcnn/src/dataset.py @@ -547,7 +547,6 @@ def create_maskrcnn_dataset(mindrecord_file, batch_size=2, device_num=1, rank_id ds = ds.map(operations=compose_map_func, input_columns=["image", "annotation", "mask", "mask_shape"], output_columns=["image", "image_shape", "box", "label", "valid_num", "mask"], - column_order=["image", "image_shape", "box", "label", "valid_num", "mask"], python_multiprocessing=False, num_parallel_workers=num_parallel_workers) ds = ds.padded_batch(batch_size, drop_remainder=True, @@ -557,7 +556,6 @@ def create_maskrcnn_dataset(mindrecord_file, batch_size=2, device_num=1, rank_id ds = ds.map(operations=compose_map_func, input_columns=["image", "annotation", "mask", "mask_shape"], output_columns=["image", "image_shape", "box", "label", "valid_num", "mask"], - column_order=["image", "image_shape", "box", "label", "valid_num", "mask"], num_parallel_workers=num_parallel_workers) ds = ds.batch(batch_size, drop_remainder=True) diff --git a/official/cv/maskrcnn_mobilenetv1/src/dataset.py b/official/cv/maskrcnn_mobilenetv1/src/dataset.py index eb065636f6dae21838d8ba9209cf6e75f6088c0b..bf297fb30574494cb24b70e9356c75b34c99a1c4 100644 --- a/official/cv/maskrcnn_mobilenetv1/src/dataset.py +++ b/official/cv/maskrcnn_mobilenetv1/src/dataset.py @@ -562,7 +562,6 @@ def create_maskrcnn_dataset(mindrecord_file, batch_size=2, device_num=1, rank_id ds = ds.map(operations=compose_map_func, input_columns=["image", "annotation", "mask", "mask_shape"], output_columns=["image", "image_shape", "box", "label", "valid_num", "mask"], - column_order=["image", "image_shape", "box", "label", "valid_num", "mask"], python_multiprocessing=False, num_parallel_workers=num_parallel_workers) ds = ds.padded_batch(batch_size, drop_remainder=True, @@ -572,7 +571,6 @@ def create_maskrcnn_dataset(mindrecord_file, batch_size=2, device_num=1, rank_id ds = ds.map(operations=compose_map_func, input_columns=["image", "annotation", "mask", "mask_shape"], output_columns=["image", "image_shape", "box", "label", "valid_num", "mask"], - column_order=["image", "image_shape", "box", "label", "valid_num", "mask"], num_parallel_workers=num_parallel_workers) ds = ds.batch(batch_size, drop_remainder=True) diff --git a/official/cv/pvnet/src/dataset.py b/official/cv/pvnet/src/dataset.py index 31b7bcb783969673b646396650f36d6caf30544e..b8e19518d5f274e4d3f1c6518e3152156a2b8af6 100644 --- a/official/cv/pvnet/src/dataset.py +++ b/official/cv/pvnet/src/dataset.py @@ -234,7 +234,6 @@ def create_dataset(cls_list, batch_size=16, workers=16, devices=1, rank=0, multi ds = ds.map(input_columns=["image", "mask", "farthest"], output_columns=["image", "mask", "vertex", "vertex_weight"], - column_order=["image", "mask", "vertex", "vertex_weight"], operations=preprocess_fn, num_parallel_workers=workers, python_multiprocessing=multi_process) img_transforms = C.Compose([ diff --git a/official/cv/retinaface_resnet50/src/dataset.py b/official/cv/retinaface_resnet50/src/dataset.py index ae522616804845ee2ba9d3de08cb6a67069d1145..235133703451f7c09e4714d4acd8512d351570a8 100644 --- a/official/cv/retinaface_resnet50/src/dataset.py +++ b/official/cv/retinaface_resnet50/src/dataset.py @@ -149,19 +149,16 @@ def create_dataset(data_dir, cfg, batch_size=32, repeat_num=1, shuffle=True, mul de_dataset = de_dataset.map(input_columns=["image", "annotation"], output_columns=["image", "annotation"], - column_order=["image", "annotation"], operations=read_data_from_dataset, python_multiprocessing=multiprocessing, num_parallel_workers=num_worker) de_dataset = de_dataset.map(input_columns=["image", "annotation"], output_columns=["image", "annotation"], - column_order=["image", "annotation"], operations=augmentation, python_multiprocessing=multiprocessing, num_parallel_workers=num_worker) de_dataset = de_dataset.map(input_columns=["image", "annotation"], output_columns=["image", "truths", "conf", "landm"], - column_order=["image", "truths", "conf", "landm"], operations=encode_data, python_multiprocessing=multiprocessing, num_parallel_workers=num_worker) diff --git a/official/cv/retinanet/src/dataset.py b/official/cv/retinanet/src/dataset.py index b0ce8681d0bd15767d112f4f62a65a6c424b2c5d..8658ce297128e947b5c12ecd3bdfb1e82b1138d2 100644 --- a/official/cv/retinanet/src/dataset.py +++ b/official/cv/retinanet/src/dataset.py @@ -534,7 +534,7 @@ def create_retinanet_dataset(mindrecord_file, batch_size, repeat_num, device_num output_columns = ["img_id", "image", "image_shape"] trans = [normalize_op, change_swap_op] ds = ds.map(operations=compose_map_func, input_columns=["img_id", "image", "annotation"], - output_columns=output_columns, column_order=output_columns, + output_columns=output_columns, python_multiprocessing=is_training, num_parallel_workers=num_parallel_workers) ds = ds.map(operations=trans, input_columns=["image"], python_multiprocessing=is_training, diff --git a/official/cv/simclr/src/dataset.py b/official/cv/simclr/src/dataset.py index ef913335924ee3facfa73fc08a1c2a1b5f2c5c58..3b668dcb28f9f20ebddde6f68431ac7580a60d15 100644 --- a/official/cv/simclr/src/dataset.py +++ b/official/cv/simclr/src/dataset.py @@ -87,7 +87,7 @@ def create_dataset(args, dataset_mode, repeat_num=1): data_set = data_set.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8) data_set = data_set.map(operations=copy_column, input_columns=["image", "label"], output_columns=["image1", "image2", "label"], - column_order=["image1", "image2", "label"], num_parallel_workers=8) + num_parallel_workers=8) data_set = data_set.map(operations=trans, input_columns=["image1"], num_parallel_workers=8) data_set = data_set.map(operations=trans, input_columns=["image2"], num_parallel_workers=8) # apply batch operations diff --git a/official/cv/ssd/src/dataset.py b/official/cv/ssd/src/dataset.py index 9e181e526e5ea26eec49629bef46b00eb5d94742..a09caf80e3e377fe65dbc6f9d27060d6456b5c5d 100644 --- a/official/cv/ssd/src/dataset.py +++ b/official/cv/ssd/src/dataset.py @@ -413,7 +413,7 @@ def create_ssd_dataset(mindrecord_file, batch_size=32, device_num=1, rank=0, output_columns = ["img_id", "image", "image_shape"] trans = [normalize_op, change_swap_op] ds = ds.map(operations=compose_map_func, input_columns=["img_id", "image", "annotation"], - output_columns=output_columns, column_order=output_columns, + output_columns=output_columns, python_multiprocessing=use_multiprocessing, num_parallel_workers=num_parallel_workers) ds = ds.map(operations=trans, input_columns=["image"], python_multiprocessing=use_multiprocessing, diff --git a/official/cv/unet/src/data_loader.py b/official/cv/unet/src/data_loader.py index bdbb0f3865ae5739a80b8f9f75ef1296459d5f48..f2fc3eb15e81f670d24a3e4006ba16b63d98398c 100644 --- a/official/cv/unet/src/data_loader.py +++ b/official/cv/unet/src/data_loader.py @@ -261,7 +261,7 @@ def create_multi_class_dataset(data_dir, img_size, repeat, batch_size, num_class compose_map_func = (lambda image, mask: preprocess_img_mask(image, mask, num_classes, tuple(img_size), augment and is_train, eval_resize)) dataset = dataset.map(operations=compose_map_func, input_columns=mc_dataset.column_names, - output_columns=mc_dataset.column_names, column_order=mc_dataset.column_names, + output_columns=mc_dataset.column_names, num_parallel_workers=num_parallel_workers) dataset = dataset.batch(batch_size, drop_remainder=is_train, num_parallel_workers=num_parallel_workers) return dataset diff --git a/official/cv/yolov3_darknet53/src/yolo_dataset.py b/official/cv/yolov3_darknet53/src/yolo_dataset.py index 82f06c7f4166b91c9f6c50b4e991f1b004bb533e..27b930fc2f0d2b49f1c4dc80586b1e236a5ea241 100644 --- a/official/cv/yolov3_darknet53/src/yolo_dataset.py +++ b/official/cv/yolov3_darknet53/src/yolo_dataset.py @@ -182,7 +182,6 @@ def create_yolo_dataset(image_dir, anno_path, batch_size, device_num, rank, compose_map_func = (lambda image, img_id: reshape_fn(image, img_id, config)) dataset = dataset.map(operations=compose_map_func, input_columns=["image", "img_id"], output_columns=["image", "image_shape", "img_id"], - column_order=["image", "image_shape", "img_id"], num_parallel_workers=8) dataset = dataset.map(operations=hwc_to_chw, input_columns=["image"], num_parallel_workers=8) dataset = dataset.batch(batch_size, drop_remainder=True) diff --git a/official/cv/yolov3_resnet18/src/dataset.py b/official/cv/yolov3_resnet18/src/dataset.py index e5d3f391d2f5a2d4ee6e7e84eae28edd51f66695..b5e5b41b1a2ce12d8eaac9219c72228830360ca4 100644 --- a/official/cv/yolov3_resnet18/src/dataset.py +++ b/official/cv/yolov3_resnet18/src/dataset.py @@ -296,7 +296,6 @@ def create_yolo_dataset(mindrecord_dir, batch_size=32, device_num=1, rank=0, hwc_to_chw = C.HWC2CHW() ds = ds.map(operations=compose_map_func, input_columns=["image", "annotation"], output_columns=["image", "bbox_1", "bbox_2", "bbox_3", "gt_box1", "gt_box2", "gt_box3"], - column_order=["image", "bbox_1", "bbox_2", "bbox_3", "gt_box1", "gt_box2", "gt_box3"], num_parallel_workers=num_parallel_workers) if "x86" in platform.machine(): ds = ds.map(operations=hwc_to_chw, input_columns=["image"], num_parallel_workers=num_parallel_workers) @@ -306,6 +305,5 @@ def create_yolo_dataset(mindrecord_dir, batch_size=32, device_num=1, rank=0, else: ds = ds.map(operations=compose_map_func, input_columns=["image", "annotation"], output_columns=["image", "image_shape", "annotation"], - column_order=["image", "image_shape", "annotation"], num_parallel_workers=num_parallel_workers) return ds diff --git a/official/cv/yolov4/src/yolo_dataset.py b/official/cv/yolov4/src/yolo_dataset.py index f01e55021f6f4f233ff6da1bd9ff28c8692ad927..82d8ef33a646267322ab3f17a2de4d5e764d093f 100644 --- a/official/cv/yolov4/src/yolo_dataset.py +++ b/official/cv/yolov4/src/yolo_dataset.py @@ -275,7 +275,6 @@ def create_yolo_dataset(image_dir, anno_path, batch_size, max_epoch, device_num, compose_map_func = (lambda image, img_id: reshape_fn(image, img_id, default_config)) ds = ds.map(operations=compose_map_func, input_columns=["image", "img_id"], output_columns=["image", "image_shape", "img_id"], - column_order=["image", "image_shape", "img_id"], num_parallel_workers=8) ds = ds.map(operations=hwc_to_chw, input_columns=["image"], num_parallel_workers=8) ds = ds.batch(batch_size, drop_remainder=True) @@ -337,7 +336,6 @@ def create_yolo_datasetv2(image_dir, compose_map_func = (lambda image, img_id: reshape_fn(image, img_id, default_config)) ds = ds.map(input_columns=["image", "img_id"], output_columns=["image", "image_shape", "img_id"], - column_order=["image", "image_shape", "img_id"], operations=compose_map_func, num_parallel_workers=8) ds = ds.map(input_columns=["image"], operations=hwc_to_chw, num_parallel_workers=8) ds = ds.batch(batch_size, drop_remainder=True) diff --git a/official/cv/yolov5/src/yolo_dataset.py b/official/cv/yolov5/src/yolo_dataset.py index 01eb38dd2434c44d3311d1e24ad93f85f43898f5..c79cdc4a242f1aed408fd65ac80391d2da70d42b 100644 --- a/official/cv/yolov5/src/yolo_dataset.py +++ b/official/cv/yolov5/src/yolo_dataset.py @@ -260,11 +260,12 @@ def create_yolo_dataset(image_dir, anno_path, batch_size, device_num, rank, dataset = ds.GeneratorDataset(yolo_dataset, column_names=dataset_column_names, sampler=distributed_sampler, python_multiprocessing=True, num_parallel_workers=min(4, num_parallel_workers)) dataset = dataset.map(operations=multi_scale_trans, input_columns=dataset_column_names, - output_columns=map1_out_column_names, column_order=map1_out_column_names, + output_columns=map1_out_column_names, num_parallel_workers=min(12, num_parallel_workers), python_multiprocessing=True) dataset = dataset.map(operations=PreprocessTrueBox(config), input_columns=map2_in_column_names, - output_columns=map2_out_column_names, column_order=output_column_names, + output_columns=map2_out_column_names, num_parallel_workers=min(4, num_parallel_workers), python_multiprocessing=False) + dataset = dataset.project(output_column_names) # Computed from random subset of ImageNet training images mean = [m * 255 for m in [0.485, 0.456, 0.406]] std = [s * 255 for s in [0.229, 0.224, 0.225]] @@ -284,7 +285,6 @@ def create_yolo_dataset(image_dir, anno_path, batch_size, device_num, rank, compose_map_func = (lambda image, img_id: reshape_fn(image, img_id, config)) dataset = dataset.map(operations=compose_map_func, input_columns=["image", "img_id"], output_columns=["image", "image_shape", "img_id"], - column_order=["image", "image_shape", "img_id"], num_parallel_workers=8) dataset = dataset.map(operations=hwc_to_chw, input_columns=["image"], num_parallel_workers=8) dataset = dataset.batch(batch_size, drop_remainder=True) diff --git a/official/nlp/bert/src/finetune_data_preprocess.py b/official/nlp/bert/src/finetune_data_preprocess.py index 6906fca7953186b0d26ebcdfeb59d799a98bae20..22bf018f2c0ee50b7163dbd71818b56a1f168bab 100644 --- a/official/nlp/bert/src/finetune_data_preprocess.py +++ b/official/nlp/bert/src/finetune_data_preprocess.py @@ -45,8 +45,8 @@ def process_tnews_clue_dataset(data_dir, label_list, bert_vocab_path, data_usage usage=data_usage, shuffle=shuffle_dataset) ### Processing label if data_usage == 'test': - dataset = dataset.map(operations=ops.Duplicate(), input_columns=["id"], output_columns=["id", "label_id"], - column_order=["id", "label_id", "sentence"]) + dataset = dataset.map(operations=ops.Duplicate(), input_columns=["id"], output_columns=["id", "label_id"]) + dataset = dataset.project(["id", "label_id", "sentence"]) dataset = dataset.map(operations=ops.Fill(0), input_columns=["label_id"]) else: label_vocab = text.Vocab.from_list(label_list) @@ -63,12 +63,12 @@ def process_tnews_clue_dataset(data_dir, label_list, bert_vocab_path, data_usage dataset = dataset.map(operations=lookup, input_columns=["sentence"], output_columns=["text_ids"]) dataset = dataset.map(operations=ops.PadEnd([max_seq_len], 0), input_columns=["text_ids"]) dataset = dataset.map(operations=ops.Duplicate(), input_columns=["text_ids"], - output_columns=["text_ids", "mask_ids"], - column_order=["text_ids", "mask_ids", "label_id"]) + output_columns=["text_ids", "mask_ids"]) + dataset = dataset.project(["text_ids", "mask_ids", "label_id"]) dataset = dataset.map(operations=ops.Mask(ops.Relational.NE, 0, mstype.int32), input_columns=["mask_ids"]) dataset = dataset.map(operations=ops.Duplicate(), input_columns=["text_ids"], - output_columns=["text_ids", "segment_ids"], - column_order=["text_ids", "mask_ids", "segment_ids", "label_id"]) + output_columns=["text_ids", "segment_ids"]) + dataset = dataset.project(["text_ids", "mask_ids", "segment_ids", "label_id"]) dataset = dataset.map(operations=ops.Fill(0), input_columns=["segment_ids"]) dataset = dataset.batch(batch_size, drop_remainder=drop_remainder) return dataset @@ -90,8 +90,8 @@ def process_cmnli_clue_dataset(data_dir, label_list, bert_vocab_path, data_usage usage=data_usage, shuffle=shuffle_dataset) ### Processing label if data_usage == 'test': - dataset = dataset.map(operations=ops.Duplicate(), input_columns=["id"], output_columns=["id", "label_id"], - column_order=["id", "label_id", "sentence1", "sentence2"]) + dataset = dataset.map(operations=ops.Duplicate(), input_columns=["id"], output_columns=["id", "label_id"]) + dataset = dataset.project(["id", "label_id", "sentence1", "sentence2"]) dataset = dataset.map(operations=ops.Fill(0), input_columns=["label_id"]) else: label_vocab = text.Vocab.from_list(label_list) @@ -114,27 +114,27 @@ def process_cmnli_clue_dataset(data_dir, label_list, bert_vocab_path, data_usage input_columns=["sentence2"]) ### Generating segment_ids dataset = dataset.map(operations=ops.Duplicate(), input_columns=["sentence1"], - output_columns=["sentence1", "type_sentence1"], - column_order=["sentence1", "type_sentence1", "sentence2", "label_id"]) + output_columns=["sentence1", "type_sentence1"]) + dataset = dataset.project(["sentence1", "type_sentence1", "sentence2", "label_id"]) dataset = dataset.map(operations=ops.Duplicate(), - input_columns=["sentence2"], output_columns=["sentence2", "type_sentence2"], - column_order=["sentence1", "type_sentence1", "sentence2", "type_sentence2", "label_id"]) + input_columns=["sentence2"], output_columns=["sentence2", "type_sentence2"]) + dataset = dataset.project(["sentence1", "type_sentence1", "sentence2", "type_sentence2", "label_id"]) dataset = dataset.map(operations=[lookup, ops.Fill(0)], input_columns=["type_sentence1"]) dataset = dataset.map(operations=[lookup, ops.Fill(1)], input_columns=["type_sentence2"]) dataset = dataset.map(operations=ops.Concatenate(), - input_columns=["type_sentence1", "type_sentence2"], output_columns=["segment_ids"], - column_order=["sentence1", "sentence2", "segment_ids", "label_id"]) + input_columns=["type_sentence1", "type_sentence2"], output_columns=["segment_ids"]) + dataset = dataset.project(["sentence1", "sentence2", "segment_ids", "label_id"]) dataset = dataset.map(operations=ops.PadEnd([max_seq_len], 0), input_columns=["segment_ids"]) ### Generating text_ids dataset = dataset.map(operations=ops.Concatenate(), - input_columns=["sentence1", "sentence2"], output_columns=["text_ids"], - column_order=["text_ids", "segment_ids", "label_id"]) + input_columns=["sentence1", "sentence2"], output_columns=["text_ids"]) + dataset = dataset.project(["text_ids", "segment_ids", "label_id"]) dataset = dataset.map(operations=lookup, input_columns=["text_ids"]) dataset = dataset.map(operations=ops.PadEnd([max_seq_len], 0), input_columns=["text_ids"]) ### Generating mask_ids dataset = dataset.map(operations=ops.Duplicate(), input_columns=["text_ids"], - output_columns=["text_ids", "mask_ids"], - column_order=["text_ids", "mask_ids", "segment_ids", "label_id"]) + output_columns=["text_ids", "mask_ids"]) + dataset = dataset.project(["text_ids", "mask_ids", "segment_ids", "label_id"]) dataset = dataset.map(operations=ops.Mask(ops.Relational.NE, 0, mstype.int32), input_columns=["mask_ids"]) dataset = dataset.batch(batch_size, drop_remainder=drop_remainder) return dataset @@ -214,12 +214,12 @@ def process_ner_msra_dataset(data_dir, label_list, bert_vocab_path, max_seq_len= dataset = dataset.map(operations=lookup, input_columns=["sentence"], output_columns=["input_ids"]) dataset = dataset.map(operations=ops.PadEnd([max_seq_len], 0), input_columns=["input_ids"]) dataset = dataset.map(operations=ops.Duplicate(), input_columns=["input_ids"], - output_columns=["input_ids", "input_mask"], - column_order=["input_ids", "input_mask", "label_ids"]) + output_columns=["input_ids", "input_mask"]) + dataset = dataset.project(["input_ids", "input_mask", "label_ids"]) dataset = dataset.map(operations=ops.Mask(ops.Relational.NE, 0, mstype.int32), input_columns=["input_mask"]) dataset = dataset.map(operations=ops.Duplicate(), input_columns=["input_ids"], - output_columns=["input_ids", "segment_ids"], - column_order=["input_ids", "input_mask", "segment_ids", "label_ids"]) + output_columns=["input_ids", "segment_ids"]) + dataset = dataset.project(["input_ids", "input_mask", "segment_ids", "label_ids"]) dataset = dataset.map(operations=ops.Fill(0), input_columns=["segment_ids"]) return dataset diff --git a/official/nlp/gru/src/dataset.py b/official/nlp/gru/src/dataset.py index 3700aba8263f392a0a29e3cd40c6eeb7c44a0e96..1973c8ba0c2d1824b38fb6cf4dddfddf6c6cf781 100644 --- a/official/nlp/gru/src/dataset.py +++ b/official/nlp/gru/src/dataset.py @@ -37,8 +37,7 @@ def create_gru_dataset(epoch_count=1, batch_size=1, rank_size=1, rank_id=0, do_s shuffle=do_shuffle, num_parallel_workers=10, num_shards=rank_size, shard_id=rank_id) operations = random_teacher_force ds = ds.map(operations=operations, input_columns=["source_ids", "target_ids", "target_mask"], - output_columns=["source_ids", "target_ids", "teacher_force"], - column_order=["source_ids", "target_ids", "teacher_force"]) + output_columns=["source_ids", "target_ids", "teacher_force"]) type_cast_op = deC.c_transforms.TypeCast(mstype.int32) type_cast_op_bool = deC.c_transforms.TypeCast(mstype.bool_) ds = ds.map(operations=type_cast_op, input_columns="source_ids") diff --git a/official/nlp/pangu_alpha/src/dataset.py b/official/nlp/pangu_alpha/src/dataset.py index 8e803d82d11195347f00cfd440650a152b052f39..70bcd8557cc26fd02fb56b3b26828d3335f7982d 100644 --- a/official/nlp/pangu_alpha/src/dataset.py +++ b/official/nlp/pangu_alpha/src/dataset.py @@ -125,8 +125,7 @@ def create_dataset(batch_size, data_path, device_num=1, rank=0, drop=True, full_ if eod_reset: dataset = dataset.batch(batch_size, drop_remainder=drop) dataset = dataset.map(operations=map_func, input_columns=[column_name], - output_columns=[column_name, "position_id", "attention_mask"], - column_order=[column_name, "position_id", "attention_mask"]) + output_columns=[column_name, "position_id", "attention_mask"]) dataset = dataset.map(input_columns="position_id", operations=type_cast_op) dataset = dataset.map(input_columns="attention_mask", operations=type_cast_op_float) else: diff --git a/official/recommend/deep_and_cross/src/datasets.py b/official/recommend/deep_and_cross/src/datasets.py index ff7ae55ac891fdac949062fda17918dc290ee742..cac71be0c9969b0620dce25d5c60ebfdf3cd262f 100644 --- a/official/recommend/deep_and_cross/src/datasets.py +++ b/official/recommend/deep_and_cross/src/datasets.py @@ -229,7 +229,7 @@ def _get_tf_dataset(data_dir, train_mode=True, epochs=1, batch_size=1000, ds = ds.map(operations=_padding_func(batch_size, manual_shape, target_column), input_columns=['feat_ids', 'feat_vals', 'label'], - column_order=['feat_ids', 'feat_vals', 'label'], num_parallel_workers=8) + num_parallel_workers=8) ds = ds.repeat(epochs) return ds @@ -268,7 +268,6 @@ def _get_mindrecord_dataset(directory, train_mode=True, epochs=10, batch_size=16 ds = ds.batch(int(batch_size / line_per_sample), drop_remainder=True) ds = ds.map(_padding_func(batch_size, manual_shape, target_column, target_column-1), input_columns=['feat_ids', 'feat_vals', 'label'], - column_order=['feat_ids', 'feat_vals', 'label'], num_parallel_workers=8) ds = ds.repeat(epochs) return ds diff --git a/official/recommend/deepfm/infer/mindrecord2bin.py b/official/recommend/deepfm/infer/mindrecord2bin.py index fa9a80e9355a9e6317290c08ec4eb0097b8ecb46..a4cfbe8bd36c5e3f8ccf23f6618252fcec504b81 100644 --- a/official/recommend/deepfm/infer/mindrecord2bin.py +++ b/official/recommend/deepfm/infer/mindrecord2bin.py @@ -42,7 +42,6 @@ data_set = data_set.map(operations=(lambda x, y, z: (np.array(x).flatten().resha batch_size, 39), np.array(z).flatten().reshape(batch_size, 1))), input_columns=['feat_ids', 'feat_vals', 'label'], - column_order=['feat_ids', 'feat_vals', 'label'], num_parallel_workers=8) d = data_set.create_dict_iterator() diff --git a/official/recommend/deepfm/src/dataset.py b/official/recommend/deepfm/src/dataset.py index c2d3f7e2bd6566e152388bd51e18c277d848a747..401a81d1c72a614efb26154025108d4e43af55f1 100644 --- a/official/recommend/deepfm/src/dataset.py +++ b/official/recommend/deepfm/src/dataset.py @@ -211,7 +211,6 @@ def _get_mindrecord_dataset(directory, train_mode=True, epochs=1, batch_size=100 np.array(y).flatten().reshape(batch_size, 39), np.array(z).flatten().reshape(batch_size, 1))), input_columns=['feat_ids', 'feat_vals', 'label'], - column_order=['feat_ids', 'feat_vals', 'label'], num_parallel_workers=8) data_set = data_set.repeat(epochs) return data_set @@ -259,7 +258,6 @@ def _get_tf_dataset(directory, train_mode=True, epochs=1, batch_size=1000, np.array(y).flatten().reshape(batch_size, 39), np.array(z).flatten().reshape(batch_size, 1))), input_columns=['feat_ids', 'feat_vals', 'label'], - column_order=['feat_ids', 'feat_vals', 'label'], num_parallel_workers=8) data_set = data_set.repeat(epochs) return data_set diff --git a/official/recommend/fibinet/src/datasets.py b/official/recommend/fibinet/src/datasets.py index 61c85a514ed893633a3c352fc44cbaac60c0a9bf..f6c7c1a5da03a25ff0471c16265dca7e41043551 100644 --- a/official/recommend/fibinet/src/datasets.py +++ b/official/recommend/fibinet/src/datasets.py @@ -221,7 +221,7 @@ def _get_tf_dataset(data_dir, train_mode=True, batch_size=1000, data_set = data_set.map(operations=_padding_func(batch_size, manual_shape, target_column), input_columns=['feat_ids', 'feat_vals', 'label'], - column_order=['feat_ids', 'feat_vals', 'label'], num_parallel_workers=8) + num_parallel_workers=8) return data_set @@ -259,7 +259,6 @@ def _get_mindrecord_dataset(directory, train_mode=True, batch_size=1000, data_set = data_set.batch(int(batch_size / line_per_sample), drop_remainder=True) data_set = data_set.map(_padding_func(batch_size, manual_shape, target_column), input_columns=['feat_ids', 'feat_vals', 'label'], - column_order=['feat_ids', 'feat_vals', 'label'], num_parallel_workers=8) return data_set diff --git a/official/recommend/wide_and_deep/infer/mindrecord2bin.py b/official/recommend/wide_and_deep/infer/mindrecord2bin.py index f2ad4e9802ec892af1614b3bb3bf7ba1cccbf942..dc5c1f85fbfe74733001173b6ce7984813949e24 100644 --- a/official/recommend/wide_and_deep/infer/mindrecord2bin.py +++ b/official/recommend/wide_and_deep/infer/mindrecord2bin.py @@ -38,7 +38,6 @@ data_set = data_set.map(operations=(lambda x, y, z: (np.array(x).flatten().resha batch_size, 39), np.array(z).flatten().reshape(batch_size, 1))), input_columns=['feat_ids', 'feat_vals', 'label'], - column_order=['feat_ids', 'feat_vals', 'label'], num_parallel_workers=8) d = data_set.create_dict_iterator() diff --git a/official/recommend/wide_and_deep/src/datasets.py b/official/recommend/wide_and_deep/src/datasets.py index e7664307a5b23751d9ec5d94f5e8879fe29f1230..c16d07e09cbd48ade6fdd042b78a6bd4736f00af 100644 --- a/official/recommend/wide_and_deep/src/datasets.py +++ b/official/recommend/wide_and_deep/src/datasets.py @@ -226,7 +226,7 @@ def _get_tf_dataset(data_dir, train_mode=True, batch_size=1000, data_set = data_set.map(operations=_padding_func(batch_size, manual_shape, target_column), input_columns=['feat_ids', 'feat_vals', 'label'], - column_order=['feat_ids', 'feat_vals', 'label'], num_parallel_workers=8) + num_parallel_workers=8) return data_set @@ -264,7 +264,6 @@ def _get_mindrecord_dataset(directory, train_mode=True, batch_size=1000, data_set = data_set.batch(int(batch_size / line_per_sample), drop_remainder=True) data_set = data_set.map(_padding_func(batch_size, manual_shape, target_column), input_columns=['feat_ids', 'feat_vals', 'label'], - column_order=['feat_ids', 'feat_vals', 'label'], num_parallel_workers=8) return data_set diff --git a/official/recommend/wide_and_deep_multitable/src/datasets.py b/official/recommend/wide_and_deep_multitable/src/datasets.py index bb40c700348f2e38d7c20544b112ef917ec57193..863d0fe75ac5c6d444431c7504e778107d86a9dc 100644 --- a/official/recommend/wide_and_deep_multitable/src/datasets.py +++ b/official/recommend/wide_and_deep_multitable/src/datasets.py @@ -270,18 +270,18 @@ def _get_tf_dataset(data_dir, 'multi_doc_ad_topic_id_mask', 'ad_id', 'display_ad_and_is_leak', 'display_id', 'is_leak' ], - column_order=[ - 'label', 'continue_val', 'indicator_id', 'emb_128_id', - 'emb_64_single_id', 'multi_doc_ad_category_id', - 'multi_doc_ad_category_id_mask', 'multi_doc_event_entity_id', - 'multi_doc_event_entity_id_mask', 'multi_doc_ad_entity_id', - 'multi_doc_ad_entity_id_mask', 'multi_doc_event_topic_id', - 'multi_doc_event_topic_id_mask', 'multi_doc_event_category_id', - 'multi_doc_event_category_id_mask', 'multi_doc_ad_topic_id', - 'multi_doc_ad_topic_id_mask', 'display_id', 'ad_id', - 'display_ad_and_is_leak', 'is_leak' - ], num_parallel_workers=8) + data_set = data_set.project([ + 'label', 'continue_val', 'indicator_id', 'emb_128_id', + 'emb_64_single_id', 'multi_doc_ad_category_id', + 'multi_doc_ad_category_id_mask', 'multi_doc_event_entity_id', + 'multi_doc_event_entity_id_mask', 'multi_doc_ad_entity_id', + 'multi_doc_ad_entity_id_mask', 'multi_doc_event_topic_id', + 'multi_doc_event_topic_id_mask', 'multi_doc_event_category_id', + 'multi_doc_event_category_id_mask', 'multi_doc_ad_topic_id', + 'multi_doc_ad_topic_id_mask', 'display_id', 'ad_id', + 'display_ad_and_is_leak', 'is_leak' + ]) data_set = data_set.repeat(epochs) return data_set diff --git a/research/audio/jasper/src/dataset.py b/research/audio/jasper/src/dataset.py index fd210c8a3b7c02a332c2a1af53a7b614f7979a65..e3ae9e2283621af51fd8aa48dde36dd51c04cb8c 100644 --- a/research/audio/jasper/src/dataset.py +++ b/research/audio/jasper/src/dataset.py @@ -433,13 +433,10 @@ def create_train_dataset(mindrecord_files, labels, batch_size, train_mode, rank= batch_spect, batch_script, len(labels) - 1)) ds = ds.map(operations=compose_map_func, input_columns=["batch_spect", "batch_script"], output_columns=["inputs", "input_length", "targets"], - column_order=["inputs", "input_length", "targets"], num_parallel_workers=8) ds = ds.batch(batch_size, drop_remainder=True) ds = ds.map(operations=postprocess, input_columns=["inputs", "input_length", "targets"], - output_columns=["inputs", "input_length", - "target_indices", "targets"], - column_order=["inputs", "input_length", "target_indices", "targets"]) + output_columns=["inputs", "input_length", "target_indices", "targets"]) return ds diff --git a/research/cv/AVA_cifar/src/datasets.py b/research/cv/AVA_cifar/src/datasets.py index df2d7a6d39ffe00acfbdb75acbb43f090e21f930..f5a0c0cabb8c1099201231936cd9dab1214193a4 100644 --- a/research/cv/AVA_cifar/src/datasets.py +++ b/research/cv/AVA_cifar/src/datasets.py @@ -97,8 +97,8 @@ def makeup_train_dataset(ds1, ds2, ds3, batchsize, epoch): ds_new = ds_new.project(columns=['data1', 'data2']) ds_new = ds.zip((ds3, ds_new)) ds_new = ds_new.map(input_columns=['label'], output_columns=['label'], - column_order=['data3', 'data2', 'data1', 'label'], operations=lambda x: x) + ds_new = ds_new.project(['data3', 'data2', 'data1', 'label']) # to keep the order : data3 data2 data1 label # ds_new = ds_new.shuffle(ds_new.get_dataset_size()) @@ -164,11 +164,10 @@ def get_train_test_dataset(train_data_dir, test_data_dir, batchsize, epoch=1): func1 = lambda x, y: (x, y, np.array(1, dtype=np.int32)) input_cols = ["image", "label"] output_cols = ["image", "label", "training"] - cols_order = ["image", "label", "training"] cifar10_test_dataset = cifar10_test_dataset.map(input_columns=input_cols, output_columns=output_cols, - operations=func0, column_order=cols_order) + operations=func0) cifar10_train_dataset = cifar10_train_dataset.map(input_columns=input_cols, output_columns=output_cols, - operations=func1, column_order=cols_order) + operations=func1) concat_dataset = cifar10_train_dataset + cifar10_test_dataset concat_dataset = concat_dataset.batch(batchsize) concat_dataset = concat_dataset.repeat(epoch) diff --git a/research/cv/AlignedReID/src/dataset.py b/research/cv/AlignedReID/src/dataset.py index 9014b2467e379ca7c810279860b4c72eb292e4ab..87f33455ff5a77e41296f662ce4fd84627854df6 100644 --- a/research/cv/AlignedReID/src/dataset.py +++ b/research/cv/AlignedReID/src/dataset.py @@ -263,7 +263,7 @@ def create_dataset( istrain=True, return_len=False, ): - """ Crate dataloader for ReID + """ Create dataloader for ReID Args: image_folder: path to image folder @@ -328,7 +328,6 @@ def create_dataset( operations=compose_map_func, input_columns=["image", "label"], output_columns=["image", "label"], - column_order=["image", "label"], num_parallel_workers=num_parallel_workers, ) diff --git a/research/cv/CascadeRCNN/src/dataset.py b/research/cv/CascadeRCNN/src/dataset.py index 8fd59ebd1aca423a05286dabc1e96e458f9d0e29..233517cea7b36f49a6013b8935a91d01445960d3 100644 --- a/research/cv/CascadeRCNN/src/dataset.py +++ b/research/cv/CascadeRCNN/src/dataset.py @@ -470,14 +470,12 @@ def create_cascadercnn_dataset(mindrecord_file, batch_size=2, device_num=1, rank if is_training: ds = ds.map(input_columns=["image", "annotation"], output_columns=["image", "image_shape", "box", "label", "valid_num"], - column_order=["image", "image_shape", "box", "label", "valid_num"], operations=compose_map_func, python_multiprocessing=False, num_parallel_workers=num_parallel_workers) ds = ds.batch(batch_size, drop_remainder=True) else: ds = ds.map(input_columns=["image", "annotation"], output_columns=["image", "image_shape", "box", "label", "valid_num"], - column_order=["image", "image_shape", "box", "label", "valid_num"], operations=compose_map_func, num_parallel_workers=num_parallel_workers) ds = ds.batch(batch_size, drop_remainder=True) diff --git a/research/cv/EDSR/eval.py b/research/cv/EDSR/eval.py index 9e082b33c3703f9cc9d84af40605496f40a632ba..6475b98db9a49d0cf4f4d06436472f9b258d19c7 100644 --- a/research/cv/EDSR/eval.py +++ b/research/cv/EDSR/eval.py @@ -113,7 +113,6 @@ def create_dataset_benchmark(dataset_path, scale): # pre-process hr lr generator_dataset = generator_dataset.map(input_columns=column_names, output_columns=column_names, - column_order=column_names, operations=transform_img) # apply batch operations diff --git a/research/cv/EDSR/src/dataset.py b/research/cv/EDSR/src/dataset.py index d4e54d82c75acfc020ac1ea2aae621379330ccab..a1f4e7fd3a61e2c3a932088bac03d9faba1e304a 100644 --- a/research/cv/EDSR/src/dataset.py +++ b/research/cv/EDSR/src/dataset.py @@ -309,7 +309,6 @@ def create_dataset_DIV2K(config, dataset_type="train", num_parallel_workers=10, # pre-process hr lr generator_dataset = generator_dataset.map(input_columns=column_names, output_columns=column_names, - column_order=column_names, operations=transform_img) # apply batch operations diff --git a/research/cv/EfficientDet_d0/src/dataset.py b/research/cv/EfficientDet_d0/src/dataset.py index 5c5f87644c56548a8c8c7c1a46550e7b5057da57..d4378ebc1cd7878d83df6911255e96994199fa26 100644 --- a/research/cv/EfficientDet_d0/src/dataset.py +++ b/research/cv/EfficientDet_d0/src/dataset.py @@ -131,7 +131,7 @@ def create_EfficientDet_datasets(mindrecord_file, batch_size, repeat_num, device output_columns = ["image", "anno"] ds = ds.map(operations=preprocess_fn, input_columns=["image", "annotation"], - output_columns=output_columns, column_order=output_columns, + output_columns=output_columns, python_multiprocessing=is_training, num_parallel_workers=num_parallel_workers) diff --git a/research/cv/FaceDetection/src/data_preprocess.py b/research/cv/FaceDetection/src/data_preprocess.py index 40081559b9bf3d760af2d6da2547708b62f405a5..871694614b7b14a35ac34ba9c19c2ee2fdbe0e0c 100644 --- a/research/cv/FaceDetection/src/data_preprocess.py +++ b/research/cv/FaceDetection/src/data_preprocess.py @@ -255,11 +255,6 @@ def create_dataset(args): 'conf_pos_mask_1', 'conf_neg_mask_1', 'cls_mask_1', 't_coord_1', 't_conf_1', 't_cls_1', 'gt_list_1', 'coord_mask_2', 'conf_pos_mask_2', 'conf_neg_mask_2', 'cls_mask_2', 't_coord_2', 't_conf_2', 't_cls_2', 'gt_list_2'], - column_order=["image", "annotation", 'coord_mask_0', 'conf_pos_mask_0', 'conf_neg_mask_0', - 'cls_mask_0', 't_coord_0', 't_conf_0', 't_cls_0', 'gt_list_0', 'coord_mask_1', - 'conf_pos_mask_1', 'conf_neg_mask_1', 'cls_mask_1', 't_coord_1', 't_conf_1', - 't_cls_1', 'gt_list_1', 'coord_mask_2', 'conf_pos_mask_2', 'conf_neg_mask_2', - 'cls_mask_2', 't_coord_2', 't_conf_2', 't_cls_2', 'gt_list_2'], operations=compose_map_func, num_parallel_workers=get_num_parallel_workers(16), python_multiprocessing=True) diff --git a/research/cv/RefineDet/src/dataset.py b/research/cv/RefineDet/src/dataset.py index bac268b496c7c03a2796c267089d6100eea9853a..83ed0adfaf882ba11a079a1a6a6f9f421eb86cb1 100644 --- a/research/cv/RefineDet/src/dataset.py +++ b/research/cv/RefineDet/src/dataset.py @@ -425,7 +425,7 @@ def create_refinedet_dataset(config, mindrecord_file, batch_size=32, repeat_num= output_columns = ["img_id", "image", "image_shape"] trans = [normalize_op, change_swap_op] ds = ds.map(operations=compose_map_func, input_columns=["img_id", "image", "annotation"], - output_columns=output_columns, column_order=output_columns, + output_columns=output_columns, python_multiprocessing=use_multiprocessing, num_parallel_workers=num_parallel_workers) ds = ds.map(operations=trans, input_columns=["image"], python_multiprocessing=use_multiprocessing, diff --git a/research/cv/Yolact++/src/dataset.py b/research/cv/Yolact++/src/dataset.py index 02c95b56689dea7a26c1423f52f5d16f5c4fa644..3b88b179d55a17d13e3daeff2261c9b2e762efde 100644 --- a/research/cv/Yolact++/src/dataset.py +++ b/research/cv/Yolact++/src/dataset.py @@ -479,19 +479,17 @@ def create_yolact_dataset(mindrecord_file, batch_size=2, device_num=1, rank_id=0 ds = ds.map(operations=compose_map_func, input_columns=["image", "annotation", "mask", "num_crowdses"], output_columns=["image", "box", "label", "crowd_box", "mask"], - column_order=["image", "box", "label", "crowd_box", "mask"], python_multiprocessing=False, num_parallel_workers=8) ds = ds.padded_batch(batch_size, drop_remainder=True, pad_info={"mask": ([cfg['max_instance_count'], None, None], 0)}) - else: ds = ds.map(operations=compose_map_func, input_columns=["image", "annotation", "mask", "mask_shape"], output_columns=["image", "image_shape", "box", "label", "valid_num", "mask"], - column_order=["image", "image_shape", "box", "label", "valid_num", "mask"], num_parallel_workers=num_parallel_workers) + ds = ds.project(["image", "image_shape", "box", "label", "valid_num", "mask"]) ds = ds.batch(batch_size, drop_remainder=True) return ds diff --git a/research/cv/centernet/src/dataset.py b/research/cv/centernet/src/dataset.py index df154bbbe8d259882c582b4296fcfe15334daf07..411e9302572c08a700e9a1d750a759fbb6d99cd1 100644 --- a/research/cv/centernet/src/dataset.py +++ b/research/cv/centernet/src/dataset.py @@ -411,8 +411,6 @@ class COCOHP(ds.Dataset): input_columns=["image", "num_objects", "keypoints", "bbox", "category_id"], output_columns=["image", "hm", "reg_mask", "ind", "wh", "kps", "kps_mask", "reg", "hm_hp", "hp_offset", "hp_ind", "hp_mask"], - column_order=["image", "hm", "reg_mask", "ind", "wh", "kps", "kps_mask", - "reg", "hm_hp", "hp_offset", "hp_ind", "hp_mask"], num_parallel_workers=num_parallel_workers, python_multiprocessing=True) data_set = data_set.batch(batch_size, drop_remainder=True, num_parallel_workers=8) diff --git a/research/cv/centernet_det/src/dataset.py b/research/cv/centernet_det/src/dataset.py index 4944082548d6c719102b4ee648734203cac61aa4..a180d5a69a865b0b74622077081b2d9f3950b03f 100644 --- a/research/cv/centernet_det/src/dataset.py +++ b/research/cv/centernet_det/src/dataset.py @@ -379,9 +379,9 @@ class COCOHP(ds.Dataset): data_set = data_set.map(operations=self.preprocess_fn, input_columns=["image", "num_objects", "bboxes", "category_id"], output_columns=["image", "hm", "reg_mask", "ind", "wh", "reg"], - column_order=["image", "hm", "reg_mask", "ind", "wh", "reg"], num_parallel_workers=num_parallel_workers, python_multiprocessing=True) + data_set = data_set.project(["image", "hm", "reg_mask", "ind", "wh", "reg"]) data_set = data_set.batch(batch_size, drop_remainder=True, num_parallel_workers=8) logger.info("data size: {}".format(data_set.get_dataset_size())) logger.info("repeat count: {}".format(data_set.get_repeat_count())) diff --git a/research/cv/centernet_resnet101/src/dataset.py b/research/cv/centernet_resnet101/src/dataset.py index adf33ca01c4bf8749db42dd672deefe5d6acabbe..30c8467bf79e5aed4ad73aadc43c1d21b879a946 100644 --- a/research/cv/centernet_resnet101/src/dataset.py +++ b/research/cv/centernet_resnet101/src/dataset.py @@ -379,9 +379,9 @@ class COCOHP(ds.Dataset): data_set = data_set.map(operations=self.preprocess_fn, input_columns=["image", "num_objects", "bboxes", "category_id"], output_columns=["image", "hm", "reg_mask", "ind", "wh", "reg"], - column_order=["image", "hm", "reg_mask", "ind", "wh", "reg"], num_parallel_workers=num_parallel_workers, python_multiprocessing=True) + data_set = data_set.project(["image", "hm", "reg_mask", "ind", "wh", "reg"]) data_set = data_set.batch(batch_size, drop_remainder=True, num_parallel_workers=8) logger.info("data size: {}".format(data_set.get_dataset_size())) logger.info("repeat count: {}".format(data_set.get_repeat_count())) diff --git a/research/cv/centernet_resnet50_v1/src/dataset.py b/research/cv/centernet_resnet50_v1/src/dataset.py index b6ab333cac67525e42b607fde76fc3f08d895dad..cfe7c1b1624fde0c368ee9c6c9f1a203fd1c6e19 100644 --- a/research/cv/centernet_resnet50_v1/src/dataset.py +++ b/research/cv/centernet_resnet50_v1/src/dataset.py @@ -379,9 +379,9 @@ class COCOHP(ds.Dataset): data_set = data_set.map(operations=self.preprocess_fn, input_columns=["image", "num_objects", "bboxes", "category_id"], output_columns=["image", "hm", "reg_mask", "ind", "wh", "reg"], - column_order=["image", "hm", "reg_mask", "ind", "wh", "reg"], num_parallel_workers=num_parallel_workers, python_multiprocessing=True) + data_set = data_set.project(["image", "hm", "reg_mask", "ind", "wh", "reg"]) data_set = data_set.batch(batch_size, drop_remainder=True, num_parallel_workers=8) logger.info("data size: {}".format(data_set.get_dataset_size())) logger.info("repeat count: {}".format(data_set.get_repeat_count())) diff --git a/research/cv/dcgan/src/dataset.py b/research/cv/dcgan/src/dataset.py index 4e0c06772b3a1d9f2c809fb14b362057e866fb1d..abb55984fb10cd4fb6ce509915b312d6e8df207a 100644 --- a/research/cv/dcgan/src/dataset.py +++ b/research/cv/dcgan/src/dataset.py @@ -62,9 +62,9 @@ def create_dataset_imagenet(dataset_path, num_parallel_workers=None): np.random.normal(size=(dcgan_imagenet_cfg.latent_size, 1, 1)).astype("float32") ), output_columns=["image", "latent_code"], - column_order=["image", "latent_code"], num_parallel_workers=num_parallel_workers ) + data_set = data_set.project(["image", "latent_code"]) data_set = data_set.batch(dcgan_imagenet_cfg.batch_size) diff --git a/research/cv/detr/src/dataset.py b/research/cv/detr/src/dataset.py index 000fb9dbb5d26d19cc03c0d0ed36cac78284c93e..55d24a79306761611cc8900d6236b6004a71987d 100755 --- a/research/cv/detr/src/dataset.py +++ b/research/cv/detr/src/dataset.py @@ -176,9 +176,9 @@ def build_dataset(cfg): partial(pad_image_to_max_size, max_size=cfg.max_img_size), input_columns=['image'], output_columns=['image', 'mask'], - column_order=['image', 'mask', 'bboxes', 'labels', 'orig_sizes', 'n_boxes', 'img_id'], num_parallel_workers=cfg.num_workers ) + dataset = dataset.project(['image', 'mask', 'bboxes', 'labels', 'orig_sizes', 'n_boxes', 'img_id']) if cfg.eval: dataset = dataset.batch(cfg.batch_size) dataset = dataset.repeat(1) diff --git a/research/cv/faceboxes/src/dataset.py b/research/cv/faceboxes/src/dataset.py index 2223565539a93fc738447e984f740d8c05138c5a..6335410a36cd0322d30ca6a16fb239b28d2add0b 100644 --- a/research/cv/faceboxes/src/dataset.py +++ b/research/cv/faceboxes/src/dataset.py @@ -113,11 +113,9 @@ def create_dataset(data_dir, cfg, batch_size=32, repeat_num=1, shuffle=True, mul data_set = data_set.map(input_columns=["image", "annotation"], output_columns=["image", "truths", "conf"], - column_order=["image", "truths", "conf"], operations=union_data, python_multiprocessing=multiprocessing, num_parallel_workers=num_worker) - data_set = data_set.batch(batch_size, drop_remainder=True) data_set = data_set.repeat(repeat_num) diff --git a/research/cv/faster_rcnn_dcn/src/dataset.py b/research/cv/faster_rcnn_dcn/src/dataset.py index 72cba6b04f5b4e92acdd548fe5e58925bf0202c9..39fa733844a396a958da62abe4a801477eafb6ee 100644 --- a/research/cv/faster_rcnn_dcn/src/dataset.py +++ b/research/cv/faster_rcnn_dcn/src/dataset.py @@ -844,14 +844,12 @@ def create_fasterrcnn_dataset(config, mindrecord_file, batch_size=2, device_num= if is_training: ds = ds.map(input_columns=["image", "annotation"], output_columns=["image", "image_shape", "box", "label", "valid_num"], - column_order=["image", "image_shape", "box", "label", "valid_num"], operations=compose_map_func, python_multiprocessing=python_multiprocessing, num_parallel_workers=num_parallel_workers) ds = ds.batch(batch_size, drop_remainder=True) else: ds = ds.map(input_columns=["image", "annotation"], output_columns=["image", "image_shape", "box", "label", "valid_num"], - column_order=["image", "image_shape", "box", "label", "valid_num"], operations=compose_map_func, num_parallel_workers=num_parallel_workers) ds = ds.batch(batch_size, drop_remainder=True) diff --git a/research/cv/gan/src/dataset.py b/research/cv/gan/src/dataset.py index 5872e2af5ca5c2e52786eb5b215dda8377b76d1a..4846c3a01ed140c0630f1b356b7779a5f14eef48 100644 --- a/research/cv/gan/src/dataset.py +++ b/research/cv/gan/src/dataset.py @@ -133,9 +133,9 @@ def create_dataset_train(batch_size=5, repeat_size=1, latent_size=100): x.astype("float32"), np.random.normal(size=(latent_size)).astype("float32") ), - output_columns=["image", "latent_code"], - column_order=["image", "latent_code"] + output_columns=["image", "latent_code"] ) + mnist_ds = mnist_ds.project(["image", "latent_code"]) mnist_ds = mnist_ds.batch(batch_size, True) mnist_ds = mnist_ds.repeat(1) return mnist_ds @@ -150,9 +150,9 @@ def create_dataset_train_dis(batch_size=5, repeat_size=1, latent_size=100): x.astype("float32"), np.random.normal(size=(latent_size)).astype("float32") ), - output_columns=["image", "latent_code"], - column_order=["image", "latent_code"] + output_columns=["image", "latent_code"] ) + mnist_ds = mnist_ds.project(["image", "latent_code"]) mnist_ds = mnist_ds.batch(batch_size, True) mnist_ds = mnist_ds.repeat(1) return mnist_ds @@ -167,10 +167,9 @@ def create_dataset_valid(batch_size=5, repeat_size=1, latent_size=100): x[-10000:].astype("float32"), np.random.normal(size=(latent_size)).astype("float32") ), - output_columns=["image", "latent_code"], - column_order=["image", "latent_code"] + output_columns=["image", "latent_code"] ) - + mnist_ds = mnist_ds.project(["image", "latent_code"]) mnist_ds = mnist_ds.batch(batch_size, True) mnist_ds = mnist_ds.repeat(1) return mnist_ds diff --git a/research/cv/m2det/src/dataset.py b/research/cv/m2det/src/dataset.py index 19f7b24a48834e94704a40ff24f75a331fe686d0..78518d37a5d51704f5ce993c32d1da1d0fa4b6b1 100644 --- a/research/cv/m2det/src/dataset.py +++ b/research/cv/m2det/src/dataset.py @@ -298,7 +298,7 @@ def get_dataset(cfg, dataset, priors, setname='train_sets', random_seed=None, di shuffle=shuffle) target_preprocess_function = (lambda img, annotation: target_preprocess(img, annotation, cfg, priors)) ds = ds.map(operations=target_preprocess_function, input_columns=['img', 'annotation'], - output_columns=['img', 'loc', 'conf'], column_order=['img', 'loc', 'conf']) + output_columns=['img', 'loc', 'conf']) ds = ds.batch(cfg.train_cfg['per_batch_size'], drop_remainder=True) return ds, generator diff --git a/research/cv/nas-fpn/src/dataset.py b/research/cv/nas-fpn/src/dataset.py index 239d7de558faff4287c2304cef90894dc9d6f1b4..7180de224eb82f65c5d6b159377944ac03f9dc0a 100644 --- a/research/cv/nas-fpn/src/dataset.py +++ b/research/cv/nas-fpn/src/dataset.py @@ -284,7 +284,7 @@ def create_retinanet_dataset(mindrecord_file, batch_size, repeat_num, device_num output_columns = ["img_id", "image", "image_shape"] trans = [normalize_op, change_swap_op] ds = ds.map(operations=compose_map_func, input_columns=["img_id", "image", "annotation"], - output_columns=output_columns, column_order=output_columns, + output_columns=output_columns, python_multiprocessing=is_training, num_parallel_workers=num_parallel_workers) ds = ds.map(operations=trans, input_columns=["image"], python_multiprocessing=is_training, diff --git a/research/cv/res2net_faster_rcnn/src/dataset.py b/research/cv/res2net_faster_rcnn/src/dataset.py index 7e7c421b707066d2a7782407cb12f54ed36a3afc..dac0d2f1c2dc95e1c53d43355b827dae00fc36e8 100644 --- a/research/cv/res2net_faster_rcnn/src/dataset.py +++ b/research/cv/res2net_faster_rcnn/src/dataset.py @@ -548,14 +548,12 @@ def create_fasterrcnn_dataset(config, mindrecord_file, batch_size=2, device_num= if is_training: ds = ds.map(input_columns=["image", "annotation"], output_columns=["image", "image_shape", "box", "label", "valid_num"], - column_order=["image", "image_shape", "box", "label", "valid_num"], operations=compose_map_func, python_multiprocessing=python_multiprocessing, num_parallel_workers=num_parallel_workers) ds = ds.batch(batch_size, drop_remainder=True) else: ds = ds.map(input_columns=["image", "annotation"], output_columns=["image", "image_shape", "box", "label", "valid_num"], - column_order=["image", "image_shape", "box", "label", "valid_num"], operations=compose_map_func, num_parallel_workers=num_parallel_workers) ds = ds.batch(batch_size, drop_remainder=True) diff --git a/research/cv/res2net_yolov3/src/yolo_dataset.py b/research/cv/res2net_yolov3/src/yolo_dataset.py index b97fd5e83f9697c2eb5d2c385c36584150005c2f..bf5dc7403b3ccb8fec21ab188890d614fef76b69 100644 --- a/research/cv/res2net_yolov3/src/yolo_dataset.py +++ b/research/cv/res2net_yolov3/src/yolo_dataset.py @@ -184,7 +184,6 @@ def create_yolo_dataset(image_dir, anno_path, batch_size, max_epoch, device_num, compose_map_func = (lambda image, img_id: reshape_fn(image, img_id, config)) ds = ds.map(operations=compose_map_func, input_columns=["image", "img_id"], output_columns=["image", "image_shape", "img_id"], - column_order=["image", "image_shape", "img_id"], num_parallel_workers=8) ds = ds.map(operations=hwc_to_chw, input_columns=["image"], num_parallel_workers=8) ds = ds.batch(batch_size, drop_remainder=True) diff --git a/research/cv/retinaface/src/dataset.py b/research/cv/retinaface/src/dataset.py index 8f97e935a8606d9fba70cc4a45c6613b48e6b1bd..2d07f2d6921fb4d68f301e229cf8c0e94ff547f6 100644 --- a/research/cv/retinaface/src/dataset.py +++ b/research/cv/retinaface/src/dataset.py @@ -144,11 +144,9 @@ def create_dataset(data_dir, cfg, batch_size=32, repeat_num=1, shuffle=True, mul de_dataset = de_dataset.map(input_columns=["image", "annotation"], output_columns=["image", "truths", "conf", "landm"], - column_order=["image", "truths", "conf", "landm"], operations=union_data, python_multiprocessing=multiprocessing, num_parallel_workers=num_worker) - de_dataset = de_dataset.batch(batch_size, drop_remainder=True) de_dataset = de_dataset.repeat(repeat_num) diff --git a/research/cv/retinanet_resnet101/src/dataset.py b/research/cv/retinanet_resnet101/src/dataset.py index d86f6d8fdc180fb0dd4e8503d7a261818128ed4e..a398ec61114a3429217541d87da66d01eac86d23 100644 --- a/research/cv/retinanet_resnet101/src/dataset.py +++ b/research/cv/retinanet_resnet101/src/dataset.py @@ -411,7 +411,7 @@ def create_retinanet_dataset(mindrecord_file, batch_size, repeat_num, device_num output_columns = ["img_id", "image", "image_shape"] trans = [normalize_op, change_swap_op] ds = ds.map(operations=compose_map_func, input_columns=["img_id", "image", "annotation"], - output_columns=output_columns, column_order=output_columns, + output_columns=output_columns, python_multiprocessing=is_training, num_parallel_workers=num_parallel_workers) ds = ds.map(operations=trans, input_columns=["image"], python_multiprocessing=is_training, diff --git a/research/cv/retinanet_resnet152/src/dataset.py b/research/cv/retinanet_resnet152/src/dataset.py index f9f186612bb290ddf1d739b8101dde24963c641a..c397af44eb8efa2a0cefe9115bebb622c7d854f1 100644 --- a/research/cv/retinanet_resnet152/src/dataset.py +++ b/research/cv/retinanet_resnet152/src/dataset.py @@ -411,7 +411,7 @@ def create_retinanet_dataset(mindrecord_file, batch_size, repeat_num, device_num output_columns = ["img_id", "image", "image_shape"] trans = [normalize_op, change_swap_op] ds = ds.map(operations=compose_map_func, input_columns=["img_id", "image", "annotation"], - output_columns=output_columns, column_order=output_columns, + output_columns=output_columns, python_multiprocessing=is_training, num_parallel_workers=num_parallel_workers) ds = ds.map(operations=trans, input_columns=["image"], python_multiprocessing=is_training, diff --git a/research/cv/rfcn/src/dataset.py b/research/cv/rfcn/src/dataset.py index dcc009b196ad9b45bb3f049e3e1c10ae550c20d1..043627039968d78d33ee2068c82473aec14d27b6 100644 --- a/research/cv/rfcn/src/dataset.py +++ b/research/cv/rfcn/src/dataset.py @@ -570,14 +570,12 @@ def create_rfcn_dataset(config, mindrecord_file, batch_size=2, device_num=1, ran if is_training: ds = ds.map(input_columns=["image", "annotation"], output_columns=["image", "image_shape", "box", "label", "valid_num"], - column_order=["image", "image_shape", "box", "label", "valid_num"], operations=compose_map_func, python_multiprocessing=python_multiprocessing, num_parallel_workers=num_parallel_workers) ds = ds.batch(batch_size, drop_remainder=True) else: ds = ds.map(input_columns=["image", "annotation"], output_columns=["image", "image_shape", "box", "label", "valid_num"], - column_order=["image", "image_shape", "box", "label", "valid_num"], operations=compose_map_func, num_parallel_workers=num_parallel_workers) ds = ds.batch(batch_size, drop_remainder=True) diff --git a/research/cv/ssc_resnet50/src/dataset.py b/research/cv/ssc_resnet50/src/dataset.py index b714dbee4a3ae85e55a3a42aff4dbff56462167a..c72ff19f7cb2aa329b68c386b664479f4a197f66 100644 --- a/research/cv/ssc_resnet50/src/dataset.py +++ b/research/cv/ssc_resnet50/src/dataset.py @@ -238,9 +238,7 @@ def create_comatch_dataset(args): ds = ds.batch(args.batch_size, num_parallel_workers=4, drop_remainder=True) ds = ds.map(num_parallel_workers=8, operations=[concate], - input_columns=["unlabel_weak", "unlabel_strong0", "unlabel_strong1"], - output_columns=["unlabel_weak", "unlabel_strong0", "unlabel_strong1"], - column_order=["label", "unlabel_weak", "unlabel_strong0", "unlabel_strong1", "target"]) + input_columns=["unlabel_weak", "unlabel_strong0", "unlabel_strong1"]) return ds, len(comatch_dataset) @@ -404,9 +402,7 @@ def create_select_dataset(args): shuffle=True, num_parallel_workers=1, shard_id=args.rank, num_shards=args.device_num) ds = ds.batch(args.batch_size, num_parallel_workers=1, drop_remainder=True) - ds = ds.map(operations=[concate_data], input_columns=["img_data"], - output_columns=["img_data"], - column_order=["img_data", "label_target", "label_path"]) + ds = ds.map(operations=[concate_data], input_columns=["img_data"]) return ds, len(comatch_dataset) diff --git a/research/cv/ssd_ghostnet/src/dataset.py b/research/cv/ssd_ghostnet/src/dataset.py index 312f14fcecc132d841526d0c2ba4c222160a62f4..ad9d04d69796d786ac02192581a94f777274f4a7 100644 --- a/research/cv/ssd_ghostnet/src/dataset.py +++ b/research/cv/ssd_ghostnet/src/dataset.py @@ -416,7 +416,7 @@ def create_ssd_dataset(mindrecord_file, batch_size=32, repeat_num=10, device_num output_columns = ["img_id", "image", "image_shape"] trans = [normalize_op, change_swap_op] ds = ds.map(input_columns=["img_id", "image", "annotation"], - output_columns=output_columns, column_order=output_columns, + output_columns=output_columns, operations=compose_map_func, python_multiprocessing=is_training, num_parallel_workers=num_parallel_workers) ds = ds.map(input_columns=["image"], operations=trans, python_multiprocessing=is_training, diff --git a/research/cv/ssd_inception_v2/src/dataset.py b/research/cv/ssd_inception_v2/src/dataset.py index ebeaf3774edd9e416b1a0249965e4e2d629b237b..32f58f3de7901606f34be8ed4c7b592974442383 100644 --- a/research/cv/ssd_inception_v2/src/dataset.py +++ b/research/cv/ssd_inception_v2/src/dataset.py @@ -523,7 +523,7 @@ def create_ssd_dataset(mindrecord_file, batch_size=32, repeat_num=10, device_num output_columns = ["img_id", "image", "image_shape"] trans = [normalize_op, change_swap_op] ds = ds.map(operations=compose_map_func, input_columns=["img_id", "image", "annotation"], - output_columns=output_columns, column_order=output_columns, + output_columns=output_columns, python_multiprocessing=use_multiprocessing, num_parallel_workers=num_parallel_workers) ds = ds.map(operations=trans, input_columns=["image"], python_multiprocessing=use_multiprocessing, diff --git a/research/cv/ssd_inceptionv2/src/dataset.py b/research/cv/ssd_inceptionv2/src/dataset.py index 1a0d2cb717d18532465ee3855e537abffe65f8ad..c054998a863b681050c7b52337b1d3de3dfba8fb 100644 --- a/research/cv/ssd_inceptionv2/src/dataset.py +++ b/research/cv/ssd_inceptionv2/src/dataset.py @@ -409,7 +409,7 @@ def create_ssd_dataset(mindrecord_file, batch_size=32, repeat_num=10, device_num output_columns = ["img_id", "image", "image_shape"] trans = [normalize_op, change_swap_op] ds = ds.map(operations=compose_map_func, input_columns=["img_id", "image", "annotation"], - output_columns=output_columns, column_order=output_columns, + output_columns=output_columns, python_multiprocessing=use_multiprocessing, num_parallel_workers=num_parallel_workers) ds = ds.map(operations=trans, input_columns=["image"], python_multiprocessing=use_multiprocessing, diff --git a/research/cv/ssd_mobilenetV2/src/dataset.py b/research/cv/ssd_mobilenetV2/src/dataset.py index bbb564c654c900449e3a149006804820205aeaa5..b3093a1b51559613aba4c374eacb588ce241e9d3 100644 --- a/research/cv/ssd_mobilenetV2/src/dataset.py +++ b/research/cv/ssd_mobilenetV2/src/dataset.py @@ -413,7 +413,7 @@ def create_ssd_dataset(mindrecord_file, batch_size=32, repeat_num=10, device_num output_columns = ["img_id", "image", "image_shape"] trans = [normalize_op, change_swap_op] ds = ds.map(operations=compose_map_func, input_columns=["img_id", "image", "annotation"], - output_columns=output_columns, column_order=output_columns, + output_columns=output_columns, python_multiprocessing=use_multiprocessing, num_parallel_workers=num_parallel_workers) ds = ds.map(operations=trans, input_columns=["image"], python_multiprocessing=use_multiprocessing, diff --git a/research/cv/ssd_mobilenetV2_FPNlite/src/dataset.py b/research/cv/ssd_mobilenetV2_FPNlite/src/dataset.py index f0a96bb844b3198370fca7783063c29b039b7beb..82a1d1ddc42c2e05ebc6da77920cfa0346142048 100644 --- a/research/cv/ssd_mobilenetV2_FPNlite/src/dataset.py +++ b/research/cv/ssd_mobilenetV2_FPNlite/src/dataset.py @@ -412,7 +412,7 @@ def create_ssd_dataset(mindrecord_file, batch_size=32, repeat_num=10, device_num output_columns = ["img_id", "image", "image_shape"] trans = [normalize_op, change_swap_op] ds = ds.map(operations=compose_map_func, input_columns=["img_id", "image", "annotation"], - output_columns=output_columns, column_order=output_columns, + output_columns=output_columns, python_multiprocessing=use_multiprocessing, num_parallel_workers=num_parallel_workers) ds = ds.map(operations=trans, input_columns=["image"], python_multiprocessing=use_multiprocessing, diff --git a/research/cv/ssd_resnet34/src/dataset.py b/research/cv/ssd_resnet34/src/dataset.py index 26d042dec3379057509bc374d7817bda74fb52f6..fbe95bbd29007b9ee3b39eed0b6b6c646a189568 100644 --- a/research/cv/ssd_resnet34/src/dataset.py +++ b/research/cv/ssd_resnet34/src/dataset.py @@ -409,7 +409,7 @@ def create_ssd_dataset(mindrecord_file, batch_size=32, repeat_num=10, device_num output_columns = ["img_id", "image", "image_shape"] trans = [normalize_op, change_swap_op] ds = ds.map(operations=compose_map_func, input_columns=["img_id", "image", "annotation"], - output_columns=output_columns, column_order=output_columns, + output_columns=output_columns, python_multiprocessing=use_multiprocessing, num_parallel_workers=num_parallel_workers) ds = ds.map(operations=trans, input_columns=["image"], python_multiprocessing=use_multiprocessing, diff --git a/research/cv/ssd_resnet50/src/dataset.py b/research/cv/ssd_resnet50/src/dataset.py index eaed11ad0e4751ed1487c8a664094af01ab76103..2cbc5e803f90718becbf47b953f2c09112be5f52 100644 --- a/research/cv/ssd_resnet50/src/dataset.py +++ b/research/cv/ssd_resnet50/src/dataset.py @@ -409,7 +409,7 @@ def create_ssd_dataset(mindrecord_file, batch_size=32, repeat_num=10, device_num output_columns = ["img_id", "image", "image_shape"] trans = [normalize_op, change_swap_op] ds = ds.map(operations=compose_map_func, input_columns=["img_id", "image", "annotation"], - output_columns=output_columns, column_order=output_columns, + output_columns=output_columns, python_multiprocessing=use_multiprocessing, num_parallel_workers=num_parallel_workers) ds = ds.map(operations=trans, input_columns=["image"], python_multiprocessing=use_multiprocessing, diff --git a/research/cv/ssd_resnet_34/src/dataset.py b/research/cv/ssd_resnet_34/src/dataset.py index d6f8787052aaeadc26e1eb40b3bacf5d856620d5..009bf8d5d0cfe9f2f60d14ab51b943b798873d39 100644 --- a/research/cv/ssd_resnet_34/src/dataset.py +++ b/research/cv/ssd_resnet_34/src/dataset.py @@ -447,7 +447,6 @@ def create_ssd_dataset( operations=compose_map_func, input_columns=["img_id", "image", "annotation"], output_columns=output_columns, - column_order=output_columns, python_multiprocessing=use_multiprocessing, num_parallel_workers=num_parallel_workers ) diff --git a/research/cv/textfusenet/src/dataset.py b/research/cv/textfusenet/src/dataset.py index 70b9cd602963cfd6bd03e6133f50324f4b40d9f5..a2af9ec4b8519fc9c7d9ac70b945e260c61684a0 100755 --- a/research/cv/textfusenet/src/dataset.py +++ b/research/cv/textfusenet/src/dataset.py @@ -516,17 +516,14 @@ def create_textfusenet_dataset(mindrecord_file, batch_size=2, device_num=1, rank ds = ds.map(operations=compose_map_func, input_columns=["image", "annotation", "mask", "mask_shape"], output_columns=["image", "image_shape", "box", "label", "valid_num", "mask"], - column_order=["image", "image_shape", "box", "label", "valid_num", "mask"], python_multiprocessing=False, num_parallel_workers=num_parallel_workers) ds = ds.padded_batch(batch_size, drop_remainder=True, pad_info={"mask": ([config.max_instance_count, None, None], 0)}) - else: ds = ds.map(operations=compose_map_func, input_columns=["image", "annotation", "mask", "mask_shape"], output_columns=["image", "image_shape", "box", "label", "valid_num", "mask"], - column_order=["image", "image_shape", "box", "label", "valid_num", "mask"], num_parallel_workers=num_parallel_workers) ds = ds.batch(batch_size, drop_remainder=True) diff --git a/research/cv/tracktor++/src/dataset.py b/research/cv/tracktor++/src/dataset.py index 0e2afa850f90d9f44a74137709dac0a7a9144aba..af78e7cb3f960aa4a2425ff0c2f18661ef87c880 100644 --- a/research/cv/tracktor++/src/dataset.py +++ b/research/cv/tracktor++/src/dataset.py @@ -591,14 +591,12 @@ def create_fasterrcnn_dataset(config, mindrecord_file, batch_size=2, device_num= if is_training: ds = ds.map(input_columns=["image", "annotation"], output_columns=["image", "image_shape", "box", "label", "valid_num"], - column_order=["image", "image_shape", "box", "label", "valid_num"], operations=compose_map_func, python_multiprocessing=python_multiprocessing, num_parallel_workers=num_parallel_workers) ds = ds.batch(batch_size, drop_remainder=True) else: ds = ds.map(input_columns=["image", "annotation"], output_columns=["image", "image_shape", "box", "label", "valid_num"], - column_order=["image", "image_shape", "box", "label", "valid_num"], operations=compose_map_func, num_parallel_workers=num_parallel_workers) ds = ds.batch(batch_size, drop_remainder=True) diff --git a/research/cv/tracktor/src/dataset.py b/research/cv/tracktor/src/dataset.py index d63c6b1f9d49b3f421b4337f91a8c36670ed1353..73da9718fa3ea736fa9bfda3cbd09cfb77202d3f 100644 --- a/research/cv/tracktor/src/dataset.py +++ b/research/cv/tracktor/src/dataset.py @@ -591,14 +591,12 @@ def create_fasterrcnn_dataset(config, mindrecord_file, batch_size=2, device_num= if is_training: ds = ds.map(input_columns=["image", "annotation"], output_columns=["image", "image_shape", "box", "label", "valid_num"], - column_order=["image", "image_shape", "box", "label", "valid_num"], operations=compose_map_func, python_multiprocessing=python_multiprocessing, num_parallel_workers=num_parallel_workers) ds = ds.batch(batch_size, drop_remainder=True) else: ds = ds.map(input_columns=["image", "annotation"], output_columns=["image", "image_shape", "box", "label", "valid_num"], - column_order=["image", "image_shape", "box", "label", "valid_num"], operations=compose_map_func, num_parallel_workers=num_parallel_workers) ds = ds.batch(batch_size, drop_remainder=True) diff --git a/research/cv/yolov3_tiny/src/yolo_dataset.py b/research/cv/yolov3_tiny/src/yolo_dataset.py index 39ad5fc4aba26ebe0e82b1ffe43bb216f2436ee6..45ff1d63a07c4b263aae608a1286fe8aeb78d84f 100644 --- a/research/cv/yolov3_tiny/src/yolo_dataset.py +++ b/research/cv/yolov3_tiny/src/yolo_dataset.py @@ -281,7 +281,6 @@ def create_yolo_dataset( yolo_dataset.transforms = multi_scale_trans dataset_column_names = ["image", "annotation", "input_size", "mosaic_flag"] - output_column_names = ["image", "annotation", "bbox1", "bbox2", "gt_box1", "gt_box2"] map1_out_column_names = ["image", "annotation", "size"] map2_in_column_names = ["annotation", "size"] map2_out_column_names = ["annotation", "bbox1", "bbox2", "gt_box1", "gt_box2"] @@ -297,7 +296,6 @@ def create_yolo_dataset( operations=multi_scale_trans, input_columns=dataset_column_names, output_columns=map1_out_column_names, - column_order=map1_out_column_names, num_parallel_workers=16, python_multiprocessing=True ) @@ -305,10 +303,10 @@ def create_yolo_dataset( operations=PreprocessTrueBox(config), input_columns=map2_in_column_names, output_columns=map2_out_column_names, - column_order=output_column_names, num_parallel_workers=2, python_multiprocessing=False ) + ds = ds.project(["image", "annotation", "bbox1", "bbox2", "gt_box1", "gt_box2"]) mean = [m * 255 for m in [0.485, 0.456, 0.406]] std = [s * 255 for s in [0.229, 0.224, 0.225]] ds = ds.map( @@ -330,7 +328,6 @@ def create_yolo_dataset( operations=compose_map_func, input_columns=["image", "img_id"], output_columns=["image", "image_shape", "img_id"], - column_order=["image", "image_shape", "img_id"], num_parallel_workers=8 ) ds = ds.map(operations=hwc_to_chw, input_columns=["image"], num_parallel_workers=8) diff --git a/research/mm/wukong/src/dataset/dataset.py b/research/mm/wukong/src/dataset/dataset.py index a15635b4c9ef4cc4566e1b461797412a2852bc14..2eb2e241f4d85de3c882e5004ca5490936a2b86b 100644 --- a/research/mm/wukong/src/dataset/dataset.py +++ b/research/mm/wukong/src/dataset/dataset.py @@ -43,6 +43,6 @@ def get_dataset(dataset_path, batch_size): C.CenterCrop(224), C.HWC2CHW(), C2.TypeCast(mstype.float32)], - input_columns=["image"], output_columns=None, column_order=["image", "label"]) + input_columns=["image"]) val_dataset = val_dataset.batch(batch_size) return val_dataset diff --git a/research/nlp/soft_masked_bert/src/tokenization.py b/research/nlp/soft_masked_bert/src/tokenization.py index 9c8525a580e57dbc8a9317c4decba7bd881dba1d..49aecbd7d7b9e1cbe84b7758de862b2f1ba96ef9 100644 --- a/research/nlp/soft_masked_bert/src/tokenization.py +++ b/research/nlp/soft_masked_bert/src/tokenization.py @@ -118,15 +118,15 @@ class CscTokenizer: def get_token_ids(self, batch_size): dataset = get_dataset(self.fp, vocab_path=self.vocab_path, device_num=self.device_num, rank_id=self.rank_id) dataset = dataset.map(operations=self.tokenizer_op, input_columns=['original_tokens']) - dataset = dataset.map(operations=self.convert2id, input_columns=['original_tokens'], \ - output_columns=['original_tokens', 'original_tokens_mask', 'original_token_type_ids'], \ - column_order=['wrong_ids', 'original_tokens', 'original_tokens_mask', 'correct_tokens', \ - 'original_token_type_ids']) + dataset = dataset.map(operations=self.convert2id, input_columns=['original_tokens'], + output_columns=['original_tokens', 'original_tokens_mask', 'original_token_type_ids']) + dataset = dataset.project(['wrong_ids', 'original_tokens', 'original_tokens_mask', 'correct_tokens', + 'original_token_type_ids']) dataset = dataset.map(operations=self.tokenizer_op, input_columns=['correct_tokens']) - dataset = dataset.map(operations=self.convert2id, input_columns=['correct_tokens'], \ - output_columns=['correct_tokens', 'correct_tokens_mask', 'correct_token_type_ids'], \ - column_order=['wrong_ids', 'original_tokens', 'original_tokens_mask', 'correct_tokens', \ - 'correct_tokens_mask', 'original_token_type_ids', 'correct_token_type_ids']) + dataset = dataset.map(operations=self.convert2id, input_columns=['correct_tokens'], + output_columns=['correct_tokens', 'correct_tokens_mask', 'correct_token_type_ids']) + dataset = dataset.project(['wrong_ids', 'original_tokens', 'original_tokens_mask', 'correct_tokens', + 'correct_tokens_mask', 'original_token_type_ids', 'correct_token_type_ids']) dataset = dataset.map(operations=self.turn2int32, input_columns=['wrong_ids']) dataset = dataset.batch(batch_size=batch_size, drop_remainder=True) return dataset diff --git a/research/recommend/EDCN/src/dataset.py b/research/recommend/EDCN/src/dataset.py index c593e205a47d3f377ef57530933441e7ba8dacdc..41053af97f5c4fb0dd1b3c6aec1c44e61b05d546 100644 --- a/research/recommend/EDCN/src/dataset.py +++ b/research/recommend/EDCN/src/dataset.py @@ -212,7 +212,6 @@ def _get_mindrecord_dataset(directory, train_mode=True, epochs=1, batch_size=100 np.array(y).flatten().reshape(batch_size, 39), np.array(z).flatten().reshape(batch_size, 1))), input_columns=['feat_ids', 'feat_vals', 'label'], - column_order=['feat_ids', 'feat_vals', 'label'], num_parallel_workers=8) data_set = data_set.repeat(epochs) return data_set @@ -260,7 +259,6 @@ def _get_tf_dataset(directory, train_mode=True, epochs=1, batch_size=1000, np.array(y).flatten().reshape(batch_size, 39), np.array(z).flatten().reshape(batch_size, 1))), input_columns=['feat_ids', 'feat_vals', 'label'], - column_order=['feat_ids', 'feat_vals', 'label'], num_parallel_workers=8) data_set = data_set.repeat(epochs) return data_set diff --git a/research/recommend/Fat-DeepFFM/src/dataset.py b/research/recommend/Fat-DeepFFM/src/dataset.py index 9ce77814f7c4a349e5ba491336815d23ab28e22d..4d4cbf5d17e010b7c7ced385bf4217ca407b16d3 100644 --- a/research/recommend/Fat-DeepFFM/src/dataset.py +++ b/research/recommend/Fat-DeepFFM/src/dataset.py @@ -39,7 +39,6 @@ def get_mindrecord_dataset(directory, train_mode=True, epochs=1, batch_size=1000 np.array(y).flatten().reshape(batch_size, 13), np.array(z).flatten().reshape(batch_size, 1))), input_columns=['cats_vals', 'num_vals', 'label'], - column_order=['cats_vals', 'num_vals', 'label'], num_parallel_workers=8) data_set = data_set.repeat(epochs) return data_set diff --git a/research/recommend/autodis/infer/utils/preprocess_mindrecord.py b/research/recommend/autodis/infer/utils/preprocess_mindrecord.py index 2b0ae852266aeb3e8c1be7310bd7fbb59aaa674e..16ceaf647e68431793aff47d26fe97642fa5d86f 100644 --- a/research/recommend/autodis/infer/utils/preprocess_mindrecord.py +++ b/research/recommend/autodis/infer/utils/preprocess_mindrecord.py @@ -40,7 +40,6 @@ data_set = data_set.map(operations=(lambda x, y, z: (np.array(x).flatten().resha batch_size, 39), np.array(z).flatten().reshape(batch_size, 1))), input_columns=['feat_ids', 'feat_vals', 'label'], - column_order=['feat_ids', 'feat_vals', 'label'], num_parallel_workers=8) d = data_set.create_dict_iterator() diff --git a/research/recommend/autodis/src/dataset.py b/research/recommend/autodis/src/dataset.py index 9a3149ad1cdb1ef62ec675f9f0b4e1600680e342..8248c1f117b5f67a590d2ee8796027f9099e6d20 100644 --- a/research/recommend/autodis/src/dataset.py +++ b/research/recommend/autodis/src/dataset.py @@ -213,7 +213,6 @@ def _get_mindrecord_dataset(directory, train_mode=True, epochs=1, batch_size=100 np.array(y).flatten().reshape(batch_size, 39), np.array(z).flatten().reshape(batch_size, 1))), input_columns=['feat_ids', 'feat_vals', 'label'], - column_order=['feat_ids', 'feat_vals', 'label'], num_parallel_workers=8) data_set = data_set.repeat(epochs) return data_set @@ -262,7 +261,6 @@ def _get_tf_dataset(directory, train_mode=True, epochs=1, batch_size=1000, np.array(y).flatten().reshape(batch_size, 39), np.array(z).flatten().reshape(batch_size, 1))), input_columns=['feat_ids', 'feat_vals', 'label'], - column_order=['feat_ids', 'feat_vals', 'label'], num_parallel_workers=8) data_set = data_set.repeat(epochs) return data_set diff --git a/research/recommend/autodis/src/dataset_modelarts.py b/research/recommend/autodis/src/dataset_modelarts.py index 0a223a049f01001ef5572d2962d91bf826299d0c..7fafadc1a8cd5f29d25c1ff77d8e7ec6817c6293 100644 --- a/research/recommend/autodis/src/dataset_modelarts.py +++ b/research/recommend/autodis/src/dataset_modelarts.py @@ -212,7 +212,6 @@ def _get_mindrecord_dataset(directory, train_mode=True, epochs=1, batch_size=100 np.array(y).flatten().reshape(batch_size, 39), np.array(z).flatten().reshape(batch_size, 1))), input_columns=['feat_ids', 'feat_vals', 'label'], - column_order=['feat_ids', 'feat_vals', 'label'], num_parallel_workers=8) data_set = data_set.repeat(epochs) return data_set @@ -260,7 +259,6 @@ def _get_tf_dataset(directory, train_mode=True, epochs=1, batch_size=1000, np.array(y).flatten().reshape(batch_size, 39), np.array(z).flatten().reshape(batch_size, 1))), input_columns=['feat_ids', 'feat_vals', 'label'], - column_order=['feat_ids', 'feat_vals', 'label'], num_parallel_workers=8) data_set = data_set.repeat(epochs) return data_set diff --git a/research/recommend/dlrm/src/dataset.py b/research/recommend/dlrm/src/dataset.py index 99de9fdd7cb0d34a2e69993c338235cc094ad1ff..58392a6039141e3ecfb4501111e644259c35f781 100644 --- a/research/recommend/dlrm/src/dataset.py +++ b/research/recommend/dlrm/src/dataset.py @@ -63,7 +63,6 @@ def _get_mindrecord_dataset(directory, train_mode=True, epochs=1, batch_size=100 np.log(np.array(y).flatten().reshape(batch_size, 13) + 1), # deal with numerical features np.array(z).flatten().reshape(batch_size, 1))), input_columns=['feat_ids', 'feat_vals', 'label'], - column_order=['feat_ids', 'feat_vals', 'label'], num_parallel_workers=8) data_set = data_set.repeat(epochs) return data_set diff --git a/research/recommend/final/infer/mindrecord2bin.py b/research/recommend/final/infer/mindrecord2bin.py index d055f87e3e5369c7aac3e14f8ddf00319654acbc..dd15eb9a4cfaa0a0ceec063c808204492b1ebb7c 100644 --- a/research/recommend/final/infer/mindrecord2bin.py +++ b/research/recommend/final/infer/mindrecord2bin.py @@ -42,7 +42,6 @@ data_set = data_set.map(operations=(lambda x, y, z: (np.array(x).flatten().resha batch_size, 39), np.array(z).flatten().reshape(batch_size, 1))), input_columns=['feat_ids', 'feat_vals', 'label'], - column_order=['feat_ids', 'feat_vals', 'label'], num_parallel_workers=8) d = data_set.create_dict_iterator() diff --git a/research/recommend/final/src/dataset.py b/research/recommend/final/src/dataset.py index ac0f05354bb18c388c81bb44b5206dbd7ddebc73..5b72e77f644378ab0823ed2f2dd781458170f6a4 100644 --- a/research/recommend/final/src/dataset.py +++ b/research/recommend/final/src/dataset.py @@ -211,7 +211,6 @@ def _get_mindrecord_dataset(directory, train_mode=True, epochs=1, batch_size=100 np.array(y).flatten().reshape(batch_size, 39), np.array(z).flatten().reshape(batch_size, 1))), input_columns=['feat_ids', 'feat_vals', 'label'], - column_order=['feat_ids', 'feat_vals', 'label'], num_parallel_workers=8) data_set = data_set.repeat(epochs) return data_set @@ -259,7 +258,6 @@ def _get_tf_dataset(directory, train_mode=True, epochs=1, batch_size=1000, np.array(y).flatten().reshape(batch_size, 39), np.array(z).flatten().reshape(batch_size, 1))), input_columns=['feat_ids', 'feat_vals', 'label'], - column_order=['feat_ids', 'feat_vals', 'label'], num_parallel_workers=8) data_set = data_set.repeat(epochs) return data_set