Skip to content
Snippets Groups Projects
Commit 87973033 authored by jonyguo's avatar jonyguo
Browse files

fix: remove column_order

parent 01abcae6
No related branches found
No related tags found
No related merge requests found
Showing
with 33 additions and 55 deletions
......@@ -6,6 +6,7 @@
#
models/official/recommend/wide_and_deep/src/wide_and_deep.py:__init__
models/official/recommend/wide_and_deep_multitable/src/wide_and_deep.py:__init__
models/research/cv/centernet/src/dataset.py:preprocess_fn
models/research/cvtmodel/wide_resnet/src/wide_resnet101_2.py:__init__
models/research/cvtmodel/resnest/src/resnest200.py:__init__
models/research/cvtmodel/resnest/src/resnest200.py:construct
......
......@@ -43,7 +43,6 @@ def GetDataLoader(per_batch_size,
columns = ['image', "hm", 'reg_mask', 'ind', 'wh', 'wight_mask', 'hm_offset', 'hps_mask', 'landmarks']
de_dataset = de_dataset.map(input_columns=["image", "anns"],
output_columns=columns,
column_order=columns,
operations=compose_map_func,
num_parallel_workers=num_parallel_workers,
python_multiprocessing=True)
......
......@@ -195,7 +195,7 @@ def create_dataset_train(mindrecord_file_pos, config, dataset_name='ocr'):
data_set = data_set.map(operations=crop_image((0, 150), (0, 150)),
input_columns=["image"], num_parallel_workers=8)
data_set = data_set.map(operations=create_label(), input_columns=["image"], output_columns=["image", "label"],
column_order=["image", "label"], num_parallel_workers=8)
num_parallel_workers=8)
augmentor = Augmentor(config.augment_severity, config.augment_prob)
operation = augmentor.process
data_set = data_set.map(operations=operation, input_columns=["image"],
......@@ -252,7 +252,7 @@ def create_dataset_eval(mindrecord_file_pos, config, dataset_name='ocr'):
data_set = data_set.map(operations=crop_image((0, 150), (0, 150)),
input_columns=["image"], num_parallel_workers=8)
data_set = data_set.map(operations=create_label(), input_columns=["image"], output_columns=["image", "label"],
column_order=["image", "label"], num_parallel_workers=8)
num_parallel_workers=8)
global image_height
global image_width
image_height = config.im_size_h
......
......@@ -114,8 +114,7 @@ def create_ocr_train_dataset(mindrecord_file, batch_size=32, rank_size=1, rank_i
num_parallel_workers=num_parallel_workers)
ds = ds.map(operations=ops.PadEnd([config.max_length], 0), input_columns=["decoder_target"])
ds = ds.map(operations=random_teacher_force, input_columns=["image", "decoder_input", "decoder_target"],
output_columns=["image", "decoder_input", "decoder_target", "teacher_force"],
column_order=["image", "decoder_input", "decoder_target", "teacher_force"])
output_columns=["image", "decoder_input", "decoder_target", "teacher_force"])
type_cast_op_bool = ops.TypeCast(mstype.bool_)
ds = ds.map(operations=type_cast_op_bool, input_columns="teacher_force")
print("Train dataset size= %s" % (int(ds.get_dataset_size())))
......
......@@ -297,7 +297,6 @@ def create_ctpn_dataset(mindrecord_file, batch_size=1, device_num=1, rank_id=0,
if is_training:
ds = ds.map(operations=compose_map_func, input_columns=["image", "annotation"],
output_columns=["image", "box", "label", "valid_num", "image_shape"],
column_order=["image", "box", "label", "valid_num", "image_shape"],
num_parallel_workers=num_parallel_workers,
python_multiprocessing=True)
ds = ds.map(operations=[normalize_op, type_cast0], input_columns=["image"],
......@@ -310,10 +309,8 @@ def create_ctpn_dataset(mindrecord_file, batch_size=1, device_num=1, rank_id=0,
ds = ds.map(operations=compose_map_func,
input_columns=["image", "annotation"],
output_columns=["image", "box", "label", "valid_num", "image_shape"],
column_order=["image", "box", "label", "valid_num", "image_shape"],
num_parallel_workers=8,
python_multiprocessing=True)
ds = ds.map(operations=[normalize_op, hwc_to_chw, type_cast1], input_columns=["image"],
num_parallel_workers=8)
# transpose_column from python to c
......
......@@ -488,7 +488,6 @@ def create_deeptext_dataset(mindrecord_file, batch_size=2, repeat_num=12, device
if is_training:
ds = ds.map(operations=compose_map_func, input_columns=["image", "annotation"],
output_columns=["image", "image_shape", "box", "label", "valid_num"],
column_order=["image", "image_shape", "box", "label", "valid_num"],
num_parallel_workers=num_parallel_workers)
flip = (np.random.rand() < config.flip_ratio)
......@@ -508,9 +507,7 @@ def create_deeptext_dataset(mindrecord_file, batch_size=2, repeat_num=12, device
ds = ds.map(operations=compose_map_func,
input_columns=["image", "annotation"],
output_columns=["image", "image_shape", "box", "label", "valid_num"],
column_order=["image", "image_shape", "box", "label", "valid_num"],
num_parallel_workers=num_parallel_workers)
ds = ds.map(operations=[normalize_op, hwc_to_chw, type_cast1], input_columns=["image"],
num_parallel_workers=24)
......
......@@ -560,14 +560,12 @@ def create_fasterrcnn_dataset(config, mindrecord_file, batch_size=2, device_num=
if is_training:
ds = ds.map(input_columns=["image", "annotation"],
output_columns=["image", "image_shape", "box", "label", "valid_num"],
column_order=["image", "image_shape", "box", "label", "valid_num"],
operations=compose_map_func, python_multiprocessing=python_multiprocessing,
num_parallel_workers=num_parallel_workers)
ds = ds.batch(batch_size, drop_remainder=True)
else:
ds = ds.map(input_columns=["image", "annotation"],
output_columns=["image", "image_shape", "box", "label", "valid_num"],
column_order=["image", "image_shape", "box", "label", "valid_num"],
operations=compose_map_func,
num_parallel_workers=num_parallel_workers)
ds = ds.batch(batch_size, drop_remainder=True)
......
......@@ -547,7 +547,6 @@ def create_maskrcnn_dataset(mindrecord_file, batch_size=2, device_num=1, rank_id
ds = ds.map(operations=compose_map_func,
input_columns=["image", "annotation", "mask", "mask_shape"],
output_columns=["image", "image_shape", "box", "label", "valid_num", "mask"],
column_order=["image", "image_shape", "box", "label", "valid_num", "mask"],
python_multiprocessing=False,
num_parallel_workers=num_parallel_workers)
ds = ds.padded_batch(batch_size, drop_remainder=True,
......@@ -557,7 +556,6 @@ def create_maskrcnn_dataset(mindrecord_file, batch_size=2, device_num=1, rank_id
ds = ds.map(operations=compose_map_func,
input_columns=["image", "annotation", "mask", "mask_shape"],
output_columns=["image", "image_shape", "box", "label", "valid_num", "mask"],
column_order=["image", "image_shape", "box", "label", "valid_num", "mask"],
num_parallel_workers=num_parallel_workers)
ds = ds.batch(batch_size, drop_remainder=True)
......
......@@ -562,7 +562,6 @@ def create_maskrcnn_dataset(mindrecord_file, batch_size=2, device_num=1, rank_id
ds = ds.map(operations=compose_map_func,
input_columns=["image", "annotation", "mask", "mask_shape"],
output_columns=["image", "image_shape", "box", "label", "valid_num", "mask"],
column_order=["image", "image_shape", "box", "label", "valid_num", "mask"],
python_multiprocessing=False,
num_parallel_workers=num_parallel_workers)
ds = ds.padded_batch(batch_size, drop_remainder=True,
......@@ -572,7 +571,6 @@ def create_maskrcnn_dataset(mindrecord_file, batch_size=2, device_num=1, rank_id
ds = ds.map(operations=compose_map_func,
input_columns=["image", "annotation", "mask", "mask_shape"],
output_columns=["image", "image_shape", "box", "label", "valid_num", "mask"],
column_order=["image", "image_shape", "box", "label", "valid_num", "mask"],
num_parallel_workers=num_parallel_workers)
ds = ds.batch(batch_size, drop_remainder=True)
......
......@@ -234,7 +234,6 @@ def create_dataset(cls_list, batch_size=16, workers=16, devices=1, rank=0, multi
ds = ds.map(input_columns=["image", "mask", "farthest"],
output_columns=["image", "mask", "vertex", "vertex_weight"],
column_order=["image", "mask", "vertex", "vertex_weight"],
operations=preprocess_fn, num_parallel_workers=workers, python_multiprocessing=multi_process)
img_transforms = C.Compose([
......
......@@ -149,19 +149,16 @@ def create_dataset(data_dir, cfg, batch_size=32, repeat_num=1, shuffle=True, mul
de_dataset = de_dataset.map(input_columns=["image", "annotation"],
output_columns=["image", "annotation"],
column_order=["image", "annotation"],
operations=read_data_from_dataset,
python_multiprocessing=multiprocessing,
num_parallel_workers=num_worker)
de_dataset = de_dataset.map(input_columns=["image", "annotation"],
output_columns=["image", "annotation"],
column_order=["image", "annotation"],
operations=augmentation,
python_multiprocessing=multiprocessing,
num_parallel_workers=num_worker)
de_dataset = de_dataset.map(input_columns=["image", "annotation"],
output_columns=["image", "truths", "conf", "landm"],
column_order=["image", "truths", "conf", "landm"],
operations=encode_data,
python_multiprocessing=multiprocessing,
num_parallel_workers=num_worker)
......
......@@ -534,7 +534,7 @@ def create_retinanet_dataset(mindrecord_file, batch_size, repeat_num, device_num
output_columns = ["img_id", "image", "image_shape"]
trans = [normalize_op, change_swap_op]
ds = ds.map(operations=compose_map_func, input_columns=["img_id", "image", "annotation"],
output_columns=output_columns, column_order=output_columns,
output_columns=output_columns,
python_multiprocessing=is_training,
num_parallel_workers=num_parallel_workers)
ds = ds.map(operations=trans, input_columns=["image"], python_multiprocessing=is_training,
......
......@@ -87,7 +87,7 @@ def create_dataset(args, dataset_mode, repeat_num=1):
data_set = data_set.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8)
data_set = data_set.map(operations=copy_column, input_columns=["image", "label"],
output_columns=["image1", "image2", "label"],
column_order=["image1", "image2", "label"], num_parallel_workers=8)
num_parallel_workers=8)
data_set = data_set.map(operations=trans, input_columns=["image1"], num_parallel_workers=8)
data_set = data_set.map(operations=trans, input_columns=["image2"], num_parallel_workers=8)
# apply batch operations
......
......@@ -413,7 +413,7 @@ def create_ssd_dataset(mindrecord_file, batch_size=32, device_num=1, rank=0,
output_columns = ["img_id", "image", "image_shape"]
trans = [normalize_op, change_swap_op]
ds = ds.map(operations=compose_map_func, input_columns=["img_id", "image", "annotation"],
output_columns=output_columns, column_order=output_columns,
output_columns=output_columns,
python_multiprocessing=use_multiprocessing,
num_parallel_workers=num_parallel_workers)
ds = ds.map(operations=trans, input_columns=["image"], python_multiprocessing=use_multiprocessing,
......
......@@ -261,7 +261,7 @@ def create_multi_class_dataset(data_dir, img_size, repeat, batch_size, num_class
compose_map_func = (lambda image, mask: preprocess_img_mask(image, mask, num_classes, tuple(img_size),
augment and is_train, eval_resize))
dataset = dataset.map(operations=compose_map_func, input_columns=mc_dataset.column_names,
output_columns=mc_dataset.column_names, column_order=mc_dataset.column_names,
output_columns=mc_dataset.column_names,
num_parallel_workers=num_parallel_workers)
dataset = dataset.batch(batch_size, drop_remainder=is_train, num_parallel_workers=num_parallel_workers)
return dataset
......@@ -182,7 +182,6 @@ def create_yolo_dataset(image_dir, anno_path, batch_size, device_num, rank,
compose_map_func = (lambda image, img_id: reshape_fn(image, img_id, config))
dataset = dataset.map(operations=compose_map_func, input_columns=["image", "img_id"],
output_columns=["image", "image_shape", "img_id"],
column_order=["image", "image_shape", "img_id"],
num_parallel_workers=8)
dataset = dataset.map(operations=hwc_to_chw, input_columns=["image"], num_parallel_workers=8)
dataset = dataset.batch(batch_size, drop_remainder=True)
......
......@@ -296,7 +296,6 @@ def create_yolo_dataset(mindrecord_dir, batch_size=32, device_num=1, rank=0,
hwc_to_chw = C.HWC2CHW()
ds = ds.map(operations=compose_map_func, input_columns=["image", "annotation"],
output_columns=["image", "bbox_1", "bbox_2", "bbox_3", "gt_box1", "gt_box2", "gt_box3"],
column_order=["image", "bbox_1", "bbox_2", "bbox_3", "gt_box1", "gt_box2", "gt_box3"],
num_parallel_workers=num_parallel_workers)
if "x86" in platform.machine():
ds = ds.map(operations=hwc_to_chw, input_columns=["image"], num_parallel_workers=num_parallel_workers)
......@@ -306,6 +305,5 @@ def create_yolo_dataset(mindrecord_dir, batch_size=32, device_num=1, rank=0,
else:
ds = ds.map(operations=compose_map_func, input_columns=["image", "annotation"],
output_columns=["image", "image_shape", "annotation"],
column_order=["image", "image_shape", "annotation"],
num_parallel_workers=num_parallel_workers)
return ds
......@@ -275,7 +275,6 @@ def create_yolo_dataset(image_dir, anno_path, batch_size, max_epoch, device_num,
compose_map_func = (lambda image, img_id: reshape_fn(image, img_id, default_config))
ds = ds.map(operations=compose_map_func, input_columns=["image", "img_id"],
output_columns=["image", "image_shape", "img_id"],
column_order=["image", "image_shape", "img_id"],
num_parallel_workers=8)
ds = ds.map(operations=hwc_to_chw, input_columns=["image"], num_parallel_workers=8)
ds = ds.batch(batch_size, drop_remainder=True)
......@@ -337,7 +336,6 @@ def create_yolo_datasetv2(image_dir,
compose_map_func = (lambda image, img_id: reshape_fn(image, img_id, default_config))
ds = ds.map(input_columns=["image", "img_id"],
output_columns=["image", "image_shape", "img_id"],
column_order=["image", "image_shape", "img_id"],
operations=compose_map_func, num_parallel_workers=8)
ds = ds.map(input_columns=["image"], operations=hwc_to_chw, num_parallel_workers=8)
ds = ds.batch(batch_size, drop_remainder=True)
......
......@@ -260,11 +260,12 @@ def create_yolo_dataset(image_dir, anno_path, batch_size, device_num, rank,
dataset = ds.GeneratorDataset(yolo_dataset, column_names=dataset_column_names, sampler=distributed_sampler,
python_multiprocessing=True, num_parallel_workers=min(4, num_parallel_workers))
dataset = dataset.map(operations=multi_scale_trans, input_columns=dataset_column_names,
output_columns=map1_out_column_names, column_order=map1_out_column_names,
output_columns=map1_out_column_names,
num_parallel_workers=min(12, num_parallel_workers), python_multiprocessing=True)
dataset = dataset.map(operations=PreprocessTrueBox(config), input_columns=map2_in_column_names,
output_columns=map2_out_column_names, column_order=output_column_names,
output_columns=map2_out_column_names,
num_parallel_workers=min(4, num_parallel_workers), python_multiprocessing=False)
dataset = dataset.project(output_column_names)
# Computed from random subset of ImageNet training images
mean = [m * 255 for m in [0.485, 0.456, 0.406]]
std = [s * 255 for s in [0.229, 0.224, 0.225]]
......@@ -284,7 +285,6 @@ def create_yolo_dataset(image_dir, anno_path, batch_size, device_num, rank,
compose_map_func = (lambda image, img_id: reshape_fn(image, img_id, config))
dataset = dataset.map(operations=compose_map_func, input_columns=["image", "img_id"],
output_columns=["image", "image_shape", "img_id"],
column_order=["image", "image_shape", "img_id"],
num_parallel_workers=8)
dataset = dataset.map(operations=hwc_to_chw, input_columns=["image"], num_parallel_workers=8)
dataset = dataset.batch(batch_size, drop_remainder=True)
......
......@@ -45,8 +45,8 @@ def process_tnews_clue_dataset(data_dir, label_list, bert_vocab_path, data_usage
usage=data_usage, shuffle=shuffle_dataset)
### Processing label
if data_usage == 'test':
dataset = dataset.map(operations=ops.Duplicate(), input_columns=["id"], output_columns=["id", "label_id"],
column_order=["id", "label_id", "sentence"])
dataset = dataset.map(operations=ops.Duplicate(), input_columns=["id"], output_columns=["id", "label_id"])
dataset = dataset.project(["id", "label_id", "sentence"])
dataset = dataset.map(operations=ops.Fill(0), input_columns=["label_id"])
else:
label_vocab = text.Vocab.from_list(label_list)
......@@ -63,12 +63,12 @@ def process_tnews_clue_dataset(data_dir, label_list, bert_vocab_path, data_usage
dataset = dataset.map(operations=lookup, input_columns=["sentence"], output_columns=["text_ids"])
dataset = dataset.map(operations=ops.PadEnd([max_seq_len], 0), input_columns=["text_ids"])
dataset = dataset.map(operations=ops.Duplicate(), input_columns=["text_ids"],
output_columns=["text_ids", "mask_ids"],
column_order=["text_ids", "mask_ids", "label_id"])
output_columns=["text_ids", "mask_ids"])
dataset = dataset.project(["text_ids", "mask_ids", "label_id"])
dataset = dataset.map(operations=ops.Mask(ops.Relational.NE, 0, mstype.int32), input_columns=["mask_ids"])
dataset = dataset.map(operations=ops.Duplicate(), input_columns=["text_ids"],
output_columns=["text_ids", "segment_ids"],
column_order=["text_ids", "mask_ids", "segment_ids", "label_id"])
output_columns=["text_ids", "segment_ids"])
dataset = dataset.project(["text_ids", "mask_ids", "segment_ids", "label_id"])
dataset = dataset.map(operations=ops.Fill(0), input_columns=["segment_ids"])
dataset = dataset.batch(batch_size, drop_remainder=drop_remainder)
return dataset
......@@ -90,8 +90,8 @@ def process_cmnli_clue_dataset(data_dir, label_list, bert_vocab_path, data_usage
usage=data_usage, shuffle=shuffle_dataset)
### Processing label
if data_usage == 'test':
dataset = dataset.map(operations=ops.Duplicate(), input_columns=["id"], output_columns=["id", "label_id"],
column_order=["id", "label_id", "sentence1", "sentence2"])
dataset = dataset.map(operations=ops.Duplicate(), input_columns=["id"], output_columns=["id", "label_id"])
dataset = dataset.project(["id", "label_id", "sentence1", "sentence2"])
dataset = dataset.map(operations=ops.Fill(0), input_columns=["label_id"])
else:
label_vocab = text.Vocab.from_list(label_list)
......@@ -114,27 +114,27 @@ def process_cmnli_clue_dataset(data_dir, label_list, bert_vocab_path, data_usage
input_columns=["sentence2"])
### Generating segment_ids
dataset = dataset.map(operations=ops.Duplicate(), input_columns=["sentence1"],
output_columns=["sentence1", "type_sentence1"],
column_order=["sentence1", "type_sentence1", "sentence2", "label_id"])
output_columns=["sentence1", "type_sentence1"])
dataset = dataset.project(["sentence1", "type_sentence1", "sentence2", "label_id"])
dataset = dataset.map(operations=ops.Duplicate(),
input_columns=["sentence2"], output_columns=["sentence2", "type_sentence2"],
column_order=["sentence1", "type_sentence1", "sentence2", "type_sentence2", "label_id"])
input_columns=["sentence2"], output_columns=["sentence2", "type_sentence2"])
dataset = dataset.project(["sentence1", "type_sentence1", "sentence2", "type_sentence2", "label_id"])
dataset = dataset.map(operations=[lookup, ops.Fill(0)], input_columns=["type_sentence1"])
dataset = dataset.map(operations=[lookup, ops.Fill(1)], input_columns=["type_sentence2"])
dataset = dataset.map(operations=ops.Concatenate(),
input_columns=["type_sentence1", "type_sentence2"], output_columns=["segment_ids"],
column_order=["sentence1", "sentence2", "segment_ids", "label_id"])
input_columns=["type_sentence1", "type_sentence2"], output_columns=["segment_ids"])
dataset = dataset.project(["sentence1", "sentence2", "segment_ids", "label_id"])
dataset = dataset.map(operations=ops.PadEnd([max_seq_len], 0), input_columns=["segment_ids"])
### Generating text_ids
dataset = dataset.map(operations=ops.Concatenate(),
input_columns=["sentence1", "sentence2"], output_columns=["text_ids"],
column_order=["text_ids", "segment_ids", "label_id"])
input_columns=["sentence1", "sentence2"], output_columns=["text_ids"])
dataset = dataset.project(["text_ids", "segment_ids", "label_id"])
dataset = dataset.map(operations=lookup, input_columns=["text_ids"])
dataset = dataset.map(operations=ops.PadEnd([max_seq_len], 0), input_columns=["text_ids"])
### Generating mask_ids
dataset = dataset.map(operations=ops.Duplicate(), input_columns=["text_ids"],
output_columns=["text_ids", "mask_ids"],
column_order=["text_ids", "mask_ids", "segment_ids", "label_id"])
output_columns=["text_ids", "mask_ids"])
dataset = dataset.project(["text_ids", "mask_ids", "segment_ids", "label_id"])
dataset = dataset.map(operations=ops.Mask(ops.Relational.NE, 0, mstype.int32), input_columns=["mask_ids"])
dataset = dataset.batch(batch_size, drop_remainder=drop_remainder)
return dataset
......@@ -214,12 +214,12 @@ def process_ner_msra_dataset(data_dir, label_list, bert_vocab_path, max_seq_len=
dataset = dataset.map(operations=lookup, input_columns=["sentence"], output_columns=["input_ids"])
dataset = dataset.map(operations=ops.PadEnd([max_seq_len], 0), input_columns=["input_ids"])
dataset = dataset.map(operations=ops.Duplicate(), input_columns=["input_ids"],
output_columns=["input_ids", "input_mask"],
column_order=["input_ids", "input_mask", "label_ids"])
output_columns=["input_ids", "input_mask"])
dataset = dataset.project(["input_ids", "input_mask", "label_ids"])
dataset = dataset.map(operations=ops.Mask(ops.Relational.NE, 0, mstype.int32), input_columns=["input_mask"])
dataset = dataset.map(operations=ops.Duplicate(), input_columns=["input_ids"],
output_columns=["input_ids", "segment_ids"],
column_order=["input_ids", "input_mask", "segment_ids", "label_ids"])
output_columns=["input_ids", "segment_ids"])
dataset = dataset.project(["input_ids", "input_mask", "segment_ids", "label_ids"])
dataset = dataset.map(operations=ops.Fill(0), input_columns=["segment_ids"])
return dataset
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment