diff --git a/official/cv/cnn_direction_model/README.md b/official/cv/cnn_direction_model/README.md index 9b39d8774c2ae1c4fc83d795713b0879e6b563fe..323508cf5e42caed8e03f5df6e31e928706f3b29 100644 --- a/official/cv/cnn_direction_model/README.md +++ b/official/cv/cnn_direction_model/README.md @@ -31,7 +31,7 @@ CNN Direction Model's composition consists of 1 convolutional layer and 4 residu # [Dataset](#contents) -Dataset used: [FSNS (French Street Name Signs)](https://arxiv.org/abs/1702.03970) +For training and evaluation, we use the French Street Name Signs (FSNS) released by Google as the training data, which contains approximately 1 million training images and their corresponding ground truth words. Note that these datasets are very large. - Dataset size:~200GB,~1M 150*600 colored images with a label indicating the text within the image. - Train:200GB,1M, images @@ -39,28 +39,7 @@ Dataset used: [FSNS (French Street Name Signs)](https://arxiv.org/abs/1702.03970 - Data format:binary files - Note:Data will be processed in dataset.py -- Download the dataset, the recommended directory structure to have is as follows: - -Annotations for training and testing should be in test_annot and train_annot. -Training and Testing images should be in train and test. - -```shell -├─test -│ -└─test_annot -│ -└─train -│ -└─train_annot -``` - -- After downloading the data and converting it to it's raw format (.txt for annotations and .jpg, .jpeg, or .png for the images), add the image and annotations paths to the src/config.py file then cd to src and run: - -```python -python create_mindrecord.py -``` - -This will create two folders: train and test in the target directory you specify in config.py. +you can find how to generate dataset in [crnn_seq2seq_ocr](https://gitee.com/mindspore/models/tree/master/official/cv/crnn_seq2seq_ocr#quick-start) # [Environment Requirements](#contents) diff --git a/official/cv/cnn_direction_model/src/create_mindrecord.py b/official/cv/cnn_direction_model/src/create_mindrecord.py deleted file mode 100644 index fe41ebc3596e0543dc22a3c387e303d3def9ef66..0000000000000000000000000000000000000000 --- a/official/cv/cnn_direction_model/src/create_mindrecord.py +++ /dev/null @@ -1,108 +0,0 @@ -# Copyright 2020-2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -import os -from mindspore.mindrecord import FileWriter -from config import config1 as config - -FAIL = 1 -SUCCESS = 0 - -def get_images(image_dir, annot_files): - """ - Get file paths that are in image_dir, annotation file is used to get the file names. - - Args: - image_dir(string): images directory. - annot_files(list(string)) : annotation files. - - Returns: - status code(int), status of process(string), image ids(list(int)), image paths(dict(int,string)) - """ - print("Process [Get Images] started") - if not os.path.isdir(image_dir): - return FAIL, "{} is not a directory. Please check the src/config.py file.".format(image_dir), [], {} - image_files_dict = {} - images = [] - img_id = 0 - # create a dictionary of image file paths - for annot_file in annot_files: - if not os.path.exists(annot_file): - return FAIL, "{} was not found.".format(annot_file), [], {} - lines = open(annot_file, 'r').readlines() - for line in lines: - # extract file name - file_name = line.split('\t')[0] - image_path = os.path.join(image_dir, file_name) - if not os.path.isfile(image_path): - return FAIL, "{} is not a file.".format(image_path), [], {} - # add path to dictionary - images.append(img_id) - image_files_dict[img_id] = image_path - img_id += 1 - return SUCCESS, "Successfully retrieved {} images.".format(str(len(images))), images, image_files_dict - -def write_mindrecord_images(image_ids, image_dict, mindrecord_dir, data_schema, file_num=8): - writer = FileWriter(os.path.join(mindrecord_dir, config.dataset_name + ".mindrecord"), shard_num=file_num) - writer.add_schema(data_schema, config.dataset_name) - len_image_dict = len(image_dict) - sample_count = 0 - for img_id in image_ids: - image_path = image_dict[img_id] - with open(image_path, 'rb') as f: - img = f.read() - row = {"image": img} - sample_count += 1 - writer.write_raw_data([row]) - print("Progress {} / {}".format(str(sample_count), str(len_image_dict)), end='\r') - writer.commit() - -def create_mindrecord(): - - annot_files_train = [config.train_annotation_file] - annot_files_test = [config.test_annotation_file] - ret_code, ret_message, images_train, image_path_dict_train = get_images(image_dir=config.data_root_train, - annot_files=annot_files_train) - if ret_code != SUCCESS: - return ret_code, message, "", "" - ret_code, ret_message, images_test, image_path_dict_test = get_images(image_dir=config.data_root_test, - annot_files=annot_files_test) - if ret_code != SUCCESS: - return ret_code, ret_message, "", "" - data_schema = {"image": {"type": "bytes"}} - train_target = os.path.join(config.mindrecord_dir, "train") - test_target = os.path.join(config.mindrecord_dir, "test") - if not os.path.exists(train_target): - os.mkdir(train_target) - if not os.path.exists(test_target): - os.mkdir(test_target) - print("Creating training mindrecords: ") - write_mindrecord_images(images_train, image_path_dict_train, train_target, data_schema) - print("Creating test mindrecords: ") - write_mindrecord_images(images_test, image_path_dict_test, test_target, data_schema) - return SUCCESS, "Successful mindrecord creation.", train_target, test_target - - - - -if __name__ == "__main__": - # start creating mindrecords from raw images and annots - # provide root path to raw data in the config file - code, message, train_target_dir, test_target_dir = create_mindrecord() - if code != SUCCESS: - print("Process done with status code: {}. Error: {}".format(code, message)) - else: - print("Process done with status: {}. Training and testing data are saved to {} and {} respectively." - .format(message, train_target_dir, test_target_dir)) diff --git a/official/cv/crnn_seq2seq_ocr/README.md b/official/cv/crnn_seq2seq_ocr/README.md index 7e2c31c5bda1357168fe585c3232c8006ce4d409..4886fdc427b9b3a056fad383c02a20484484a8c9 100644 --- a/official/cv/crnn_seq2seq_ocr/README.md +++ b/official/cv/crnn_seq2seq_ocr/README.md @@ -37,7 +37,13 @@ CRNN-Seq2Seq-OCR applies a vgg structure to extract features from processed imag ## [Dataset](#content) -For training and evaluation, we use the French Street Name Signs (FSNS) released by Google as the training data, which contains approximately 1 million training images and their corresponding ground truth words. +For training and evaluation, we use the French Street Name Signs (FSNS) released by Google as the training data, which contains approximately 1 million training images and their corresponding ground truth words. Note that these datasets are very large. + +- Dataset size:~200GB,~1M 150*600 colored images with a label indicating the text within the image. + - Train:200GB,1M, images + - Test:4GB,24,404 images +- Data format:binary files + - Note:Data will be processed in dataset.py ## [Environment Requirements](#contents) @@ -54,9 +60,60 @@ For training and evaluation, we use the French Street Name Signs (FSNS) released - After the dataset is prepared, you may start running the training or the evaluation scripts as follows: - Preprocess FSNS dataset - - 1.download FSNS dataset from [here](https://rrc.cvc.uab.es/?ch=6&com=downloads) - - 2.Use tf2file_v3.py transform to intermediate dataset. + - 1.download FSNS dataset from the following list by "wget". + + https://download.tensorflow.org/data/fsns-20160927/test/test-00000-of-00064 + ... + https://download.tensorflow.org/data/fsns-20160927/test/test-00063-of-00064 + https://download.tensorflow.org/data/fsns-20160927/train/train-00000-of-00512 + ... + https://download.tensorflow.org/data/fsns-20160927/train/train-00511-of-00512 + the dir structure of dataset is as follows: + + tfrecord + ├── test + ├── test-00000-of-00064 + ├── ... + └── test-00063-of-00064 + ├── train + ├── train-00000-of-00512 + ├── ... + ├── train-00511-of-00512 + - 2.Use tf2file_v3.py transform to intermediate dataset (from tfrecord files to original files). + - set some parameters in tf2file_v3.py + + ```shell + phase: "train" or "test" + save_img_dir + save_annot_dir + tfrecord_dir: "tfrecord/train" or "tfrecord/test" + ``` + + - python tf2file_v3.py + after running this command, the dir structure of intermediate dataset is as follows: + + ```shell + data + ├── test + ├── *.png + ├── test.txt + ├── train + ├── *.png + ├── train.txt + ``` + - 3.Use create_mindrecord_files.py to convert the intermediate data set to the mindrecord dataset. + - set some parameters in default_config.yaml + + ```shell + mindrecord_dir + data_root: "data/train" + annotation_file: "data/train.txt" + val_data_root: "data/test" + val_annotation_file: "data/test.txt" + ``` + + - python create_mindrecord_files.py - Running on Ascend diff --git a/official/cv/crnn_seq2seq_ocr/src/create_mindrecord_files.py b/official/cv/crnn_seq2seq_ocr/src/create_mindrecord_files.py index 1058d55cdd60750727de8a986a370f5fce2a05c2..31e0b9e7f1beb140a5a23bca03848d6e23d48515 100644 --- a/official/cv/crnn_seq2seq_ocr/src/create_mindrecord_files.py +++ b/official/cv/crnn_seq2seq_ocr/src/create_mindrecord_files.py @@ -14,19 +14,47 @@ # ============================================================================ """Create FSNS MindRecord files.""" +import codecs +import logging import os import numpy as np from mindspore.mindrecord import FileWriter +from model_utils.config import config -from src.model_utils.config import config -from utils import initialize_vocabulary +def initialize_vocabulary(vocabulary_path): + """ + initialize vocabulary from file. + assume the vocabulary is stored one-item-per-line + """ + characters_class = 9999 + if os.path.exists(vocabulary_path): + rev_vocab = [] + with codecs.open(vocabulary_path, 'r', encoding='utf-8') as voc_file: + rev_vocab = [line.strip() for line in voc_file] + + vocab = {x: y for (y, x) in enumerate(rev_vocab)} + + reserved_char_size = characters_class - len(rev_vocab) + if reserved_char_size < 0: + raise ValueError("Number of characters in vocabulary is equal or larger than config.characters_class") + + for _ in range(reserved_char_size): + rev_vocab.append('') + + # put space at the last position + vocab[' '] = len(rev_vocab) + rev_vocab.append(' ') + logging.info("Initializing vocabulary ends: %s", vocabulary_path) + return vocab, rev_vocab + + raise ValueError("Initializing vocabulary ends: %s" % vocabulary_path) def serialize_annotation(img_path, lex, vocab): - go_id = config.characters_dictionary.get("go_id") - eos_id = config.characters_dictionary.get("eos_id") + go_id = config.characters_dictionary.go_id + eos_id = config.characters_dictionary.eos_id word = [go_id] for special_label in config.labels_not_use: @@ -85,7 +113,10 @@ def fsns_train_data_to_mindrecord(mindrecord_dir, prefix="data_ocr.mindrecord", anno_file_dirs = [config.annotation_file] images, image_path_dict, image_anno_dict = create_fsns_label(image_dir=config.data_root, anno_file_dirs=anno_file_dirs) - vocab, _ = initialize_vocabulary(config.vocab_path) + + current_file_dir = os.path.dirname(os.path.realpath(__file__)) + vocab_path = os.path.dirname(current_file_dir) + "/" + config.vocab_path + vocab, _ = initialize_vocabulary(vocab_path) data_schema = {"image": {"type": "bytes"}, "label": {"type": "int32", "shape": [-1]}, @@ -128,7 +159,7 @@ def fsns_train_data_to_mindrecord(mindrecord_dir, prefix="data_ocr.mindrecord", decoder_input = (np.array(label).T).astype(np.int32) target_weight = (np.array(target_weight).T).astype(np.int32) - if not len(decoder_input) == len(target_weight): + if len(decoder_input) != len(target_weight): continue target = [decoder_input[i + 1] for i in range(len(decoder_input) - 1)] @@ -154,7 +185,10 @@ def fsns_val_data_to_mindrecord(mindrecord_dir, prefix="data_ocr.mindrecord", fi anno_file_dirs = [config.val_annotation_file] images, image_path_dict, image_anno_dict = create_fsns_label(image_dir=config.val_data_root, anno_file_dirs=anno_file_dirs) - vocab, _ = initialize_vocabulary(config.vocab_path) + + current_file_dir = os.path.dirname(os.path.realpath(__file__)) + vocab_path = os.path.dirname(current_file_dir) + "/" + config.vocab_path + vocab, _ = initialize_vocabulary(vocab_path) data_schema = {"image": {"type": "bytes"}, "decoder_input": {"type": "int32", "shape": [-1]},