Skip to content
Snippets Groups Projects
Commit c19a8d2b authored by hemaohua's avatar hemaohua
Browse files

add preprocess_dataset.py for PSPNet

parent ef49e901
No related branches found
No related tags found
No related merge requests found
......@@ -35,12 +35,12 @@ The pyramid pooling module fuses features under four different pyramid scales.Fo
# [Dataset](#Content)
- [PASCAL VOC 2012 and SBD Dataset Website](http://home.bharathh.info/pubs/codes/SBD/download.html)
- [Semantic Boundaries Dataset](http://home.bharathh.info/pubs/codes/SBD/download.html)
- It contains 11,357 finely annotated images split into training and testing sets with 8,498 and 2,857 images respectively.
- The path formats in voc_train_lst.txt and voc_val_lst.txt are different, you can run create_train_lst.py to generate train_lst.txt in data dir for VOC2012. As follow:
- The path formats in train.txt and val.txt are partial. And the mat file in the cls needs to be converted to image. You can run preprocess_dataset.py to convert the mat file and generate train_list.txt and val_list.txt. As follow:
```python
python src/dataset/create_train_lst.py --data_dir [DATA_DIR]
python src/dataset/preprocess_dataset.py --data_dir [DATA_DIR]
```
- [ADE20K Dataset Website](http://groups.csail.mit.edu/vision/datasets/ADE20K/)
......
......@@ -12,9 +12,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""generate train_lst.txt"""
"""preprocess dataset"""
import os
import argparse
from PIL import Image
from scipy.io import loadmat
def _parser_args():
......@@ -28,18 +30,55 @@ def _get_data_list(data_list_file):
return f.readlines()
def _mat_to_arr(mat_path):
data = loadmat(mat_path)['GTcls']
arr = data[0, 0][1]
return arr
def main():
args = _parser_args()
data_dir = args.data_dir
voc_train_lst_txt = os.path.join(data_dir, 'voc_train_lst.txt')
train_lst_txt = os.path.join(data_dir, 'train_lst.txt')
cls_path = os.path.join(data_dir, 'cls')
cls_png_path = os.path.join(data_dir, 'cls_png')
if not os.path.exists(cls_png_path):
os.mkdir(cls_png_path)
mat_list = os.listdir(cls_path)
print('Start generating png.')
print("It takes a little time. Don't quit!")
i = 0
for mat in mat_list:
mat_path = os.path.join(cls_path, mat)
arr = _mat_to_arr(mat_path)
png_path = os.path.join(cls_png_path, mat.replace('mat', 'png'))
ann_im = Image.fromarray(arr)
ann_im.save(png_path)
i += 1
print(f"Generate {i} png to data_dir/cls_png.")
train_txt = os.path.join(data_dir, 'train.txt')
train_list_txt = os.path.join(data_dir, 'train_list.txt')
val_txt = os.path.join(data_dir, 'val.txt')
val_list_txt = os.path.join(data_dir, 'val_list.txt')
train_data_lst = _get_data_list(train_txt)
with open(train_list_txt, 'w') as f:
for line in train_data_lst:
line = line.strip()
img_ = os.path.join('img', line + '.jpg')
anno_ = os.path.join('cls_png', line + '.png')
f.write(f'{img_} {anno_}\n')
print('Generate train_list to data_dir.')
voc_train_data_lst = _get_data_list(voc_train_lst_txt)
with open(train_lst_txt, 'w') as f:
for line in voc_train_data_lst:
img_, anno_ = (os.path.join('VOCdevkit/VOC2012', i.strip()) for i in line.split())
val_data_lst = _get_data_list(val_txt)
with open(val_list_txt, 'w') as f:
for line in val_data_lst:
line = line.strip()
img_ = os.path.join('img', line + '.jpg')
anno_ = os.path.join('cls_png', line + '.png')
f.write(f'{img_} {anno_}\n')
print('generating voc train list success.')
print('Generate train_list to data_dir.')
print('Finish.')
if __name__ == "__main__":
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment