From e5364a010c6810b457f59dfc34f906528430b320 Mon Sep 17 00:00:00 2001
From: zhouneng <zhouneng2@huawei.com>
Date: Fri, 22 Oct 2021 16:12:54 +0800
Subject: [PATCH] [yolov3_darknet]use c_transforms.Decode to speed up data
 processing

---
 .../scripts/run_standalone_train.sh           | 20 +++++++++++--------
 .../cv/yolov3_darknet53/src/yolo_dataset.py   |  6 +++++-
 2 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/official/cv/yolov3_darknet53/scripts/run_standalone_train.sh b/official/cv/yolov3_darknet53/scripts/run_standalone_train.sh
index f4faf6b22..a0bbe09eb 100644
--- a/official/cv/yolov3_darknet53/scripts/run_standalone_train.sh
+++ b/official/cv/yolov3_darknet53/scripts/run_standalone_train.sh
@@ -64,16 +64,20 @@ cd ./train || exit
 echo "start training for device $DEVICE_ID"
 env > env.log
 
-cpus=`cat /proc/cpuinfo| grep "processor"| wc -l`
-if [ $cpus -ge $CPU_BIND_NUM ]
+cmdopt=`lscpu | grep NUMA | tail -1 | awk '{print $4}'`
+if test -z $cmdopt
 then
-  start=`expr $cpus - $CPU_BIND_NUM`
-  end=`expr $cpus - 1`
-else
-  start=0
-  end=`expr $cpus - 1`
+  cpus=`cat /proc/cpuinfo| grep "processor"| wc -l`
+  if [ $cpus -ge $CPU_BIND_NUM ]
+  then
+    start=`expr $cpus - $CPU_BIND_NUM`
+    end=`expr $cpus - 1`
+  else
+    start=0
+    end=`expr $cpus - 1`
+  fi
+  cmdopt=$start"-"$end
 fi
-cmdopt=$start"-"$end
 
 taskset -c $cmdopt python train.py \
     --data_dir=$DATASET_PATH \
diff --git a/official/cv/yolov3_darknet53/src/yolo_dataset.py b/official/cv/yolov3_darknet53/src/yolo_dataset.py
index 6cb7fdfaf..36faf1d5c 100644
--- a/official/cv/yolov3_darknet53/src/yolo_dataset.py
+++ b/official/cv/yolov3_darknet53/src/yolo_dataset.py
@@ -18,6 +18,7 @@ import os
 import multiprocessing
 import cv2
 from PIL import Image
+import numpy as np
 from pycocotools.coco import COCO
 import mindspore.dataset as de
 import mindspore.dataset.vision.c_transforms as CV
@@ -97,9 +98,10 @@ class COCOYoloDataset:
         coco = self.coco
         img_id = self.img_ids[index]
         img_path = coco.loadImgs(img_id)[0]["file_name"]
-        img = Image.open(os.path.join(self.root, img_path)).convert("RGB")
         if not self.is_training:
+            img = Image.open(os.path.join(self.root, img_path)).convert("RGB")
             return img, img_id
+        img = np.fromfile(os.path.join(self.root, img_path), dtype="int8")
 
         ann_ids = coco.getAnnIds(imgIds=img_id)
         target = coco.loadAnns(ann_ids)
@@ -168,10 +170,12 @@ def create_yolo_dataset(image_dir, anno_path, batch_size, max_epoch, device_num,
         if device_num != 8:
             ds = de.GeneratorDataset(yolo_dataset, column_names=dataset_column_names,
                                      sampler=distributed_sampler)
+            ds = ds.map(operations=CV.Decode(), input_columns=["image"])
             ds = ds.batch(batch_size, per_batch_map=multi_scale_trans, input_columns=dataset_column_names,
                           num_parallel_workers=min(32, num_parallel_workers), drop_remainder=True)
         else:
             ds = de.GeneratorDataset(yolo_dataset, column_names=dataset_column_names, sampler=distributed_sampler)
+            ds = ds.map(operations=CV.Decode(), input_columns=["image"])
             ds = ds.batch(batch_size, per_batch_map=multi_scale_trans, input_columns=dataset_column_names,
                           num_parallel_workers=min(8, num_parallel_workers), drop_remainder=True)
     else:
-- 
GitLab