From cf4f577b992064f33c985f682f0ffa9310e0ae08 Mon Sep 17 00:00:00 2001
From: Xiao Tianci <xiaotianci1@huawei.com>
Date: Mon, 29 Aug 2022 21:39:16 +0800
Subject: [PATCH] update iterating on string numpy

---
 official/cv/crnn_seq2seq_ocr/eval.py              |  2 +-
 official/cv/psenet/infer_psenet_onnx.py           |  2 +-
 official/cv/psenet/test.py                        |  2 +-
 official/nlp/bert/src/finetune_data_preprocess.py | 14 +++++++-------
 research/cv/CycleGAN/eval.py                      |  4 ++--
 research/cv/CycleGAN/eval_onnx.py                 |  4 ++--
 research/cv/FaceDetection/eval.py                 |  6 +++---
 research/cv/FaceDetection/preprocess.py           |  6 +++---
 research/cv/Pix2PixHD/precompute_feature_maps.py  |  2 +-
 research/cv/res2net/infer.py                      |  2 +-
 10 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/official/cv/crnn_seq2seq_ocr/eval.py b/official/cv/crnn_seq2seq_ocr/eval.py
index 4d756a3e2..3e28a5d74 100644
--- a/official/cv/crnn_seq2seq_ocr/eval.py
+++ b/official/cv/crnn_seq2seq_ocr/eval.py
@@ -167,7 +167,7 @@ def run_eval():
                 batch_decoded_label.append(ele.asnumpy())
 
             for b in range(config.eval_batch_size):
-                text = data["annotation"][b].decode("utf8")
+                text = data["annotation"][b]
                 text = text_standardization(text)
                 decoded_label = list(np.array(batch_decoded_label)[:, b])
                 decoded_words = []
diff --git a/official/cv/psenet/infer_psenet_onnx.py b/official/cv/psenet/infer_psenet_onnx.py
index 698e9130c..bb9335917 100644
--- a/official/cv/psenet/infer_psenet_onnx.py
+++ b/official/cv/psenet/infer_psenet_onnx.py
@@ -107,7 +107,7 @@ def test():
         # get data
         img, img_resized, img_name = data
         img = img[0].astype(np.uint8).copy()
-        img_name = img_name[0].decode('utf-8')
+        img_name = img_name[0]
 
         get_data_pts = time.time()
         get_data_time.update(get_data_pts - end_pts)
diff --git a/official/cv/psenet/test.py b/official/cv/psenet/test.py
index c9238d954..83ae226a8 100644
--- a/official/cv/psenet/test.py
+++ b/official/cv/psenet/test.py
@@ -126,7 +126,7 @@ def test():
         # get data
         img, img_resized, img_name = data
         img = img[0].astype(np.uint8).copy()
-        img_name = img_name[0].decode('utf-8')
+        img_name = img_name[0]
 
         get_data_pts = time.time()
         get_data_time.update(get_data_pts - end_pts)
diff --git a/official/nlp/bert/src/finetune_data_preprocess.py b/official/nlp/bert/src/finetune_data_preprocess.py
index 3f9f682b5..6906fca79 100644
--- a/official/nlp/bert/src/finetune_data_preprocess.py
+++ b/official/nlp/bert/src/finetune_data_preprocess.py
@@ -58,8 +58,8 @@ def process_tnews_clue_dataset(data_dir, label_list, bert_vocab_path, data_usage
     lookup = text.Lookup(vocab, unknown_token='[UNK]')
     dataset = dataset.map(operations=tokenizer, input_columns=["sentence"])
     dataset = dataset.map(operations=ops.Slice(slice(0, max_seq_len)), input_columns=["sentence"])
-    dataset = dataset.map(operations=ops.Concatenate(prepend=np.array(["[CLS]"], dtype='S'),
-                                                     append=np.array(["[SEP]"], dtype='S')), input_columns=["sentence"])
+    dataset = dataset.map(operations=ops.Concatenate(prepend=np.array(["[CLS]"]),
+                                                     append=np.array(["[SEP]"])), input_columns=["sentence"])
     dataset = dataset.map(operations=lookup, input_columns=["sentence"], output_columns=["text_ids"])
     dataset = dataset.map(operations=ops.PadEnd([max_seq_len], 0), input_columns=["text_ids"])
     dataset = dataset.map(operations=ops.Duplicate(), input_columns=["text_ids"],
@@ -107,10 +107,10 @@ def process_cmnli_clue_dataset(data_dir, label_list, bert_vocab_path, data_usage
     dataset = dataset.map(operations=text.TruncateSequencePair(max_seq_len - 3),
                           input_columns=["sentence1", "sentence2"])
     ### Adding special tokens
-    dataset = dataset.map(operations=ops.Concatenate(prepend=np.array(["[CLS]"], dtype='S'),
-                                                     append=np.array(["[SEP]"], dtype='S')),
+    dataset = dataset.map(operations=ops.Concatenate(prepend=np.array(["[CLS]"]),
+                                                     append=np.array(["[SEP]"])),
                           input_columns=["sentence1"])
-    dataset = dataset.map(operations=ops.Concatenate(append=np.array(["[SEP]"], dtype='S')),
+    dataset = dataset.map(operations=ops.Concatenate(append=np.array(["[SEP]"])),
                           input_columns=["sentence2"])
     ### Generating segment_ids
     dataset = dataset.map(operations=ops.Duplicate(), input_columns=["sentence1"],
@@ -209,8 +209,8 @@ def process_ner_msra_dataset(data_dir, label_list, bert_vocab_path, max_seq_len=
     unicode_char_tokenizer = text.UnicodeCharTokenizer()
     dataset = dataset.map(operations=unicode_char_tokenizer, input_columns=["text"], output_columns=["sentence"])
     dataset = dataset.map(operations=ops.Slice(slice(0, max_seq_len-2)), input_columns=["sentence"])
-    dataset = dataset.map(operations=ops.Concatenate(prepend=np.array(["[CLS]"], dtype='S'),
-                                                     append=np.array(["[SEP]"], dtype='S')), input_columns=["sentence"])
+    dataset = dataset.map(operations=ops.Concatenate(prepend=np.array(["[CLS]"]),
+                                                     append=np.array(["[SEP]"])), input_columns=["sentence"])
     dataset = dataset.map(operations=lookup, input_columns=["sentence"], output_columns=["input_ids"])
     dataset = dataset.map(operations=ops.PadEnd([max_seq_len], 0), input_columns=["input_ids"])
     dataset = dataset.map(operations=ops.Duplicate(), input_columns=["input_ids"],
diff --git a/research/cv/CycleGAN/eval.py b/research/cv/CycleGAN/eval.py
index 62f688636..a4949825d 100644
--- a/research/cv/CycleGAN/eval.py
+++ b/research/cv/CycleGAN/eval.py
@@ -45,7 +45,7 @@ def predict():
     reporter.start_predict("A to B")
     for data in ds.create_dict_iterator(output_numpy=True):
         img_A = Tensor(data["image"])
-        path_A = str(data["image_name"][0], encoding="utf-8")
+        path_A = data["image_name"][0]
         path_B = path_A[0:-4] + "_fake_B.jpg"
         fake_B = G_A(img_A)
         save_image(fake_B, os.path.join(imgs_out, "fake_B", path_B))
@@ -58,7 +58,7 @@ def predict():
     reporter.start_predict("B to A")
     for data in ds.create_dict_iterator(output_numpy=True):
         img_B = Tensor(data["image"])
-        path_B = str(data["image_name"][0], encoding="utf-8")
+        path_B = data["image_name"][0]
         path_A = path_B[0:-4] + "_fake_A.jpg"
         fake_A = G_B(img_B)
         save_image(fake_A, os.path.join(imgs_out, "fake_A", path_A))
diff --git a/research/cv/CycleGAN/eval_onnx.py b/research/cv/CycleGAN/eval_onnx.py
index d896be42f..372650fd5 100644
--- a/research/cv/CycleGAN/eval_onnx.py
+++ b/research/cv/CycleGAN/eval_onnx.py
@@ -63,7 +63,7 @@ def predict():
     reporter.start_predict("A to B")
     for data in ds.create_dict_iterator(output_numpy=True):
         img_a = data["image"]
-        path_a = str(data["image_name"][0], encoding="utf-8")
+        path_a = data["image_name"][0]
         path_b = path_a[0:-4] + "_fake_B.jpg"
         [fake_b] = gen_a.run(None, {gen_a_input_name: img_a})
         save_image(fake_b, os.path.join(imgs_out, "fake_B", path_b))
@@ -77,7 +77,7 @@ def predict():
     reporter.start_predict("B to A")
     for data in ds.create_dict_iterator(output_numpy=True):
         img_b = data["image"]
-        path_b = str(data["image_name"][0], encoding="utf-8")
+        path_b = data["image_name"][0]
         path_a = path_b[0:-4] + "_fake_A.jpg"
         [fake_a] = gen_b.run(None, {gen_b_input_name: img_b})
         save_image(fake_a, os.path.join(imgs_out, "fake_A", path_a))
diff --git a/research/cv/FaceDetection/eval.py b/research/cv/FaceDetection/eval.py
index 3ded3b96e..95960072a 100644
--- a/research/cv/FaceDetection/eval.py
+++ b/research/cv/FaceDetection/eval.py
@@ -232,9 +232,9 @@ def run_eval():
                     single_dets.extend(tdets[op][b])
                 dets.append(single_dets)
 
-            det.update({batch_image_name[k].decode('UTF-8'): v for k, v in enumerate(dets)})
-            img_size.update({batch_image_name[k].decode('UTF-8'): v for k, v in enumerate(batch_image_size)})
-            img_anno.update({batch_image_name[k].decode('UTF-8'): v for k, v in enumerate(batch_labels)})
+            det.update({batch_image_name[k]: v for k, v in enumerate(dets)})
+            img_size.update({batch_image_name[k]: v for k, v in enumerate(batch_image_size)})
+            img_anno.update({batch_image_name[k]: v for k, v in enumerate(batch_labels)})
 
         print('eval times:', eval_times)
         print('batch size: ', config.batch_size)
diff --git a/research/cv/FaceDetection/preprocess.py b/research/cv/FaceDetection/preprocess.py
index 205d31531..35d3c56c0 100644
--- a/research/cv/FaceDetection/preprocess.py
+++ b/research/cv/FaceDetection/preprocess.py
@@ -78,9 +78,9 @@ def preprocess():
         images, labels, image_name, image_size = data[0:4]
         images = Image.fromarray(images[0].astype('uint8')).convert('RGB')
         images.save(os.path.join(images_path, image_name[0].decode() + ".jpg"))
-        labels.tofile(os.path.join(labels_path, image_name[0].decode() + ".bin"))
-        image_name.tofile(os.path.join(image_name_path, image_name[0].decode() + ".bin"))
-        image_size.tofile(os.path.join(image_size_path, image_name[0].decode() + ".bin"))
+        labels.tofile(os.path.join(labels_path, image_name[0] + ".bin"))
+        image_name.tofile(os.path.join(image_name_path, image_name[0] + ".bin"))
+        image_size.tofile(os.path.join(image_size_path, image_name[0] + ".bin"))
 
 
 if __name__ == '__main__':
diff --git a/research/cv/Pix2PixHD/precompute_feature_maps.py b/research/cv/Pix2PixHD/precompute_feature_maps.py
index 70a30684e..e17dd3768 100644
--- a/research/cv/Pix2PixHD/precompute_feature_maps.py
+++ b/research/cv/Pix2PixHD/precompute_feature_maps.py
@@ -47,6 +47,6 @@ for i, data in enumerate(data_loader):
     inst = ms.Tensor(data['inst'])
     feat_map = netE(image, inst)
     feat_map = resizeBilinear(feat_map, scale_factor=2)
-    save_path = data['path'][0].decode('utf-8').replace('/train_label', '/train_feat')
+    save_path = data['path'][0].replace('/train_label', '/train_feat')
     save_path = os.path.splitext(save_path)[0]
     save_image(feat_map, save_path, format_name='.png')
diff --git a/research/cv/res2net/infer.py b/research/cv/res2net/infer.py
index 52cff20a3..83f845353 100644
--- a/research/cv/res2net/infer.py
+++ b/research/cv/res2net/infer.py
@@ -40,7 +40,7 @@ def show_predict_info(label_list, prediction_list, filename_list, predict_ng):
     """show_predict_info"""
     label_index = 0
     for label_index, predict_index, filename in zip(label_list, prediction_list, filename_list):
-        filename = np.array(filename).tostring().decode('utf8')
+        filename = np.array(filename).tostring()
         if label_index == -1:
             print("file: '{}' predict class id is: {}".format(
                 filename, predict_index))
-- 
GitLab