diff --git a/research/cv/siamRPN/README_CN.md b/research/cv/siamRPN/README_CN.md index b451c4c06191290a98990124e1c40100a89177df..5c5e960ea8921854f24bd0f9cbf975a206c6ce58 100644 --- a/research/cv/siamRPN/README_CN.md +++ b/research/cv/siamRPN/README_CN.md @@ -16,6 +16,8 @@ - [分布式训练](#分布式训练) - [评估过程](#评估过程) - [评估](#评估) + - [910评估](#910评估) + - [310评估·](#310评估) - [模型描述](#模型描述) - [性能](#性能) - [训练性能](#训练性能) @@ -91,9 +93,12 @@ Siam-RPN提出了一种基于RPN的孪生网络结构。由孪生子网络和RPN ├── cv ├── siamRPN ├── README_CN.md // googlenet相关说明 - ├── ascend310_infer // 实现310推理源代码 + ├── ascend_310_infer // 实现310推理源代码 ├── scripts │ ├──run.sh // 训练脚本 + | |──run_distribute_train.sh //本地多卡训练脚本 + | |──run_eval.sh //910评估脚本 + | |──run_infer_310.sh //310推理评估脚本 ├── src │ ├──data_loader.py // 数据集加载处理脚本 │ ├──net.py // siamRPN架构 @@ -184,6 +189,8 @@ Siam-RPN提出了一种基于RPN的孪生网络结构。由孪生子网络和RPN ### 评估 +#### 910评估 + - 评估过程如下,需要vot数据集对应video的图片放于对应文件夹的color文件夹下,标签groundtruth.txt放于该目录下。 ```bash @@ -197,6 +204,21 @@ Siam-RPN提出了一种基于RPN的孪生网络结构。由孪生子网络和RPN {... "all_videos": {"accuracy": 0.5809545709441025, "robustness": 0.33422978326730364, "eao": 0.3102655908013835}} ``` +#### 310评估 + +- 评估过程如下,需要vot数据集对应video的图片放于对应文件夹的color文件夹下,标签groundtruth.txt放于该目录下,并到script目录。 + +```bash +# 使用数据集 + bash run_infer_310.sh [MINDIR_PATH] [DATA_PATH] [DATA_NAME] [DEVICE_ID] +``` + +查看评估结果命令如下: + +```bash +cat acc.log +``` + # 模型描述 ## 性能 @@ -210,7 +232,7 @@ Siam-RPN提出了一种基于RPN的孪生网络结构。由孪生子网络和RPN | 上传日期 | 2021-07-22 | | MindSpore版本 | 1.2.0-alpha | | 数据集 |VID-youtube-bb | -| 训练参数 |epoch=50, steps=1147, batch_size = 32 | +| 训练参数 |epoch=50, steps=1471, batch_size = 32 | | 优化器 | SGD | | 损失函数 | 自定义损失函数 | | 输出 | 目标框 | diff --git a/research/cv/siamRPN/ascend_310_infer/CMakeLists.txt b/research/cv/siamRPN/ascend_310_infer/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..597443af012682c6043bfa47801cd7de77496ab7 --- /dev/null +++ b/research/cv/siamRPN/ascend_310_infer/CMakeLists.txt @@ -0,0 +1,19 @@ +cmake_minimum_required(VERSION 3.14.1) +project(Ascend310Infer) +find_package(GEOS 3.8.0 REQUIRED) +find_package(gflags REQUIRED) +add_compile_definitions(_GLIBCXX_USE_CXX11_ABI=0) +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O0 -g -lgflags -std=c++17 -Wall -fPIE -Wl,--allow-shlib-undefined") +set(PROJECT_SRC_ROOT ${CMAKE_CURRENT_LIST_DIR}/) +find_package(OpenCV 2 REQUIRED) +find_package(GEOS 3.8.0 REQUIRED) +option(MINDSPORE_PATH "mindspore install path" "") +include_directories(${MINDSPORE_PATH}) +include_directories(${MINDSPORE_PATH}/include) +include_directories(${PROJECT_SRC_ROOT}) +include_directories(${OpenCV_INCLUDE_DIRS}) +message(${OpenCV_INCLUDE_DIRS}) +find_library(MS_LIB libmindspore.so ${MINDSPORE_PATH}/lib) +file(GLOB_RECURSE MD_LIB ${MINDSPORE_PATH}/_c_dataengine*) +add_executable(main src/main.cc src/utils.cc) +target_link_libraries(main ${MS_LIB} ${MD_LIB} ${OpenCV_LIBS} gflags geos_c) diff --git a/research/cv/siamRPN/ascend_310_infer/build.sh b/research/cv/siamRPN/ascend_310_infer/build.sh new file mode 100644 index 0000000000000000000000000000000000000000..9765335cb84b6e42ca3b46d57eaa594827400569 --- /dev/null +++ b/research/cv/siamRPN/ascend_310_infer/build.sh @@ -0,0 +1,18 @@ +#!/bin/bash +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +cmake . -DMINDSPORE_PATH="`pip3.7 show mindspore-ascend | grep Location | awk '{print $2"/mindspore"}' | xargs realpath`" +make + diff --git a/research/cv/siamRPN/ascend_310_infer/inc/utils.h b/research/cv/siamRPN/ascend_310_infer/inc/utils.h new file mode 100644 index 0000000000000000000000000000000000000000..1d500273983bace43e906675d836ce3064c803c0 --- /dev/null +++ b/research/cv/siamRPN/ascend_310_infer/inc/utils.h @@ -0,0 +1,38 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_INFERENCE_UTILS_H_ +#define MINDSPORE_INFERENCE_UTILS_H_ + +#include <sys/stat.h> +#include <dirent.h> +#include <vector> +#include <string> +#include <memory> +#include <fstream> +#include "include/api/types.h" + +using mindspore::MSTensor; + +std::vector<std::string> GetAllFiles(std::string_view dirName); +DIR *OpenDir(std::string_view dirName); +std::string RealPath(std::string_view path); +mindspore::MSTensor ReadFileToTensor(const std::string &file); +int WriteResult(const std::string& imageFile, float outputs[][4], int k, const std::string & dataset_name +, const std::string& seq); +std::vector<std::string> GetAlldir(const std::string& dir_name, const std::string_view& data_name); +#endif + diff --git a/research/cv/siamRPN/ascend_310_infer/src/main.cc b/research/cv/siamRPN/ascend_310_infer/src/main.cc new file mode 100644 index 0000000000000000000000000000000000000000..f55e7383f612b3397de5c6aab0cc62b19e22e82d --- /dev/null +++ b/research/cv/siamRPN/ascend_310_infer/src/main.cc @@ -0,0 +1,567 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <dirent.h> +#include <geos_c.h> +#include <gflags/gflags.h> +#include <stdarg.h> +#include <stdio.h> +#include <sys/stat.h> +#include <sys/time.h> + +#include <algorithm> +#include <vector> +#include <cmath> +#include <fstream> +#include <iostream> +#include <sstream> +#include <string> +#include <opencv2/opencv.hpp> + +#include "../inc/utils.h" +#include "include/api/context.h" +#include "include/api/model.h" +#include "include/api/serialization.h" +#include "include/dataset/vision.h" +#include "include/dataset/transforms.h" +#include "include/dataset/execute.h" + + +namespace ms = mindspore; +DEFINE_string(siamRPN_file, "", "mindir path"); +DEFINE_string(image_path, "", "dataset path"); +DEFINE_string(dataset_name, "", "dataset name"); +DEFINE_int32(device_id, 0, "device id"); + +float min_box(float* bbox, int start, int step, int len) { + float min_value = bbox[start]; + for (int i = start; i < len; i = i + step) { + if (min_value > bbox[i]) { + min_value = bbox[i]; + } + } + return min_value; +} + +float max(float* bbox, int start, int step, int len) { + float max_value = bbox[start]; + for (int i = start; i < len; i = i + step) { + if (max_value < bbox[i]) { + max_value = bbox[i]; + } + } + return max_value; +} +void trans_box(float* bbox, float* box) { + float x1 = min_box(bbox, 0, 2, 8); + float x2 = max(bbox, 0, 2, 8); + float y1 = min_box(bbox, 1, 2, 8); + float y2 = max(bbox, 1, 2, 8); + float distance_1 = bbox[0] - bbox[2]; + float distance_2 = bbox[1] - bbox[3]; + float distance_3 = bbox[2] - bbox[4]; + float distance_4 = bbox[3] - bbox[5]; + float w = std::sqrt(distance_1 * distance_1 + distance_2 * distance_2); + float h = std::sqrt(distance_3 * distance_3 + distance_4 * distance_4); + float A1 = w * h; + float A2 = (x2 - x1) * (y2 - y1); + float s = std::sqrt(A1 / A2); + w = s * (x2 - x1) + 1; + h = s * (y2 - y1) + 1; + float x = x1; + float y = y1; + box[0] = x; + box[1] = y; + box[2] = w; + box[3] = h; +} + + +cv::Mat Pad(const cv::Mat& srcImageMat, int left, int bottom, + int right, int top) { + cv::Mat dstImageMat; + cv::Scalar tempVal = cv::mean(srcImageMat); + tempVal.val[0] = static_cast<int>(tempVal.val[0]); + tempVal.val[1] = static_cast<int>(tempVal.val[1]); + tempVal.val[2] = static_cast<int>(tempVal.val[2]); + int borderType = cv::BORDER_CONSTANT; + copyMakeBorder(srcImageMat, dstImageMat, top, bottom, left, right, borderType, + tempVal); + return dstImageMat; +} +// area is the upper left corner coordinates and width and height of the cutting area +cv::Mat Crop(const cv::Mat& img, const std::vector<int>& area) { + cv::Mat crop_img; + int crop_x1 = std::max(0, area[0]); + int crop_y1 = std::max(0, area[1]); + int crop_x2 = std::min(img.cols - 1, area[0] + area[2] - 1); + int crop_y2 = std::min(img.rows - 1, area[1] + area[3] - 1); + crop_img = img(cv::Range(crop_y1, crop_y2 + 1), cv::Range(crop_x1, crop_x2 + 1)); + return crop_img; +} +cv::Mat ResizeImage(const cv::Mat& srcImageMat, const std::vector<int>& size) { + cv::Mat dstImageMat; + cv::resize(srcImageMat, dstImageMat, cv::Size(size[0], size[1])); + return dstImageMat; +} +cv::Mat get_template_Mat(const std::string &file_path, float* box, int resize_template, + float context_amount) { + cv::Mat srcImageMat; + srcImageMat = cv::imread(file_path, cv::IMREAD_COLOR); + int w = srcImageMat.cols; + int h = srcImageMat.rows; + int cx = box[0] + box[2] / 2 - 1 / 2; + int cy = box[1] + box[3] / 2 - 1 / 2; + float w_template = box[2] + (box[2] + box[3]) * context_amount; + float h_template = box[3] + (box[2] + box[3]) * context_amount; + float s_x = std::sqrt(w_template * h_template); + + int left_x = cx - (s_x - 1) / 2 + w; + int top_y = cy - (s_x - 1) / 2 + h; + std::vector<int> position = {left_x, top_y, static_cast<int>(s_x), static_cast<int>(s_x)}; + std::vector<int> size = {resize_template, resize_template}; + srcImageMat = Pad(srcImageMat, w, h, w, h); + srcImageMat = Crop(srcImageMat, position); + srcImageMat = ResizeImage(srcImageMat, size); + // HWC2CHW + cv::Mat srcImageMat1; + srcImageMat.convertTo(srcImageMat1, CV_32FC3); + std::vector<float> dst_data; + std::vector<cv::Mat> bgrChannels(3); + + cv::split(srcImageMat1, bgrChannels); + for (auto i = 0; i < bgrChannels.size(); i++) { + std::vector<float> data = std::vector<float>(bgrChannels[i].reshape(1, 1)); + dst_data.insert(dst_data.end(), data.begin(), data.end()); + } + srcImageMat1 = cv::Mat(dst_data, true); + cv::Mat dst = srcImageMat1.reshape(3, 127); + return dst; +} + +cv::Mat get_detection_Mat(std::string file_path, float* box, + int resize_template, int resize_detection, + float context_amount, float* scale_x) { + cv::Mat srcImageMat; + srcImageMat = cv::imread(file_path, cv::IMREAD_COLOR); + int w = srcImageMat.cols; + int h = srcImageMat.rows; + int cx = box[0] + box[2] / 2 - 1 / 2; + int cy = box[1] + box[3] / 2 - 1 / 2; + float w_template = box[2] + (box[2] + box[3]) * context_amount; + float h_template = box[3] + (box[2] + box[3]) * context_amount; + float s_x = std::sqrt(w_template * h_template); + s_x = s_x * resize_detection / resize_template; + *scale_x = resize_detection / static_cast<float>(s_x); + s_x = static_cast<int>(s_x); + int left_x = cx - (s_x - 1) / 2 + w; + int top_y = cy - (s_x - 1) / 2 + h; + + std::vector<int> position = {left_x, top_y, static_cast<int>(s_x), static_cast<int>(s_x)}; + std::vector<int> size = {resize_detection, resize_detection}; + + srcImageMat = Pad(srcImageMat, w, h, w, h); + srcImageMat = Crop(srcImageMat, position); + srcImageMat = ResizeImage(srcImageMat, size); + + + // HWC2CHW + cv::Mat srcImageMat1; + srcImageMat.convertTo(srcImageMat1, CV_32FC3); + std::vector<float> dst_data; + std::vector<cv::Mat> bgrChannels(3); + + cv::split(srcImageMat1, bgrChannels); + + for (auto i = 0; i < bgrChannels.size(); i++) { + std::vector<float> data = std::vector<float>(bgrChannels[i].reshape(1, 1)); + dst_data.insert(dst_data.end(), data.begin(), data.end()); + } + srcImageMat1 = cv::Mat(dst_data, true); + cv::Mat dst = srcImageMat1.reshape(3, resize_detection); + return dst; +} + +// postprocess using +float getrange(float num, float min, float max) { + float temp = num; + if (num > max) { + temp = max; + } else if (num < min) { + temp = min; + } + return temp; +} + +cv::Mat softmax(const cv::Mat& src) { + cv::Mat dst; + cv::Mat col1 = src.colRange(0, 1).clone(); + cv::Mat col2 = src.colRange(1, 2).clone(); + cv::exp(col1, col1); + cv::exp(col2, col2); + cv::add(col1, col2, col1); + cv::divide(col2, col1, dst); + return dst; +} + +std::vector<cv::Mat> box_transform_inv(const cv::Mat& src, const cv::Mat& offset) { + cv::Mat anchor_xctr = src.colRange(0, 1).clone(); + cv::Mat anchor_yctr = src.colRange(1, 2).clone(); + cv::Mat anchor_w = src.colRange(2, 3).clone(); + cv::Mat anchor_h = src.colRange(3, 4).clone(); + cv::Mat offset_x = offset.colRange(0, 1).clone(); + cv::Mat offset_y = offset.colRange(1, 2).clone(); + cv::Mat offset_w = offset.colRange(2, 3).clone(); + cv::Mat offset_h = offset.colRange(3, 4).clone(); + + cv::Mat box_cx, box_cy, box_w, box_h; + cv::multiply(anchor_w, offset_x, box_cx); + box_cx = box_cx + anchor_xctr; + cv::multiply(anchor_h, offset_y, box_cy); + box_cy = box_cy + anchor_yctr; + cv::exp((offset_w), offset_w); + cv::multiply(anchor_w, offset_w, box_w); + cv::exp((offset_h), offset_h); + cv::multiply(anchor_h, offset_h, box_h); + std::vector<cv::Mat> channels; + channels.push_back(box_cx); + channels.push_back(box_cy); + channels.push_back(box_w); + channels.push_back(box_h); + return channels; +} + +cv::Mat readMatFromFile(std::string path, int height, int width) { + std::ifstream inFile(path, std::ios::in | std::ios::binary); + cv::Mat im(height, width, CV_32FC1); + if (!inFile) { + std::cout << "error" << std::endl; + return im; + } + for (int r = 0; r < im.rows; r++) { + inFile.read(reinterpret_cast<char*>(im.ptr<uchar>(r)), im.cols * im.elemSize()); + } + inFile.close(); + return im; +} + +cv::Mat sz(const std::vector<cv::Mat>& bbox) { + cv::Mat w = bbox[2].clone(); + cv::Mat h = bbox[3].clone(); + cv::Mat pad = (w + h) * 0.5; + cv::Mat sz2, temp; + cv::multiply((w + pad), (h + pad), temp); + cv::sqrt(temp, sz2); + return sz2; +} + +float sz_wh(float* wh, float scale) { + float wh1 = wh[0] * scale; + float wh2 = wh[1] * scale; + float pad = (wh1 + wh2) * 0.5; + float sz2 = (wh1 + pad) * (wh2 + pad); + return sqrt(sz2); +} + +cv::Mat change(const cv::Mat& r) { return cv::max(r, 1 / r); } + +cv::Mat get_rc(const std::vector<cv::Mat>& box_pred, const float* wh, const float scale) { + float ratio = wh[0] / wh[1]; + cv::Mat temp1 = box_pred[2] / box_pred[3]; + temp1 = temp1 / ratio; + temp1 = cv::max(temp1, 1 / temp1); + return temp1; +} + +cv::Mat get_sc(const std::vector<cv::Mat>& box_pred, float* wh, float scale) { + cv::Mat temp1 = sz(box_pred); + float ss = sz_wh(wh, scale); + temp1 = temp1 / ss; + temp1 = cv::max(temp1, 1 / temp1); + return temp1; +} + +cv::Mat get_penalty(const cv::Mat& s_c, const cv::Mat& r_c, float penalty_k) { + cv::Mat mm; + cv::multiply(s_c, r_c, mm); + mm = -(mm - 1) * penalty_k; + cv::exp((mm), mm); + return mm; +} + +static void geos_message_handler(const char* fmt, ...) { + va_list ap; + va_start(ap, fmt); + vprintf(fmt, ap); + va_end(ap); +} + +bool judge_failures(float* pred_box, float* gt_box) { + initGEOS(geos_message_handler, geos_message_handler); + std::string a = + std::to_string(pred_box[0]) + ' ' + std::to_string(pred_box[1]) + ", " + + std::to_string(pred_box[0]) + ' ' + std::to_string(pred_box[3]) + ", " + + std::to_string(pred_box[2]) + ' ' + std::to_string(pred_box[3]) + ", " + + std::to_string(pred_box[2]) + ' ' + std::to_string(pred_box[1]) + ", " + + std::to_string(pred_box[0]) + ' ' + std::to_string(pred_box[1]); + std::string b = + std::to_string(gt_box[6]) + ' ' + std::to_string(gt_box[7]) + ", " + + std::to_string(gt_box[0]) + ' ' + std::to_string(gt_box[1]) + ", " + + std::to_string(gt_box[2]) + ' ' + std::to_string(gt_box[3]) + ", " + + std::to_string(gt_box[4]) + ' ' + std::to_string(gt_box[5]) + ", " + + std::to_string(gt_box[6]) + ' ' + std::to_string(gt_box[7]); + std::string wkt_a = "POLYGON((" + a + "))"; + std::string wkt_b = "POLYGON((" + b + "))"; + /* Read the WKT into geometry objects */ + GEOSWKTReader* reader = GEOSWKTReader_create(); + GEOSGeometry* geom_a = GEOSWKTReader_read(reader, wkt_a.c_str()); + GEOSGeometry* geom_b = GEOSWKTReader_read(reader, wkt_b.c_str()); + + /* Calculate the intersection */ + GEOSWKTWriter* writer = GEOSWKTWriter_create(); + GEOSGeometry* inter = GEOSIntersection(geom_a, geom_b); + GEOSWKTWriter_setTrim(writer, 1); + std::string wkt_inter = GEOSWKTWriter_write(writer, inter); + + if (wkt_inter == "POLYGON EMPTY") { + return false; + } else { + return true; + } +} +int read_gtBox(std::string path, float gt_bbox[][8]) { + std::ifstream infile(path); + int k = 0; + char s; + while (infile >> gt_bbox[k][0] >> s >> gt_bbox[k][1] >> s >> gt_bbox[k][2] >> + s >> gt_bbox[k][3] >> s >> gt_bbox[k][4] >> s >> gt_bbox[k][5] >> s >> + gt_bbox[k][6] >> s >> gt_bbox[k][7]) { + k++; + } + return k; +} +void copy_box(float* box, float* bbox, int num) { + for (int i = 0; i < num; i++) { + box[i] = bbox[i]; + } +} +void copy_box_four_value(float* box, float value1, float value2, float value3, float value4) { + box[0] = value1; + box[1] = value2; + box[2] = value3; + box[3] = value4; +} +void copy_box_two_value(float* box, float value1, float value2) { + box[0] = value1; + box[1] = value2; +} +struct Config { + int resize_template = 127; + int resize_detection = 255; + cv::Mat anchors = readMatFromFile("../ascend_310_infer/src/anchors.bin", 1445, 4); + cv::Mat windows = readMatFromFile("../ascend_310_infer/src/windows.bin", 1445, 1); + float context_amount = 0.5; + float min_scale = 0.1; + float max_scale = 10; + float window_influence = 0.40; + float penalty_k = 0.22; + float lr_box = 0.3; + float gt_bbox[2500][8]; + float pred_box[4]; + float scale_x = 0.0; + float bbox[8]; + float box_01[4]; + bool flag = true; + float target_sz[2]; + float pos[2]; + int infer_cout_shape[2]={1445, 2}; + int infer_rout_shape[2]={1445, 4}; +}; + +void deal_predict(const cv::Mat ccout, const cv::Mat rout, struct Config* config, float *shape, float *origin_target_sz, + int i, float* resultbox) { + float cx, cy, w, h, terget[4]; + double maxValue, minValue; + int minId, maxId = 0; + cv::Mat ccout_sofmax, pscore; + ccout_sofmax = softmax(ccout); + std::vector<cv::Mat> box_pred = box_transform_inv(config->anchors, rout); + cv::Mat s_c = get_sc(box_pred, config->target_sz, config->scale_x); + cv::Mat r_c = get_rc(box_pred, config->target_sz, config->scale_x); + cv::Mat penalty = get_penalty(s_c, r_c, config->penalty_k); + cv::Mat score_pred = ccout_sofmax.colRange(0, 1).clone(); + cv::multiply(score_pred, penalty, pscore); + pscore = pscore * (1 - config->window_influence) + config->windows * config->window_influence; + cv::minMaxIdx(pscore, &minValue, &maxValue, &minId, &maxId); + cx = box_pred[0].at<float>(maxId, 0); + cy = box_pred[1].at<float>(maxId, 0); + w = box_pred[2].at<float>(maxId, 0); + h = box_pred[3].at<float>(maxId, 0); + copy_box_four_value(terget, cx / config->scale_x, cy / config->scale_x, w / config->scale_x, h / config->scale_x); + float lr = penalty.at<float>(maxId, 0) * score_pred.at<float>(maxId, 0) * config->lr_box; + float res_x = getrange(terget[0] + config->pos[0], 0, shape[1]); + float res_y = getrange(terget[1] + config->pos[1], 0, shape[0]); + float res_w = getrange(config->target_sz[0] * (1 - lr) + terget[2] * lr, + config->min_scale * origin_target_sz[0], config->max_scale * origin_target_sz[0]); + float res_h = getrange(config->target_sz[1] * (1 - lr) + terget[3] * lr, + config->min_scale * origin_target_sz[1], config->max_scale * origin_target_sz[1]); + copy_box_two_value(config->pos, res_x, res_y); + copy_box_two_value(config->target_sz, res_w, res_h); + copy_box_four_value(config->pred_box, getrange(res_x, 0, shape[1]), getrange(res_y, 0, shape[0]), + getrange(res_w, 0, shape[1]), getrange(res_h, 0, shape[0])); + resultbox[0] = config->pred_box[0] - config->pred_box[2] / 2 + 1 / 2; + resultbox[1] = config->pred_box[1] - config->pred_box[3] / 2 + 1 / 2; + resultbox[2] = config->pred_box[0] + config->pred_box[2] / 2 - 1 / 2; + resultbox[3] = config->pred_box[1] + config->pred_box[3] / 2 - 1 / 2; + copy_box_two_value(config->pred_box, resultbox[0], resultbox[1]); + config->flag = judge_failures(resultbox, config->gt_bbox[i]); +} +int process_infer(const std::vector<std::string>& dirs, const std::vector<ms::MSTensor>& model_inputs, + const std::string& data_set, ms::Model* siamRPN, ms::Status ret, std::map<double, double>* costTime_map) { + Config config; + for (const auto &dir : dirs) { + std::vector<std::string> images = GetAllFiles(data_set + '/' + FLAGS_dataset_name + '/' + dir + "/color"); + int k = read_gtBox(data_set + '/' + FLAGS_dataset_name + '/' + dir + "/groundtruth.txt", config.gt_bbox); + int template_idx = 0; + float result_box[k][4], resultbox[4], shape[2], origin_target_sz[2]; + std::vector<ms::MSTensor> inputs; + std::string image_template, image_detection; + for (int i = 0; i < static_cast<int>(images.size()); i++) { + struct timeval start; + struct timeval end; + double startTime_ms; + double endTime_ms; + std::cout << "start infer:" << i << " " << template_idx << " " << std::endl; + if (i == template_idx) { + cv::Mat imageMat = cv::imread(images[i], cv::IMREAD_COLOR); + shape[0] = imageMat.rows; + shape[1] = imageMat.cols; + copy_box(config.bbox, config.gt_bbox[i], 8); + trans_box(config.bbox, config.box_01); + config.pos[0] = config.box_01[0] + (config.box_01[2] + 1) / 2; + config.pos[1] = config.box_01[1] + (config.box_01[3] + 1) / 2; + copy_box_two_value(config.target_sz, config.box_01[2], config.box_01[3]); + copy_box_two_value(origin_target_sz, config.box_01[2], config.box_01[3]); + image_template = images[template_idx]; + cv::Mat srcImageMat = get_template_Mat(image_template, config.box_01, config.resize_template, + config.context_amount); + size_t size_buffer = srcImageMat.size().height * srcImageMat.size().width * 3 * 4; + mindspore::MSTensor buffer("template", mindspore::DataType::kNumberTypeFloat32, + {static_cast<int64_t>(3), static_cast<int64_t>(srcImageMat.size().height), + static_cast<int64_t>(srcImageMat.size().width)}, srcImageMat.data, size_buffer); + inputs.clear(); + inputs.emplace_back(model_inputs[0].Name(), model_inputs[0].DataType(), + model_inputs[0].Shape(), buffer.Data().get(), buffer.DataSize()); + copy_box_four_value(result_box[i], 1.0, 0.0, 0.0, 0.0); + } else if (i < template_idx) { + copy_box_four_value(result_box[i], 0.0, 0.0, 0.0, 0.0); + } else { + std::vector<ms::MSTensor> outputs; + image_detection = images[i]; + cv::Mat decImageMat = get_detection_Mat(image_detection, config.box_01, config.resize_template, + config.resize_detection, config.context_amount, &config.scale_x); + size_t size_detection_buffer = decImageMat.size().height * decImageMat.size().width * 3 * 4; + auto dec_size = decImageMat.size(); + mindspore::MSTensor buffer1("detection", mindspore::DataType::kNumberTypeFloat32, {static_cast<int64_t>(3), + static_cast<int64_t>(decImageMat.size().height), + static_cast<int64_t>(decImageMat.size().width)}, + decImageMat.data, size_detection_buffer); + inputs.emplace_back(model_inputs[1].Name(), model_inputs[1].DataType(), + model_inputs[1].Shape(), buffer1.Data().get(), buffer1.DataSize()); + gettimeofday(&start, NULL); + ret = siamRPN->Predict(inputs, &outputs); + gettimeofday(&end, NULL); + if (ret != ms::kSuccess) { + std::cout << "infer failed." << std::endl; + } + inputs.pop_back(); + cv::Mat ccout(config.infer_cout_shape[0], config.infer_cout_shape[1], CV_32FC1, outputs[0].MutableData()); + cv::Mat rout(config.infer_rout_shape[0], config.infer_rout_shape[1], CV_32FC1, outputs[1].MutableData()); + deal_predict(ccout, rout, &config, shape, origin_target_sz, i, resultbox); + copy_box_two_value(config.pred_box, resultbox[0], resultbox[1]); + config.flag = judge_failures(resultbox, config.gt_bbox[i]); + if (config.flag == 0) { + copy_box_four_value(result_box[i], 2.0, 0.0, 0.0, 0.0); + template_idx = std::min(i + 5, k - 1); + } else { + copy_box(result_box[i], resultbox, 4); + copy_box(config.box_01, config.pred_box, 4); + } + startTime_ms = (1.0 * start.tv_sec * 1000000 + start.tv_usec) / 1000; + endTime_ms = (1.0 * end.tv_sec * 1000000 + end.tv_usec) / 1000; + costTime_map->insert(std::pair<double, double>(startTime_ms, endTime_ms)); + } + } + WriteResult("prediction.txt", result_box, k, FLAGS_dataset_name, dir); + } + return 0; +} +int main(int argc, char** argv) { + gflags::ParseCommandLineFlags(&argc, &argv, true); + auto context = std::make_shared<ms::Context>(); + auto ascend310_info = std::make_shared<ms::Ascend310DeviceInfo>(); + ascend310_info->SetDeviceID(0); + context->MutableDeviceInfo().push_back(ascend310_info); + ms::Graph graph; + std::cout << "siamRPN file is " << FLAGS_siamRPN_file << std::endl; + ms::Status ret = ms::Serialization::Load(FLAGS_siamRPN_file, ms::ModelType::kMindIR, &graph); + if (ret != ms::kSuccess) { + std::cout << "Load model failed." << std::endl; + return 1; + } + ms::Model siamRPN; + ret = siamRPN.Build(ms::GraphCell(graph), context); + if (ret != ms::kSuccess) { + std::cout << "Build model failed." << std::endl; + return 1; + } + std::vector<ms::MSTensor> model_inputs = siamRPN.GetInputs(); + if (model_inputs.empty()) { + std::cout << "Invalid model, inputs is empty." << std::endl; + return 1; + } + auto data_set = FLAGS_image_path; + std::map<double, double> costTime_map; + std::vector<std::string> dirs; + dirs = GetAlldir(data_set, FLAGS_dataset_name); + process_infer(dirs, model_inputs, data_set, &siamRPN, ret, &costTime_map); + if (ret != ms::kSuccess) { + std::cout << "process_infer failed." << std::endl; + return 1; + } + std::cout << "process_infer is ok" << std::endl; + double average = 0.0; + int inferCount = 0; + for (auto iter = costTime_map.begin(); iter != costTime_map.end(); iter++) { + double diff = 0.0; + diff = iter->second - iter->first; + average += diff; + inferCount++; + } + average = average / inferCount; + std::stringstream timeCost; + timeCost << "NN inference cost average time: " << average + << " ms of infer_count " << inferCount << std::endl; + std::cout << "NN inference cost average time: " << average + << "ms of infer_count " << inferCount << std::endl; + std::string fileName = + "./time_Result" + std::string("/test_perform_static.txt"); + std::ofstream fileStream(fileName.c_str(), std::ios::trunc); + fileStream << timeCost.str(); + fileStream.close(); + costTime_map.clear(); + return 0; +} diff --git a/research/cv/siamRPN/ascend_310_infer/src/utils.cc b/research/cv/siamRPN/ascend_310_infer/src/utils.cc new file mode 100644 index 0000000000000000000000000000000000000000..83ac98ff9cd877f1275c98be3db6098656b28631 --- /dev/null +++ b/research/cv/siamRPN/ascend_310_infer/src/utils.cc @@ -0,0 +1,149 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "inc/utils.h" + +#include <algorithm> +#include <fstream> +#include <iostream> +#include <sstream> +using mindspore::DataType; +using mindspore::MSTensor; + +std::vector<std::string> GetAlldir(const std::string &dir_name, const std::string_view &data_name) { + DIR *dir = OpenDir(dir_name + '/' + data_name.data()); + if (dir == nullptr) { + return {}; + } + std::vector<std::string> res; + if (data_name == "vot2015" || data_name == "vot2016") { + struct dirent *filename; + while ((filename = readdir(dir)) != nullptr) { + std::string d_name = std::string(filename->d_name); + // get rid of "." and ".." + if (d_name == "." || d_name == ".." || filename->d_type != DT_DIR) + continue; + std::cout << "dirs:" << d_name << std::endl; + res.emplace_back(d_name); + } + } + + return res; +} + +int WriteResult(const std::string &imageFile, float outputs[][4], int k, + const std::string &dataset_name, const std::string &seq) { + std::string homePath; + homePath = "./result_Files/" + dataset_name + "/" + seq; + std::string path = "mkdir ./result_Files/" + dataset_name; + std::string path1 = "mkdir " + homePath; + system(path.c_str()); + system(path1.c_str()); + std::cout << "homePath is " << homePath << std::endl; + std::string fileName = homePath + '/' + imageFile; + FILE *fp; + fp = fopen(fileName.c_str(), "wt"); + for (int i = 0; i < k; i++) { + fprintf(fp, "%f, ", outputs[i][0]); + fprintf(fp, "%f, ", outputs[i][1]); + fprintf(fp, "%f, ", outputs[i][2]); + fprintf(fp, "%f\n", outputs[i][3]); + } + fclose(fp); + return 0; +} + +std::vector<std::string> GetAllFiles(std::string_view dirName) { + struct dirent *filename; + DIR *dir = OpenDir(dirName); + if (dir == nullptr) { + return {}; + } + std::vector<std::string> res; + while ((filename = readdir(dir)) != nullptr) { + std::string dName = std::string(filename->d_name); + if (dName == "." || dName == ".." || filename->d_type != DT_REG) { + continue; + } + res.emplace_back(std::string(dirName) + "/" + filename->d_name); + } + std::sort(res.begin(), res.end()); + return res; +} + +mindspore::MSTensor ReadFileToTensor(const std::string &file) { + if (file.empty()) { + std::cout << "Pointer file is nullptr" << std::endl; + return mindspore::MSTensor(); + } + + std::ifstream ifs(file); + if (!ifs.good()) { + std::cout << "File: " << file << " is not exist" << std::endl; + return mindspore::MSTensor(); + } + + if (!ifs.is_open()) { + std::cout << "File: " << file << "open failed" << std::endl; + return mindspore::MSTensor(); + } + + ifs.seekg(0, std::ios::end); + size_t size = ifs.tellg(); + mindspore::MSTensor buffer(file, mindspore::DataType::kNumberTypeUInt8, + {static_cast<int64_t>(size)}, nullptr, size); + + ifs.seekg(0, std::ios::beg); + ifs.read(reinterpret_cast<char *>(buffer.MutableData()), size); + ifs.close(); + + return buffer; +} + +DIR *OpenDir(std::string_view dirName) { + if (dirName.empty()) { + std::cout << " dirName is null ! " << std::endl; + return nullptr; + } + std::string realPath = RealPath(dirName); + struct stat s; + lstat(realPath.c_str(), &s); + if (!S_ISDIR(s.st_mode)) { + std::cout << "dirName is not a valid directory !" << std::endl; + return nullptr; + } + DIR *dir = opendir(realPath.c_str()); + if (dir == nullptr) { + std::cout << "Can not open dir " << dirName << std::endl; + return nullptr; + } + std::cout << "Successfully opened the dir " << dirName << std::endl; + return dir; +} + +std::string RealPath(std::string_view path) { + char realPathMem[PATH_MAX] = {0}; + char *realPathRet = nullptr; + realPathRet = realpath(path.data(), realPathMem); + if (realPathRet == nullptr) { + std::cout << "File: " << path << " is not exist."; + return ""; + } + + std::string realPath(realPathMem); + std::cout << path << " realpath is: " << realPath << std::endl; + return realPath; +} diff --git a/research/cv/siamRPN/export_mindir.py b/research/cv/siamRPN/export_mindir.py index b737c3115f33a4491b465ce94d5f057190cc376a..1706b521723105be03585a59c72b568b240f362d 100644 --- a/research/cv/siamRPN/export_mindir.py +++ b/research/cv/siamRPN/export_mindir.py @@ -14,7 +14,7 @@ # ============================================================================ """ export script """ - +import argparse import numpy as np import mindspore @@ -23,6 +23,7 @@ from mindspore.train.serialization import load_checkpoint from src.net import SiameseRPN + def siamrpn_export(): """ export function """ context.set_context( @@ -30,16 +31,17 @@ def siamrpn_export(): device_target="Ascend", save_graphs=False, device_id=args.device_id) - net = SiameseRPN(groups=1) + net = SiameseRPN(groups=1, is_310infer=True) load_checkpoint(args.ckpt_file, net=net) net.set_train(False) input_data1 = Tensor(np.zeros([1, 3, 127, 127]), mindspore.float32) input_data2 = Tensor(np.zeros([1, 3, 255, 255]), mindspore.float32) input_data = [input_data1, input_data2] - export(net, *input_data, file_name='siamrpn3', file_format="MINDIR") + export(net, *input_data, file_name='siamrpn', file_format="MINDIR") if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Mindspore infering') parser.add_argument("--device_id", type=int, default=0, help="Device id") parser.add_argument('--ckpt_file', type=str, required=True, help='siamRPN ckpt file.') args = parser.parse_args() diff --git a/research/cv/siamRPN/postprocess.py b/research/cv/siamRPN/postprocess.py new file mode 100644 index 0000000000000000000000000000000000000000..4f2f608db8525116497d2a94c956606ef72ea4e4 --- /dev/null +++ b/research/cv/siamRPN/postprocess.py @@ -0,0 +1,271 @@ +# -*- coding: UTF-8 -*- +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""310eval vot""" + +import os +import copy +import argparse +import numpy as np +from tqdm import tqdm +from shapely.geometry import Polygon + +parser = argparse.ArgumentParser(description='Mindspore SiameseRPN 310 eval') +parser.add_argument( + '--dataset', + default=None, + help='dataset absolute, path or relative path') +parser.add_argument( + '--predict_dataset', + default=None, + help='predict_dataset, absolute path or relative path') + + +def eval_310infer(args): + """ execute inferring """ + dataset = args.dataset + predict_dataset = args.predict_dataset + direct_file = os.path.join(dataset, 'list.txt') + with open(direct_file, 'r') as f: + direct_lines = f.readlines() + video_names = np.sort([x.split('\n')[0] for x in direct_lines]) + video_paths = [os.path.join(dataset, x) for x in video_names] + results = {} + accuracy = 0 + all_overlaps = [] + all_failures = [] + gt_lenth = [] + + for video_path in tqdm(video_names, total=len(video_names)): + groundtruth_path = os.path.join(dataset, video_path, 'groundtruth.txt') + with open(groundtruth_path, 'r') as f: + boxes = f.readlines() + if ',' in boxes[0]: + boxes = [list(map(float, box.split(','))) for box in boxes] + else: + boxes = [list(map(int, box.split())) for box in boxes] + gt = copy.deepcopy(boxes) + predict_path = os.path.join( + predict_dataset, video_path, 'prediction.txt') + with open(predict_path, 'r') as f: + boxes = f.readlines() + if ',' in boxes[0]: + boxes = [list(map(float, box.split(','))) for box in boxes] + else: + boxes = [list(map(int, box.split())) for box in boxes] + res = copy.deepcopy(boxes) + acc, overlaps, failures, num_failures = calculate_accuracy_failures(res, gt, [10000, 10000]) + + accuracy += acc + result1 = {} + result1['acc'] = acc + result1['num_failures'] = num_failures + results[video_path.split('/')[-1]] = result1 + + all_overlaps.append(overlaps) + all_failures.append(failures) + gt_lenth.append(len(boxes)) + all_length = sum([len(x) for x in all_overlaps]) + + robustness = sum([len(x) for x in all_failures]) / all_length * 100 + eao = _calculate_eao("VOT2015", all_failures, all_overlaps, gt_lenth) + result1 = {} + result1['accuracy'] = accuracy / float(len(video_paths)) + result1['robustness'] = robustness + result1['eao'] = eao + results['all_videos'] = result1 + print('accuracy is ', accuracy / float(len(video_paths))) + print('robustness is ', robustness) + print('eao is ', eao) + + +def calculate_accuracy_failures(pred_trajectory, gt_trajectory, + bound=None): + ''' + args: + pred_trajectory:list of bbox + gt_trajectory: list of bbox ,shape == pred_trajectory + bound :w and h of img + return : + overlaps:list ,iou value in pred_trajectory + acc : mean iou value + failures: failures point in pred_trajectory + num_failures: number of failres + ''' + + overlaps = [] + failures = [] + for i in range(len(pred_trajectory)): + if len(pred_trajectory[i]) == 1: + + if pred_trajectory[i][0] == 2: + failures.append(i) + overlaps.append(float("nan")) + elif pred_trajectory[i][0] == 2 or pred_trajectory[i][0] == 1 or pred_trajectory[i][0] == 0: + if pred_trajectory[i][0] == 2: + failures.append(i) + overlaps.append(float("nan")) + else: + if bound is not None: + poly_img = Polygon(np.array([[0, 0], + [0, bound[1]], + [bound[0], bound[1]], + [bound[0], 0]])).convex_hull + if len(gt_trajectory[i]) == 8: + poly_pred = Polygon(np.array([[pred_trajectory[i][0], pred_trajectory[i][1]], \ + [pred_trajectory[i][2], pred_trajectory[i][1]], \ + [pred_trajectory[i][2], pred_trajectory[i][3]], \ + [pred_trajectory[i][0], pred_trajectory[i][3]] \ + ])).convex_hull + poly_gt = Polygon(np.array(gt_trajectory[i]).reshape(4, 2)).convex_hull + if bound is not None: + gt_inter_img = poly_gt.intersection(poly_img) + pred_inter_img = poly_pred.intersection(poly_img) + inter_area = gt_inter_img.intersection(pred_inter_img).area + overlap = inter_area / \ + (gt_inter_img.area + pred_inter_img.area - inter_area) + else: + inter_area = poly_gt.intersection(poly_pred).area + overlap = inter_area / \ + (poly_gt.area + poly_pred.area - inter_area) + elif len(gt_trajectory[i]) == 4: + overlap = iou(np.array( + pred_trajectory[i]).reshape(-1, 4), np.array(gt_trajectory[i]).reshape(-1, 4)) + overlaps.append(overlap) + acc = 0 + num_failures = len(failures) + if overlaps: + acc = np.nanmean(overlaps) + return acc, overlaps, failures, num_failures + + +def _calculate_eao(dataset_name, all_failures, all_overlaps, + gt_traj_length, skipping=5): + ''' + input:dataset name + all_failures: type is list , index of failure + all_overlaps: type is list , length of list is the length of all_failures + gt_traj_length: type is list , length of list is the length of all_failures + skipping:number of skipping per failing + ''' + if dataset_name == "VOT2016": + + low = 108 + high = 371 + + elif dataset_name == "VOT2015": + low = 108 + high = 371 + + fragment_num = sum([len(x) + 1 for x in all_failures]) + max_len = max([len(x) for x in all_overlaps]) + tags = [1] * max_len + seq_weight = 1 / (1 + 1e-10) # division by zero + + eao = {} + + # prepare segments + fweights = np.ones((fragment_num), dtype=np.float32) * np.nan + fragments = np.ones((fragment_num, max_len), dtype=np.float32) * np.nan + seg_counter = 0 + for traj_len, failures, overlaps in zip(gt_traj_length, all_failures, all_overlaps): + if failures: + points = [x + skipping for x in failures if + x + skipping <= len(overlaps)] + points.insert(0, 0) + for i in range(len(points)): + if i != len(points) - 1: + fragment = np.array( + overlaps[points[i]:points[i + 1] + 1], dtype=np.float32) + fragments[seg_counter, :] = 0 + else: + fragment = np.array(overlaps[points[i]:], dtype=np.float32) + fragment[np.isnan(fragment)] = 0 + fragments[seg_counter, :len(fragment)] = fragment + if i != len(points) - 1: + + tag_value = tags[points[i]:points[i + 1] + 1] + w = sum(tag_value) / (points[i + 1] - points[i] + 1) + fweights[seg_counter] = seq_weight * w + else: + + tag_value = tags[points[i]:len(overlaps)] + w = sum(tag_value) / (traj_len - points[i] + 1e-16) + fweights[seg_counter] = seq_weight * w + seg_counter += 1 + else: + # no failure + max_idx = min(len(overlaps), max_len) + fragments[seg_counter, :max_idx] = overlaps[:max_idx] + tag_value = tags[0: max_idx] + w = sum(tag_value) / max_idx + fweights[seg_counter] = seq_weight * w + seg_counter += 1 + + expected_overlaps = calculate_expected_overlap(fragments, fweights) + print(len(expected_overlaps)) + # calculate eao + weight = np.zeros((len(expected_overlaps))) + weight[low - 1:high - 1 + 1] = 1 + expected_overlaps = np.array(expected_overlaps, dtype=np.float32) + is_valid = np.logical_not(np.isnan(expected_overlaps)) + eao_ = np.sum(expected_overlaps[is_valid] * + weight[is_valid]) / np.sum(weight[is_valid]) + eao = eao_ + return eao + + +def iou(box1, box2): + """ calculate iou """ + box1, box2 = copy.deepcopy(box1), copy.deepcopy(box2) + N = box1.shape[0] + K = box2.shape[0] + box1 = np.array(box1.reshape((N, 1, 4))) + \ + np.zeros((1, K, 4)) # box1=[N,K,4] + box2 = np.array(box2.reshape((1, K, 4))) + \ + np.zeros((N, 1, 4)) # box1=[N,K,4] + x_max = np.max(np.stack((box1[:, :, 0], box2[:, :, 0]), axis=-1), axis=2) + x_min = np.min(np.stack((box1[:, :, 2], box2[:, :, 2]), axis=-1), axis=2) + y_max = np.max(np.stack((box1[:, :, 1], box2[:, :, 1]), axis=-1), axis=2) + y_min = np.min(np.stack((box1[:, :, 3], box2[:, :, 3]), axis=-1), axis=2) + tb = x_min - x_max + lr = y_min - y_max + tb[np.where(tb < 0)] = 0 + lr[np.where(lr < 0)] = 0 + over_square = tb * lr + all_square = (box1[:, :, 2] - box1[:, :, 0]) * (box1[:, :, 3] - box1[:, :, 1]) + \ + (box2[:, :, 2] - box2[:, :, 0]) * (box2[:, :, 3] - box2[:, :, 1]) - over_square + return over_square / all_square + + +def calculate_expected_overlap(fragments, fweights): + """ compute expected iou """ + max_len = fragments.shape[1] + expected_overlaps = np.zeros((max_len), np.float32) + expected_overlaps[0] = 1 + # TODO Speed Up + for i in range(1, max_len): + mask = np.logical_not(np.isnan(fragments[:, i])) + if np.any(mask): + fragment = fragments[mask, 1:i + 1] + seq_mean = np.sum(fragment, 1) / fragment.shape[1] + expected_overlaps[i] = np.sum(seq_mean * + fweights[mask]) / np.sum(fweights[mask]) + return expected_overlaps + + +if __name__ == '__main__': + Args = parser.parse_args() + eval_310infer(Args) diff --git a/research/cv/siamRPN/preprocess.py b/research/cv/siamRPN/preprocess.py new file mode 100644 index 0000000000000000000000000000000000000000..a9f10a01cdd3c7a41f3346676de7993a22bd5981 --- /dev/null +++ b/research/cv/siamRPN/preprocess.py @@ -0,0 +1,65 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""310eval preprocess""" +import os +import numpy as np +from src.config import config + +def generate_anchors(total_stride, base_size, scales, ratios, score_size): + """ generate anchors """ + anchor_num = len(ratios) * len(scales) + anchor = np.zeros((anchor_num, 4), dtype=np.float32) + size = base_size * base_size + count = 0 + for ratio in ratios: + ws = int(np.sqrt(size / ratio)) + hs = int(ws * ratio) + for scale in scales: + wws = ws * scale + hhs = hs * scale + anchor[count, 0] = 0 + anchor[count, 1] = 0 + anchor[count, 2] = wws + anchor[count, 3] = hhs + count += 1 + + anchor = np.tile(anchor, score_size * score_size).reshape((-1, 4)) + ori = - (score_size // 2) * total_stride + xx, yy = np.meshgrid([ori + total_stride * dx for dx in range(score_size)], + [ori + total_stride * dy for dy in range(score_size)]) + xx, yy = np.tile(xx.flatten(), (anchor_num, 1)).flatten(), \ + np.tile(yy.flatten(), (anchor_num, 1)).flatten() + anchor[:, 0], anchor[:, 1] = xx.astype(np.float32), yy.astype(np.float32) + return anchor + +def generateConfigBin(): + """ get anchors and hanning for eval """ + valid_scope = 2 * config.valid_scope + 1 + anchors = generate_anchors(config.total_stride, config.anchor_base_size, config.anchor_scales, + config.anchor_ratios, valid_scope) + + windows = np.tile(np.outer(np.hanning(config.score_size), np.hanning(config.score_size))[None, :], + [config.anchor_num, 1, 1]).flatten() + path1 = os.path.join(os.getcwd(), "ascend_310_infer", "src", "anchors.bin") + path2 = os.path.join(os.getcwd(), "ascend_310_infer", "src", "windows.bin") + if os.path.exists(path1): + os.remove(path1) + if os.path.exists(path2): + os.remove(path2) + anchors.tofile(path1) + windows.tofile(path2) + +if __name__ == '__main__': + generateConfigBin() diff --git a/research/cv/siamRPN/requirement.txt b/research/cv/siamRPN/requirement.txt index b211ecb1f4f46e9720699752bc40790a88ddcd70..2faf9a2c64d64e1de59c58f2d3844e393dc45bf6 100644 --- a/research/cv/siamRPN/requirement.txt +++ b/research/cv/siamRPN/requirement.txt @@ -2,4 +2,4 @@ lmdb fire opencv-python tqdm -Shaply \ No newline at end of file +Shapely \ No newline at end of file diff --git a/research/cv/siamRPN/scripts/run_infer_310.sh b/research/cv/siamRPN/scripts/run_infer_310.sh new file mode 100644 index 0000000000000000000000000000000000000000..d3771b129193c5ff88acee2b6ef938ce87b4f477 --- /dev/null +++ b/research/cv/siamRPN/scripts/run_infer_310.sh @@ -0,0 +1,114 @@ +#!/bin/bash +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +if [[ $# -lt 3 || $# -gt 4 ]]; then + echo "Usage: sh run_infer_310.sh [MODEL_PATH] [DATA_PATH] [DATASET_NAME] [DEVICE_ID] + DEVICE_ID is optional, it can be set by environment variable device_id, otherwise the value is zero" +exit 1 +fi + +get_real_path(){ + if [ "${1:0:1}" == "/" ]; then + echo "$1" + else + echo "$(realpath -m $PWD/$1)" + fi +} + +model=$(get_real_path $1) +data_path=$(get_real_path $2) + +dataset_name=$3 + +device_id=0 +if [ $# == 4 ]; then + device_id=$4 +fi + +echo $model +echo $data_path +echo $dataset_name +echo $device_id + +export ASCEND_HOME=/usr/local/Ascend/ +if [ -d ${ASCEND_HOME}/ascend-toolkit ]; then + export PATH=$ASCEND_HOME/fwkacllib/bin:$ASCEND_HOME/fwkacllib/ccec_compiler/bin:$ASCEND_HOME/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin:$ASCEND_HOME/ascend-toolkit/latest/atc/bin:$PATH + export LD_LIBRARY_PATH=$ASCEND_HOME/fwkacllib/lib64:/usr/local/lib:$ASCEND_HOME/ascend-toolkit/latest/atc/lib64:$ASCEND_HOME/ascend-toolkit/latest/fwkacllib/lib64:$ASCEND_HOME/driver/lib64:$ASCEND_HOME/add-ons:$LD_LIBRARY_PATH + export TBE_IMPL_PATH=$ASCEND_HOME/ascend-toolkit/latest/opp/op_impl/built-in/ai_core/tbe + export PYTHONPATH=$ASCEND_HOME/fwkacllib/python/site-packages:${TBE_IMPL_PATH}:$ASCEND_HOME/ascend-toolkit/latest/fwkacllib/python/site-packages:$PYTHONPATH + export ASCEND_OPP_PATH=$ASCEND_HOME/ascend-toolkit/latest/opp +else + export PATH=$ASCEND_HOME/fwkacllib/bin:$ASCEND_HOME/fwkacllib/ccec_compiler/bin:$ASCEND_HOME/atc/ccec_compiler/bin:$ASCEND_HOME/atc/bin:$PATH + export LD_LIBRARY_PATH=$ASCEND_HOME/fwkacllib/lib64:/usr/local/lib:$ASCEND_HOME/atc/lib64:$ASCEND_HOME/acllib/lib64:$ASCEND_HOME/driver/lib64:$ASCEND_HOME/add-ons:$LD_LIBRARY_PATH + export PYTHONPATH=$ASCEND_HOME/fwkacllib/python/site-packages:$ASCEND_HOME/atc/python/site-packages:$PYTHONPATH + export ASCEND_OPP_PATH=$ASCEND_HOME/opp +fi + +function compile_app() +{ + cd ../ascend_310_infer || exit + if [ -f "Makefile" ]; then + make clean + fi + sh build.sh &> build.log + + if [ $? -ne 0 ]; then + echo "compile app code failed" + exit 1 + fi + cd - || exit +} +function generate_config() +{ + python3.7 ../preprocess.py + if [ $? -ne 0 ]; then + echo "preprocess failed" + exit 1 + fi +} +function infer() +{ + if [ -d result_Files ]; then + rm -rf ./result_Files + fi + if [ -d time_Result ]; then + rm -rf ./time_Result + fi + mkdir result_Files + mkdir time_Result + cd result_Files + mkdir $dataset_name + cd .. + ../ascend_310_infer/main --siamRPN_file=$model --image_path=$data_path --dataset_name=$dataset_name --device_id=$device_id &> infer.log + + if [ $? -ne 0 ]; then + echo "execute inference failed" + exit 1 + fi +} + +function cal_acc() +{ + python3.7 ../postprocess.py --dataset=$data_path/$dataset_name --predict_dataset=./result_Files/$dataset_name &> acc.log + if [ $? -ne 0 ]; then + echo "calculate accuracy failed" + exit 1 + fi +} +compile_app +generate_config +infer +cal_acc + diff --git a/research/cv/siamRPN/src/net.py b/research/cv/siamRPN/src/net.py index fae1a46932a56a84f9931a4537f7272e6314137d..a6dbe02eb76791ba59d72db49255a64473afb71c 100644 --- a/research/cv/siamRPN/src/net.py +++ b/research/cv/siamRPN/src/net.py @@ -21,6 +21,7 @@ from mindspore.ops import composite as C from mindspore.parallel._utils import _get_device_num, _get_parallel_mode, _get_gradients_mean from mindspore.context import ParallelMode from mindspore.nn.wrap.grad_reducer import DistributedGradReducer +from config import config class SiameseRPN(nn.Cell): """ @@ -34,7 +35,7 @@ class SiameseRPN(nn.Cell): Returns: coutputs tensor, routputs tensor. """ - def __init__(self, groups=1, k=5, s=4, is_train=False, is_trackinit=False, is_track=False): + def __init__(self, groups=1, k=5, s=4, is_train=False, is_trackinit=False, is_track=False, is_310infer=False): super(SiameseRPN, self).__init__() self.groups = groups self.k = k @@ -42,6 +43,7 @@ class SiameseRPN(nn.Cell): self.is_train = is_train self.is_trackinit = is_trackinit self.is_track = is_track + self.is_310infer = is_310infer self.expand_dims = ops.ExpandDims() self.featureExtract = nn.SequentialCell( [nn.Conv2d(3, 96, kernel_size=11, stride=2, pad_mode='valid', has_bias=True), @@ -148,6 +150,37 @@ class SiameseRPN(nn.Cell): self.reshape(routputs, (-1, 4, 1445)), (0, 2, 1)) pred_score = self.softmax(pred_score)[0, :, 1] out1, out2 = pred_score, pred_regression + elif self.is_310infer is True: + template_feature = self.featureExtract(template) + detection_feature = self.featureExtract(detection) + + ckernal = self.conv1(template_feature) + ckernal = self.reshape(ckernal.view(self.groups, 2 * self.k, 256, 4, 4), (-1, 256, 4, 4)) + cinput = self.reshape(self.conv3(detection_feature), (1, -1, 20, 20)) + + rkernal = self.conv2(template_feature) + rkernal = self.reshape(rkernal.view(self.groups, 4 * self.k, 256, 4, 4), (-1, 256, 4, 4)) + rinput = self.reshape(self.conv4(detection_feature), (1, -1, 20, 20)) + c_features = self.op_split_input(cinput) + c_weights = self.op_split_krenal(ckernal) + r_features = self.op_split_input(rinput) + r_weights = self.op_split_krenal(rkernal) + coutputs = () + routputs = () + for i in range(self.groups): + coutputs = coutputs + (self.conv2d_cout(c_features[i], c_weights[i]),) + routputs = routputs + (self.conv2d_rout(r_features[i], r_weights[i]),) + coutputs = self.op_concat(coutputs) + routputs = self.op_concat(routputs) + coutputs = self.reshape(coutputs, (self.groups, 2*config.anchor_num, config.score_size, config.score_size)) + routputs = self.reshape(routputs, (self.groups, 4*config.anchor_num, config.score_size, config.score_size)) + routputs = self.regress_adjust(routputs) + coutputs = self.transpose( + self.reshape(coutputs, (-1, 2, config.anchor_num * config.score_size* config.score_size)), (0, 2, 1)) + routputs = self.transpose( + self.reshape(routputs, (-1, 4, config.anchor_num * config.score_size* config.score_size)), + (0, 2, 1)) + out1, out2 = coutputs, routputs else: out1, out2 = template, detection return out1, out2