diff --git a/research/cv/PatchCore/README_CN.md b/research/cv/PatchCore/README_CN.md new file mode 100644 index 0000000000000000000000000000000000000000..954a70d0b2b640329ad1658a434a80f3abd4d92e --- /dev/null +++ b/research/cv/PatchCore/README_CN.md @@ -0,0 +1,328 @@ +# 目录 + +<!-- TOC --> + +- [目录](#目录) +- [PatchCore描述](#PatchCore描述) +- [模型架构](#模型架构) +- [数据集](#数据集) +- [特性](#特性) + - [混合精度](#混合精度) +- [环境要求](#环境要求) +- [快速入门](#快速入门) +- [脚本说明](#脚本说明) + - [脚本及样例代码](#脚本及样例代码) + - [脚本参数](#脚本参数) + - [训练过程](#训练过程) + - [加载预训练权重](#加载预训练权重) + - [训练](#训练) + - [评估过程](#评估过程) + - [评估](#评估) + - [导出过程](#导出过程) + - [导出](#导出) + - [推理过程](#推理过程) + - [推理](#推理) +- [模型描述](#模型描述) + - [性能](#性能) + - [训练性能](#训练性能) + - [MVTec-AD上训练PatchCore](#MVTec-AD上训练PatchCore) + - [评估性能](#评估性能) + - [MVTec-AD上评估PatchCore](#MVTec-AD上评估PatchCore) + - [推理性能](#评估性能) + - [MVTec-AD上推理PatchCore](#MVTec-AD上推理PatchCore) +- [随机情况说明](#随机情况说明) +- [ModelZoo主页](#modelzoo主页) + +<!-- /TOC --> + +# PatchCore描述 + +PatchCore是2021年提出的基于预训练神经网络的工业异常检测模型,截至2021年末在MVTex-AD数据集上精度排名第一。PatchCore训练时仅使用正常样本,训练过程中不对网络参数进行更新(无反向传播),将每张图片的网络输出(图片对应区域的特征表示)保存作为“Memory Bank”的一部分,最后进行采样操作得到最终“Memory Bank”。推理时加载“Memory Bank”,得到某张图片的网络输出后,通过论文定义的计算得到该图片score,以输出异常图。 + +[论文](https://arxiv.org/abs/2106.08265):Karsten Roth, Latha Pemula, Joaquin Zepeda, Bernhard Scholkopf, Thomas Brox, Peter Gehler.Towards Total Recall in Industrial Anomaly Detection.2021. + +# 模型架构 + + +PatchCore使用预训练的WideResNet50作为Encoder, 并去除layer3之后的层。 + +# 数据集 + +使用的数据集:[MVTec AD](<https://www.mvtec.com/company/research/datasets/mvtec-ad/>) + +- 数据集大小:4.9G,共15个类、5354张图片(尺寸在700x700~1024x1024之间) + - 训练集:共3629张 + - 测试集:共1725张 +- 数据格式:二进制文件 + - 注:数据将在src/dataset.py中处理。 +- 目录结构: + + ```text + data + ├── bottle + │ ├── bottle_test.json + │ ├── bottle_train.json + │ ├── ground_truth + │ │ ├── broken_large + │ │ │ ├── 000_mask.png + │ │ │ └── ...... + │ │ ├── broken_small + │ │ │ ├── 000_mask.png + │ │ └── ...... + │ ├── test + │ │ ├── broken_large + │ │ │ ├── 000.png + │ │ │ └── ...... + │ │ └── good + │ │ ├── 000.png + │ │ └── ...... + │ └── train + │ └── good + │ ├── 000.png + │ └── ...... + ├── cable + │ ├── cable_test.json + │ ├── cable_train.json + │ ├── ground_truth + │ │ ├── bent_wire + │ │ │ ├── 000_mask.png + ...... + ``` + +# 特性 + +## 混合精度 + +采用[混合精度](https://www.mindspore.cn/docs/programming_guide/zh-CN/master/enable_mixed_precision.html)的训练方法使用支持单精度和半精度数据来提高深度学习神经网络的训练速度,同时保持单精度训练所能达到的网络精度。混合精度训练提高计算速度、减少内存使用的同时,支持在特定硬件上训练更大的模型或实现更大批次的训练。 +以FP16算子为例,如果输入数据类型为FP32,MindSpore后台会自动降低精度来处理数据。用户可打开INFO日志,搜索“reduce precision”查看精度降低的算子。 + +# 环境要求 + +- 硬件(Ascend) + - 使用Ascend处理器来搭建硬件环境。 +- 框架 + - [MindSpore](https://www.mindspore.cn/install/en) +- 如需查看详情,请参见如下资源: + - [MindSpore教程](https://www.mindspore.cn/tutorials/zh-CN/master/index.html) + - [MindSpore Python API](https://www.mindspore.cn/docs/api/zh-CN/master/index.html) + +# 快速入门 + +通过官方网站安装MindSpore后,您可以按照如下步骤进行训练和评估: + +- Ascend处理器环境运行 + + ```bash + # 运行训练示例 + python train.py --dataset_path ../data/ --device_id 0 --pre_ckpt_path ../pretrain/PatchCore_pretrain.ckpt --category screw > train.log 2>&1 & + 或 + bash run_train.sh [dataset_path] [device_id] [pre_ckpt_path] [category] + + # 运行评估示例 + python eval.py --dataset_path ../data/ --device_id 0 --pre_ckpt_path ../pretrain/PatchCore_pretrain.ckpt --category screw > eval.log 2>&1 & + 或 + bash run_eval.sh [dataset_path] [device_id] [pre_ckpt_path] [category] + + # 运行推理示例 + bash run_310_infer.sh [MINDIR_PATH] [DATASET_PATH] [NEED_PREPROCESS] [DEVICE_ID] [CATEGORY] + ``` + +# 脚本说明 + +## 脚本及样例代码 + +```text + + ├── PatchCore + ├── README.md // PatchCore相关说明 + ├── ascend310_infer // 实现310推理源代码 + ├── scripts + │ ├── run_310_infer.sh // 推理脚本 + │ ├── run_eval.sh // 评估脚本 + │ └── run_train.sh // 训练脚本 + ├── src + │ ├── dataset.py // 数据集加载 + │ ├── model.py // 模型加载 + │ ├── oneStep.py // model增加填充与池化操作 + │ ├── operator.py // 数据操作 + │ ├── pthtockpt.py // pth转ckpt + │ └── sampling_methods + │ ├── kcenter_greedy.py // 分类+采样 + │ └── sampling_def.py // 采样基类 + ├── eval.py // 评估脚本 + ├── export.py // 推理模型导出脚本 + ├── preprocess.py // 310前处理脚本 + ├── postprocess.py // 310后处理脚本 + └── train.py // 训练脚本 +``` + +## 脚本参数 + + ```yaml + --dataset_path:数据集路径 + --category:数据类别 + --device_id:设备序号 + --pre_ckpt_path:预训练路径 + ``` + +## 训练过程 + +### 加载预训练权重 + +pytorch的WideResNet50预训练模型,[点击获取](https://download.pytorch.org/models/wide_resnet50_2-95faca4d.pth) + +```bash +python src/pthtockpt.py --pth_path /path/wide_resnet50_2-95faca4d.pth +``` + +### 训练 + +- Ascend处理器环境运行 + + ```bash + python train.py --dataset_path ../data/ --device_id 0 --pre_ckpt_path ../pretrain/PatchCore_pretrain.ckpt --category screw > train.log 2>&1 & + 或 + bash run_train.sh [dataset_path] [device_id] [pre_ckpt_path] [category] + ``` + + 上述python命令将在后台运行,您可以通过train.log文件查看结果。 + + 训练结束后,您可在生成的相应类别embeddings目录下找到faiss.index文件,该文件在评估或推理阶段使用,不需要移动。在相应类别的sample目录下可找到异常图文件。 + +## 评估过程 + +### 评估 + +- 在Ascend环境运行评估 + + ```shell + python eval.py --dataset_path ../data/ --device_id 0 --pre_ckpt_path ../pretrain/PatchCore_pretrain.ckpt --category screw > eval.log 2>&1 & + 或 + bash run_eval.sh [dataset_path] [device_id] [pre_ckpt_path] [category] + ``` + + 上述python命令将在后台运行,您可以通过eval.log文件查看结果。测试数据集的准确性如下: + + ```shell + # screw类参考精度 + img_auc: 0.947732, pixel_auc: 0.97995 + ``` + +## 导出过程 + +### 导出 + +将checkpoint文件导出成mindir格式模型。 + +```shell +python export.py --device_id 0 --ckpt_file ../pretrain/PatchCore_pretrain.ckpt +``` + +## 推理过程 + +### 推理 + +在运行推理之前我们需要先导出模型。Air模型只能在昇腾910环境上导出,mindir可以在任意环境上导出。 + +- 在昇腾310上使用MVTec AD数据集进行推理 + + 执行推理的命令如下所示, 其中``MINDIR_PATH``是mindir文件路径; + + ``DATASET_PATH``是推理数据集路径, 为数据类(如toothbrush)的父级目录; + + ``NEED_PREPROCESS``表示数据集是否需要预处理,一般选择'y'; + + ``DEVICE_ID``可选,默认值为0; + + ``CATEGORY``表示数据类型,可取:bottle, cable, capsule, carpet, grid, hazelnut, leather, metal_nut, pill, screw, tile, toothbrush, transistor, wood, zipper. + + ```shell + # Ascend310 inference + bash run_infer_310.sh [MINDIR_PATH] [DATASET_PATH] [NEED_PREPROCESS] [DEVICE_ID] [CATEGORY] + # 例:bash run_infer_310.sh ./PathCore.mindir ../data/ y 0 toothbrush + ``` + + 推理的精度结果保存在acc_[CATEGORY].log日志文件中。 + +# 模型描述 + +## 性能 + +### 训练性能 + +#### MVTec-AD上训练PatchCore + +| 参数 | Ascend | +| ------------- | ---------------------------------------------------------------| +| 模型版本 | PatchCore | +| 资源 | Ascend 910;CPU 2.60GHz,192核;内存 755G;系统 Euler2.8 | +| 上传日期 | 2021-12-17 | +| MindSpore版本 | 1.5.0 | +| 数据集 | MVTec AD | +| 训练参数 | epoch=1, steps依数据类型而定, batch_size = 32 | +| 速度 | 95毫秒/步 | +| 总时长 | 依数据类型5-15min(包含下采样时间) | + +### 评估性能 + +#### MVTec-AD上评估PatchCore + +| 参数 | Ascend | +| ------------------- | --------------------------- | +| 模型版本 | PatchCore | +| 资源 | Ascend 910;系统 Euler2.8 | +| 上传日期 | 2021-12-17 | +| MindSpore 版本 | 1.5.0 | +| 数据集 | MVTec AD | +| batch_size | 1 | +| carpet_auc | img_auc: 0.9896, pixel_auc: 0.9886| +| grid_auc | img_auc: 0.9741, pixel_auc: 0.9698| +| leather_auc | img_auc: 1.0, pixel_auc: 0.9904| +| tile_auc | img_auc: 0.9859, pixel_auc: 0.9491| +| wood_auc | img_auc: 0.9930, pixel_auc: 0.9379| +| bottle_auc | img_auc: 1.0, pixel_auc: 0.9812| +| cable_auc | img_auc: 0.9946, pixel_auc: 0.9833| +| capsule_auc | img_auc: 0.9813, pixel_auc: 0.9887| +| hazelnut_auc | img_auc: 1.0, pixel_auc: 0.9841| +| metal_nut_auc | img_auc: 0.9990, pixel_auc: 0.9850| +| pill_auc | img_auc: 0.9539, pixel_auc: 0.9781| +| screw_auc | img_auc: 0.9477, pixel_auc: 0.9799| +| toothbrush_auc | img_auc: 0.9917, pixel_auc: 0.9866| +| transistor_auc | img_auc: 1.0, pixel_auc: 0.9728| +| zipper_auc | img_auc: 0.9945, pixel_auc: 0.9836| + +### 推理性能 + +#### MVTec-AD上推理PatchCore + +| 参数 | Ascend | +| ------------------- | --------------------------- | +| 模型版本 | PatchCore | +| 资源 | Ascend 310;系统 Euler2.8 | +| 上传日期 | 2021-12-17 | +| MindSpore 版本 | 1.5.0 | +| 数据集 | MVTec AD | +| carpet_auc | img_auc: 0.9928, pixel_auc: 0.9886| +| grid_auc | img_auc: 0.9816, pixel_auc: 0.9670| +| leather_auc | img_auc: 1.0, pixel_auc: 0.9904| +| tile_auc | img_auc: 0.9917, pixel_auc: 0.9490| +| wood_auc | img_auc: 0.9895, pixel_auc: 0.9376| +| bottle_auc | img_auc: 1.0, pixel_auc: 0.9810| +| cable_auc | img_auc: 0.9964, pixel_auc: 0.9827| +| capsule_auc | img_auc: 0.9852, pixel_auc: 0.9886| +| hazelnut_auc | img_auc: 1.0, pixel_auc: 0.9844| +| metal_nut_auc | img_auc: 1.0, pixel_auc: 0.9812| +| pill_auc | img_auc: 0.9414, pixel_auc: 0.9794| +| screw_auc | img_auc: 0.9108, pixel_auc: 0.9826| +| toothbrush_auc | img_auc: 1.0, pixel_auc: 0.9860| +| transistor_auc | img_auc: 1.0, pixel_auc: 0.9718| +| zipper_auc | img_auc: 0.9966, pixel_auc: 0.9842| + +# 随机情况说明 + +在dataset.py中,设置了"shuffle=True"。 +在train.py中,使用了SparseRandomProjection。 + +# ModelZoo主页 + +请浏览官网[主页](https://gitee.com/mindspore/models)。 diff --git a/research/cv/PatchCore/ascend_310_infer/CMakeLists.txt b/research/cv/PatchCore/ascend_310_infer/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..d3fa58018b176831ab537342555b0552c5ceeb5d --- /dev/null +++ b/research/cv/PatchCore/ascend_310_infer/CMakeLists.txt @@ -0,0 +1,15 @@ +cmake_minimum_required(VERSION 3.14.1) +project(Ascend310Infer) +add_compile_definitions(_GLIBCXX_USE_CXX11_ABI=0) +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O0 -g -std=c++17 -Werror -Wall -fPIE -Wl,--allow-shlib-undefined") +set(PROJECT_SRC_ROOT ${CMAKE_CURRENT_LIST_DIR}/) +option(MINDSPORE_PATH "mindspore install path" "") +include_directories(${MINDSPORE_PATH}) +include_directories(${MINDSPORE_PATH}/include) +include_directories(${PROJECT_SRC_ROOT}) +find_library(MS_LIB libmindspore.so ${MINDSPORE_PATH}/lib) +file(GLOB_RECURSE MD_LIB ${MINDSPORE_PATH}/_c_dataengine*) +find_package(gflags REQUIRED) + +add_executable(main src/main.cc src/utils.cc) +target_link_libraries(main ${MS_LIB} ${MD_LIB} gflags) diff --git a/research/cv/PatchCore/ascend_310_infer/build.sh b/research/cv/PatchCore/ascend_310_infer/build.sh new file mode 100644 index 0000000000000000000000000000000000000000..ce42b350ee7c55402425380af96e824e2a8fa28b --- /dev/null +++ b/research/cv/PatchCore/ascend_310_infer/build.sh @@ -0,0 +1,29 @@ +#!/bin/bash +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +if [ -d out ]; then + rm -rf out +fi + +mkdir out +cd out || exit + +if [ -f "Makefile" ]; then + make clean +fi + +cmake .. -DMINDSPORE_PATH="`pip show mindspore-ascend | grep Location | awk '{print $2"/mindspore"}' | xargs realpath`" +make diff --git a/research/cv/PatchCore/ascend_310_infer/inc/utils.h b/research/cv/PatchCore/ascend_310_infer/inc/utils.h new file mode 100644 index 0000000000000000000000000000000000000000..0abbda6fceb7cc05d641835efe6c4428663a0859 --- /dev/null +++ b/research/cv/PatchCore/ascend_310_infer/inc/utils.h @@ -0,0 +1,33 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MINDSPORE_INFERENCE_UTILS_H_ +#define MINDSPORE_INFERENCE_UTILS_H_ + +#include <sys/stat.h> +#include <dirent.h> +#include <vector> +#include <string> +#include <memory> +#include "include/api/types.h" + +std::vector<std::string> GetAllFiles(std::string_view dirName); +DIR *OpenDir(std::string_view dirName); +std::string RealPath(std::string_view path); +mindspore::MSTensor ReadFileToTensor(const std::string &file); +int WriteResult(const std::string& result_path, + const std::string& imageFile, const std::vector<mindspore::MSTensor> &outputs); +#endif diff --git a/research/cv/PatchCore/ascend_310_infer/src/main.cc b/research/cv/PatchCore/ascend_310_infer/src/main.cc new file mode 100644 index 0000000000000000000000000000000000000000..1d00046e596d8fd6d6f2e7240c843f7dc55704b5 --- /dev/null +++ b/research/cv/PatchCore/ascend_310_infer/src/main.cc @@ -0,0 +1,134 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <sys/time.h> +#include <gflags/gflags.h> +#include <dirent.h> +#include <iostream> +#include <string> +#include <algorithm> +#include <iosfwd> +#include <vector> +#include <fstream> +#include <sstream> + +#include "include/api/model.h" +#include "include/api/context.h" +#include "include/api/types.h" +#include "include/api/serialization.h" +#include "include/dataset/execute.h" +#include "include/dataset/vision.h" +#include "inc/utils.h" + +using mindspore::Context; +using mindspore::Serialization; +using mindspore::Model; +using mindspore::Status; +using mindspore::MSTensor; +using mindspore::dataset::Execute; +using mindspore::ModelType; +using mindspore::GraphCell; +using mindspore::kSuccess; + +DEFINE_string(mindir_path, "", "mindir path"); +DEFINE_string(input_path, ".", "input path"); +DEFINE_string(result_path, ".", "result_path"); +DEFINE_string(time_path, ".", "time_path"); +DEFINE_int32(device_id, 0, "device id"); + +int main(int argc, char **argv) { + gflags::ParseCommandLineFlags(&argc, &argv, true); + if (RealPath(FLAGS_mindir_path).empty()) { + std::cout << "Invalid mindir" << std::endl; + return 1; + } + + auto context = std::make_shared<Context>(); + auto ascend310 = std::make_shared<mindspore::Ascend310DeviceInfo>(); + ascend310->SetDeviceID(FLAGS_device_id); + context->MutableDeviceInfo().push_back(ascend310); + mindspore::Graph graph; + Serialization::Load(FLAGS_mindir_path, ModelType::kMindIR, &graph); + + Model model; + Status ret = model.Build(GraphCell(graph), context); + if (ret != kSuccess) { + std::cout << "ERROR: Build failed." << std::endl; + return 1; + } + + std::vector<MSTensor> model_inputs = model.GetInputs(); + if (model_inputs.empty()) { + std::cout << "Invalid model, inputs is empty." << std::endl; + return 1; + } + + auto input_files = GetAllFiles(FLAGS_input_path); + + if (input_files.empty()) { + std::cout << "ERROR: input data empty." << std::endl; + return 1; + } + + std::map<double, double> costTime_map; + size_t size = input_files.size(); + + for (size_t i = 0; i < size; ++i) { + struct timeval start = {0}; + struct timeval end = {0}; + double startTimeMs; + double endTimeMs; + std::vector<MSTensor> inputs; + std::vector<MSTensor> outputs; + std::cout << "Start predict " << std::endl; + std::cout << "input files:" << input_files[i] << std::endl; + + auto input = ReadFileToTensor(input_files[i]); + + inputs.emplace_back(model_inputs[0].Name(), model_inputs[0].DataType(), model_inputs[0].Shape(), + input.Data().get(), input.DataSize()); + + gettimeofday(&start, nullptr); + ret = model.Predict(inputs, &outputs); + gettimeofday(&end, nullptr); + if (ret != kSuccess) { + std::cout << "Predict file failed." << std::endl; + return 1; + } + startTimeMs = (1.0 * start.tv_sec * 1000000 + start.tv_usec) / 1000; + endTimeMs = (1.0 * end.tv_sec * 1000000 + end.tv_usec) / 1000; + costTime_map.insert(std::pair<double, double>(startTimeMs, endTimeMs)); + WriteResult(FLAGS_result_path, input_files[i], outputs); + } + double average = 0.0; + int inferCount = 0; + + for (auto iter = costTime_map.begin(); iter != costTime_map.end(); iter++) { + double diff = 0.0; + diff = iter->second - iter->first; + average += diff; + inferCount++; + } + average = average / inferCount; + std::stringstream timeCost; + timeCost << "NN inference cost average time: "<< average << " ms of infer_count " << inferCount << std::endl; + std::cout << "NN inference cost average time: "<< average << "ms of infer_count " << inferCount << std::endl; + std::string fileName = FLAGS_time_path + std::string("/test_perform_static.txt"); + std::ofstream fileStream(fileName.c_str(), std::ios::trunc); + fileStream << timeCost.str(); + fileStream.close(); + costTime_map.clear(); + return 0; +} diff --git a/research/cv/PatchCore/ascend_310_infer/src/utils.cc b/research/cv/PatchCore/ascend_310_infer/src/utils.cc new file mode 100644 index 0000000000000000000000000000000000000000..e739c503daca95d5afccdf6eeb1b52dd19ec04d2 --- /dev/null +++ b/research/cv/PatchCore/ascend_310_infer/src/utils.cc @@ -0,0 +1,129 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <fstream> +#include <algorithm> +#include <iostream> +#include "inc/utils.h" + +using mindspore::MSTensor; +using mindspore::DataType; + +std::vector<std::string> GetAllFiles(std::string_view dirName) { + struct dirent *filename; + DIR *dir = OpenDir(dirName); + if (dir == nullptr) { + return {}; + } + std::vector<std::string> res; + while ((filename = readdir(dir)) != nullptr) { + std::string dName = std::string(filename->d_name); + if (dName == "." || dName == ".." || filename->d_type != DT_REG) { + continue; + } + res.emplace_back(std::string(dirName) + "/" + filename->d_name); + } + std::sort(res.begin(), res.end()); + for (auto &f : res) { + std::cout << "image file: " << f << std::endl; + } + return res; +} + +int WriteResult(const std::string& result_path, const std::string& imageFile, const std::vector<MSTensor> &outputs) { + std::string homePath = result_path; + for (size_t i = 0; i < outputs.size(); ++i) { + size_t outputSize; + std::shared_ptr<const void> netOutput; + netOutput = outputs[i].Data(); + outputSize = outputs[i].DataSize(); + int pos = imageFile.rfind('/'); + std::string fileName(imageFile, pos + 1); + fileName.replace(fileName.find('.'), fileName.size() - fileName.find('.'), '_' + std::to_string(i) + ".bin"); + std::string outFileName = homePath + "/" + fileName; + FILE * outputFile = fopen(outFileName.c_str(), "wb"); + fwrite(netOutput.get(), outputSize, sizeof(char), outputFile); + fclose(outputFile); + outputFile = nullptr; + } + return 0; +} + +mindspore::MSTensor ReadFileToTensor(const std::string &file) { + if (file.empty()) { + std::cout << "Pointer file is nullptr" << std::endl; + return mindspore::MSTensor(); + } + + std::ifstream ifs(file); + if (!ifs.good()) { + std::cout << "File: " << file << " is not exist" << std::endl; + return mindspore::MSTensor(); + } + + if (!ifs.is_open()) { + std::cout << "File: " << file << "open failed" << std::endl; + return mindspore::MSTensor(); + } + + ifs.seekg(0, std::ios::end); + size_t size = ifs.tellg(); + mindspore::MSTensor buffer(file, mindspore::DataType::kNumberTypeUInt8, {static_cast<int64_t>(size)}, nullptr, size); + + ifs.seekg(0, std::ios::beg); + ifs.read(reinterpret_cast<char *>(buffer.MutableData()), size); + ifs.close(); + + return buffer; +} + + +DIR *OpenDir(std::string_view dirName) { + if (dirName.empty()) { + std::cout << " dirName is null ! " << std::endl; + return nullptr; + } + std::string realPath = RealPath(dirName); + struct stat s; + lstat(realPath.c_str(), &s); + if (!S_ISDIR(s.st_mode)) { + std::cout << "dirName is not a valid directory !" << std::endl; + return nullptr; + } + DIR *dir; + dir = opendir(realPath.c_str()); + if (dir == nullptr) { + std::cout << "Can not open dir " << dirName << std::endl; + return nullptr; + } + std::cout << "Successfully opened the dir " << dirName << std::endl; + return dir; +} + +std::string RealPath(std::string_view path) { + char realPathMem[PATH_MAX] = {0}; + char *realPathRet = nullptr; + realPathRet = realpath(path.data(), realPathMem); + + if (realPathRet == nullptr) { + std::cout << "File: " << path << " is not exist."; + return ""; + } + + std::string realPath(realPathMem); + std::cout << path << " realpath is: " << realPath << std::endl; + return realPath; +} diff --git a/research/cv/PatchCore/eval.py b/research/cv/PatchCore/eval.py new file mode 100644 index 0000000000000000000000000000000000000000..f6588e46188ea18e1524ffccad88a50a4bfe5743 --- /dev/null +++ b/research/cv/PatchCore/eval.py @@ -0,0 +1,111 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""eval""" +import argparse +import json +import os +from pathlib import Path + +import cv2 +import faiss +import numpy as np +from mindspore import context +from mindspore.train.serialization import load_checkpoint, load_param_into_net +from scipy.ndimage import gaussian_filter +from sklearn.metrics import roc_auc_score + +from src.config import _C as cfg +from src.dataset import createDataset +from src.model import wide_resnet50_2 +from src.oneStep import OneStepCell +from src.operator import (embedding_concat, normalize, prep_dirs, + reshape_embedding, save_anomaly_map) + +parser = argparse.ArgumentParser(description='eval') + +parser.add_argument('--category', type=str, default='screw') +parser.add_argument('--device_id', type=int, default=0, help='Device id') +parser.add_argument('--dataset_path', type=str, required=True, help='Dataset path') +parser.add_argument('--pre_ckpt_path', type=str, required=True, help='Pretrain checkpoint file path') + +args = parser.parse_args() + +if __name__ == '__main__': + context.set_context(mode=context.GRAPH_MODE, device_target='Ascend', device_id=args.device_id) + + # dataset + mean = cfg.mean + std = cfg.std + _, test_dataset, _, test_json_path = createDataset(args.dataset_path, args.category) + json_path = Path(test_json_path) + with json_path.open('r') as label_file: + label = json.load(label_file) + data_iter = test_dataset.create_dict_iterator() + step_size = test_dataset.get_dataset_size() + + embedding_dir_path, sample_path = prep_dirs('./', args.category) + index = faiss.read_index(os.path.join(embedding_dir_path, 'index.faiss')) + + # network + network = wide_resnet50_2() + param_dict = load_checkpoint(args.pre_ckpt_path) + load_param_into_net(network, param_dict) + + for p in network.trainable_params(): + p.requires_grad = False + + model = OneStepCell(network) + + print("***************start eval***************") + gt_list_px_lvl = [] + pred_list_px_lvl = [] + gt_list_img_lvl = [] + pred_list_img_lvl = [] + img_path_list = [] + for step, data in enumerate(data_iter): + step_label = label['{}'.format(data['idx'][0])] + file_name = step_label['name'] + x_type = step_label['img_type'] + + features = model(data['img']) + embedding = embedding_concat(features[0].asnumpy(), features[1].asnumpy()) + embedding_test = reshape_embedding(embedding) + + embedding_test = np.array(embedding_test, dtype=np.float32) + score_patches, _ = index.search(embedding_test, k=9) + + anomaly_map = score_patches[:, 0].reshape((28, 28)) + N_b = score_patches[np.argmax(score_patches[:, 0])] + w = (1 - (np.max(np.exp(N_b)) / np.sum(np.exp(N_b)))) + score = w * max(score_patches[:, 0]) + gt_np = data['gt'].asnumpy()[0, 0].astype(int) + anomaly_map_resized = cv2.resize(anomaly_map, (224, 224)) + anomaly_map_resized_blur = gaussian_filter(anomaly_map_resized, sigma=4) + + gt_list_px_lvl.extend(gt_np.ravel()) + pred_list_px_lvl.extend(anomaly_map_resized_blur.ravel()) + gt_list_img_lvl.append(data['label'].asnumpy()[0]) + pred_list_img_lvl.append(score) + img_path_list.extend(file_name) + img = normalize(data['img'], mean, std) + input_img = cv2.cvtColor(np.transpose(img, (0, 2, 3, 1))[0] * 255, cv2.COLOR_BGR2RGB) + save_anomaly_map(sample_path, anomaly_map_resized_blur, input_img, gt_np * 255, file_name, x_type) + + pixel_auc = roc_auc_score(gt_list_px_lvl, pred_list_px_lvl) + img_auc = roc_auc_score(gt_list_img_lvl, pred_list_img_lvl) + + print('\ntest_epoch_end') + print('category is {}'.format(args.category)) + print("img_auc: {}, pixel_auc: {}".format(img_auc, pixel_auc)) diff --git a/research/cv/PatchCore/export.py b/research/cv/PatchCore/export.py new file mode 100644 index 0000000000000000000000000000000000000000..0252fb43aad4fe52a83c5cffa283cec70df28b55 --- /dev/null +++ b/research/cv/PatchCore/export.py @@ -0,0 +1,55 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""export checkpoint file into air, onnx, mindir models""" +import argparse +import numpy as np + +import mindspore.common.dtype as ms +from mindspore import Tensor, load_checkpoint, load_param_into_net, export, context + +from src.oneStep import OneStepCell +from src.model import wide_resnet50_2 + +parser = argparse.ArgumentParser(description='export') + +parser.add_argument('--device_id', type=int, default=0, help='Device id') +parser.add_argument('--ckpt_file', type=str, required=True, help='Checkpoint file path') +parser.add_argument('--file_name', type=str, default='PathCore', help='output file name') +parser.add_argument('--file_format', type=str, choices=['AIR', 'ONNX', 'MINDIR'], default='MINDIR', help='file format') +parser.add_argument('--device_target', type=str, choices=['Ascend', 'GPU', 'CPU'], default='Ascend', + help='device target') + +args = parser.parse_args() + +if __name__ == '__main__': + context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target) + + if args.device_target == "Ascend": + context.set_context(device_id=args.device_id) + + assert args.ckpt_file is not None, "args.ckpt_file is None." + + # network + network = wide_resnet50_2() + param_dict = load_checkpoint(args.ckpt_file) + load_param_into_net(network, param_dict) + + for p in network.trainable_params(): + p.requires_grad = False + + model = OneStepCell(network) + + input_arr = Tensor(np.ones([1, 3, 224, 224]), ms.float32) + export(model, input_arr, file_name=args.file_name, file_format=args.file_format) diff --git a/research/cv/PatchCore/picture/PatchCore.png b/research/cv/PatchCore/picture/PatchCore.png new file mode 100644 index 0000000000000000000000000000000000000000..64750724b82c00fd09b080b7e8b61609b8cb5f9c Binary files /dev/null and b/research/cv/PatchCore/picture/PatchCore.png differ diff --git a/research/cv/PatchCore/postprocess.py b/research/cv/PatchCore/postprocess.py new file mode 100644 index 0000000000000000000000000000000000000000..6ceb8c9ab4492b8cd3865756acf974fdaaaaa83e --- /dev/null +++ b/research/cv/PatchCore/postprocess.py @@ -0,0 +1,162 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""postprocess""" +import argparse +import json +import os +from pathlib import Path + +import cv2 +import faiss +import numpy as np +from mindspore.common import set_seed +from scipy.ndimage import gaussian_filter +from sklearn.metrics import roc_auc_score +from sklearn.random_projection import SparseRandomProjection + +from src.config import _C as cfg +from src.operator import (embedding_concat, prep_dirs, reshape_embedding, + save_anomaly_map) +from src.sampling_methods.kcenter_greedy import kCenterGreedy + +set_seed(1) + +parser = argparse.ArgumentParser(description='postprocess') + +parser.add_argument('--result_dir', type=str, default='') +parser.add_argument('--img_dir', type=str, default='') +parser.add_argument('--label_dir', type=str, default='') +parser.add_argument('--category', type=str, default='screw') +parser.add_argument('--coreset_sampling_ratio', type=float, default=0.01) + +args = parser.parse_args() + +def normalize(input_, mean_, std_): + mean_ = np.array(mean_).reshape((-1, 1, 1)) + std_ = np.array(std_).reshape((-1, 1, 1)) + + out = np.divide(np.subtract(input_, mean_), std_).astype(np.float32) + + return out + +if __name__ == '__main__': + train_label_path = Path(os.path.join(args.label_dir, "pre_label.json")) + test_label_path = Path(os.path.join(args.label_dir, "infer_label.json")) + train_result_path = os.path.join(args.result_dir, 'pre') + test_result_path = os.path.join(args.result_dir, 'infer') + + with train_label_path.open('r') as dst_file: + train_label = json.load(dst_file) + with test_label_path.open('r') as dst_file: + test_label = json.load(dst_file) + + test_json_path = test_label['infer_json_path'] + + # dataset + embedding_dir_path, sample_path = prep_dirs('./', args.category) + + mean = cfg.mean + std = cfg.std + + json_path = Path(test_json_path) + with json_path.open('r') as label_file: + test_label_string = json.load(label_file) + + # train + embedding_list = [] + for i in range(int(len(os.listdir(train_result_path)) / 2)): + features_one_path = os.path.join(train_result_path, "data_img_{}_0.bin".format(i)) + features_two_path = os.path.join(train_result_path, "data_img_{}_1.bin".format(i)) + + features_one = np.fromfile(features_one_path, dtype=np.float32).reshape(1, 512, 28, 28) + features_two = np.fromfile(features_two_path, dtype=np.float32).reshape(1, 1024, 14, 14) + + embedding = embedding_concat(features_one, features_two) + embedding_list.extend(reshape_embedding(embedding)) + + total_embeddings = np.array(embedding_list, dtype=np.float32) + + # Random projection + randomprojector = SparseRandomProjection(n_components='auto', eps=0.9) + randomprojector.fit(total_embeddings) + + # Coreset Subsampling + selector = kCenterGreedy(total_embeddings, 0, 0) + selected_idx = selector.select_batch(model=randomprojector, + already_selected=[], + N=int(total_embeddings.shape[0] * args.coreset_sampling_ratio)) + embedding_coreset = total_embeddings[selected_idx] + + print('initial embedding size : {}'.format(total_embeddings.shape)) + print('final embedding size : {}'.format(embedding_coreset.shape)) + + # faiss + index = faiss.IndexFlatL2(embedding_coreset.shape[1]) + index.add(embedding_coreset) + faiss.write_index(index, os.path.join(embedding_dir_path, 'index.faiss')) + + # eval + gt_list_px_lvl = [] + pred_list_px_lvl = [] + gt_list_img_lvl = [] + pred_list_img_lvl = [] + img_path_list = [] + index = faiss.read_index(os.path.join(embedding_dir_path, 'index.faiss')) + for i in range(int(len(os.listdir(test_result_path)) / 2)): + test_single_label = test_label['{}'.format(i)] + gt = test_single_label['gt'] + label = test_single_label['label'] + idx = test_single_label['idx'] + test_single_label_string = test_label_string['{}'.format(idx[0])] + file_name = test_single_label_string['name'] + x_type = test_single_label_string['img_type'] + + img_path = os.path.join(args.img_dir, "data_img_{}.bin".format(i)) + features_one_path = os.path.join(test_result_path, "data_img_{}_0.bin".format(i)) + features_two_path = os.path.join(test_result_path, "data_img_{}_1.bin".format(i)) + + img = np.fromfile(img_path, dtype=np.float32).reshape(1, 3, 224, 224) + features_one = np.fromfile(features_one_path, dtype=np.float32).reshape(1, 512, 28, 28) + features_two = np.fromfile(features_two_path, dtype=np.float32).reshape(1, 1024, 14, 14) + + embedding = embedding_concat(features_one, features_two) + embedding_test = reshape_embedding(embedding) + + embedding_test = np.array(embedding_test, dtype=np.float32) + score_patches, _ = index.search(embedding_test, k=9) + + anomaly_map = score_patches[:, 0].reshape((28, 28)) + N_b = score_patches[np.argmax(score_patches[:, 0])] + w = (1 - (np.max(np.exp(N_b)) / np.sum(np.exp(N_b)))) + score = w * max(score_patches[:, 0]) + gt_np = np.array(gt)[0, 0].astype(int) + anomaly_map_resized = cv2.resize(anomaly_map, (224, 224)) + anomaly_map_resized_blur = gaussian_filter(anomaly_map_resized, sigma=4) + + gt_list_px_lvl.extend(gt_np.ravel()) + pred_list_px_lvl.extend(anomaly_map_resized_blur.ravel()) + gt_list_img_lvl.append(label[0]) + pred_list_img_lvl.append(score) + img_path_list.extend(file_name) + img = normalize(img, mean, std) + input_img = cv2.cvtColor(np.transpose(img, (0, 2, 3, 1))[0] * 255, cv2.COLOR_BGR2RGB) + save_anomaly_map(sample_path, anomaly_map_resized_blur, input_img, gt_np * 255, file_name, x_type) + + pixel_acc = roc_auc_score(gt_list_px_lvl, pred_list_px_lvl) + img_acc = roc_auc_score(gt_list_img_lvl, pred_list_img_lvl) + + print('\n310 acc is') + print('category is {}'.format(args.category)) + print("img_acc: {}, pixel_acc: {}".format(img_acc, pixel_acc)) diff --git a/research/cv/PatchCore/preprocess.py b/research/cv/PatchCore/preprocess.py new file mode 100644 index 0000000000000000000000000000000000000000..d537da68c2d17cb355f9468f92d51de5af82a75d --- /dev/null +++ b/research/cv/PatchCore/preprocess.py @@ -0,0 +1,181 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""preprocess""" +import argparse +import json +import os +from pathlib import Path + +import mindspore.common.dtype as mstype +import mindspore.dataset as ds +import mindspore.dataset.transforms.c_transforms as C2 +import mindspore.dataset.vision.py_transforms as py_vision +from mindspore.common import set_seed +from mindspore.dataset.transforms.py_transforms import Compose +from mindspore.dataset.vision import Inter + +from src.config import _C as cfg +from src.dataset import MVTecDataset + +set_seed(1) + +parser = argparse.ArgumentParser(description='preprocesss') + +parser.add_argument('--data_dir', type=str, default='') +parser.add_argument("--img_dir", type=str, help="") +parser.add_argument('--category', type=str, default='') + +args = parser.parse_args() + +def createDatasetJson(dataset_path, category, data_transforms, gt_transforms): + """createDatasetJson""" + path = os.path.join(dataset_path, 'json', category) + if not os.path.exists(path): + os.makedirs(path) + train_json_path = os.path.join(path, '{}_{}.json'.format(category, 'pre')) + test_json_path = os.path.join(path, '{}_{}.json'.format(category, 'infer')) + + if not os.path.isfile(train_json_path): + os.mknod(train_json_path) + train_data = MVTecDataset(root=os.path.join(dataset_path, category), + transform=data_transforms, gt_transform=gt_transforms, phase='train', is_json=True) + train_label = {} + train_data_length = train_data.__len__() + for i in range(train_data_length): + single_label = {} + name, img_type = train_data.__getitem__(i) + single_label['name'] = name + single_label['img_type'] = img_type + train_label['{}'.format(i)] = single_label + + json_path = Path(train_json_path) + with json_path.open('w') as json_file: + json.dump(train_label, json_file) + + if not os.path.isfile(test_json_path): + os.mknod(test_json_path) + test_data = MVTecDataset(root=os.path.join(dataset_path, category), + transform=data_transforms, gt_transform=gt_transforms, phase='test', is_json=True) + test_label = {} + test_data_length = test_data.__len__() + for i in range(test_data_length): + single_label = {} + name, img_type = test_data.__getitem__(i) + single_label['name'] = name + single_label['img_type'] = img_type + test_label['{}'.format(i)] = single_label + + json_path = Path(test_json_path) + with json_path.open('w') as json_file: + json.dump(test_label, json_file) + + return train_json_path, test_json_path + +def createDataset(dataset_path, category): + """createDataset""" + mean = cfg.mean_dft + std = cfg.std_dft + + data_transforms = Compose([ + py_vision.Resize((256, 256), interpolation=Inter.ANTIALIAS), + py_vision.CenterCrop(224), + py_vision.ToTensor(), + py_vision.Normalize(mean=mean, std=std) + ]) + gt_transforms = Compose([ + py_vision.Resize((256, 256)), + py_vision.CenterCrop(224), + py_vision.ToTensor() + ]) + + train_json_path, test_json_path = createDatasetJson(dataset_path, category, data_transforms, gt_transforms) + + train_data = MVTecDataset(root=os.path.join(dataset_path, category), + transform=data_transforms, gt_transform=gt_transforms, phase='train') + test_data = MVTecDataset(root=os.path.join(dataset_path, category), + transform=data_transforms, gt_transform=gt_transforms, phase='test') + + train_dataset = ds.GeneratorDataset(train_data, column_names=['img', 'gt', 'label', 'idx'], + shuffle=True) + test_dataset = ds.GeneratorDataset(test_data, column_names=['img', 'gt', 'label', 'idx'], + shuffle=False) + + type_cast_float32_op = C2.TypeCast(mstype.float32) + train_dataset = train_dataset.map(operations=type_cast_float32_op, input_columns="img") + test_dataset = test_dataset.map(operations=type_cast_float32_op, input_columns="img") + + train_dataset = train_dataset.batch(1, drop_remainder=False) + test_dataset = test_dataset.batch(1, drop_remainder=False) + + return train_dataset, test_dataset, train_json_path, test_json_path + +if __name__ == '__main__': + set_seed(1) + train_dataset_, test_dataset_, train_json_path_, test_json_path_ = createDataset(args.data_dir, args.category) + root_path = os.path.join(args.img_dir, args.category) + train_path = os.path.join(root_path, 'pre') + test_path = os.path.join(root_path, 'infer') + label_path = os.path.join(root_path, 'label') + + train_label_ = {} + for j, data in enumerate(train_dataset_.create_dict_iterator()): + train_single_lable = {} + + img = data['img'].asnumpy() + gt = data['gt'].asnumpy() + label = data['label'].asnumpy() + idx = data['idx'].asnumpy() + + # save img + file_name_img = "data_img" + "_" + str(j) + ".bin" + file_path = os.path.join(train_path, file_name_img) + img.tofile(file_path) + + train_single_lable['gt'] = gt.tolist() + train_single_lable['label'] = label.tolist() + train_single_lable['idx'] = idx.tolist() + + train_label_['{}'.format(j)] = train_single_lable + + test_label_ = {} + for j, data in enumerate(test_dataset_.create_dict_iterator()): + test_single_lable = {} + + img = data['img'].asnumpy() + gt = data['gt'].asnumpy() + label = data['label'].asnumpy() + idx = data['idx'].asnumpy() + + # save img + file_name_img = "data_img" + "_" + str(j) + ".bin" + file_path = os.path.join(test_path, file_name_img) + img.tofile(file_path) + + test_single_lable['gt'] = gt.tolist() + test_single_lable['label'] = label.tolist() + test_single_lable['idx'] = idx.tolist() + + test_label_['{}'.format(j)] = test_single_lable + + train_label_['pre_json_path'] = train_json_path_ + test_label_['infer_json_path'] = test_json_path_ + + train_label_json_path = Path(os.path.join(label_path, 'pre_label.json')) + with train_label_json_path.open('w') as json_path_: + json.dump(train_label_, json_path_) + + test_label_json_path = Path(os.path.join(label_path, 'infer_label.json')) + with test_label_json_path.open('w') as json_path_: + json.dump(test_label_, json_path_) diff --git a/research/cv/PatchCore/requirements.txt b/research/cv/PatchCore/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..605df3f5022aad6ea8621e8f9912e372fbae542c --- /dev/null +++ b/research/cv/PatchCore/requirements.txt @@ -0,0 +1,4 @@ +opencv-python +scikit-learn +faiss-cpu +yacs \ No newline at end of file diff --git a/research/cv/PatchCore/scripts/run_310_infer.sh b/research/cv/PatchCore/scripts/run_310_infer.sh new file mode 100644 index 0000000000000000000000000000000000000000..717955802fbf267f94eaed9413481276c5745590 --- /dev/null +++ b/research/cv/PatchCore/scripts/run_310_infer.sh @@ -0,0 +1,149 @@ +#!/bin/bash +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +if [[ $# -lt 4 || $# -gt 5 ]]; then + echo "Usage: bash run_infer_310.sh [MINDIR_PATH] [DATASET_PATH] [NEED_PREPROCESS] [DEVICE_ID] [CATEGORY] + NEED_PREPROCESS means weather need preprocess or not, it's value is 'y' or 'n'. + DEVICE_ID is optional, it can be set by environment variable device_id, otherwise the value is zero" +exit 1 +fi + +get_real_path() { + if [ "${1:0:1}" == "/" ]; then + echo "$1" + else + echo "$(realpath -m $PWD/$1)" + fi +} + +model=$(get_real_path $1) +dataset_path=$(get_real_path $2) + +if [ "$3" == "y" ] || [ "$3" == "n" ]; then + need_preprocess=$3 +else + echo "weather need preprocess or not, it's value must be in [y, n]" + exit 1 +fi + +device_id=$4 +category=$5 + +echo "Mindir name: "$model +echo "dataset path: "$dataset_path +echo "need preprocess: "$need_preprocess +echo "device id: "$device_id +echo "category: "$category + +export ASCEND_HOME=/usr/local/Ascend/ +if [ -d ${ASCEND_HOME}/ascend-toolkit ]; then + export PATH=$ASCEND_HOME/fwkacllib/bin:$ASCEND_HOME/fwkacllib/ccec_compiler/bin:$ASCEND_HOME/ascend-toolkit/latest/fwkacllib/ccec_compiler/bin:$ASCEND_HOME/ascend-toolkit/latest/atc/bin:$PATH + export LD_LIBRARY_PATH=$ASCEND_HOME/fwkacllib/lib64:/usr/local/lib:$ASCEND_HOME/ascend-toolkit/latest/atc/lib64:$ASCEND_HOME/ascend-toolkit/latest/fwkacllib/lib64:$ASCEND_HOME/driver/lib64:$ASCEND_HOME/add-ons:$LD_LIBRARY_PATH + export TBE_IMPL_PATH=$ASCEND_HOME/ascend-toolkit/latest/opp/op_impl/built-in/ai_core/tbe + export PYTHONPATH=$ASCEND_HOME/fwkacllib/python/site-packages:${TBE_IMPL_PATH}:$ASCEND_HOME/ascend-toolkit/latest/fwkacllib/python/site-packages:$PYTHONPATH + export ASCEND_OPP_PATH=$ASCEND_HOME/ascend-toolkit/latest/opp +else + export PATH=$ASCEND_HOME/fwkacllib/bin:$ASCEND_HOME/fwkacllib/ccec_compiler/bin:$ASCEND_HOME/atc/ccec_compiler/bin:$ASCEND_HOME/atc/bin:$PATH + export LD_LIBRARY_PATH=$ASCEND_HOME/fwkacllib/lib64:/usr/local/lib:$ASCEND_HOME/atc/lib64:$ASCEND_HOME/acllib/lib64:$ASCEND_HOME/driver/lib64:$ASCEND_HOME/add-ons:$LD_LIBRARY_PATH + export PYTHONPATH=$ASCEND_HOME/fwkacllib/python/site-packages:$ASCEND_HOME/atc/python/site-packages:$PYTHONPATH + export ASCEND_OPP_PATH=$ASCEND_HOME/opp +fi + + +function preprocess_data() { + if [ ! -d img ]; then + mkdir ./img + fi + if [ -d img/$category ]; then + rm -rf img/$category + fi + mkdir ./img/$category + mkdir ./img/$category/pre + mkdir ./img/$category/infer + mkdir ./img/$category/label + + python3.7 ../preprocess.py \ + --data_dir $dataset_path \ + --img_dir ./img \ + --category $category +} + +function compile_app() { + cd ../ascend_310_infer || exit + bash build.sh &> build.log +} + +function infer() { + cd - || exit + if [ -d img/$category/result ]; then + rm -rf img/$category/result + fi + mkdir img/$category/result + mkdir img/$category/result/pre + mkdir img/$category/result/infer + + if [ -d img/$category/time ]; then + rm -rf img/$category/time + fi + mkdir img/$category/time + mkdir img/$category/time/pre + mkdir img/$category/time/infer + + ../ascend_310_infer/out/main \ + --mindir_path=$model \ + --input_path=./img/$category/pre \ + --result_path=./img/$category/result/pre \ + --time_path=./img/$category/time/pre \ + --device_id=$device_id &> pre_$category.log + + ../ascend_310_infer/out/main \ + --mindir_path=$model \ + --input_path=./img/$category/infer \ + --result_path=./img/$category/result/infer \ + --time_path=./img/$category/time/infer \ + --device_id=$device_id &> infer_$category.log +} + +function cal_acc() { + python3 ../postprocess.py \ + --result_dir ./img/$category/result/ \ + --img_dir ./img/$category/infer/ \ + --label_dir ./img/$category/label/ \ + --category $category > acc_$category.log +} + +if [ $need_preprocess == "y" ]; then + preprocess_data + if [ $? -ne 0 ]; then + echo "preprocess dataset failed" + exit 1 + fi +fi +compile_app +if [ $? -ne 0 ]; then + echo "compile app code failed" + exit 1 +fi +infer +if [ $? -ne 0 ]; then + echo " execute inference failed" + exit 1 +fi +cal_acc +if [ $? -ne 0 ]; then + echo "calculate accuracy failed" + exit 1 +fi diff --git a/research/cv/PatchCore/scripts/run_eval.sh b/research/cv/PatchCore/scripts/run_eval.sh new file mode 100644 index 0000000000000000000000000000000000000000..8ffcadf2c5b5ea38929f2b6fededaeaa9dc58af4 --- /dev/null +++ b/research/cv/PatchCore/scripts/run_eval.sh @@ -0,0 +1,41 @@ +#!/bin/bash +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +echo "==============================================================================================================" +echo "Please run the script as: " +echo "bash run_eval.sh DATA_PATH DEVICE_ID PRETRAINED_PATH CATEGORY" +echo "For example: bash run_eval.sh /path/dataset 0 /path/pretrained_path category" +echo "It is better to use the absolute path." +echo "==============================================================================================================" +set -e +DATA_PATH=$1 + +export DATA_PATH=${DATA_PATH} + +export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python + +cd ../ +env > env0.log +python3 eval.py --dataset_path $1 --device_id $2 --pre_ckpt_path $3 --category $4 > eval_$4.log 2>&1 + +if [ $? -eq 0 ];then + echo "training success" +else + echo "training failed" + exit 2 +fi +echo "finish" +cd ../ diff --git a/research/cv/PatchCore/scripts/run_train.sh b/research/cv/PatchCore/scripts/run_train.sh new file mode 100644 index 0000000000000000000000000000000000000000..6cbc25babae597bf403058c156b8c7b0d6b78351 --- /dev/null +++ b/research/cv/PatchCore/scripts/run_train.sh @@ -0,0 +1,41 @@ +#!/bin/bash +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +echo "==============================================================================================================" +echo "Please run the script as: " +echo "bash run_train.sh DATA_PATH DEVICE_ID PRETRAINED_PATH CATEGORY" +echo "For example: bash run_train.sh /path/dataset 0 /path/pretrained_path category" +echo "It is better to use the absolute path." +echo "==============================================================================================================" +set -e +DATA_PATH=$1 + +export DATA_PATH=${DATA_PATH} + +export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python + +cd ../ +env > env0.log +python3 train.py --dataset_path $1 --isModelArts False --device_id $2 --pre_ckpt_path $3 --category $4 > train_$4.log 2>&1 + +if [ $? -eq 0 ];then + echo "training success" +else + echo "training failed" + exit 2 +fi +echo "finish" +cd ../ diff --git a/research/cv/PatchCore/src/config.py b/research/cv/PatchCore/src/config.py new file mode 100644 index 0000000000000000000000000000000000000000..bd725b71742df82843838f24fa48085df9c1e843 --- /dev/null +++ b/research/cv/PatchCore/src/config.py @@ -0,0 +1,23 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""config""" +from yacs.config import CfgNode as CN + +_C = CN() + +_C.mean_dft = [-0.485, -0.456, -0.406] +_C.std_dft = [0.229, 0.224, 0.255] +_C.mean = [-0.485/0.229, -0.456/0.224, -0.406/0.255] +_C.std = [1/0.229, 1/0.224, 1/0.255] diff --git a/research/cv/PatchCore/src/dataset.py b/research/cv/PatchCore/src/dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..ca4bad80affca362c19966394a93c1586e0eedf7 --- /dev/null +++ b/research/cv/PatchCore/src/dataset.py @@ -0,0 +1,170 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""MVTecDataset""" +import glob +import json +import os +from pathlib import Path +import numpy as np +import mindspore.common.dtype as mstype +import mindspore.dataset as ds +import mindspore.dataset.transforms.c_transforms as C2 +import mindspore.dataset.vision.py_transforms as py_vision +from mindspore.dataset.transforms.py_transforms import Compose +from mindspore.dataset.vision import Inter +from PIL import Image + + +class MVTecDataset(): + """MVTecDataset""" + def __init__(self, root, transform, gt_transform, phase, is_json=False): + if phase == 'train': + self.img_path = os.path.join(root, 'train') + else: + self.img_path = os.path.join(root, 'test') + self.gt_path = os.path.join(root, 'ground_truth') + + self.is_json = is_json + self.transform = transform + self.gt_transform = gt_transform + self.img_paths, self.gt_paths, self.labels, self.types = self.load_dataset() + + def load_dataset(self): + """load_dataset""" + img_tot_paths = [] + gt_tot_paths = [] + tot_labels = [] + tot_types = [] + + defect_types = os.listdir(self.img_path) + + for defect_type in defect_types: + if defect_type == 'good': + img_paths = glob.glob(os.path.join(self.img_path, defect_type) + "/*.png") + img_tot_paths.extend(img_paths) + gt_tot_paths.extend([0] * len(img_paths)) + tot_labels.extend([0] * len(img_paths)) + tot_types.extend(['good'] * len(img_paths)) + else: + img_paths = glob.glob(os.path.join(self.img_path, defect_type) + "/*.png") + gt_paths = glob.glob(os.path.join(self.gt_path, defect_type) + "/*.png") + img_paths.sort() + gt_paths.sort() + img_tot_paths.extend(img_paths) + gt_tot_paths.extend(gt_paths) + tot_labels.extend([1] * len(img_paths)) + tot_types.extend([defect_type] * len(img_paths)) + + assert len(img_tot_paths) == len(gt_tot_paths), "Something wrong with test and ground truth pair!" + + return img_tot_paths, gt_tot_paths, tot_labels, tot_types + + def __len__(self): + return len(self.img_paths) + + def __getitem__(self, idx): + img_path, gt, label, img_type = self.img_paths[idx], self.gt_paths[idx], self.labels[idx], self.types[idx] + img = Image.open(img_path).convert('RGB') + img = self.transform(img)[0] + + if gt == 0: + gt = np.zeros((1, np.array(img).shape[-2], np.array(img).shape[-2])).tolist() + else: + gt = Image.open(gt) + gt = self.gt_transform(gt)[0] + + if self.is_json: + return os.path.basename(img_path[:-4]), img_type + return img, gt, label, idx + +def createDatasetJson(dataset_path, category, data_transforms, gt_transforms): + """createDatasetJson""" + train_json_path = os.path.join(dataset_path, category, '{}_{}.json'.format(category, 'train')) + test_json_path = os.path.join(dataset_path, category, '{}_{}.json'.format(category, 'test')) + + if not os.path.isfile(train_json_path): + print(train_json_path) + os.mknod(train_json_path) + train_data = MVTecDataset(root=os.path.join(dataset_path, category), + transform=data_transforms, gt_transform=gt_transforms, phase='train', is_json=True) + train_label = {} + train_data_length = train_data.__len__() + for i in range(train_data_length): + single_label = {} + name, img_type = train_data.__getitem__(i) + single_label['name'] = name + single_label['img_type'] = img_type + train_label['{}'.format(i)] = single_label + + json_path = Path(train_json_path) + with json_path.open('w') as json_file: + json.dump(train_label, json_file) + + if not os.path.isfile(test_json_path): + os.mknod(test_json_path) + test_data = MVTecDataset(root=os.path.join(dataset_path, category), + transform=data_transforms, gt_transform=gt_transforms, phase='test', is_json=True) + test_label = {} + test_data_length = test_data.__len__() + for i in range(test_data_length): + single_label = {} + name, img_type = test_data.__getitem__(i) + single_label['name'] = name + single_label['img_type'] = img_type + test_label['{}'.format(i)] = single_label + + json_path = Path(test_json_path) + with json_path.open('w') as json_file: + json.dump(test_label, json_file) + + return train_json_path, test_json_path + +def createDataset(dataset_path, category): + """createDataset""" + mean = [0.485, 0.456, 0.406] + std = [0.229, 0.224, 0.225] + + data_transforms = Compose([ + py_vision.Resize((256, 256), interpolation=Inter.ANTIALIAS), + py_vision.CenterCrop(224), + py_vision.ToTensor(), + py_vision.Normalize(mean=mean, std=std) + ]) + gt_transforms = Compose([ + py_vision.Resize((256, 256)), + py_vision.CenterCrop(224), + py_vision.ToTensor() + ]) + + train_json_path, test_json_path = createDatasetJson(dataset_path, category, data_transforms, gt_transforms) + + train_data = MVTecDataset(root=os.path.join(dataset_path, category), + transform=data_transforms, gt_transform=gt_transforms, phase='train') + test_data = MVTecDataset(root=os.path.join(dataset_path, category), + transform=data_transforms, gt_transform=gt_transforms, phase='test') + + train_dataset = ds.GeneratorDataset(train_data, column_names=['img', 'gt', 'label', 'idx'], + shuffle=True) + test_dataset = ds.GeneratorDataset(test_data, column_names=['img', 'gt', 'label', 'idx'], + shuffle=False) + + type_cast_float32_op = C2.TypeCast(mstype.float32) + train_dataset = train_dataset.map(operations=type_cast_float32_op, input_columns="img") + test_dataset = test_dataset.map(operations=type_cast_float32_op, input_columns="img") + + train_dataset = train_dataset.batch(32, drop_remainder=False) + test_dataset = test_dataset.batch(1, drop_remainder=False) + + return train_dataset, test_dataset, train_json_path, test_json_path diff --git a/research/cv/PatchCore/src/model.py b/research/cv/PatchCore/src/model.py new file mode 100644 index 0000000000000000000000000000000000000000..3fdffcbef88cf1cf6544332d64bc7b129c4f3ce3 --- /dev/null +++ b/research/cv/PatchCore/src/model.py @@ -0,0 +1,132 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""model""" +import mindspore.nn as nn +import mindspore.ops as ops + +class Bottleneck(nn.Cell): + """Bottleneck""" + expansion: int = 4 + + def __init__(self, + inplanes, + planes, + stride=1, + downsample=None, + groups=1, + base_width=64, + dilation=1): + super(Bottleneck, self).__init__() + + width = int(planes * (base_width / 64.)) * groups + + self.conv1 = nn.Conv2d(inplanes, width, kernel_size=1, stride=1, padding=0, pad_mode='pad') + self.bn1 = nn.BatchNorm2d(width) + self.conv2 = nn.Conv2d(width, width, kernel_size=3, stride=stride, padding=dilation, + pad_mode='pad', group=groups, dilation=dilation) + self.bn2 = nn.BatchNorm2d(width) + self.conv3 = nn.Conv2d(width, planes * self.expansion, kernel_size=1, stride=1, padding=0, pad_mode='pad') + self.bn3 = nn.BatchNorm2d(planes * self.expansion) + self.relu = nn.ReLU() + + self.downsample = downsample + self.stride = stride + + def construct(self, x): + """construct""" + identity = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + if self.downsample is not None: + identity = self.downsample(x) + + out += identity + out = self.relu(out) + + return out + +class ResNet(nn.Cell): + """ResNet""" + def __init__(self, block, layers, num_classes=1000, groups=1, width_per_group=64): + super(ResNet, self).__init__() + self.inplanes = 64 + self.dilation = 1 + self.groups = groups + self.base_width = width_per_group + self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3, pad_mode='pad') + self.bn1 = nn.BatchNorm2d(self.inplanes) + self.relu = nn.ReLU() + self.pad = nn.Pad(paddings=((0, 0), (0, 0), (1, 1), (1, 1))) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode="valid") + + self.layer1 = self._make_layer(block, 64, layers[0]) + self.layer2 = self._make_layer(block, 128, layers[1], stride=2) + self.layer3 = self._make_layer(block, 256, layers[2], stride=2) + self.layer4 = self._make_layer(block, 512, layers[3], stride=2) + + self.avgpool = nn.AvgPool2d(kernel_size=3, stride=1, pad_mode="valid") + self.avgpool_same = nn.AvgPool2d(kernel_size=3, stride=1, pad_mode="same") + + self.mean = ops.ReduceMean(keep_dims=True) + self.flatten = nn.Flatten() + self.fc = nn.Dense(512 * block.expansion, num_classes) + + def _make_layer(self, block, planes, blocks, stride=1): + """_make_layer""" + downsample = None + previous_dilation = self.dilation + + if stride != 1 or self.inplanes != planes * block.expansion: + downsample = nn.SequentialCell( + nn.Conv2d(self.inplanes, planes * block.expansion, kernel_size=1, stride=stride), + nn.BatchNorm2d(planes * block.expansion), + ) + + layers = [] + layers.append(block(self.inplanes, planes, stride, downsample, self.groups, + self.base_width, previous_dilation)) + self.inplanes = planes * block.expansion + for _ in range(1, blocks): + layers.append(block(self.inplanes, planes, groups=self.groups, + base_width=self.base_width, dilation=self.dilation)) + + return nn.SequentialCell(layers) + + def construct(self, x): + """construct""" + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.pad(x) + x = self.maxpool(x) + + c1 = self.layer1(x) + c2 = self.layer2(c1) + c3 = self.layer3(c2) + + return [c2, c3] + +def wide_resnet50_2(): + return ResNet(Bottleneck, [3, 4, 6, 3], width_per_group=64 * 2) diff --git a/research/cv/PatchCore/src/oneStep.py b/research/cv/PatchCore/src/oneStep.py new file mode 100644 index 0000000000000000000000000000000000000000..616c3e29e535e6af359ee8aa2b40e944abbf3549 --- /dev/null +++ b/research/cv/PatchCore/src/oneStep.py @@ -0,0 +1,33 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""OneStepCell""" +import mindspore.nn as nn + +class OneStepCell(nn.Cell): + """OneStepCell""" + def __init__(self, network): + super(OneStepCell, self).__init__() + self.network = network + + self.pad = nn.Pad(paddings=((0, 0), (0, 0), (1, 1), (1, 1)), mode="CONSTANT") + self.pool = nn.AvgPool2d(kernel_size=3, stride=1, pad_mode="valid") + + def construct(self, img): + output = self.network(img) + + output_one = self.pool(self.pad(output[0])) + output_two = self.pool(self.pad(output[1])) + + return [output_one, output_two] diff --git a/research/cv/PatchCore/src/operator.py b/research/cv/PatchCore/src/operator.py new file mode 100644 index 0000000000000000000000000000000000000000..8712aceda68915772dfb9a70d0477e0e0f6dc9cc --- /dev/null +++ b/research/cv/PatchCore/src/operator.py @@ -0,0 +1,170 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""operator""" +import os +import cv2 +import mindspore +import mindspore.ops as ops +import numpy as np + +def unfold(img, kernel_size, stride=1, pad=0, dilation=1): + """ + unfold function + """ + batch_num, channel, height, width = img.shape + out_h = (height + pad + pad - kernel_size - (kernel_size - 1) * (dilation - 1)) // stride + 1 + out_w = (width + pad + pad - kernel_size - (kernel_size - 1) * (dilation - 1)) // stride + 1 + + img = np.pad(img, [(0, 0), (0, 0), (pad, pad), (pad, pad)], 'constant') + col = np.zeros((batch_num, channel, kernel_size, kernel_size, out_h, out_w)).astype(img.dtype) + + for y in range(kernel_size): + y_max = y + stride * out_h + for x in range(kernel_size): + x_max = x + stride * out_w + col[:, :, y, x, :, :] = img[:, :, y:y_max:stride, x:x_max:stride] + + col = np.reshape(col, (batch_num, channel*kernel_size*kernel_size, out_h*out_w)) + + return col + +def fold(col, input_shape, kernel_size, stride=1, pad=0): + """ + fold function + """ + batch_num, channel, height, width = input_shape + out_h = (height + pad + pad - kernel_size) // stride + 1 + out_w = (width + pad + pad - kernel_size) // stride + 1 + + col = col.reshape(batch_num, channel, kernel_size, kernel_size, out_h, out_w) + img = np.zeros((batch_num, + channel, + height + pad + pad + stride - 1, + width + pad + pad + stride - 1)) \ + .astype(col.dtype) + for y in range(kernel_size): + y_max = y + stride * out_h + for x in range(kernel_size): + x_max = x + stride * out_w + img[:, :, y:y_max:stride, x:x_max:stride] += col[:, :, y, x, :, :] + + return img[:, :, pad:height + pad, pad:width + pad] + +def embedding_concat(x, y): + """ + embedding_concat function + """ + B, C1, H1, W1 = x.shape + _, C2, H2, W2 = y.shape + s = int(H1 / H2) + x = unfold(x, s, stride=s) + x = np.reshape(x, (B, C1, -1, H2, W2)) + z = np.zeros((B, C1 + C2, x.shape[2], H2, W2)) + + for i in range(x.shape[2]): + z[:, :, i, :, :] = np.concatenate((x[:, :, i, :, :], y), axis=1) + + z = np.reshape(z, (B, -1, H2*W2)) + z = fold(z, (z.shape[0], int(z.shape[1] / (s * s)), H1, W1), s, stride=s) + + return z + +def reshape_embedding(embedding): + """ + reshape_embedding function + """ + embedding_list = [] + for k in range(embedding.shape[0]): + for i in range(embedding.shape[2]): + for j in range(embedding.shape[3]): + embedding_list.append(embedding[k, :, i, j]) + + return embedding_list + +def prep_dirs(path, category): + """ + prep_dirs function + """ + root = os.path.join(path, category) + os.makedirs(root, exist_ok=True) + + # make embeddings dir + embeddings_path = os.path.join(root, 'embeddings') + os.makedirs(embeddings_path, exist_ok=True) + + # make sample dir + sample_path = os.path.join(root, 'sample') + os.makedirs(sample_path, exist_ok=True) + + return embeddings_path, sample_path + +def normalize(input_n, mean, std): + """ + normalize function + input: numpy + output: numpy + """ + mean = mindspore.Tensor(mean, dtype=mindspore.float32).view((-1, 1, 1)) + std = mindspore.Tensor(std, dtype=mindspore.float32).view((-1, 1, 1)) + + sub = ops.Sub() + div = ops.Div() + + out = div(sub(mindspore.Tensor(input_n), mean), std) + + return out.asnumpy() + +def cvt2heatmap(gray): + """ + cvt2heatmap function + """ + heatmap = cv2.applyColorMap(np.uint8(gray), cv2.COLORMAP_JET) + return heatmap + +def min_max_norm(image): + """ + min_max_norm function + """ + a_min, a_max = image.min(), image.max() + return (image - a_min) / (a_max - a_min) + +def heatmap_on_image(heatmap, image): + """ + heatmap_on_image function + """ + if heatmap.shape != image.shape: + heatmap = cv2.resize(heatmap, (image.shape[0], image.shape[1])) + out = np.float32(heatmap) / 255 + np.float32(image) / 255 + out = out / np.max(out) + return np.uint8(255 * out) + +def save_anomaly_map(sample_path, anomaly_map, input_img, gt_img, file_name, x_type): + """ + save_anomaly_map function + """ + if anomaly_map.shape != input_img.shape: + anomaly_map = cv2.resize(anomaly_map, (input_img.shape[0], input_img.shape[1])) + anomaly_map_norm = min_max_norm(anomaly_map) + anomaly_map_norm_hm = cvt2heatmap(anomaly_map_norm * 255) + + # anomaly map on image + heatmap = cvt2heatmap(anomaly_map_norm * 255) + hm_on_img = heatmap_on_image(heatmap, input_img) + + # save images + cv2.imwrite(os.path.join(sample_path, f'{x_type}_{file_name}.jpg'), input_img) + cv2.imwrite(os.path.join(sample_path, f'{x_type}_{file_name}_amap.jpg'), anomaly_map_norm_hm) + cv2.imwrite(os.path.join(sample_path, f'{x_type}_{file_name}_amap_on_img.jpg'), hm_on_img) + cv2.imwrite(os.path.join(sample_path, f'{x_type}_{file_name}_gt.jpg'), gt_img) diff --git a/research/cv/PatchCore/src/pthtockpt.py b/research/cv/PatchCore/src/pthtockpt.py new file mode 100644 index 0000000000000000000000000000000000000000..64ecb3bc0b183d48e904a69186063f1570ac5f6a --- /dev/null +++ b/research/cv/PatchCore/src/pthtockpt.py @@ -0,0 +1,63 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""pth --> ckpt""" +import argparse +import torch +import mindspore +from mindspore.train.serialization import save_checkpoint +from mindspore import Tensor + +def replace_self(name1, str1, str2): + return name1.replace(str1, str2) + +parser = argparse.ArgumentParser(description='') + +parser.add_argument('--pth_path', type=str, default='wide_resnet50_2-95faca4d.pth', + help='pth path') +parser.add_argument('--device_target', type=str, default='cpu', + help='device target') + +args = parser.parse_args() +print(args) + +if __name__ == '__main__': + par_dict = torch.load(args.pth_path, map_location=args.device_target) + new_params_list = [] + + for name in par_dict: + param_dict = {} + parameter = par_dict[name] + + if name.endswith('num_batches_tracked'): + continue + elif (name.endswith('running_mean') or name.endswith('running_var')): + name = replace_self(name, "running_mean", "moving_mean") + name = replace_self(name, "running_var", "moving_variance") + else: + name = replace_self(name, "bn1.weight", "bn1.gamma") + name = replace_self(name, "bn1.bias", "bn1.beta") + name = replace_self(name, "bn2.weight", "bn2.gamma") + name = replace_self(name, "bn2.bias", "bn2.beta") + name = replace_self(name, "bn3.weight", "bn3.gamma") + name = replace_self(name, "bn3.bias", "bn3.beta") + + name = replace_self(name, "downsample.1.weight", "downsample.1.gamma") + name = replace_self(name, "downsample.1.bias", "downsample.1.beta") + + param_dict['name'] = name + param_dict['data'] = Tensor(parameter.numpy(), mindspore.float32) + new_params_list.append(param_dict) + + save_checkpoint(new_params_list, 'PatchCore_pretrain.ckpt') diff --git a/research/cv/PatchCore/src/sampling_methods/kcenter_greedy.py b/research/cv/PatchCore/src/sampling_methods/kcenter_greedy.py new file mode 100644 index 0000000000000000000000000000000000000000..59a1846cf5ac471b7937cb25e8c7325f1c3d2e4c --- /dev/null +++ b/research/cv/PatchCore/src/sampling_methods/kcenter_greedy.py @@ -0,0 +1,97 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""kCenterGreedy""" +import numpy as np +from sklearn.metrics import pairwise_distances +from src.sampling_methods.sampling_def import SamplingMethod + +class kCenterGreedy(SamplingMethod): + """kCenterGreedy""" + def __init__(self, X, y, seed, metric='euclidean'): + self.X = X + self.y = y + self.flat_X = self.flatten_X() + self.name = 'kcenter' + self.features = self.flat_X + self.metric = metric + self.min_distances = None + self.n_obs = self.X.shape[0] + self.already_selected = [] + + def update_distances(self, cluster_centers, only_new=True, reset_dist=False): + """Update min distances given cluster centers. + + Args: + cluster_centers: indices of cluster centers + only_new: only calculate distance for newly selected points and update + min_distances. + rest_dist: whether to reset min_distances. + """ + if reset_dist: + self.min_distances = None + if only_new: + cluster_centers = [d for d in cluster_centers + if d not in self.already_selected] + if cluster_centers: + # Update min_distances for all examples given new cluster center. + x = self.features[cluster_centers] + dist = pairwise_distances(self.features, x, metric=self.metric) + + if self.min_distances is None: + self.min_distances = np.min(dist, axis=1).reshape(-1, 1) + else: + self.min_distances = np.minimum(self.min_distances, dist) + + def select_batch_(self, model, already_selected, N, **kwargs): + """ + Diversity promoting active learning method that greedily forms a batch + to minimize the maximum distance to a cluster center among all unlabeled + datapoints. + + Args: + model: model with scikit-like API with decision_function implemented + already_selected: index of datapoints already selected + N: batch size + + Returns: + indices of points selected to minimize distance to cluster centers + """ + + # Assumes that the transform function takes in original data and not + # flattened data. + print('Getting transformed features...') + self.features = model.transform(self.X) + print('Calculating distances...') + self.update_distances(already_selected, only_new=False, reset_dist=True) + + new_batch = [] + + for _ in range(N): + if self.already_selected is None: + # Initialize centers with a randomly selected datapoint + ind = np.random.choice(np.arange(self.n_obs)) + else: + ind = np.argmax(self.min_distances) + # New examples should not be in already selected since those points + # should have min_distance of zero to a cluster center. + assert ind not in already_selected + + self.update_distances([ind], only_new=True, reset_dist=False) + new_batch.append(ind) + print('Maximum distance from cluster centers is %0.2f' % max(self.min_distances)) + + self.already_selected = already_selected + + return new_batch diff --git a/research/cv/PatchCore/src/sampling_methods/sampling_def.py b/research/cv/PatchCore/src/sampling_methods/sampling_def.py new file mode 100644 index 0000000000000000000000000000000000000000..45548feaf30f3ef87e5fbd98257b739e355a6d23 --- /dev/null +++ b/research/cv/PatchCore/src/sampling_methods/sampling_def.py @@ -0,0 +1,45 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""sampling_def""" +import abc +import numpy as np + +class SamplingMethod(): + """SamplingMethod""" + __metaclass__ = abc.ABCMeta + + @abc.abstractmethod + def __init__(self, X, y, seed, **kwargs): + self.X = X + self.y = y + self.seed = seed + + def flatten_X(self): + shape = self.X.shape + flat_X = self.X + if len(shape) > 2: + flat_X = np.reshape(self.X, (shape[0], np.product(shape[1:]))) + + return flat_X + + @abc.abstractmethod + def select_batch_(self): + return + + def select_batch(self, **kwargs): + return self.select_batch_(**kwargs) + + def to_dict(self): + return None diff --git a/research/cv/PatchCore/train.py b/research/cv/PatchCore/train.py new file mode 100644 index 0000000000000000000000000000000000000000..fe052cbc00e8b482dd7e43588dce3000eb56f50d --- /dev/null +++ b/research/cv/PatchCore/train.py @@ -0,0 +1,126 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""train""" +import ast +import datetime +import os +import time +import argparse +import faiss +import numpy as np +from mindspore import context +from mindspore.common import set_seed +from mindspore.train.serialization import load_checkpoint, load_param_into_net +from sklearn.random_projection import SparseRandomProjection + +from src.dataset import createDataset +from src.model import wide_resnet50_2 +from src.oneStep import OneStepCell +from src.operator import embedding_concat, prep_dirs, reshape_embedding +from src.sampling_methods.kcenter_greedy import kCenterGreedy + +set_seed(1) + +parser = argparse.ArgumentParser(description='train') + +parser.add_argument('--train_url', type=str) +parser.add_argument('--data_url', type=str) +parser.add_argument('--isModelArts', type=ast.literal_eval, default=False) + +parser.add_argument('--category', type=str, default='screw') +parser.add_argument('--coreset_sampling_ratio', type=float, default=0.01) +parser.add_argument('--num_epochs', type=int, default=1, help='Epoch size') +parser.add_argument('--device_id', type=int, default=0, help='Device id') +parser.add_argument('--dataset_path', type=str, help='Dataset path') +parser.add_argument('--pre_ckpt_path', type=str, help='Pretrain checkpoint file path') + +args = parser.parse_args() + +if args.isModelArts: + import moxing as mox + +if __name__ == '__main__': + context.set_context(mode=context.GRAPH_MODE, device_target='Ascend', save_graphs=False) + if args.isModelArts: + device_id = int(os.getenv('DEVICE_ID')) + context.set_context(device_id=device_id) + else: + context.set_context(device_id=args.device_id) + + # dataset + if args.isModelArts: + mox.file.copy_parallel(src_url=args.data_url, dst_url='/cache/dataset/device_' + os.getenv('DEVICE_ID')) + train_dataset_path = '/cache/dataset/device_' + os.getenv('DEVICE_ID') + prep_path = '/cache/train_output/device_' + os.getenv('DEVICE_ID') + + train_dataset, _, _, _ = createDataset(train_dataset_path, args.category) + embedding_dir_path, _ = prep_dirs(prep_path, args.category) + else: + train_dataset, _, _, _ = createDataset(args.dataset_path, args.category) + embedding_dir_path, _ = prep_dirs('./', args.category) + + # network + network = wide_resnet50_2() + param_dict = load_checkpoint(args.pre_ckpt_path) + load_param_into_net(network, param_dict) + + for p in network.trainable_params(): + p.requires_grad = False + + model = OneStepCell(network) + + # train + embedding_list = [] + print("***************start train***************") + for epoch in range(args.num_epochs): + data_iter = train_dataset.create_dict_iterator() + step_size = train_dataset.get_dataset_size() + + for step, data in enumerate(data_iter): + # time + start = datetime.datetime.fromtimestamp(time.time()) + features = model(data['img']) + end = datetime.datetime.fromtimestamp(time.time()) + step_time = (end - start).microseconds / 1000.0 + print("step: {}, time: {}ms".format(step, step_time)) + + embedding = embedding_concat(features[0].asnumpy(), features[1].asnumpy()) + embedding_list.extend(reshape_embedding(embedding)) + + total_embeddings = np.array(embedding_list, dtype=np.float32) + + # Random projection + randomprojector = SparseRandomProjection(n_components='auto', eps=0.9) + randomprojector.fit(total_embeddings) + + # Coreset Subsampling + selector = kCenterGreedy(total_embeddings, 0, 0) + selected_idx = selector.select_batch(model=randomprojector, + already_selected=[], + N=int(total_embeddings.shape[0] * args.coreset_sampling_ratio)) + embedding_coreset = total_embeddings[selected_idx] + + print('initial embedding size : {}'.format(total_embeddings.shape)) + print('final embedding size : {}'.format(embedding_coreset.shape)) + + # faiss + index = faiss.IndexFlatL2(embedding_coreset.shape[1]) + index.add(embedding_coreset) + faiss.write_index(index, os.path.join(embedding_dir_path, 'index.faiss')) + + if args.isModelArts: + mox.file.copy_parallel(src_url='/cache/train_output', dst_url=args.train_url) + + print("***************train end***************")