From 0ba246190d4bc9c39b792b0bda6d75a842f7c98c Mon Sep 17 00:00:00 2001 From: wzk <1131623843@qq.com> Date: Thu, 9 Jun 2022 03:47:21 +0000 Subject: [PATCH] add infer and modelarts --- research/gnn/sgcn/infer/convert/convert.sh | 50 +++++ research/gnn/sgcn/infer/docker_start_infer.sh | 38 ++++ research/gnn/sgcn/infer/mxbase/CMakeLists.txt | 51 +++++ research/gnn/sgcn/infer/mxbase/build.sh | 46 ++++ research/gnn/sgcn/infer/mxbase/run.sh | 25 +++ research/gnn/sgcn/infer/mxbase/src/SGCN.cpp | 200 ++++++++++++++++++ research/gnn/sgcn/infer/mxbase/src/SGCN.h | 55 +++++ research/gnn/sgcn/infer/mxbase/src/main.cpp | 71 +++++++ research/gnn/sgcn/infer/mxbase/task_metric.py | 139 ++++++++++++ research/gnn/sgcn/infer/preprocess_infer.py | 132 ++++++++++++ .../gnn/sgcn/infer/sdk/pipeline/sgcn.pipeline | 44 ++++ .../gnn/sgcn/infer/sdk/python_SGCN/SdkApi.py | 124 +++++++++++ .../gnn/sgcn/infer/sdk/python_SGCN/main.py | 111 ++++++++++ .../sgcn/infer/sdk/python_SGCN/sgcn_run.sh | 27 +++ research/gnn/sgcn/infer/sdk/task_metric.py | 138 ++++++++++++ research/gnn/sgcn/modelarts/param_parser.py | 96 +++++++++ .../gnn/sgcn/modelarts/pip-requirements.txt | 3 + .../gnn/sgcn/modelarts/train_modelarts.py | 116 ++++++++++ research/gnn/sgcn/scripts/docker_start.sh | 38 ++++ 19 files changed, 1504 insertions(+) create mode 100644 research/gnn/sgcn/infer/convert/convert.sh create mode 100644 research/gnn/sgcn/infer/docker_start_infer.sh create mode 100644 research/gnn/sgcn/infer/mxbase/CMakeLists.txt create mode 100644 research/gnn/sgcn/infer/mxbase/build.sh create mode 100644 research/gnn/sgcn/infer/mxbase/run.sh create mode 100644 research/gnn/sgcn/infer/mxbase/src/SGCN.cpp create mode 100644 research/gnn/sgcn/infer/mxbase/src/SGCN.h create mode 100644 research/gnn/sgcn/infer/mxbase/src/main.cpp create mode 100644 research/gnn/sgcn/infer/mxbase/task_metric.py create mode 100644 research/gnn/sgcn/infer/preprocess_infer.py create mode 100644 research/gnn/sgcn/infer/sdk/pipeline/sgcn.pipeline create mode 100644 research/gnn/sgcn/infer/sdk/python_SGCN/SdkApi.py create mode 100644 research/gnn/sgcn/infer/sdk/python_SGCN/main.py create mode 100644 research/gnn/sgcn/infer/sdk/python_SGCN/sgcn_run.sh create mode 100644 research/gnn/sgcn/infer/sdk/task_metric.py create mode 100644 research/gnn/sgcn/modelarts/param_parser.py create mode 100644 research/gnn/sgcn/modelarts/pip-requirements.txt create mode 100644 research/gnn/sgcn/modelarts/train_modelarts.py create mode 100644 research/gnn/sgcn/scripts/docker_start.sh diff --git a/research/gnn/sgcn/infer/convert/convert.sh b/research/gnn/sgcn/infer/convert/convert.sh new file mode 100644 index 000000000..7e5fa2000 --- /dev/null +++ b/research/gnn/sgcn/infer/convert/convert.sh @@ -0,0 +1,50 @@ +#!/bin/bash + +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +if [ $# -ne 2 ] +then + echo "Wrong parameter format." + echo "Usage:" + echo " bash $0 [INPUT_AIR_PATH] [AIPP_PATH] [OUTPUT_OM_PATH_NAME]" + echo "Example: " + echo " bash convert_om.sh xxx.air xx_name(with no suffix)" + + exit 1 +fi + +input_air_path=$1 +output_om_path=$2 + +export install_path=/usr/local/Ascend/ +export ASCEND_ATC_PATH=${install_path}/atc +export LD_LIBRARY_PATH=${install_path}/atc/lib64:$LD_LIBRARY_PATH +export PATH=/usr/local/python3.7.5/bin:${install_path}/atc/ccec_compiler/bin:${install_path}/atc/bin:$PATH +export PYTHONPATH=${install_path}/atc/python/site-packages:${install_path}/latest/atc/python/site-packages/auto_tune.egg/auto_tune:${install_path}/atc/python/site-packages/schedule_search.egg:${PYTHONPATH} +export ASCEND_OPP_PATH=${install_path}/opp + +export ASCEND_SLOG_PRINT_TO_STDOUT=1 + +echo "Input AIR file path: ${input_air_path}" +echo "Output OM file path: ${output_om_path}" + +atc --input_format=NCHW \ +--framework=1 \ +--model=${input_air_path} \ +--output=${output_om_path} \ +--soc_version=Ascend310 \ +--disable_reuse_memory=0 \ +--precision_mode=allow_fp32_to_fp16 \ +--op_select_implmode=high_precision diff --git a/research/gnn/sgcn/infer/docker_start_infer.sh b/research/gnn/sgcn/infer/docker_start_infer.sh new file mode 100644 index 000000000..43a3c04de --- /dev/null +++ b/research/gnn/sgcn/infer/docker_start_infer.sh @@ -0,0 +1,38 @@ +#!/bin/bash + +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +docker_image=$1 +model_dir=$2 + +if [ -z "${docker_image}" ]; then + echo "please input docker_image" + exit 1 +fi + +if [ ! -d "${model_dir}" ]; then + echo "please input model_dir" + exit 1 +fi + +docker run -it -u root\ + --device=/dev/davinci0 \ + --device=/dev/davinci_manager \ + --device=/dev/devmm_svm \ + --device=/dev/hisi_hdc \ + -v /usr/local/Ascend/driver:/usr/local/Ascend/driver \ + -v ${model_dir}:${model_dir} \ + ${docker_image} \ + /bin/bash diff --git a/research/gnn/sgcn/infer/mxbase/CMakeLists.txt b/research/gnn/sgcn/infer/mxbase/CMakeLists.txt new file mode 100644 index 000000000..675d52a1d --- /dev/null +++ b/research/gnn/sgcn/infer/mxbase/CMakeLists.txt @@ -0,0 +1,51 @@ +cmake_minimum_required(VERSION 3.10.0) +project(sgcn) + +set(TARGET sgcn) + +add_definitions(-DENABLE_DVPP_INTERFACE) +add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0) +add_definitions(-Dgoogle=mindxsdk_private) +add_compile_options(-std=c++11 -fPIE -fstack-protector-all -fPIC -Wall) +add_link_options(-Wl,-z,relro,-z,now,-z,noexecstack -s -pie) + +# Check environment variable +if(NOT DEFINED ENV{ASCEND_HOME}) + message(FATAL_ERROR "please define environment variable:ASCEND_HOME") +endif() +if(NOT DEFINED ENV{ASCEND_VERSION}) + message(WARNING "please define environment variable:ASCEND_VERSION") +endif() +if(NOT DEFINED ENV{ARCH_PATTERN}) + message(WARNING "please define environment variable:ARCH_PATTERN") +endif() +set(ACL_INC_DIR $ENV{ASCEND_HOME}/$ENV{ASCEND_VERSION}/$ENV{ARCH_PATTERN}/acllib/include) +set(ACL_LIB_DIR $ENV{ASCEND_HOME}/$ENV{ASCEND_VERSION}/$ENV{ARCH_PATTERN}/acllib/lib64) + +set(MXBASE_ROOT_DIR $ENV{MX_SDK_HOME}) +set(MXBASE_INC ${MXBASE_ROOT_DIR}/include) +set(MXBASE_LIB_DIR ${MXBASE_ROOT_DIR}/lib) +set(MXBASE_POST_LIB_DIR ${MXBASE_ROOT_DIR}/lib/modelpostprocessors) +set(MXBASE_POST_PROCESS_DIR ${MXBASE_ROOT_DIR}/include/MxBase/postprocess/include) +if(DEFINED ENV{MXSDK_OPENSOURCE_DIR}) + set(OPENSOURCE_DIR $ENV{MXSDK_OPENSOURCE_DIR}) +else() + set(OPENSOURCE_DIR ${MXBASE_ROOT_DIR}/opensource) +endif() + +include_directories(${ACL_INC_DIR}) +include_directories(${OPENSOURCE_DIR}/include) +include_directories(${OPENSOURCE_DIR}/include/opencv4) + +include_directories(${MXBASE_INC}) +include_directories(${MXBASE_POST_PROCESS_DIR}) + +link_directories(${ACL_LIB_DIR}) +link_directories(${OPENSOURCE_DIR}/lib) +link_directories(${MXBASE_LIB_DIR}) +link_directories(${MXBASE_POST_LIB_DIR}) + +add_executable(${TARGET} src/main.cpp src/SGCN.cpp) +target_link_libraries(${TARGET} glog cpprest mxbase opencv_world stdc++fs) + +install(TARGETS ${TARGET} RUNTIME DESTINATION ${PROJECT_SOURCE_DIR}/) diff --git a/research/gnn/sgcn/infer/mxbase/build.sh b/research/gnn/sgcn/infer/mxbase/build.sh new file mode 100644 index 000000000..1c895af45 --- /dev/null +++ b/research/gnn/sgcn/infer/mxbase/build.sh @@ -0,0 +1,46 @@ +#!/bin/bash + +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +path_cur=$(dirname $0) + +# set ASCEND_VERSION to ascend-toolkit/latest when it was not specified by user +if [ ! "${ASCEND_VERSION}" ]; then + export ASCEND_VERSION=ascend-toolkit/latest + echo "Set ASCEND_VERSION to the default value: ${ASCEND_VERSION}" +else + echo "ASCEND_VERSION is set to ${ASCEND_VERSION} by user" +fi + +if [ ! "${ARCH_PATTERN}" ]; then + # set ARCH_PATTERN to ./ when it was not specified by user + export ARCH_PATTERN=./ + echo "ARCH_PATTERN is set to the default value: ${ARCH_PATTERN}" +else + echo "ARCH_PATTERN is set to ${ARCH_PATTERN} by user" +fi + +cd $path_cur +rm -rf build +mkdir -p build +cd build +cmake .. +make +ret=$? +if [ ${ret} -ne 0 ]; then + echo "Failed to build bert." + exit ${ret} +fi +make install diff --git a/research/gnn/sgcn/infer/mxbase/run.sh b/research/gnn/sgcn/infer/mxbase/run.sh new file mode 100644 index 000000000..479e29d73 --- /dev/null +++ b/research/gnn/sgcn/infer/mxbase/run.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + + +# run +data_path=$1 +data_type=$2 + +rm res +./build/sgcn ${data_path} ${data_type} +exit 0 diff --git a/research/gnn/sgcn/infer/mxbase/src/SGCN.cpp b/research/gnn/sgcn/infer/mxbase/src/SGCN.cpp new file mode 100644 index 000000000..0b5ec4245 --- /dev/null +++ b/research/gnn/sgcn/infer/mxbase/src/SGCN.cpp @@ -0,0 +1,200 @@ +/* +* Copyright 2022 Huawei Technologies Co., Ltd. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*/ + +#include "SGCN.h" +#include <unistd.h> +#include <sys/stat.h> +#include <map> +#include <fstream> +#include <typeinfo> +#include <iomanip> +#include <iostream> +#include "MxBase/DeviceManager/DeviceManager.h" +#include "MxBase/Log/Log.h" + +APP_ERROR sgcn::Init(const InitParam& initParam) { + deviceId_ = initParam.deviceId; + APP_ERROR ret = MxBase::DeviceManager::GetInstance()->InitDevices(); + if (ret != APP_ERR_OK) { + LogError << "Init devices failed, ret=" << ret << "."; + return ret; + } + ret = MxBase::TensorContext::GetInstance()->SetContext(initParam.deviceId); + if (ret != APP_ERR_OK) { + LogError << "Set context failed, ret=" << ret << "."; + return ret; + } + model_ = std::make_shared<MxBase::ModelInferenceProcessor>(); + ret = model_->Init(initParam.modelPath, modelDesc_); + if (ret != APP_ERR_OK) { + LogError << "ModelInferenceProcessor init failed, ret=" << ret << "."; + return ret; + } + return APP_ERR_OK; +} + +APP_ERROR sgcn::DeInit() { + model_->DeInit(); + MxBase::DeviceManager::GetInstance()->DestroyDevices(); + return APP_ERR_OK; +} + +APP_ERROR sgcn::ReadTensorFromFile(const std::string& file, int* data, uint32_t size) { + if (data == NULL) { + LogError << "input data is invalid."; + return APP_ERR_COMM_INVALID_POINTER; + } + + std::ifstream fp(file); + std::string line; + while (std::getline(fp, line)) { + std::string number; + std::istringstream readstr(line); + for (uint32_t j = 0; j < size; j++) { + std::getline(readstr, number, ' '); + data[j] = atoi(number.c_str()); + } + } + return APP_ERR_OK; +} + +APP_ERROR sgcn::ReadInputTensor(const std::string& fileName, uint32_t index, + std::vector<MxBase::TensorBase>* inputs, uint32_t size, + MxBase::TensorDataType type) { + int* data = new int[size]; + APP_ERROR ret = ReadTensorFromFile(fileName, data, size); + if (ret != APP_ERR_OK) { + LogError << "Read Tensor From File failed."; + return ret; + } + const uint32_t dataSize = modelDesc_.inputTensors[index].tensorSize; + LogInfo << "dataSize:" << dataSize; + MxBase::MemoryData memoryDataDst(dataSize, MxBase::MemoryData::MEMORY_DEVICE, deviceId_); + MxBase::MemoryData memoryDataSrc(reinterpret_cast<void*>(data), dataSize, MxBase::MemoryData::MEMORY_HOST_MALLOC); + ret = MxBase::MemoryHelper::MxbsMallocAndCopy(memoryDataDst, memoryDataSrc); + if (ret != APP_ERR_OK) { + LogError << GetError(ret) << "Memory malloc and copy failed."; + return ret; + } + std::vector<uint32_t> shape = { 1, size }; + inputs->push_back(MxBase::TensorBase(memoryDataDst, false, shape, type)); + return APP_ERR_OK; +} + +APP_ERROR sgcn::Inference(const std::vector<MxBase::TensorBase>& inputs, + std::vector<MxBase::TensorBase>* outputs) { + auto dtypes = model_->GetOutputDataType(); + for (size_t i = 0; i < modelDesc_.outputTensors.size(); ++i) { + std::vector<uint32_t> shape = {}; + for (size_t j = 0; j < modelDesc_.outputTensors[i].tensorDims.size(); ++j) { + shape.push_back((uint32_t)modelDesc_.outputTensors[i].tensorDims[j]); + } + MxBase::TensorBase tensor(shape, dtypes[i], MxBase::MemoryData::MemoryType::MEMORY_DEVICE, deviceId_); + APP_ERROR ret = MxBase::TensorBase::TensorBaseMalloc(tensor); + if (ret != APP_ERR_OK) { + LogError << "TensorBaseMalloc failed, ret=" << ret << "."; + return ret; + } + outputs->push_back(tensor); + } + MxBase::DynamicInfo dynamicInfo = {}; + dynamicInfo.dynamicType = MxBase::DynamicType::STATIC_BATCH; + auto startTime = std::chrono::high_resolution_clock::now(); + APP_ERROR ret = model_->ModelInference(inputs, *outputs, dynamicInfo); + auto endTime = std::chrono::high_resolution_clock::now(); + double costMs = std::chrono::duration<double, std::milli>(endTime - startTime).count(); + g_inferCost.push_back(costMs); + if (ret != APP_ERR_OK) { + LogError << "ModelInference failed, ret=" << ret << "."; + return ret; + } + return APP_ERR_OK; +} + +APP_ERROR sgcn::PostProcess(std::vector<MxBase::TensorBase>* outputs, std::vector<float>* result) { + LogInfo << "Outputs size:" << outputs->size(); + MxBase::TensorBase& tensor = outputs->at(0); + APP_ERROR ret = tensor.ToHost(); + if (ret != APP_ERR_OK) { + LogError << GetError(ret) << "Tensor deploy to host failed."; + return ret; + } + // check tensor is available + auto outputShape = tensor.GetShape(); + uint32_t length = outputShape[0]; + uint32_t classNum = outputShape[1]; + LogInfo << "output shape is: " << outputShape[0] << " " << outputShape[1] << std::endl; + + void* data = tensor.GetBuffer(); + for (uint32_t i = 0; i < length; i++) { + for (uint32_t j = 0; j < classNum; j++) { + // get real data by index, the variable 'data' is address + float value = *(reinterpret_cast<float*>(data) + i * classNum + j); + // LogInfo << "value " << value; + result->push_back(value); + } + } + return APP_ERR_OK; +} + +APP_ERROR sgcn::SaveResult(std::vector<float >* result) { + std::ofstream outfile("res", std::ofstream::app); + if (outfile.fail()) { + LogError << "Failed to open result file: "; + return APP_ERR_COMM_FAILURE; + } + for (uint32_t i = 0; i < result->size(); ++i) { + outfile << std::setiosflags(std::ios::fixed) << std::setprecision(6) << result->at(i) << " "; + } + outfile << std::endl; + outfile.close(); + return APP_ERR_OK; +} + +APP_ERROR sgcn::Process(const std::string& inferPath, const std::string& dataType) { + std::vector<MxBase::TensorBase> inputs = {}; + std::string inputReposFile = inferPath + "repos.txt"; + uint32_t size1 = (dataType == "otc") ? 29248 : 20430; + uint32_t size2 = (dataType == "otc") ? 5044 : 2098; + APP_ERROR ret = ReadInputTensor(inputReposFile, 0, &inputs, size1, MxBase::TENSOR_DTYPE_UINT32); + if (ret != APP_ERR_OK) { + LogError << "Read repos data failed, ret= " << ret << "."; + } + std::string inputRenegFile = inferPath + "reneg.txt"; + ret = ReadInputTensor(inputRenegFile, 1, &inputs, size2, MxBase::TENSOR_DTYPE_UINT32); + if (ret != APP_ERR_OK) { + LogError << "Read reneg data failed, ret= " << ret << "."; + } + + std::vector<MxBase::TensorBase> outputs = {}; + ret = Inference(inputs, &outputs); + if (ret != APP_ERR_OK) { + LogError << "Inference failed, ret=" << ret << "."; + return ret; + } + std::vector<float> result; + ret = PostProcess(&outputs, &result); + if (ret != APP_ERR_OK) { + LogError << "PostProcess failed, ret=" << ret << "."; + return ret; + } + ret = SaveResult(&result); + if (ret != APP_ERR_OK) { + LogError << "CalcF1Score read label failed, ret=" << ret << "."; + return ret; + } + return APP_ERR_OK; +} diff --git a/research/gnn/sgcn/infer/mxbase/src/SGCN.h b/research/gnn/sgcn/infer/mxbase/src/SGCN.h new file mode 100644 index 000000000..1e8aea162 --- /dev/null +++ b/research/gnn/sgcn/infer/mxbase/src/SGCN.h @@ -0,0 +1,55 @@ +/** + * Copyright 2022 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef MXBASE_SGCN_H +#define MXBASE_SGCN_H + +#include <memory> +#include <utility> +#include <vector> +#include <string> +#include <map> +#include <opencv2/opencv.hpp> +#include "MxBase/DvppWrapper/DvppWrapper.h" +#include "MxBase/ModelInfer/ModelInferenceProcessor.h" +#include "MxBase/Tensor/TensorContext/TensorContext.h" + +extern std::vector<double> g_inferCost; + +struct InitParam { + uint32_t deviceId; + std::string datasetPath; + std::string modelPath; +}; + +class sgcn { + public: + APP_ERROR Init(const InitParam& initParam); + APP_ERROR DeInit(); + APP_ERROR Inference(const std::vector<MxBase::TensorBase>& inputs, std::vector<MxBase::TensorBase>* outputs); + APP_ERROR Process(const std::string& inferPath, const std::string& dataType); + APP_ERROR PostProcess(std::vector<MxBase::TensorBase>* outputs, std::vector<float>* result); + protected: + APP_ERROR ReadTensorFromFile(const std::string& file, int* data, uint32_t size); + APP_ERROR ReadInputTensor(const std::string& fileName, uint32_t index, std::vector<MxBase::TensorBase>* inputs, + uint32_t size, MxBase::TensorDataType type); + APP_ERROR SaveResult(std::vector<float>* result); + private: + std::shared_ptr<MxBase::ModelInferenceProcessor> model_; + MxBase::ModelDesc modelDesc_ = {}; + uint32_t deviceId_ = 0; +}; +#endif diff --git a/research/gnn/sgcn/infer/mxbase/src/main.cpp b/research/gnn/sgcn/infer/mxbase/src/main.cpp new file mode 100644 index 000000000..56ac45c70 --- /dev/null +++ b/research/gnn/sgcn/infer/mxbase/src/main.cpp @@ -0,0 +1,71 @@ +/** + * Copyright 2022 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <dirent.h> +#include <unistd.h> +#include <algorithm> +#include <fstream> +#include <iostream> +#include <vector> +#include "SGCN.h" +#include "MxBase/Log/Log.h" + +std::vector<double> g_inferCost; + +void InitSgcnParam(InitParam* initParam) { + initParam->deviceId = 0; + initParam->modelPath = "../convert/sgcn.om"; +} + +int main(int argc, char* argv[]) { + if (argc <= 2) { + LogWarn << "Please input dataset path and dataset type"; + return APP_ERR_OK; + } + + InitParam initParam; + InitSgcnParam(&initParam); + auto sgcnBase = std::make_shared<sgcn>(); + APP_ERROR ret = sgcnBase->Init(initParam); + if (ret != APP_ERR_OK) { + LogError << "FFMbase init failed, ret=" << ret << "."; + return ret; + } + std::string inferPath = argv[1]; + std::string dataType = argv[2]; + std::vector<std::string> files; + files.push_back(argv[1]); + for (uint32_t i = 0; i < files.size(); i++) { + LogInfo << "read file name: " << files[i]; + ret = sgcnBase->Process(inferPath, dataType); + if (ret != APP_ERR_OK) { + LogError << "Gcnbase process failed, ret=" << ret << "."; + sgcnBase->DeInit(); + return ret; + } + LogInfo << "Finish " << i + 1 << " file"; + } + LogInfo << "======== Inference finished ========"; + sgcnBase->DeInit(); + double costSum = 0; + for (uint32_t i = 0; i < g_inferCost.size(); i++) { + costSum += g_inferCost[i]; + } + LogInfo << "Infer sum " << g_inferCost.size() << ", cost total time: " << costSum << " ms."; + LogInfo << "The throughput: " << g_inferCost.size() * 1000 / costSum << " bin/sec."; + return APP_ERR_OK; +} + diff --git a/research/gnn/sgcn/infer/mxbase/task_metric.py b/research/gnn/sgcn/infer/mxbase/task_metric.py new file mode 100644 index 000000000..0ada98c26 --- /dev/null +++ b/research/gnn/sgcn/infer/mxbase/task_metric.py @@ -0,0 +1,139 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import argparse +import numpy as np +import pandas as pd + +from mindspore import load_checkpoint +from sklearn.metrics import f1_score +from sklearn.metrics import roc_auc_score +from sklearn.model_selection import train_test_split + +def parse_args(): + """set and check parameters.""" + parser = argparse.ArgumentParser(description="bert process") + + parser.add_argument("--res_path", type=str, default="res", help="result numpy path") + parser.add_argument("--data_dir", type=str, default="../data/bitcoin_alpha.csv", + help="Dataset") + parser.add_argument("--ckpt_path", type=str, + default="../data/sgcn_ascend_v130_bitcoinalpha_research_gnn_bs64_AUC80.81.ckpt", + help="ckpt") + parser.add_argument("--dataset_type", type=str, default="alpha", help="result numpy path") + args_opt = parser.parse_args() + return args_opt + +# load static result +def read_restxt(res_path): + return np.loadtxt(res_path) + + +def read_graph(data_path): + """ + Method to read graph and create a target matrix with pooled adjacency matrix powers. + Args: + args(Arguments): Arguments object. + + Returns: + edge(dicts): Edges dictionary. + """ + dataset = pd.read_csv(data_path).values.tolist() + edges = {} + edges["positive_edges"] = [edge[0:2] for edge in dataset if edge[2] == 1] + edges["negative_edges"] = [edge[0:2] for edge in dataset if edge[2] == -1] + edges["ecount"] = len(dataset) + edges["ncount"] = len(set([edge[0] for edge in dataset]+[edge[1] for edge in dataset])) + return edges + +def setup_dataset(data_path): + edges = read_graph(data_path) + positive_edges, test_positive_edges = train_test_split(edges["positive_edges"], test_size=0.2, random_state=1) + negative_edges, test_negative_edges = train_test_split(edges["negative_edges"], test_size=0.2, random_state=1) + ecount = len(positive_edges + negative_edges) + positive_edges = np.array(positive_edges, dtype=np.int32).T + negative_edges = np.array(negative_edges, dtype=np.int32).T + y = np.array([0 if i < int(ecount / 2) else 1 for i in range(ecount)] + [2] * (ecount * 2)) + y = np.array(y, np.int32) + print('self.positive_edges', positive_edges.shape, type(positive_edges)) + print('self.negative_edges', negative_edges.shape, type(negative_edges)) + print('self.y', y.shape, type(y)) + print(positive_edges.dtype, negative_edges.dtype, y.dtype) + return test_positive_edges, test_negative_edges + +def remove_self_loops(edge_index): + """ + remove self loops + Args: + edge_index (LongTensor): The edge indices. + + Returns: + Tensor(edge_index): removed self loops + """ + mask = edge_index[0] != edge_index[1] + edge_index = edge_index[:, mask] + return Tensor(edge_index) + +def calculate_auc(targets, predictions): + """ + Calculate performance measures on test dataset. + Args: + targets(Tensor): Ground truth. + predictions(Tensor): Model outputs. + + Returns: + auc(Float32): AUC result. + f1(Float32): F1-Score result. + """ + targets = [0 if target == 1 else 1 for target in targets] + auc = roc_auc_score(targets, predictions) + pred = [1 if p > 0.5 else 0 for p in predictions] + f1 = f1_score(targets, pred) + return auc, f1 + +def softmax(x): + """Softmax""" + t_max = np.max(x, axis=1, keepdims=True) # returns max of each row and keeps same dims + e_x = np.exp(x - t_max) # subtracts each row with its max value + t_sum = np.sum(e_x, axis=1, keepdims=True) # returns sum of each row and keeps same dims + f_x = e_x / t_sum + return f_x + +def test_result(arg): + pos_test, neg_test = setup_dataset(arg.data_dir) + preds = read_restxt(arg.res_path) + if arg.dataset_type == "otc": + preds = preds.reshape(5881, 64) + else: + preds = preds.reshape(3783, 64) + param_dict = load_checkpoint(arg.ckpt_path) + weights = np.array(param_dict['regression_weights'].asnumpy()) + bias = np.array(param_dict['regression_bias'].asnumpy()) + score_positive_edges = np.array(pos_test, dtype=np.int32).T + score_negative_edges = np.array(neg_test, dtype=np.int32).T + test_positive_z = np.concatenate((preds[score_positive_edges[0, :], :], + preds[score_positive_edges[1, :], :]), axis=1) + test_negative_z = np.concatenate((preds[score_negative_edges[0, :], :], + preds[score_negative_edges[1, :], :]), axis=1) + scores = np.dot(np.concatenate((test_positive_z, test_negative_z), axis=0), weights) + bias + probability_scores = np.exp(softmax(scores)) + predictions = probability_scores[:, 0] / probability_scores[:, 0: 2].sum(1) + targets = [0] * len(pos_test) + [1] * len(neg_test) + auc, f1 = calculate_auc(targets, predictions) + print("Test set results:", "auc=", "{:.5f}".format(auc), "f1=", "{:.5f}".format(f1)) + +if __name__ == '__main__': + args = parse_args() + test_result(args) diff --git a/research/gnn/sgcn/infer/preprocess_infer.py b/research/gnn/sgcn/infer/preprocess_infer.py new file mode 100644 index 000000000..a7dfdc8f8 --- /dev/null +++ b/research/gnn/sgcn/infer/preprocess_infer.py @@ -0,0 +1,132 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" +Evaluation script +""" +import argparse +import os + +import pandas as pd +import numpy as np +from sklearn.model_selection import train_test_split + + +def read_graph(args): + """ + Method to read graph and create a target matrix with pooled adjacency matrix powers. + Args: + args(Arguments): Arguments object. + + Returns: + edge(dicts): Edges dictionary. + """ + dataset = pd.read_csv(args.features_path).values.tolist() + edges = {} + edges["positive_edges"] = [edge[0:2] for edge in dataset if edge[2] == 1] + edges["negative_edges"] = [edge[0:2] for edge in dataset if edge[2] == -1] + edges["ecount"] = len(dataset) + edges["ncount"] = len(set([edge[0] for edge in dataset]+[edge[1] for edge in dataset])) + return edges + + +def setup_dataset(args, edges): + """ + Returns: + X(Tensor): Dataset. + positive_edges(Tensor): Positive edges for training. + negative_edges(Tensor): Negative edges for training. + test_positive_edges(Tensor): Positive edges for testing. + test_negative_edges(Tensor): Negative edges for testing. + """ + positive_edges, test_positive_edges = train_test_split(edges["positive_edges"], + test_size=args.test_size, random_state=1) + + negative_edges, test_negative_edges = train_test_split(edges["negative_edges"], + test_size=args.test_size, random_state=1) + ecount = len(positive_edges + negative_edges) + + X = np.array(pd.read_csv(args.features_path)) + X = np.array(X.tolist()) + positive_edges = np.array(positive_edges, dtype=np.int32).T + negative_edges = np.array(negative_edges, dtype=np.int32).T + y = np.array([0 if i < int(ecount / 2) else 1 for i in range(ecount)] + [2] * (ecount * 2)) + y = np.array(y, np.int32) + X = np.array(X, np.float32) + return X, positive_edges, negative_edges, test_positive_edges, test_negative_edges + + +def remove_self_loops(edge_index): + """ + remove self loops + Args: + edge_index (LongTensor): The edge indices. + + Returns: + Tensor(edge_index): removed self loops + """ + mask = edge_index[0] != edge_index[1] + edge_index = edge_index[:, mask] + return edge_index + + +def main(): + def w2txt(file, data): + s = "" + for i in range(len(data[0])): + s = s + str(data[0][i]) + s = s + " " + with open(file, "w") as f: + f.write(s) + + # Set DEVICE_ID + parser = argparse.ArgumentParser(description="SGCN eval") + parser.add_argument("--device_id", help="device_id", default=2, type=int) + parser.add_argument("--device_target", type=str, default="Ascend", + choices=["Ascend"], help="device target (default: Ascend)") + parser.add_argument("--edge_path", nargs="?", + default="./data/bitcoin_alpha.csv", help="Edge list csv.") + parser.add_argument("--result_path", nargs="?", + default="./data/", help="result path") + parser.add_argument("--features_path", nargs="?", + default="./data/bitcoin_alpha.csv", help="Edge list csv.") + parser.add_argument("--test-size", type=float, + default=0.2, help="Test dataset size. Default is 0.2.") + parser.add_argument("--dataset_type", type=str, default="alpha") + parser.add_argument("--seed", type=int, default=42, + help="Random seed for sklearn pre-training. Default is 42.") + parser.add_argument("--spectral-features", default=True, dest="spectral_features", action="store_true") + parser.add_argument("--reduction-iterations", type=int, + default=30, help="Number of SVD iterations. Default is 30.") + parser.add_argument("--reduction-dimensions", type=int, + default=64, help="Number of SVD feature extraction dimensions. Default is 64.") + + args = parser.parse_args() + edges = read_graph(args) + dataset = setup_dataset(args, edges) + pos_edg, neg_edg = dataset[1], dataset[2] + repos, reneg = remove_self_loops(pos_edg), remove_self_loops(neg_edg) + + if args.dataset_type == "alpha": + repos = np.array(repos, dtype=np.int32).reshape(1, 20430) + reneg = np.array(reneg, dtype=np.int32).reshape(1, 2098) + else: + repos = np.array(repos, dtype=np.int32).reshape(1, 29248) + reneg = np.array(reneg, dtype=np.int32).reshape(1, 5044) + + w2txt(os.path.join(args.result_path, "repos.txt"), repos) + w2txt(os.path.join(args.result_path, "reneg.txt"), reneg) + +if __name__ == "__main__": + main() diff --git a/research/gnn/sgcn/infer/sdk/pipeline/sgcn.pipeline b/research/gnn/sgcn/infer/sdk/pipeline/sgcn.pipeline new file mode 100644 index 000000000..64fcfae84 --- /dev/null +++ b/research/gnn/sgcn/infer/sdk/pipeline/sgcn.pipeline @@ -0,0 +1,44 @@ +{ + "sgcn": { + "stream_config": { + "deviceId": "0" + }, + "appsrc0": { + "props": { + "blocksize": "409600" + }, + "factory": "appsrc", + "next": "mxpi_tensorinfer0:0" + }, + "appsrc1": { + "props": { + "blocksize": "409600" + }, + "factory": "appsrc", + "next": "mxpi_tensorinfer0:1" + }, + + "mxpi_tensorinfer0": { + "props": { + "dataSource": "appsrc0,appsrc1", + "modelPath": "../../convert/sgcn.om", + "outputDeviceId": "0" + }, + "factory": "mxpi_tensorinfer", + "next": "mxpi_dataserialize0" + }, + "mxpi_dataserialize0": { + "props": { + "outputDataKeys": "mxpi_tensorinfer0" + }, + "factory": "mxpi_dataserialize", + "next": "appsink0" + }, + "appsink0": { + "props": { + "blocksize": "4096000" + }, + "factory": "appsink" + } + } +} diff --git a/research/gnn/sgcn/infer/sdk/python_SGCN/SdkApi.py b/research/gnn/sgcn/infer/sdk/python_SGCN/SdkApi.py new file mode 100644 index 000000000..db6e209e2 --- /dev/null +++ b/research/gnn/sgcn/infer/sdk/python_SGCN/SdkApi.py @@ -0,0 +1,124 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" Model Infer """ +import json +import logging +import numpy as np +import MxpiDataType_pb2 as MxpiDataType +from StreamManagerApi import StreamManagerApi, MxDataInput, InProtobufVector, MxProtobufIn, StringVector + + +class SdkApi: + """ Class SdkApi """ + INFER_TIMEOUT = 100000 + STREAM_NAME = "sgcn" + + def __init__(self, pipeline_cfg): + self.pipeline_cfg = pipeline_cfg + self._stream_api = None + self._data_input = None + self._device_id = None + + def init(self): + """ Initialize Stream """ + with open(self.pipeline_cfg, 'r') as fp: + self._device_id = int( + json.loads(fp.read())[self.STREAM_NAME]["stream_config"] + ["deviceId"]) + print(f"The device id: {self._device_id}.") + + # create api + self._stream_api = StreamManagerApi() + + # init stream mgr + ret = self._stream_api.InitManager() + if ret != 0: + print(f"Failed to init stream manager, ret={ret}.") + return False + + # create streams + with open(self.pipeline_cfg, 'rb') as fp: + pipe_line = fp.read() + + ret = self._stream_api.CreateMultipleStreams(pipe_line) + if ret != 0: + print(f"Failed to create stream, ret={ret}.") + return False + + self._data_input = MxDataInput() + return True + + def __del__(self): + if not self._stream_api: + return + + self._stream_api.DestroyAllStreams() + + def _send_protobuf(self, stream_name, plugin_id, element_name, buf_type, + pkg_list): + """ Send Stream """ + protobuf = MxProtobufIn() + protobuf.key = element_name + protobuf.type = buf_type + protobuf.protobuf = pkg_list.SerializeToString() + protobuf_vec = InProtobufVector() + protobuf_vec.push_back(protobuf) + err_code = self._stream_api.SendProtobuf(stream_name, plugin_id, + protobuf_vec) + if err_code != 0: + logging.error( + "Failed to send data to stream, stream_name(%s), plugin_id(%s), element_name(%s), " + "buf_type(%s), err_code(%s).", stream_name, plugin_id, + element_name, buf_type, err_code) + return False + return True + + def send_tensor_input(self, stream_name, plugin_id, element_name, + input_data, input_shape, data_type): + """ Send Tensor """ + tensor_list = MxpiDataType.MxpiTensorPackageList() + for i in range(2): + data = np.expand_dims(input_data[i, :], 0) + tensor_pkg = tensor_list.tensorPackageVec.add() + # init tensor vector + tensor_vec = tensor_pkg.tensorVec.add() + tensor_vec.deviceId = self._device_id + tensor_vec.memType = 0 + tensor_vec.tensorShape.extend(data.shape) + tensor_vec.tensorDataType = data_type + tensor_vec.dataStr = data.tobytes() + tensor_vec.tensorDataSize = data.shape[0] + print(type(tensor_list)) + buf_type = b"MxTools.MxpiTensorPackageList" + return self._send_protobuf(stream_name, plugin_id, element_name, + buf_type, tensor_list) + + def get_result(self, stream_name, out_plugin_id=0): + """ Get Result """ + keys = [b"mxpi_tensorinfer0"] + keyVec = StringVector() + for key in keys: + keyVec.push_back(key) + infer_result = self._stream_api.GetProtobuf(stream_name, 0, keyVec) + if infer_result.size() == 0: + print("infer_result is null") + exit() + if infer_result[0].errorCode != 0: + print("GetProtobuf error. errorCode=%d" % ( + infer_result[0].errorCode)) + exit() + TensorList = MxpiDataType.MxpiTensorPackageList() + TensorList.ParseFromString(infer_result[0].messageBuf) + return TensorList diff --git a/research/gnn/sgcn/infer/sdk/python_SGCN/main.py b/research/gnn/sgcn/infer/sdk/python_SGCN/main.py new file mode 100644 index 000000000..ffb10f75f --- /dev/null +++ b/research/gnn/sgcn/infer/sdk/python_SGCN/main.py @@ -0,0 +1,111 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import argparse +import time +import numpy as np +import pandas as pd +from SdkApi import SdkApi +from sklearn.model_selection import train_test_split + +STREAM_NAME = b'sgcn' +TENSOR_DTYPE_FLOAT16 = 1 +TENSOR_DTYPE_INT32 = 3 + + +def parse_args(): + """set and check parameters.""" + parser = argparse.ArgumentParser(description="bert process") + parser.add_argument("--pipeline_path", type=str, default="../pipeline/sgcn.pipeline", help="SDK infer pipeline") + parser.add_argument("--data_dir", type=str, + default="../../data/bitcoin_alpha.csv", help="Path where the dataset is saved") + args_opt = parser.parse_args() + return args_opt + + +def inference(): + args = parse_args() + + # init stream manager + sdk_api = SdkApi(args.pipeline_path) + if not sdk_api.init(): + exit(-1) + + start_time = time.time() + dataset = setup_dataset(args.data_dir) + pos_edg, neg_edg = dataset[1], dataset[2] + repos, reneg = remove_self_loops(pos_edg), remove_self_loops(neg_edg) + sdk_api.send_tensor_input(STREAM_NAME, 0, b'appsrc0', repos, repos.shape, TENSOR_DTYPE_INT32) + sdk_api.send_tensor_input(STREAM_NAME, 1, b'appsrc1', reneg, reneg.shape, TENSOR_DTYPE_INT32) + print("Getting result") + result = sdk_api.get_result(STREAM_NAME) + pred = np.array( + [np.frombuffer(result.tensorPackageVec[k].tensorVec[0].dataStr, dtype=np.float32) for k in range(2)]) + end_time = time.time() - start_time + print(f"The inference time is {end_time}") + np.savetxt('res', pred, fmt="%f") + +def read_graph(data_path): + """ + Method to read graph and create a target matrix with pooled adjacency matrix powers. + Args: + args(Arguments): Arguments object. + + Returns: + edge(dicts): Edges dictionary. + """ + dataset = pd.read_csv(data_path).values.tolist() + edges = {} + edges["positive_edges"] = [edge[0:2] for edge in dataset if edge[2] == 1] + edges["negative_edges"] = [edge[0:2] for edge in dataset if edge[2] == -1] + edges["ecount"] = len(dataset) + edges["ncount"] = len(set([edge[0] for edge in dataset]+[edge[1] for edge in dataset])) + return edges + +def setup_dataset(data_path): + """ + Returns: + X(Tensor): Dataset. + positive_edges(Tensor): Positive edges for training. + negative_edges(Tensor): Negative edges for training. + test_positive_edges(Tensor): Positive edges for testing. + test_negative_edges(Tensor): Negative edges for testing. + """ + edges = read_graph(data_path) + positive_edges, test_positive_edges = train_test_split(edges["positive_edges"], test_size=0.2, random_state=1) + negative_edges, test_negative_edges = train_test_split(edges["negative_edges"], test_size=0.2, random_state=1) + ecount = len(positive_edges + negative_edges) + X = np.array(pd.read_csv(data_path)) + X = np.array(X.tolist()) + positive_edges = np.array(positive_edges, dtype=np.int32).T + negative_edges = np.array(negative_edges, dtype=np.int32).T + y = np.array([0 if i < int(ecount / 2) else 1 for i in range(ecount)] + [2] * (ecount * 2)) + y = np.array(y, np.int32) + X = np.array(X, np.float32) + return X, positive_edges, negative_edges, test_positive_edges, test_negative_edges +def remove_self_loops(edge_index): + """ + remove self loops + Args: + edge_index (LongTensor): The edge indices. + + Returns: + Tensor(edge_index): removed self loops + """ + mask = edge_index[0] != edge_index[1] + edge_index = edge_index[:, mask] + return edge_index + +if __name__ == '__main__': + inference() diff --git a/research/gnn/sgcn/infer/sdk/python_SGCN/sgcn_run.sh b/research/gnn/sgcn/infer/sdk/python_SGCN/sgcn_run.sh new file mode 100644 index 000000000..9fda82e10 --- /dev/null +++ b/research/gnn/sgcn/infer/sdk/python_SGCN/sgcn_run.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +pipeline_path=$1 +data_dir=$2 + +set -e + +# Simple log helper functions +info() { echo -e "\033[1;34m[INFO ][MxStream] $1\033[1;37m" ; } +warn() { echo >&2 -e "\033[1;31m[WARN ][MxStream] $1\033[1;37m" ; } + +python3 main.py --pipeline_path=$pipeline_path --data_dir=$data_dir +exit 0 diff --git a/research/gnn/sgcn/infer/sdk/task_metric.py b/research/gnn/sgcn/infer/sdk/task_metric.py new file mode 100644 index 000000000..eac5b7176 --- /dev/null +++ b/research/gnn/sgcn/infer/sdk/task_metric.py @@ -0,0 +1,138 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import argparse +import numpy as np +import pandas as pd +from mindspore import load_checkpoint +from sklearn.metrics import f1_score +from sklearn.metrics import roc_auc_score +from sklearn.model_selection import train_test_split + +def parse_args(): + """set and check parameters.""" + parser = argparse.ArgumentParser(description="bert process") + parser.add_argument("--res_path", type=str, default="./python_SGCN/res", help="result numpy path") + parser.add_argument("--res_path2", type=str, default="res", help="result numpy path") + parser.add_argument("--data_dir", type=str, default="../data/bitcoin_alpha.csv", + help="Dataset") + parser.add_argument("--ckpt_path", type=str, + default="../data/sgcn_ascend_v130_bitcoinalpha_research_gnn_bs64_AUC80.81.ckpt", + help="ckpt") + parser.add_argument("--dataset_type", type=str, default="alpha", help="result numpy path") + args_opt = parser.parse_args() + return args_opt + +# load static result +def read_restxt(res_path): + return np.loadtxt(res_path) + + +def read_graph(data_path): + """ + Method to read graph and create a target matrix with pooled adjacency matrix powers. + Args: + args(Arguments): Arguments object. + + Returns: + edge(dicts): Edges dictionary. + """ + dataset = pd.read_csv(data_path).values.tolist() + edges = {} + edges["positive_edges"] = [edge[0:2] for edge in dataset if edge[2] == 1] + edges["negative_edges"] = [edge[0:2] for edge in dataset if edge[2] == -1] + edges["ecount"] = len(dataset) + edges["ncount"] = len(set([edge[0] for edge in dataset]+[edge[1] for edge in dataset])) + return edges + +def setup_dataset(data_path): + edges = read_graph(data_path) + positive_edges, test_positive_edges = train_test_split(edges["positive_edges"], test_size=0.2, random_state=1) + negative_edges, test_negative_edges = train_test_split(edges["negative_edges"], test_size=0.2, random_state=1) + ecount = len(positive_edges + negative_edges) + positive_edges = np.array(positive_edges, dtype=np.int32).T + negative_edges = np.array(negative_edges, dtype=np.int32).T + y = np.array([0 if i < int(ecount / 2) else 1 for i in range(ecount)] + [2] * (ecount * 2)) + y = np.array(y, np.int32) + print('self.positive_edges', positive_edges.shape, type(positive_edges)) + print('self.negative_edges', negative_edges.shape, type(negative_edges)) + print('self.y', y.shape, type(y)) + print(positive_edges.dtype, negative_edges.dtype, y.dtype) + return test_positive_edges, test_negative_edges + +def remove_self_loops(edge_index): + """ + remove self loops + Args: + edge_index (LongTensor): The edge indices. + + Returns: + Tensor(edge_index): removed self loops + """ + mask = edge_index[0] != edge_index[1] + edge_index = edge_index[:, mask] + return Tensor(edge_index) + +def calculate_auc(targets, predictions): + """ + Calculate performance measures on test dataset. + Args: + targets(Tensor): Ground truth. + predictions(Tensor): Model outputs. + + Returns: + auc(Float32): AUC result. + f1(Float32): F1-Score result. + """ + targets = [0 if target == 1 else 1 for target in targets] + auc = roc_auc_score(targets, predictions) + pred = [1 if p > 0.5 else 0 for p in predictions] + f1 = f1_score(targets, pred) + return auc, f1 + +def softmax(x): + """Softmax""" + t_max = np.max(x, axis=1, keepdims=True) # returns max of each row and keeps same dims + e_x = np.exp(x - t_max) # subtracts each row with its max value + t_sum = np.sum(e_x, axis=1, keepdims=True) # returns sum of each row and keeps same dims + f_x = e_x / t_sum + return f_x + +def test_result(arg): + pos_test, neg_test = setup_dataset(arg.data_dir) + preds = read_restxt(arg.res_path) + if arg.dataset_type == "otc": + preds = preds.reshape(5881, 64) + else: + preds = preds.reshape(3783, 64) + param_dict = load_checkpoint(args.ckpt_path) + weights = np.array(param_dict['regression_weights'].asnumpy()) + bias = np.array(param_dict['regression_bias'].asnumpy()) + score_positive_edges = np.array(pos_test, dtype=np.int32).T + score_negative_edges = np.array(neg_test, dtype=np.int32).T + test_positive_z = np.concatenate((preds[score_positive_edges[0, :], :], + preds[score_positive_edges[1, :], :]), axis=1) + test_negative_z = np.concatenate((preds[score_negative_edges[0, :], :], + preds[score_negative_edges[1, :], :]), axis=1) + scores = np.dot(np.concatenate((test_positive_z, test_negative_z), axis=0), weights) + bias + probability_scores = np.exp(softmax(scores)) + predictions = probability_scores[:, 0] / probability_scores[:, 0: 2].sum(1) + targets = [0] * len(pos_test) + [1] * len(neg_test) + auc, f1 = calculate_auc(targets, predictions) + print("Test set results:", "auc=", "{:.5f}".format(auc), "f1=", "{:.5f}".format(f1)) + +if __name__ == '__main__': + args = parse_args() + test_result(args) diff --git a/research/gnn/sgcn/modelarts/param_parser.py b/research/gnn/sgcn/modelarts/param_parser.py new file mode 100644 index 000000000..f285ee72d --- /dev/null +++ b/research/gnn/sgcn/modelarts/param_parser.py @@ -0,0 +1,96 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""SGCN parameter parser.""" +import argparse +import ast + + +def parameter_parser(): + """ + A method to parse up command line parameters. + By default it gives an embedding of the Bitcoin OTC dataset. + The default hyperparameters give a good quality representation without grid search. + Representations are sorted by node ID. + """ + parser = argparse.ArgumentParser(description="Run SGCN.") + parser.add_argument("--distributed", type=ast.literal_eval, default=False, help="Distributed train") + parser.add_argument("--device_id", type=int, default=0, help="Device id") + parser.add_argument("--checkpoint_file", type=str, default='sgcn_alpha', help="Checkpoint file path.") + parser.add_argument("--device_target", type=str, default="Ascend", + choices=["Ascend", "GPU", "CPU"], help="device target (default: Ascend)") + parser.add_argument("--data_path", + nargs="?", + default="/input", help="Edge list csv.") + parser.add_argument("--data_type", + nargs="?", + default="alpha") + parser.add_argument('--save_ckpt', type=str, default='../output', + help='if is test, must provide path where the trained ckpt file') + + parser.add_argument("--edge-path", + nargs="?", + default="./input/bitcoin_alpha.csv", help="Edge list csv.") + + parser.add_argument("--features-path", + nargs="?", + default="./input/bitcoin_alpha.csv", help="Edge list csv.") + + parser.add_argument("--epochs", + type=int, + default=500, help="Number of training epochs. Default is 500.") + + parser.add_argument("--reduction-iterations", + type=int, + default=30, help="Number of SVD iterations. Default is 30.") + + parser.add_argument("--reduction-dimensions", + type=int, + default=64, help="Number of SVD feature extraction dimensions. Default is 64.") + + parser.add_argument("--seed", + type=int, + default=42, help="Random seed for sklearn pre-training. Default is 42.") + + parser.add_argument("--lamb", + type=float, + default=1.0, help="Embedding regularization parameter. Default is 1.0.") + + parser.add_argument("--test-size", + type=float, + default=0.2, help="Test dataset size. Default is 0.2.") + + parser.add_argument("--learning-rate", + type=float, + default=0.01, help="Learning rate. Default is 0.01.") + + parser.add_argument("--weight-decay", + type=float, + default=10**-5, help="Weight decay. Default is 10^-5.") + + parser.add_argument("--spectral-features", + dest="spectral_features", + action="store_true") + + parser.add_argument("--general-features", + dest="spectral_features", + action="store_false") + + parser.add_argument("--norm", type=ast.literal_eval, default=True, help="Normalize features or not.") + parser.add_argument("--norm-embed", type=ast.literal_eval, default=True, help="Normalize embedding or not.") + parser.add_argument("--bias", type=ast.literal_eval, default=True, help="Add bias or not.") + + parser.set_defaults(spectral_features=True) + + return parser.parse_args() diff --git a/research/gnn/sgcn/modelarts/pip-requirements.txt b/research/gnn/sgcn/modelarts/pip-requirements.txt new file mode 100644 index 000000000..039ea3cd5 --- /dev/null +++ b/research/gnn/sgcn/modelarts/pip-requirements.txt @@ -0,0 +1,3 @@ +pandas >= 0.23.4 +texttable >= 1.5.0 +scipy >= 1.1.0 diff --git a/research/gnn/sgcn/modelarts/train_modelarts.py b/research/gnn/sgcn/modelarts/train_modelarts.py new file mode 100644 index 000000000..8f14f6c80 --- /dev/null +++ b/research/gnn/sgcn/modelarts/train_modelarts.py @@ -0,0 +1,116 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""SGCN runner.""" +import os +from mindspore import Tensor +from mindspore import export +from mindspore import context +from mindspore import load_checkpoint +from mindspore import load_param_into_net +from mindspore.common import set_seed +from mindspore.communication import init +from mindspore.communication.management import get_rank +from mindspore.context import ParallelMode +import moxing as mox + +from src.ms_utils import read_graph +from src.ms_utils import score_printer +from src.ms_utils import tab_printer +from src.param_parser import parameter_parser +from src.sgcn import SignedGCNTrainer +from src.sgcn import SignedGraphConvolutionalNetwork + +def remove_self_loops(edge_index): + """ + remove self loops + Args: + edge_index (LongTensor): The edge indices. + + Returns: + Tensor(edge_index): removed self loops + """ + mask = edge_index[0] != edge_index[1] + edge_index = edge_index.asnumpy()[:, mask.asnumpy()] + return Tensor(edge_index) + +def main(): + """ + Parsing command line parameters. + Creating target matrix. + Fitting an SGCN. + Predicting edge signs and saving the embedding. + """ + args = parameter_parser() + set_seed(args.seed) + device_id = int(os.getenv('DEVICE_ID', args.device_id)) + + CKPT_OUTPUT_PATH = "../" + mox.file.copy_parallel(args.data_path, '/cache') + + args.edge_path = args.data_path + "/bitcoin_" + args.data_type + ".csv" + args.features_path = args.data_path + "/bitcoin_" + args.data_type + ".csv" + + context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target, device_id=device_id) + args.rank_log_save_ckpt_flag = 1 + if args.distributed: + if args.device_target == 'Ascend': + init() + else: + init('nccl') + context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, gradients_mean=True) + args.rank = get_rank() + if args.rank != 0: + args.rank_log_save_ckpt_flag = 0 + edges = read_graph(args) + if args.rank_log_save_ckpt_flag: + tab_printer(args) + trainer = SignedGCNTrainer(args, edges) + print('******************** set up dataset... ********************') + dataset = trainer.setup_dataset() + print('******************** set up dataset! ********************') + print("\nTraining started.\n") + trainer.create_and_train_model() + print('******************** finish training! ********************') + if args.test_size > 0: + score_printer(trainer.logs) + else: + trainer = SignedGCNTrainer(args, edges) + dataset = trainer.setup_dataset() + trainer.create_and_train_model() + + print('******************** export! ********************') + input_x, pos_edg, neg_edg = dataset[0], dataset[1], dataset[2] + repos, reneg = remove_self_loops(pos_edg), remove_self_loops(neg_edg) + net = SignedGraphConvolutionalNetwork(input_x, args.norm, args.norm_embed, args.bias) + # Load parameters from checkpoint into network + param_dict = load_checkpoint(args.checkpoint_file + '_auc.ckpt') + load_param_into_net(net, param_dict) + # export + export(net, repos, reneg, + file_name="sgcn_auc", file_format="AIR") + + param_dict = load_checkpoint(args.checkpoint_file + '_f1.ckpt') + load_param_into_net(net, param_dict) + # export + export(net, repos, reneg, + file_name="sgcn_f1", file_format="AIR") + + print("==========================================") + print("sgcn_auc.air and sgcn_f1.air exported successfully!") + print("==========================================") + mox.file.copy_parallel(CKPT_OUTPUT_PATH, args.save_ckpt) + +if __name__ == "__main__": + main() diff --git a/research/gnn/sgcn/scripts/docker_start.sh b/research/gnn/sgcn/scripts/docker_start.sh new file mode 100644 index 000000000..55b2815fb --- /dev/null +++ b/research/gnn/sgcn/scripts/docker_start.sh @@ -0,0 +1,38 @@ +#!/bin/bash + +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +docker_image=$1 +data_dir=$2 +model_dir=$3 + +docker run -it -u root--ipc=host \ + --device=/dev/davinci0 \ + --device=/dev/davinci1 \ + --device=/dev/davinci2 \ + --device=/dev/davinci3 \ + --device=/dev/davinci4 \ + --device=/dev/davinci5 \ + --device=/dev/davinci6 \ + --device=/dev/davinci7 \ + --device=/dev/davinci_manager \ + --device=/dev/devmm_svm \ + --device=/dev/hisi_hdc \ + --privileged \ + -v /usr/local/Ascend/add-ons/:/usr/local/Ascend/add-ons \ + -v ${data_dir}:${data_dir} \ + -v ${model_dir}:${model_dir} \ + -v /root/ascend/log:/root/ascend/log ${docker_image} \ + /bin/bash -- GitLab