diff --git a/community/cv/snn/README.md b/community/cv/snn/README.md new file mode 100644 index 0000000000000000000000000000000000000000..962b59398f5d2feffa23966e42fcc72dcf768a91 --- /dev/null +++ b/community/cv/snn/README.md @@ -0,0 +1,299 @@ +# Contents + +- [Contents](#contents) +- [SNN Description](#snn-description) + - [Description](#description) + - [Paper](#paper) +- [Model Architecture](#model-architecture) +- [Dataset](#dataset) +- [Environment Requirements](#environment-requirements) +- [Quick Start](#quick-start) +- [Script Description](#script-description) + - [Script and Sample Code](#script-and-sample-code) + - [Script Parameters](#script-parameters) + - [Training Process](#training-process) + - [Usage](#usage) + - [Running on Ascend](#running-on-ascend) + - [Running parameter server mode training](#running-parameter-server-mode-training) + - [Evaluation while training](#evaluation-while-training) + - [Result](#result) + - [Evaluation Process](#evaluation-process) + - [Usage](#usage-1) + - [Running on Ascend](#running-on-ascend-1) + - [Result](#result-1) +- [Model Description](#model-description) + - [Performance](#performance) + - [Evaluation Performance](#evaluation-performance) + - [LeNet on CIFAR-10](#lenet-on-cifar-10) +- [Description of Random Situation](#description-of-random-situation) +- [ModelZoo Homepage](#modelzoo-homepage) + +# [SNN Description](#contents) + +## Description + +SNN (Spiking neural networks) was proposed by Kaushik Roy and other two authors. + +These are examples of training LeNet/ResNet50 with CIFAR-10 dataset. + +## Paper + +1.[paper](https://www.nature.com/articles/s41586-019-1677-2):Kaushik Roy, Akhilesh Jaiswal, Priyadarshini Panda. "Towards spike-based machine intelligence with neuromorphic computing" + +# [Model Architecture](#contents) + +The overall network architecture of SNN is show below: +[Link](https://www.nature.com/articles/s41586-019-1677-2) + +# [Dataset](#contents) + +Dataset used: [CIFAR-10](<http://www.cs.toronto.edu/~kriz/cifar.html>) + +- Dataset size锛�60,000 32*32 colorful images in 10 classes + - Train锛�50,000 images + - Test锛� 10,000 images +- Data format锛歜inary files + - Note锛欴ata will be processed in dataset.py +- Download the dataset, the directory structure is as follows: + +```bash +鈹溾攢cifar-10-batches-bin +鈹� +鈹斺攢cifar-10-verify-bin +``` + +# [Environment Requirements](#contents) + +- Hardware锛圓scend锛� + - Prepare hardware environment with Ascend processor. +- Framework + - [MindSpore](https://www.mindspore.cn/install/en) +- For more information, please check the resources below锛� + - [MindSpore Tutorials](https://www.mindspore.cn/tutorials/en/master/index.html) + - [MindSpore Python API](https://www.mindspore.cn/docs/api/en/master/index.html) + +# [Quick Start](#contents) + +After installing MindSpore via the official website, you can start training and evaluation as follows: + +> - <font size=2>During training, if CIFAR-10 dataset is used, DATASET_PATH={CIFAR-10 directory}/cifar-10-batches-bin;</font> +> - <font size=2>During evaluating and inferring, if CIFAR-10 dataset is used, DATASET_PATH={CIFAR-10 directory}/cifar-10-verify-bin;</font> + +- Running on Ascend + +```bash +# distributed training +Usage: bash run_distribute_train_ascend.sh [RANK_TABLE_FILE] [DATASET_PATH] [CONFIG_PATH] [PRETRAINED_CKPT_PATH](optional) + +# standalone training +Usage: bash run_standalone_train_ascend.sh [DATASET_PATH] [CONFIG_PATH] [PRETRAINED_CKPT_PATH](optional) + +# run evaluation example +Usage: bash run_eval.sh [DATASET_PATH] [CHECKPOINT_PATH] [CONFIG_PATH] +``` + +```bash +# infer example +python eval.py --data_path=[DATASET_PATH] --ckpt_path=[CHECKPOINT_PATH] --config_path [CONFIG_PATH] +``` + +If you want to run in modelarts, please check the official documentation of [modelarts](https://support.huaweicloud.com/modelarts/), and you can start training and evaluation as follows: + +```text +# run distributed training on modelarts example +# (1) Add "config_path='/path_to_code/config/snn_lenet_cifar10_config.yaml'" on the website UI interface. +# (2) First, Perform a or b. +# a. Set "enable_modelarts=True" on yaml file. +# Set other parameters on yaml file you need. +# b. Add "enable_modelarts=True" on the website UI interface. +# Add other parameters on the website UI interface. +# (3) Set the code directory to "/path/snn" on the website UI interface. +# (4) Set the startup file to "train.py" on the website UI interface. +# (5) Set the "Dataset path" and "Output file path" and "Job log path" to your path on the website UI interface. +# (6) Create your job. + +# run evaluation on modelarts example +# (1) Add "config_path='/path_to_code/config/snn_lenet_cifar10_config.yaml'" on the website UI interface. +# (2) Copy or upload your trained model to S3 bucket. +# (3) Perform a or b. +# a. Set "enable_modelarts=True" on yaml file. +# Set "checkpoint_file_path='/cache/checkpoint_path/model.ckpt'" on yaml file. +# Set "checkpoint_url=/The path of checkpoint in S3/" on yaml file. +# b. Add "enable_modelarts=True" on the website UI interface. +# Add "checkpoint_file_path='/cache/checkpoint_path/model.ckpt'" on the website UI interface. +# Add "checkpoint_url=/The path of checkpoint in S3/" on the website UI interface. +# (4) Set the code directory to "/path/snn" on the website UI interface. +# (5) Set the startup file to "eval.py" on the website UI interface. +# (6) Set the "Dataset path" and "Output file path" and "Job log path" to your path on the website UI interface. +# (7) Create your job. +``` + +# [Script Description](#contents) + +## [Script and Sample Code](#contents) + +```text +. +鈹斺攢鈹€snn + 鈹溾攢鈹€ README.md + 鈹溾攢鈹€ config # parameter configuration + 鈹溾攢鈹€ snn_lenet_cifar10_config.yaml + 鈹溾攢鈹€ snn_resnet50_cifar10_config.yaml + 鈹溾攢鈹€ scripts + 鈹溾攢鈹€ run_distribute_train_ascend.sh # launch ascend distributed training(8 pcs) + 鈹溾攢鈹€ run_eval.sh # launch ascend evaluation + 鈹溾攢鈹€ run_standalone_train_ascend.sh # launch ascend standalone training(1 pcs) + 鈹溾攢鈹€ src + 鈹溾攢鈹€ dataset.py # data preprocessing + 鈹溾攢鈹€ ifnode.py # ifnode cell for snn + 鈹溾攢鈹€ lr_generator.py # generate learning rate for each step + 鈹溾攢鈹€ snn_lenet.py # lenet_snn for ascend benchmark + 鈹溾攢鈹€ snn_resnet.py # resnet50_snn for ascend benchmark + 鈹溾攢鈹€ model_utils + 鈹溾攢鈹€config.py # parameter configuration + 鈹溾攢鈹€device_adapter.py # device adapter + 鈹溾攢鈹€local_adapter.py # local adapter + 鈹溾攢鈹€moxing_adapter.py # moxing adapter + 鈹溾攢鈹€ eval.py # eval net + 鈹斺攢鈹€ train.py # train net +``` + +## [Script Parameters](#contents) + +Parameters for both training and evaluation can be set in config file. + +- Config for LeNet and ResNet50, CIFAR-10 dataset + +```text +"class_num": 10, # dataset class num +"batch_size": 32, # batch size of input tensor +"loss_scale": 1024, # loss scale +"momentum": 0.9, # momentum +"weight_decay": 1e-4, # weight decay +"epoch_size": 5, # only valid for taining, which is always 1 for inference +"save_checkpoint": True, # whether save checkpoint or not +"save_checkpoint_epochs": 1, # the epoch interval between two checkpoints. By default, the last checkpoint will be saved after the last step +"keep_checkpoint_max": 5, # only keep the last keep_checkpoint_max checkpoint +"warmup_epochs": 5, # number of warmup epoch +"lr_init": 0.001, # initial learning rate +"save_graphs": False, # save graph results +``` + +## [Training Process](#contents) + +### Usage + +#### Running on Ascend + +```bash +# distributed training +Usage: bash run_distribute_train_ascend.sh [RANK_TABLE_FILE] [DATASET_PATH] [CONFIG_PATH] [PRETRAINED_CKPT_PATH](optional) + +# standalone training +Usage: bash run_standalone_train_ascend.sh [DATASET_PATH] [CONFIG_PATH] [PRETRAINED_CKPT_PATH](optional) + +# run evaluation example +Usage: bash run_eval.sh [DATASET_PATH] [CHECKPOINT_PATH] [CONFIG_PATH] +``` + +For distributed training, a hccl configuration file with JSON format needs to be created in advance. + +Please follow the instructions in the link [hccn_tools](https://gitee.com/mindspore/models/tree/master/utils/hccl_tools). + +Training result will be stored in the example path, whose folder name begins with "train" or "train_parallel". Under this, you can find checkpoint file together with result like the following in log. + +If you want to change device_id for standalone training, you can set environment variable `export DEVICE_ID=x` or set `device_id=x` in context. + +## [Resume Process](#contents) + +### Usage + +#### Running on Ascend + +```text +# distributed training +Usage: bash run_distribute_train_ascend.sh [RANK_TABLE_FILE] [DATASET_PATH] [CONFIG_PATH] [PRETRAINED_CKPT_PATH](optional) + +# standalone training +Usage: bash run_standalone_train_ascend.sh [DATASET_PATH] [CONFIG_PATH] [PRETRAINED_CKPT_PATH](optional) +``` + +### Result + +- Training LeNet with CIFAR-10 dataset + +```text +# training result on GRAPH mode(1 pcs) +epoch: 2, step: 250, loss is loss:0.090041, epoch time: 60457.911ms, per step time: 241.832ms +epoch: 3, step: 250, loss is loss:0.088589, epoch time: 60414.800ms, per step time: 241.659ms +epoch: 4, step: 250, loss is loss:0.078889, epoch time: 60488.454ms, per step time: 241.954ms +epoch: 5, step: 250, loss is loss:0.072030, epoch time: 60465.275ms, per step time: 241.861ms +``` + +- Training ResNet50 with CIFAR-10 dataset + +```text +# training result on GRAPH mode(1 pcs) +epoch: 2, step: 1562, loss is loss:2.575632, epoch time: 106556.557ms, per step time: 68.218ms +epoch: 3, step: 1562, loss is loss:2.307327, epoch time: 106474.296ms, per step time: 68.165ms +epoch: 4, step: 1562, loss is loss:2.308245, epoch time: 106434.503ms, per step time: 68.140ms +epoch: 5, step: 1562, loss is loss:2.309555, epoch time: 108063.124ms, per step time: 69.183ms +``` + +## [Evaluation Process](#contents) + +### Usage + +#### Running on Ascend + +```bash +# evaluation +Usage: bash run_eval.sh [DATASET_PATH] [CHECKPOINT_PATH] [CONFIG_PATH] +``` + +> checkpoint can be produced in training process. + +### Result + +Evaluation result will be stored in the example path, whose folder name is "eval". Under this, you can find result like the following in log. + +- Evaluating LeNet with CIFAR-10 dataset + +```bash +result: {'acc': 59.5400 %} ckpt=~/snn/train/output/checkpoint/lenet-5_250.ckpt +``` + +# [Model Description](#contents) + +## [Performance](#contents) + +### Evaluation Performance + +#### LeNet on CIFAR-10 + +| Parameters | Ascend 910 | +| -------------------------- | ------------------------------------------------------------ | +| Model Version | LeNet | +| Resource | Ascend 910; CPU 2.60GHz, 192cores; Memory 755G; OS Euler2.8 | +| uploaded Date | 06/30/2022 (month/day/year) | +| MindSpore Version | 1.8.0 | +| Dataset | CIFAR-10 | +| Training Parameters | epoch=5, steps per epoch=250, batch_size = 200 | +| Optimizer | Adam | +| Loss Function | MSE | +| outputs | probability | +| Loss | 0.072030 | +| Speed | 241 ms/step锛�1pcs) | +| Total time | 7 mins | +| Checkpoint for Fine tuning | 1.3M (.ckpt file) | +| Accuracy | 59.54% | +| config | [Link](https://gitee.com/mindspore/models/community/cv/snn/config)| + +# [Description of Random Situation](#contents) + +In dataset.py, we set the seed inside 鈥渃reate_dataset" function. We also use random seed in train.py. + +# [ModelZoo Homepage](#contents) + + Please check the official [homepage](https://gitee.com/mindspore/models). + diff --git a/community/cv/snn/config/snn_lenet_cifar10_config.yaml b/community/cv/snn/config/snn_lenet_cifar10_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..00412223bc8b8e385dee16ce053506d444ff5939 --- /dev/null +++ b/community/cv/snn/config/snn_lenet_cifar10_config.yaml @@ -0,0 +1,53 @@ +# Builtin Configurations(DO NOT CHANGE THESE CONFIGURATIONS unless you know exactly what you are doing) +enable_modelarts: False +data_url: "" +train_url: "" +checkpoint_url: "" +data_path: "/cache/data" +output_path: "/cache/train" +load_path: "/cache/checkpoint_path" +device_target: Ascend +checkpoint_path: "./checkpoint/" +run_distribute: False +enable_profiling: False + +ckpt_path: '/cache/train/' +ckpt_file: '/cache/train/output/checkpoint/lenet-5-250.ckpt' + +# ============================================================================== +# Training options +optimizer: "Adam" +loss_function: "MSE" +class_num: 10 +batch_size: 200 +lr_init: 0.001 +momentum: 0.9 +weight_decay: 0.0001 +warmup_epochs: 5 +epoch_size: 5 +image_height: 32 +image_width: 32 +save_checkpoint_epochs: 1 +keep_checkpoint_max: 5 +loss_scale: 1024 + +net_name: "lenet" +device_num: 1 +device_id: 0 +pre_trained: "" +save_checkpoint: True +mode_name: "GRAPH" # can be PYNATIVE or GRAPH +all_reduce_fusion_config: [] +save_graphs: False + +--- +# Config description for each option +enable_modelarts: 'Whether training on modelarts, default: False' +data_url: 'Dataset url for obs' +train_url: 'Training output url for obs' +data_path: 'Dataset path for local' +output_path: 'Training output path for local' + +device_target: 'Target device type' +enable_profiling: 'Whether enable profiling while training, default: False' +--- diff --git a/community/cv/snn/config/snn_resnet50_cifar10_config.yaml b/community/cv/snn/config/snn_resnet50_cifar10_config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d2a3b3b27668011b45b9176fc963928cb3de2ab3 --- /dev/null +++ b/community/cv/snn/config/snn_resnet50_cifar10_config.yaml @@ -0,0 +1,68 @@ +# Builtin Configurations(DO NOT CHANGE THESE CONFIGURATIONS unless you know exactly what you are doing) +enable_modelarts: False +# Url for modelarts +data_url: "" +train_url: "" +checkpoint_url: "" +# Path for local +run_distribute: False +enable_profiling: False +data_path: "/cache/data" +output_path: "/cache/train" +load_path: "/cache/checkpoint_path/" +device_target: "Ascend" +checkpoint_path: "./checkpoint/" +ckpt_path: '/cache/train/' +ckpt_file: '/cache/train/output/checkpoint/resnet50-5_1562.ckpt' + +# ============================================================================== +# Training options +optimizer: "Momentum" +loss_function: "SoftmaxCrossEntropy" +class_num: 10 +batch_size: 32 +loss_scale: 1024 +momentum: 0.9 +weight_decay: 0.0001 +epoch_size: 10 +save_checkpoint: True +save_checkpoint_epochs: 5 +keep_checkpoint_max: 10 +warmup_epochs: 5 +lr_decay_mode: "poly" +lr_init: 0.01 +lr_end: 0.00001 +lr_max: 0.1 + +net_name: "resnet50" +device_num: 1 +device_id: 0 +pre_trained: "" +mode_name: "GRAPH" # can be PYNATIVE or GRAPH +conv_init: "XavierUniform" +dense_init: "TruncatedNormal" +all_reduce_fusion_config: + - 85 + - 160 +train_image_size: 224 +eval_image_size: 224 + +save_graphs: False + +--- +# Help description for each configuration +enable_modelarts: "Whether training on modelarts, default: False" +data_url: "Dataset url for obs" +checkpoint_url: "The location of checkpoint for obs" +data_path: "Dataset path for local" +output_path: "Training output path for local" +load_path: "The location of checkpoint for obs" +device_target: "Target device type, available: [Ascend, GPU, CPU]" +enable_profiling: "Whether enable profiling while training, default: False" +num_classes: "Class for dataset" +batch_size: "Batch size for training and evaluation" +epoch_size: "Total training epochs." +checkpoint_path: "The location of the checkpoint file." +checkpoint_file_path: "The location of the checkpoint file." +save_graphs: "Whether save graphs during training, default: False." +save_graphs_path: "Path to save graphs." diff --git a/community/cv/snn/eval.py b/community/cv/snn/eval.py new file mode 100644 index 0000000000000000000000000000000000000000..b343c10d65c456b8a708e7ead8cdeb74b6f6cdd6 --- /dev/null +++ b/community/cv/snn/eval.py @@ -0,0 +1,90 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" +######################## eval net ######################## +""" + +from src.model_utils.config import config +from src.model_utils.moxing_adapter import moxing_wrapper +from src.dataset import create_dataset_cifar10 + +import mindspore.ops as ops +from mindspore import context +from mindspore.train.serialization import load_checkpoint, load_param_into_net +import mindspore as ms +from mindspore.ops import operations as P + + +def modelarts_process(): + config.ckpt_path = config.ckpt_file + +def snn_model_build(): + """ + build snn model for lenet and resnet50 + """ + if config.net_name == "resnet50": + if config.mode_name == 'GRAPH': + from src.snn_resnet import snn_resnet50_graph as snn_resnet50 + else: + from src.snn_resnet import snn_resnet50_pynative as snn_resnet50 + net = snn_resnet50(class_num=config.class_num) + elif config.net_name == "lenet": + if config.mode_name == 'GRAPH': + from src.snn_lenet import snn_lenet_graph as snn_lenet + else: + from src.snn_lenet import snn_lenet_pynative as snn_lenet + net = snn_lenet(num_class=config.class_num) + else: + raise ValueError(f'config.model: {config.model_name} is not supported') + return net + + +@moxing_wrapper(pre_process=modelarts_process) +def eval_net(): + """ + eval net + """ + print('eval with config: ', config) + correct = 0.0 + total = 0.0 + if config.mode_name == 'GRAPH': + context.set_context(mode=context.GRAPH_MODE, device_target=config.device_target) + else: + context.set_context(mode=context.PYNATIVE_MODE, device_target=config.device_target) + context.set_context(device_id=config.device_id) + ds_eval = create_dataset_cifar10(data_path=config.data_path, do_train=False, batch_size=config.batch_size) + if ds_eval.get_dataset_size() == 0: + raise ValueError("Please check dataset size > 0 and batch_size <= dataset size") + network_eval = snn_model_build() + param_dict = load_checkpoint(config.ckpt_path) + load_param_into_net(network_eval, param_dict) + network_eval.set_train(False) + print("============== Starting Testing ==============", flush=True) + for _, data in enumerate(ds_eval.create_dict_iterator()): + image = data['image'] + label = data['label'] + outspikes = network_eval(image) + predicted = ops.Argmax(output_type=ms.int32)(outspikes) + total += label.shape[0] + cast = P.Cast() + correct += cast((predicted == label), ms.float32).sum().asnumpy().item() + if config.mode_name == 'PYNATIVE': + network_eval.reset_net() + + accuracy = 100 * correct / total + print('Accuracy of the network is: %.4f %%' % accuracy, flush=True) + +if __name__ == "__main__": + eval_net() diff --git a/community/cv/snn/scripts/run_distribute_train_ascend.sh b/community/cv/snn/scripts/run_distribute_train_ascend.sh new file mode 100644 index 0000000000000000000000000000000000000000..b7abef89494cd282423354f2b1529057353307df --- /dev/null +++ b/community/cv/snn/scripts/run_distribute_train_ascend.sh @@ -0,0 +1,90 @@ +#!/bin/bash +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +if [ $# != 3 ] && [ $# != 4 ] +then + echo "Usage: bash run_distribute_train_ascend.sh [RANK_TABLE_FILE] [DATASET_PATH] [CONFIG_PATH] [PRETRAINED_CKPT_PATH](optional)" + exit 1 +fi + +get_real_path(){ + if [ "${1:0:1}" == "/" ]; then + echo "$1" + else + echo "$(realpath -m $PWD/$1)" + fi +} + +PATH1=$(get_real_path $1) +PATH2=$(get_real_path $2) +CONFIG_FILE=$(get_real_path $3) + +if [ $# == 4 ] +then + PATH3=$(get_real_path $4) +fi + +if [ ! -f $PATH1 ] +then + echo "error: RANK_TABLE_FILE=$PATH1 is not a file" +exit 1 +fi + +if [ ! -d $PATH2 ] +then + echo "error: DATASET_PATH=$PATH2 is not a directory" +exit 1 +fi + +if [ $# == 4 ] && [ ! -f $PATH3 ] +then + echo "error: PRETRAINED_CKPT_PATH=$PATH3 is not a file" +exit 1 +fi + +ulimit -u unlimited +export DEVICE_NUM=8 +export RANK_SIZE=8 +export RANK_TABLE_FILE=$PATH1 + + +for((i=0; i<${DEVICE_NUM}; i++)) +do + export DEVICE_ID=${i} + export RANK_ID=$i + rm -rf ./train_parallel$i + mkdir ./train_parallel$i + cp ../*.py ./train_parallel$i + cp *.sh ./train_parallel$i + cp -r ../config/*.yaml ./train_parallel$i + cp -r ../src ./train_parallel$i + cd ./train_parallel$i || exit + echo "start training for rank $RANK_ID, device $DEVICE_ID" + env > env.log + if [ $# == 3 ] + then + python train.py --run_distribute=True --device_num=$RANK_SIZE --data_path=$PATH2 \ + --config_path=$CONFIG_FILE --output_path './output' &> log & + fi + + if [ $# == 4 ] + then + python train.py --run_distribute=True --device_num=$RANK_SIZE --data_path=$PATH2 --pre_trained=$PATH3 \ + --config_path=$CONFIG_FILE --output_path './output' &> log & + fi + cd .. +done + diff --git a/community/cv/snn/scripts/run_eval.sh b/community/cv/snn/scripts/run_eval.sh new file mode 100644 index 0000000000000000000000000000000000000000..6b61612762930dda611cdf3bef63d18b8935acf2 --- /dev/null +++ b/community/cv/snn/scripts/run_eval.sh @@ -0,0 +1,66 @@ +#!/bin/bash +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +if [ $# != 3 ] +then + echo "Usage: bash run_eval.sh [DATASET_PATH] [CHECKPOINT_PATH] [CONFIG_PATH]" +exit 1 +fi + +get_real_path(){ + if [ "${1:0:1}" == "/" ]; then + echo "$1" + else + echo "$(realpath -m $PWD/$1)" + fi +} + +PATH1=$(get_real_path $1) +PATH2=$(get_real_path $2) +CONFIG_FILE=$(get_real_path $3) + + +if [ ! -d $PATH1 ] +then + echo "error: DATASET_PATH=$PATH1 is not a directory" +exit 1 +fi + +if [ ! -f $PATH2 ] +then + echo "error: CHECKPOINT_PATH=$PATH2 is not a file" +exit 1 +fi + +export DEVICE_NUM=1 +export RANK_SIZE=$DEVICE_NUM +export RANK_ID=0 + +if [ -d "eval" ]; +then + rm -rf ./eval +fi +mkdir ./eval +cp ../*.py ./eval +cp *.sh ./eval +cp -r ../config/*.yaml ./eval +cp -r ../src ./eval +cd ./eval || exit +env > env.log +echo "start evaluation" +python eval.py --data_path=$PATH1 --ckpt_path=$PATH2 --config_path=$CONFIG_FILE &> log.txt & +cd .. + diff --git a/community/cv/snn/scripts/run_standalone_train_ascend.sh b/community/cv/snn/scripts/run_standalone_train_ascend.sh new file mode 100644 index 0000000000000000000000000000000000000000..5f6547c525fccf087905e348077572dab36b2034 --- /dev/null +++ b/community/cv/snn/scripts/run_standalone_train_ascend.sh @@ -0,0 +1,76 @@ +#!/bin/bash +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +if [ $# != 2 ] && [ $# != 3 ] +then + echo "Usage: bash run_standalone_train_ascend.sh [DATASET_PATH] [CONFIG_PATH] [PRETRAINED_CKPT_PATH](optional)" +exit 1 +fi + +get_real_path(){ + if [ "${1:0:1}" == "/" ]; then + echo "$1" + else + echo "$(realpath -m $PWD/$1)" + fi +} + +PATH1=$(get_real_path $1) +CONFIG_FILE=$(get_real_path $2) +if [ $# == 3 ] +then + PATH2=$(get_real_path $3) +fi + +if [ ! -d $PATH1 ] +then + echo "error: DATASET_PATH=$PATH1 is not a directory" +exit 1 +fi + +if [ $# == 3 ] && [ ! -f $PATH2 ] +then + echo "error: PRETRAINED_CKPT_PATH=$PATH2 is not a file" +exit 1 +fi + +export DEVICE_NUM=1 +export RANK_ID=0 +export RANK_SIZE=1 + +if [ -d "train" ]; +then + rm -rf ./train +fi +mkdir ./train +cp ../config/*.yaml ./train +cp ../*.py ./train +cp *.sh ./train +cp -r ../src ./train +cd ./train || exit +echo "start training" +env > env.log + +if [ $# == 2 ] +then + python train.py --data_path=$PATH1 --config_path=$CONFIG_FILE --output_path './output' &> log.txt & +fi + +if [ $# == 3 ] +then + python train.py --data_path=$PATH1 --pre_trained=$PATH2 --config_path=$CONFIG_FILE --output_path './output' &> log.txt & +fi +cd .. diff --git a/community/cv/snn/src/dataset.py b/community/cv/snn/src/dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..1fcd2d7ede7b8dabbd4b05dd03ad2409429e08f5 --- /dev/null +++ b/community/cv/snn/src/dataset.py @@ -0,0 +1,56 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" +Produce the dataset +""" + +import os + +import mindspore.dataset as ds +import mindspore.dataset.vision as CV +import mindspore.dataset.transforms as C +from mindspore.dataset.vision import Inter +from mindspore.common import dtype as mstype + +def create_dataset_cifar10(data_path, batch_size=32, num_parallel_workers=8, do_train=True): + """ + create cifar10 dataset for train or test + """ + # define dataset + data_path = os.path.join(data_path, "cifar-10-batches-bin" if do_train else "cifar-10-verify-bin") + + cifar_ds = ds.Cifar10Dataset(data_path, num_parallel_workers=num_parallel_workers, shuffle=do_train) + + # define map operations + resize_height, resize_width = 32, 32 + rescale = 1.0 / 255.0 + shift = 0.0 + random_crop_op = CV.RandomCrop([32, 32], [4, 4, 4, 4]) + random_horizontal_op = CV.RandomHorizontalFlip(prob=0.5) + resize_op = CV.Resize((resize_height, resize_width), interpolation=Inter.LINEAR) + rescale_op = CV.Rescale(rescale, shift) + normalize_op = CV.Normalize(mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010]) + hwc2chw_op = CV.HWC2CHW() + type_cast_op = C.TypeCast(mstype.int32) + + if do_train: + compose_op = [random_crop_op, random_horizontal_op, resize_op, rescale_op, normalize_op, hwc2chw_op] + else: + compose_op = [resize_op, rescale_op, normalize_op, hwc2chw_op] + cifar_ds = cifar_ds.map(input_columns="image", operations=compose_op, num_parallel_workers=num_parallel_workers) + cifar_ds = cifar_ds.map(input_columns="label", operations=type_cast_op, num_parallel_workers=num_parallel_workers) + cifar_ds = cifar_ds.batch(batch_size, drop_remainder=True) + + return cifar_ds diff --git a/community/cv/snn/src/ifnode.py b/community/cv/snn/src/ifnode.py new file mode 100644 index 0000000000000000000000000000000000000000..7d763360bd40dd3754844a64584dd89deeda2c9a --- /dev/null +++ b/community/cv/snn/src/ifnode.py @@ -0,0 +1,90 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""IFNode.""" +import mindspore.nn as nn +from mindspore import ops +import mindspore + + +class relusigmoid(nn.Cell): + """ + custom surrogate function for integrate and fire cell + """ + def __init__(self): + super().__init__() + self.sigmoid = ops.Sigmoid() + self.greater = ops.Greater() + + def construct(self, x): + spike = self.greater(x, 0) + return spike.astype(mindspore.float32) + + def bprop(self, x, out, dout): + sgax = self.sigmoid(x * 5.0) + grad_x = dout * (1 - sgax) * sgax * 5.0 + # must be a tuple + return (grad_x,) + +class IFNode_GRAPH(nn.Cell): + """ + integrate and fire cell for GRAPH mode, it will output spike value + """ + def __init__(self, v_threshold=1.0, fire=True, surrogate_function=relusigmoid()): + super().__init__() + self.v_threshold = v_threshold + self.fire = fire + self.surrogate_function = surrogate_function + + def construct(self, x, v): + """ neuronal_charge: v need to do add""" + v = v + x + if self.fire: + spike = self.surrogate_function(v - self.v_threshold) * self.v_threshold + v -= spike + return spike, v + return v, v + + +class IFNode_PYNATIVE(nn.Cell): + """ + integrate and fire cell for PYNATIVE mode, it will output spike value + """ + def __init__(self, v_threshold=1.0, v_reset=0.0, fire=True, surrogate_function=relusigmoid()): + super().__init__() + self.v_threshold = v_threshold + if v_reset is None: + self.v_reset = 0.0 + else: + self.v_reset = v_reset + self.v = self.v_reset + self.fire = fire + self.surrogate_function = surrogate_function + + def construct(self, x): + """ neuronal_charge: self.v need to do add""" + self.v = self.v + x + # neuronal_fire + if self.fire: + spike = self.surrogate_function(self.v - self.v_threshold) * self.v_threshold + self.v -= spike + return spike + return self.v + + def reset(self): + """each batch should reset the accumulated value of the net such as self.v""" + if self.v_reset is None: + self.v = 0.0 + else: + self.v = self.v_reset diff --git a/community/cv/snn/src/lr_generator.py b/community/cv/snn/src/lr_generator.py new file mode 100644 index 0000000000000000000000000000000000000000..99660784a2dc6df80fe7139ca797dc1478badc25 --- /dev/null +++ b/community/cv/snn/src/lr_generator.py @@ -0,0 +1,273 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""learning rate generator""" +import math +import numpy as np + + +def _generate_steps_lr(lr_init, lr_max, total_steps, warmup_steps): + """ + Applies three steps decay to generate learning rate array. + + Args: + lr_init(float): init learning rate. + lr_max(float): max learning rate. + total_steps(int): all steps in training. + warmup_steps(int): all steps in warmup epochs. + + Returns: + np.array, learning rate array. + """ + decay_epoch_index = [0.3 * total_steps, 0.6 * total_steps, 0.8 * total_steps] + lr_each_step = [] + for i in range(total_steps): + if i < warmup_steps: + lr = lr_init + (lr_max - lr_init) * i / warmup_steps + else: + if i < decay_epoch_index[0]: + lr = lr_max + elif i < decay_epoch_index[1]: + lr = lr_max * 0.1 + elif i < decay_epoch_index[2]: + lr = lr_max * 0.01 + else: + lr = lr_max * 0.001 + lr_each_step.append(lr) + return lr_each_step + + +def _generate_step_lr(lr_init, lr_max, total_steps, warmup_steps): + """ + Applies three steps decay to generate learning rate array. + + Args: + lr_init(float): init learning rate. + lr_max(float): max learning rate. + total_steps(int): all steps in training. + warmup_steps(int): all steps in warmup epochs. + + Returns: + np.array, learning rate array. + """ + decay_epoch_index = [0.2 * total_steps, 0.5 * total_steps, 0.7 * total_steps, 0.9 * total_steps] + lr_each_step = [] + for i in range(total_steps): + if i < decay_epoch_index[0]: + lr = lr_max + elif i < decay_epoch_index[1]: + lr = lr_max * 0.1 + elif i < decay_epoch_index[2]: + lr = lr_max * 0.01 + elif i < decay_epoch_index[3]: + lr = lr_max * 0.001 + else: + lr = 0.00005 + lr_each_step.append(lr) + return lr_each_step + + +def _generate_poly_lr(lr_init, lr_end, lr_max, total_steps, warmup_steps): + """ + Applies polynomial decay to generate learning rate array. + + Args: + lr_init(float): init learning rate. + lr_end(float): end learning rate + lr_max(float): max learning rate. + total_steps(int): all steps in training. + warmup_steps(int): all steps in warmup epochs. + + Returns: + np.array, learning rate array. + """ + lr_each_step = [] + if warmup_steps != 0: + inc_each_step = (float(lr_max) - float(lr_init)) / float(warmup_steps) + else: + inc_each_step = 0 + for i in range(total_steps): + if i < warmup_steps: + lr = float(lr_init) + inc_each_step * float(i) + else: + base = (1.0 - (float(i) - float(warmup_steps)) / (float(total_steps) - float(warmup_steps))) + lr = float(lr_max) * base * base + if lr < 0.0: + lr = 0.0 + lr_each_step.append(lr) + return lr_each_step + + +def _generate_cosine_lr(lr_init, lr_end, lr_max, total_steps, warmup_steps): + """ + Applies cosine decay to generate learning rate array. + + Args: + lr_init(float): init learning rate. + lr_end(float): end learning rate + lr_max(float): max learning rate. + total_steps(int): all steps in training. + warmup_steps(int): all steps in warmup epochs. + + Returns: + np.array, learning rate array. + """ + decay_steps = total_steps - warmup_steps + lr_each_step = [] + for i in range(total_steps): + if i < warmup_steps: + lr_inc = (float(lr_max) - float(lr_init)) / float(warmup_steps) + lr = float(lr_init) + lr_inc * (i + 1) + else: + linear_decay = (total_steps - i) / decay_steps + cosine_decay = 0.5 * (1 + math.cos(math.pi * 2 * 0.47 * i / decay_steps)) + decayed = linear_decay * cosine_decay + 0.00001 + lr = lr_max * decayed + lr_each_step.append(lr) + return lr_each_step + + +def _generate_liner_lr(lr_init, lr_end, lr_max, total_steps, warmup_steps): + """ + Applies liner decay to generate learning rate array. + + Args: + lr_init(float): init learning rate. + lr_end(float): end learning rate + lr_max(float): max learning rate. + total_steps(int): all steps in training. + warmup_steps(int): all steps in warmup epochs. + + Returns: + np.array, learning rate array. + """ + lr_each_step = [] + for i in range(total_steps): + if i < warmup_steps: + lr = lr_init + (lr_max - lr_init) * i / warmup_steps + else: + lr = lr_max - (lr_max - lr_end) * (i - warmup_steps) / (total_steps - warmup_steps) + lr_each_step.append(lr) + return lr_each_step + + + +def get_lr(lr_init, lr_end, lr_max, warmup_epochs, total_epochs, steps_per_epoch, lr_decay_mode): + """ + generate learning rate array + + Args: + lr_init(float): init learning rate + lr_end(float): end learning rate + lr_max(float): max learning rate + warmup_epochs(int): number of warmup epochs + total_epochs(int): total epoch of training + steps_per_epoch(int): steps of one epoch + lr_decay_mode(string): learning rate decay mode, including steps, poly, cosine or liner(default) + + Returns: + np.array, learning rate array + """ + lr_each_step = [] + total_steps = steps_per_epoch * total_epochs + warmup_steps = steps_per_epoch * warmup_epochs + + if lr_decay_mode == 'steps': + lr_each_step = _generate_steps_lr(lr_init, lr_max, total_steps, warmup_steps) + elif lr_decay_mode == 'step': + warmup_steps = warmup_epochs + lr_each_step = _generate_step_lr(lr_init, lr_max, total_steps, warmup_steps) + elif lr_decay_mode == 'poly': + lr_each_step = _generate_poly_lr(lr_init, lr_end, lr_max, total_steps, warmup_steps) + elif lr_decay_mode == 'cosine': + lr_each_step = _generate_cosine_lr(lr_init, lr_end, lr_max, total_steps, warmup_steps) + else: + lr_each_step = _generate_liner_lr(lr_init, lr_end, lr_max, total_steps, warmup_steps) + + lr_each_step = np.array(lr_each_step).astype(np.float32) + return lr_each_step + + +def linear_warmup_lr(current_step, warmup_steps, base_lr, init_lr): + lr_inc = (float(base_lr) - float(init_lr)) / float(warmup_steps) + lr = float(init_lr) + lr_inc * current_step + return lr + + +def warmup_cosine_annealing_lr(lr, steps_per_epoch, warmup_epochs, max_epoch=120, global_step=0): + """ + generate learning rate array with cosine + + Args: + lr(float): base learning rate + steps_per_epoch(int): steps size of one epoch + warmup_epochs(int): number of warmup epochs + max_epoch(int): total epochs of training + global_step(int): the current start index of lr array + Returns: + np.array, learning rate array + """ + base_lr = lr + warmup_init_lr = 0 + total_steps = int(max_epoch * steps_per_epoch) + warmup_steps = int(warmup_epochs * steps_per_epoch) + decay_steps = total_steps - warmup_steps + + lr_each_step = [] + for i in range(total_steps): + if i < warmup_steps: + lr = linear_warmup_lr(i + 1, warmup_steps, base_lr, warmup_init_lr) + else: + linear_decay = (total_steps - i) / decay_steps + cosine_decay = 0.5 * (1 + math.cos(math.pi * 2 * 0.47 * i / decay_steps)) + decayed = linear_decay * cosine_decay + 0.00001 + lr = base_lr * decayed + lr_each_step.append(lr) + + lr_each_step = np.array(lr_each_step).astype(np.float32) + learning_rate = lr_each_step[global_step:] + return learning_rate + + +def get_thor_lr(global_step, lr_init, decay, total_epochs, steps_per_epoch, decay_epochs=100): + """get_model_lr""" + lr_each_step = [] + total_steps = steps_per_epoch * total_epochs + for i in range(total_steps): + epoch = (i + 1) / steps_per_epoch + base = (1.0 - float(epoch) / total_epochs) ** decay + lr_local = lr_init * base + if epoch >= decay_epochs: + lr_local = lr_local * 0.5 + if epoch >= decay_epochs + 1: + lr_local = lr_local * 0.5 + lr_each_step.append(lr_local) + current_step = global_step + lr_each_step = np.array(lr_each_step).astype(np.float32) + learning_rate = lr_each_step[current_step:] + return learning_rate + + +def get_thor_damping(global_step, damping_init, decay_rate, total_epochs, steps_per_epoch): + """get_model_damping""" + damping_each_step = [] + total_steps = steps_per_epoch * total_epochs + for step in range(total_steps): + epoch = (step + 1) / steps_per_epoch + damping_here = damping_init * (decay_rate ** (epoch / 10)) + damping_each_step.append(damping_here) + current_step = global_step + damping_each_step = np.array(damping_each_step).astype(np.float32) + damping_now = damping_each_step[current_step:] + return damping_now diff --git a/community/cv/snn/src/model_utils/config.py b/community/cv/snn/src/model_utils/config.py new file mode 100644 index 0000000000000000000000000000000000000000..6e9cbd76def969bac2709ba3f49e3fdc1dafd982 --- /dev/null +++ b/community/cv/snn/src/model_utils/config.py @@ -0,0 +1,130 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Parse arguments""" + +import os +import ast +import argparse +from pprint import pprint, pformat +import yaml + +_config_path = "./config/snn_lenet_cifar10_config.yaml" + +class Config: + """ + Configuration namespace. Convert dictionary to members. + """ + def __init__(self, cfg_dict): + for k, v in cfg_dict.items(): + if isinstance(v, (list, tuple)): + setattr(self, k, [Config(x) if isinstance(x, dict) else x for x in v]) + else: + setattr(self, k, Config(v) if isinstance(v, dict) else v) + + def __str__(self): + return pformat(self.__dict__) + + def __repr__(self): + return self.__str__() + + +def parse_cli_to_yaml(parser, cfg, helper=None, choices=None, cfg_path="snn_lenet_cifar10_config.yaml"): + """ + Parse command line arguments to the configuration according to the default yaml. + + Args: + parser: Parent parser. + cfg: Base configuration. + helper: Helper description. + cfg_path: Path to the default yaml config. + """ + parser = argparse.ArgumentParser(description="[REPLACE THIS at config.py]", + parents=[parser]) + helper = {} if helper is None else helper + choices = {} if choices is None else choices + for item in cfg: + if not isinstance(cfg[item], list) and not isinstance(cfg[item], dict): + help_description = helper[item] if item in helper else "Please reference to {}".format(cfg_path) + choice = choices[item] if item in choices else None + if isinstance(cfg[item], bool): + parser.add_argument("--" + item, type=ast.literal_eval, default=cfg[item], choices=choice, + help=help_description) + else: + parser.add_argument("--" + item, type=type(cfg[item]), default=cfg[item], choices=choice, + help=help_description) + args = parser.parse_args() + return args + + +def parse_yaml(yaml_path): + """ + Parse the yaml config file. + + Args: + yaml_path: Path to the yaml config. + """ + with open(yaml_path, 'r') as fin: + try: + cfgs = yaml.load_all(fin.read(), Loader=yaml.FullLoader) + cfgs = [x for x in cfgs] + if len(cfgs) == 1: + cfg_helper = {} + cfg = cfgs[0] + cfg_choices = {} + elif len(cfgs) == 2: + cfg, cfg_helper = cfgs + cfg_choices = {} + elif len(cfgs) == 3: + cfg, cfg_helper, cfg_choices = cfgs + else: + raise ValueError("At most 3 docs (config description for help, choices) are supported in config yaml") + print(cfg_helper) + except: + raise ValueError("Failed to parse yaml") + return cfg, cfg_helper, cfg_choices + + +def merge(args, cfg): + """ + Merge the base config from yaml file and command line arguments. + + Args: + args: Command line arguments. + cfg: Base configuration. + """ + args_var = vars(args) + for item in args_var: + cfg[item] = args_var[item] + return cfg + + +def get_config(): + """ + Get Config according to the yaml file and cli arguments. + """ + parser = argparse.ArgumentParser(description="default name", add_help=False) + current_dir = os.path.dirname(os.path.abspath(__file__)) + parser.add_argument("--config_path", type=str, default=os.path.join(current_dir, \ + "../../config/snn_lenet_cifar10_config.yaml"), help="Config file path") + path_args, _ = parser.parse_known_args() + default, helper, choices = parse_yaml(path_args.config_path) + args = parse_cli_to_yaml(parser=parser, cfg=default, helper=helper, choices=choices, cfg_path=path_args.config_path) + final_config = merge(args, default) + pprint(final_config) + print("Please check the above information for the configurations", flush=True) + return Config(final_config) + +config = get_config() diff --git a/community/cv/snn/src/model_utils/device_adapter.py b/community/cv/snn/src/model_utils/device_adapter.py new file mode 100644 index 0000000000000000000000000000000000000000..aad35a51fb05ffb38e343c5a5ae261fcbcefcef6 --- /dev/null +++ b/community/cv/snn/src/model_utils/device_adapter.py @@ -0,0 +1,27 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Device adapter for ModelArts""" + +from src.model_utils.config import config + +if config.enable_modelarts: + from src.model_utils.moxing_adapter import get_device_id, get_device_num, get_rank_id, get_job_id +else: + from src.model_utils.local_adapter import get_device_id, get_device_num, get_rank_id, get_job_id + +__all__ = [ + "get_device_id", "get_device_num", "get_rank_id", "get_job_id" +] diff --git a/community/cv/snn/src/model_utils/local_adapter.py b/community/cv/snn/src/model_utils/local_adapter.py new file mode 100644 index 0000000000000000000000000000000000000000..fedaf071f2c19213f7a8a0d0a753e9c8c9924d3c --- /dev/null +++ b/community/cv/snn/src/model_utils/local_adapter.py @@ -0,0 +1,36 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Local adapter""" + +import os + +def get_device_id(): + device_id = os.getenv('DEVICE_ID', '0') + return int(device_id) + + +def get_device_num(): + device_num = os.getenv('RANK_SIZE', '1') + return int(device_num) + + +def get_rank_id(): + global_rank_id = os.getenv('RANK_ID', '0') + return int(global_rank_id) + + +def get_job_id(): + return "Local Job" diff --git a/community/cv/snn/src/model_utils/moxing_adapter.py b/community/cv/snn/src/model_utils/moxing_adapter.py new file mode 100644 index 0000000000000000000000000000000000000000..dc2a9ef91c145ec06f35a865276c82a803bbcd3f --- /dev/null +++ b/community/cv/snn/src/model_utils/moxing_adapter.py @@ -0,0 +1,115 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +"""Moxing adapter for ModelArts""" + +import os +import functools +import mindspore as ms +from src.model_utils.config import config + +_global_sync_count = 0 + +def get_device_id(): + device_id = os.getenv('DEVICE_ID', '0') + return int(device_id) + + +def get_device_num(): + device_num = os.getenv('RANK_SIZE', '1') + return int(device_num) + + +def get_rank_id(): + global_rank_id = os.getenv('RANK_ID', '0') + return int(global_rank_id) + + +def get_job_id(): + job_id = os.getenv('JOB_ID') + job_id = job_id if job_id != "" else "default" + return job_id + +def sync_data(from_path, to_path): + """ + Download data from remote obs to local directory if the first url is remote url and the second one is local path + Upload data from local directory to remote obs in contrast. + """ + import moxing as mox + import time + global _global_sync_count + sync_lock = "/tmp/copy_sync.lock" + str(_global_sync_count) + _global_sync_count += 1 + + # Each server contains 8 devices as most. + if get_device_id() % min(get_device_num(), 8) == 0 and not os.path.exists(sync_lock): + print("from path: ", from_path) + print("to path: ", to_path) + mox.file.copy_parallel(from_path, to_path) + print("===finish data synchronization===") + try: + os.mknod(sync_lock) + except IOError: + pass + print("===save flag===") + + while True: + if os.path.exists(sync_lock): + break + time.sleep(1) + + print("Finish sync data from {} to {}.".format(from_path, to_path)) + + +def moxing_wrapper(pre_process=None, post_process=None): + """ + Moxing wrapper to download dataset and upload outputs. + """ + def wrapper(run_func): + @functools.wraps(run_func) + def wrapped_func(*args, **kwargs): + # Download data from data_url + if config.enable_modelarts: + if config.data_url: + sync_data(config.data_url, config.data_path) + print("Dataset downloaded: ", os.listdir(config.data_path)) + if config.checkpoint_url: + sync_data(config.checkpoint_url, config.load_path) + print("Preload downloaded: ", os.listdir(config.load_path)) + if config.train_url: + sync_data(config.train_url, config.output_path) + print("Workspace downloaded: ", os.listdir(config.output_path)) + + ms.set_context(save_graphs_path=os.path.join(config.output_path, str(get_rank_id()))) + config.device_num = get_device_num() + config.device_id = get_device_id() + if not os.path.exists(config.output_path): + os.makedirs(config.output_path) + + if pre_process: + pre_process() + + run_func(*args, **kwargs) + + # Upload data to train_url + if config.enable_modelarts: + if post_process: + post_process() + + if config.train_url: + print("Start to copy output directory") + sync_data(config.output_path, config.train_url) + return wrapped_func + return wrapper diff --git a/community/cv/snn/src/snn_lenet.py b/community/cv/snn/src/snn_lenet.py new file mode 100644 index 0000000000000000000000000000000000000000..25fc609b18d7693d9cecd17298e99c395a0d65cc --- /dev/null +++ b/community/cv/snn/src/snn_lenet.py @@ -0,0 +1,170 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""LeNet5_SNN.""" +import mindspore.nn as nn +from mindspore.ops import operations as P +from mindspore import Tensor +from src.ifnode import IFNode_GRAPH, IFNode_PYNATIVE +import numpy as np + + +def init_weight(inC, outC, kernel): + key = 1 / (inC * kernel * kernel) + weight = np.random.uniform(-key**0.5, key**0.5, (outC, inC, kernel, kernel)).astype(np.float32) + return Tensor(weight) + + +def init_bias(inC, outC, kernel): + key = 1 / (inC * kernel * kernel) + weight = np.random.uniform(-key**0.5, key**0.5, (outC)).astype(np.float32) + return Tensor(weight) + + +def init_dense_weight(inC, outC): + key = 1 / inC + weight = np.random.uniform(-key ** 0.5, key ** 0.5, (outC, inC)).astype(np.float32) + return Tensor(weight) + + +def init_dense_bias(inC, outC): + key = 1 / inC + weight = np.random.uniform(-key ** 0.5, key ** 0.5, (outC)).astype(np.float32) + return Tensor(weight) + + +class snn_lenet_graph(nn.Cell): + """ + snn backbone for lenet with graph mode + """ + def __init__(self, num_class=10, num_channel=3): + super(snn_lenet_graph, self).__init__() + self.T = 100 + self.conv1 = nn.Conv2d(num_channel, 16, 3, stride=1, pad_mode='pad', padding=1, has_bias=True, + weight_init=init_weight(num_channel, 16, 3), bias_init=init_bias(num_channel, 16, 3)) + self.ifnode1 = IFNode_GRAPH() + self.conv2 = nn.Conv2d(16, 16, 3, stride=2, pad_mode='pad', padding=1, has_bias=True, + weight_init=init_weight(16, 16, 3), bias_init=init_bias(16, 16, 3)) + self.ifnode2 = IFNode_GRAPH() + self.conv3 = nn.Conv2d(16, 32, 3, stride=1, pad_mode='pad', padding=1, has_bias=True, + weight_init=init_weight(16, 32, 3), bias_init=init_bias(16, 32, 3)) + self.ifnode3 = IFNode_GRAPH() + self.conv4 = nn.Conv2d(32, 32, 3, stride=2, pad_mode='pad', padding=1, has_bias=True, + weight_init=init_weight(32, 32, 3), bias_init=init_bias(32, 32, 3)) + self.ifnode4 = IFNode_GRAPH() + self.conv5 = nn.Conv2d(32, 64, 3, stride=1, pad_mode='pad', padding=1, has_bias=True, + weight_init=init_weight(32, 64, 3), bias_init=init_bias(32, 64, 3)) + self.ifnode5 = IFNode_GRAPH() + self.conv6 = nn.Conv2d(64, 64, 3, stride=2, pad_mode='pad', padding=1, has_bias=True, + weight_init=init_weight(64, 64, 3), bias_init=init_bias(64, 64, 3)) + self.ifnode6 = IFNode_GRAPH() + self.fc1 = nn.Dense(64 * 4 * 4, 32, weight_init=init_dense_weight(64 * 4 * 4, 32), + bias_init=init_dense_bias(64 * 4 * 4, 32)) + self.ifnode7 = IFNode_GRAPH() + self.fc2 = nn.Dense(32, num_class, weight_init=init_dense_weight(32, num_class), + bias_init=init_dense_bias(32, num_class)) + self.ifnode8 = IFNode_GRAPH(fire=False) + + def construct(self, x_in): + """forward the snn-lenet block""" + x = x_in + v1 = v2 = v3 = v4 = v5 = v6 = v7 = v8 = 0.0 + for _ in range(self.T): + x = self.conv1(x_in) + x, v1 = self.ifnode1(x, v1) + x = self.conv2(x) + x, v2 = self.ifnode2(x, v2) + x = self.conv3(x) + x, v3 = self.ifnode3(x, v3) + x = self.conv4(x) + x, v4 = self.ifnode4(x, v4) + x = self.conv5(x) + x, v5 = self.ifnode5(x, v5) + x = self.conv6(x) + x, v6 = self.ifnode6(x, v6) + x = P.Reshape()(x, (-1, 64 * 4 * 4)) + x = self.fc1(x) + x, v7 = self.ifnode7(x, v7) + x = self.fc2(x) + x, v8 = self.ifnode8(x, v8) + return x / self.T + + +class snn_lenet_pynative(nn.Cell): + """ + snn backbone for lenet with pynative mode + """ + def __init__(self, num_class=10, num_channel=3): + super(snn_lenet_pynative, self).__init__() + self.T = 100 + self.conv1 = nn.SequentialCell([nn.Conv2d(num_channel, 16, 3, stride=1, pad_mode='pad', padding=1, + has_bias=True, weight_init=init_weight(num_channel, 16, 3), + bias_init=init_bias(num_channel, 16, 3)), + IFNode_PYNATIVE(v_threshold=1.0, v_reset=None)]) + + self.conv2 = nn.SequentialCell([nn.Conv2d(16, 16, 3, stride=2, pad_mode='pad', padding=1, has_bias=True, + weight_init=init_weight(16, 16, 3), bias_init=init_bias(16, 16, 3)), + IFNode_PYNATIVE(v_threshold=1.0, v_reset=None)]) + + self.conv3 = nn.SequentialCell([nn.Conv2d(16, 32, 3, stride=1, pad_mode='pad', padding=1, has_bias=True, + weight_init=init_weight(16, 32, 3), bias_init=init_bias(16, 32, 3)), + IFNode_PYNATIVE(v_threshold=1.0, v_reset=None)]) + + self.conv4 = nn.SequentialCell([nn.Conv2d(32, 32, 3, stride=2, pad_mode='pad', padding=1, has_bias=True, + weight_init=init_weight(32, 32, 3), bias_init=init_bias(32, 32, 3)), + IFNode_PYNATIVE(v_threshold=1.0, v_reset=None)]) + + self.conv5 = nn.SequentialCell([nn.Conv2d(32, 64, 3, stride=1, pad_mode='pad', padding=1, has_bias=True, + weight_init=init_weight(32, 64, 3), bias_init=init_bias(32, 64, 3)), + IFNode_PYNATIVE(v_threshold=1.0, v_reset=None)]) + + self.conv6 = nn.SequentialCell([nn.Conv2d(64, 64, 3, stride=2, pad_mode='pad', padding=1, has_bias=True, + weight_init=init_weight(64, 64, 3), bias_init=init_bias(64, 64, 3)), + IFNode_PYNATIVE(v_threshold=1.0, v_reset=None)]) + + self.fc1 = nn.SequentialCell([nn.Dense(64 * 4 * 4, 32, + weight_init=init_dense_weight(64 * 4 * 4, 32), + bias_init=init_dense_bias(64 * 4 * 4, 32)), + IFNode_PYNATIVE(v_threshold=1.0, v_reset=None)]) + + self.fc2 = nn.Dense(32, num_class, weight_init=init_dense_weight(32, num_class), + bias_init=init_dense_bias(32, num_class)) + + self.outlayer = IFNode_PYNATIVE(v_threshold=1.0, v_reset=None, fire=False) + + def construct(self, x_in): + """forward the snn-lenet block""" + x = x_in + for _ in range(self.T): + x = self.conv1(x_in) + x = self.conv2(x) + x = self.conv3(x) + x = self.conv4(x) + x = self.conv5(x) + x = self.conv6(x) + x = P.Reshape()(x, (-1, 64 * 4 * 4)) + x = self.fc1(x) + x = self.fc2(x) + x = self.outlayer(x) + return x / self.T + + def reset_net(self): + """each batch should reset the accumulated value of the net such as self.v""" + for item in self.cells(): + if isinstance(type(item), type(nn.SequentialCell())): + if hasattr(item[-1], 'reset'): + item[-1].reset() + else: + if hasattr(item, 'reset'): + item.reset() diff --git a/community/cv/snn/src/snn_resnet.py b/community/cv/snn/src/snn_resnet.py new file mode 100644 index 0000000000000000000000000000000000000000..4836401d2360390c26f8c387dbb76d2d39a168fc --- /dev/null +++ b/community/cv/snn/src/snn_resnet.py @@ -0,0 +1,452 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""ResNet_SNN.""" +import math +import mindspore.nn as nn +import mindspore.ops as ops +from mindspore.common.initializer import HeNormal, HeUniform +from src.ifnode import IFNode_GRAPH, IFNode_PYNATIVE + + +def _conv3x3(in_channel, out_channel, stride=1): + return nn.Conv2d(in_channel, out_channel, kernel_size=3, stride=stride, + padding=0, pad_mode='same', weight_init=HeNormal(mode='fan_out', nonlinearity='relu')) + + +def _conv1x1(in_channel, out_channel, stride=1): + return nn.Conv2d(in_channel, out_channel, kernel_size=1, stride=stride, + padding=0, pad_mode='same', weight_init=HeNormal(mode='fan_out', nonlinearity='relu')) + + +def _conv7x7(in_channel, out_channel, stride=1): + return nn.Conv2d(in_channel, out_channel, + kernel_size=7, stride=stride, padding=0, pad_mode='same', + weight_init=HeNormal(mode='fan_out', nonlinearity='relu')) + + +def _bn(channel): + return nn.BatchNorm2d(channel, eps=1e-4, momentum=0.9, + gamma_init=1, beta_init=0, moving_mean_init=0, moving_var_init=1) + + +def _fc(in_channel, out_channel): + return nn.Dense(in_channel, out_channel, has_bias=True, + weight_init=HeUniform(negative_slope=math.sqrt(5), mode='fan_in', nonlinearity='leaky_relu'), + bias_init=0) + + +class ResidualBlock_GRAPH(nn.Cell): + """ + ResNet V1 residual block definition. + + Args: + in_channel (int): Input channel. + out_channel (int): Output channel. + stride (int): Stride size for the first convolutional layer. Default: 1. + + Returns: + Tensor, output tensor. + + Examples: + >>> ResidualBlock_GRAPH(3, 256, stride=2) + """ + expansion = 4 + + def __init__(self, in_channel, out_channel, stride=1): + super(ResidualBlock_GRAPH, self).__init__() + self.stride = stride + channel = out_channel // self.expansion + self.conv1 = _conv1x1(in_channel, channel, stride=1) + self.bn1 = _bn(channel) + self.ifnode1 = IFNode_GRAPH() + + self.conv2 = _conv3x3(channel, channel, stride=stride) + self.bn2 = _bn(channel) + self.ifnode2 = IFNode_GRAPH() + + self.conv3 = _conv1x1(channel, out_channel, stride=1) + self.bn3 = _bn(out_channel) + + self.down_sample = False + if stride != 1 or in_channel != out_channel: + self.down_sample = True + self.down_sample_layer = None + + if self.down_sample: + self.down_sample_layer = nn.SequentialCell([_conv1x1(in_channel, out_channel, stride), _bn(out_channel)]) + + self.ifnode3 = IFNode_GRAPH() + + def construct(self, x_in): + """ResidualBlock with graph mode""" + x, v1, v2, v3 = x_in + identity = x + + out = self.conv1(x) + out = self.bn1(out) + out, v1 = self.ifnode1(out, v1) + + out = self.conv2(out) + out = self.bn2(out) + out, v2 = self.ifnode2(out, v2) + + out = self.conv3(out) + out = self.bn3(out) + + if self.down_sample: + identity = self.down_sample_layer(identity) + + out = out + identity + out, v3 = self.ifnode3(out, v3) + return (out, v1, v2, v3) + + +class ResNet_SNN_GRAPH(nn.Cell): + """ + ResNet architecture. + + Args: + block (Cell): Block for network. + layer_nums (list): Numbers of block in different layers. + in_channels (list): Input channel in each layer. + out_channels (list): Output channel in each layer. + strides (list): Stride size in each layer. + num_classes (int): The number of classes that the training images are belonging to. + + Returns: + Tensor, output tensor. + + Examples: + >>> ResNet_SNN_GRAPH(ResidualBlock, + >>> [3, 4, 6, 3], + >>> [64, 256, 512, 1024], + >>> [256, 512, 1024, 2048], + >>> [1, 2, 2, 2], + >>> 10) + """ + + def __init__(self, block, layer_nums, in_channels, out_channels, strides, num_classes): + super(ResNet_SNN_GRAPH, self).__init__() + + if not len(layer_nums) == len(in_channels) == len(out_channels) == 4: + raise ValueError("the length of layer_num, in_channels, out_channels list must be 4!") + + self.T = 5 + self.conv1 = _conv7x7(3, 64, stride=2) + self.bn1 = _bn(64) + self.ifnode1 = IFNode_GRAPH() + + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode="same") + # layer_nums:[3, 4, 6, 3] + self.layer1_1 = self._make_layer_test1(block, in_channel=in_channels[0], + out_channel=out_channels[0], stride=strides[0]) + self.layer1_2 = self._make_layer_test2(block, out_channel=out_channels[0],) + self.layer1_3 = self._make_layer_test2(block, out_channel=out_channels[0],) + self.layer2_1 = self._make_layer_test1(block, in_channel=in_channels[1], + out_channel=out_channels[1], stride=strides[1]) + self.layer2_2 = self._make_layer_test2(block, out_channel=out_channels[1]) + self.layer2_3 = self._make_layer_test2(block, out_channel=out_channels[1]) + self.layer2_4 = self._make_layer_test2(block, out_channel=out_channels[1]) + self.layer3_1 = self._make_layer_test1(block, in_channel=in_channels[2], + out_channel=out_channels[2], stride=strides[2]) + self.layer3_2 = self._make_layer_test2(block, out_channel=out_channels[2]) + self.layer3_3 = self._make_layer_test2(block, out_channel=out_channels[2]) + self.layer3_4 = self._make_layer_test2(block, out_channel=out_channels[2]) + self.layer3_5 = self._make_layer_test2(block, out_channel=out_channels[2]) + self.layer3_6 = self._make_layer_test2(block, out_channel=out_channels[2]) + self.layer4_1 = self._make_layer_test1(block, in_channel=in_channels[3], + out_channel=out_channels[3], stride=strides[3]) + self.layer4_2 = self._make_layer_test2(block, out_channel=out_channels[3]) + self.layer4_3 = self._make_layer_test2(block, out_channel=out_channels[3]) + + self.mean = ops.ReduceMean(keep_dims=True) + self.flatten = nn.Flatten() + self.end_point = _fc(out_channels[3], num_classes) + self.end_ifnode = IFNode_GRAPH(fire=False) + + + def _make_layer_test1(self, block, in_channel, out_channel, stride): + """ + Make stage network of ResNet. + + Args: + block (Cell): Resnet block. + in_channel (int): Input channel. + out_channel (int): Output channel. + stride (int): Stride size for the first convolutional layer. + Returns: + SequentialCell, the output layer. + """ + layers = [] + resnet_block = block(in_channel, out_channel, stride=stride) + layers.append(resnet_block) + return nn.SequentialCell(layers) + + def _make_layer_test2(self, block, out_channel): + """ + Make stage network of ResNet. + + Args: + block (Cell): Resnet block. + out_channel (int): Output channel. + Returns: + SequentialCell, the output layer. + """ + layers = [] + resnet_block = block(out_channel, out_channel, stride=1) + layers.append(resnet_block) + return nn.SequentialCell(layers) + + def construct(self, x_in): + """ResNet SNN block with graph mode""" + out = x_in + v1 = v_end = 0.0 + # layer_nums:[3, 4, 6, 3] + v1_1_1 = v1_1_2 = v1_1_3 = v1_2_1 = v1_2_2 = v1_2_3 = v1_3_1 = v1_3_2 = v1_3_3 = 0.0 + v2_1_1 = v2_1_2 = v2_1_3 = v2_2_1 = v2_2_2 = v2_2_3 = v2_3_1 = v2_3_2 = v2_3_3 = v2_4_1 = v2_4_2 = v2_4_3 = 0.0 + v3_1_1 = v3_1_2 = v3_1_3 = v3_2_1 = v3_2_2 = v3_2_3 = v3_3_1 = v3_3_2 = v3_3_3 = 0.0 + v3_4_1 = v3_4_2 = v3_4_3 = v3_5_1 = v3_5_2 = v3_5_3 = v3_6_1 = v3_6_2 = v3_6_3 = 0.0 + v4_1_1 = v4_1_2 = v4_1_3 = v4_2_1 = v4_2_2 = v4_2_3 = v4_3_1 = v4_3_2 = v4_3_3 = 0.0 + + for _ in range(self.T): + x = self.conv1(x_in) + x = self.bn1(x) + x, v1 = self.ifnode1(x, v1) + + c1 = self.maxpool(x) + + c1_1, v1_1_1, v1_1_2, v1_1_3 = self.layer1_1((c1, v1_1_1, v1_1_2, v1_1_3)) + c1_2, v1_2_1, v1_2_2, v1_2_3 = self.layer1_2((c1_1, v1_2_1, v1_2_2, v1_2_3)) + c1_3, v1_3_1, v1_3_2, v1_3_3 = self.layer1_3((c1_2, v1_3_1, v1_3_2, v1_3_3)) + c2_1, v2_1_1, v2_1_2, v2_1_3 = self.layer2_1((c1_3, v2_1_1, v2_1_2, v2_1_3)) + c2_2, v2_2_1, v2_2_2, v2_2_3 = self.layer2_2((c2_1, v2_2_1, v2_2_2, v2_2_3)) + c2_3, v2_3_1, v2_3_2, v2_3_3 = self.layer2_3((c2_2, v2_3_1, v2_3_2, v2_3_3)) + c2_4, v2_4_1, v2_4_2, v2_4_3 = self.layer2_4((c2_3, v2_4_1, v2_4_2, v2_4_3)) + c3_1, v3_1_1, v3_1_2, v3_1_3 = self.layer3_1((c2_4, v3_1_1, v3_1_2, v3_1_3)) + c3_2, v3_2_1, v3_2_2, v3_2_3 = self.layer3_2((c3_1, v3_2_1, v3_2_2, v3_2_3)) + c3_3, v3_3_1, v3_3_2, v3_3_3 = self.layer3_3((c3_2, v3_3_1, v3_3_2, v3_3_3)) + c3_4, v3_4_1, v3_4_2, v3_4_3 = self.layer3_4((c3_3, v3_4_1, v3_4_2, v3_4_3)) + c3_5, v3_5_1, v3_5_2, v3_5_3 = self.layer3_5((c3_4, v3_5_1, v3_5_2, v3_5_3)) + c3_6, v3_6_1, v3_6_2, v3_6_3 = self.layer3_6((c3_5, v3_6_1, v3_6_2, v3_6_3)) + c4_1, v4_1_1, v4_1_2, v4_1_3 = self.layer4_1((c3_6, v4_1_1, v4_1_2, v4_1_3)) + c4_2, v4_2_1, v4_2_2, v4_2_3 = self.layer4_2((c4_1, v4_2_1, v4_2_2, v4_2_3)) + c4_3, v4_3_1, v4_3_2, v4_3_3 = self.layer4_3((c4_2, v4_3_1, v4_3_2, v4_3_3)) + + out = self.mean(c4_3, (2, 3)) + out = self.flatten(out) + out = self.end_point(out) + out, v_end = self.end_ifnode(out, v_end) + + return out / self.T + + +class ResidualBlock_PYNATIVE(nn.Cell): + """ + ResNet V1 residual block definition. + + Args: + in_channel (int): Input channel. + out_channel (int): Output channel. + stride (int): Stride size for the first convolutional layer. Default: 1. + + Returns: + Tensor, output tensor. + + Examples: + >>> ResidualBlock_PYNATIVE(3, 256, stride=2) + """ + expansion = 4 + + def __init__(self, + in_channel, + out_channel, + stride=1): + super(ResidualBlock_PYNATIVE, self).__init__() + self.stride = stride + channel = out_channel // self.expansion + self.conv1 = _conv1x1(in_channel, channel, stride=1) + self.bn1 = _bn(channel) + self.ifnode1 = IFNode_PYNATIVE() + + self.conv2 = _conv3x3(channel, channel, stride=stride) + self.bn2 = _bn(channel) + self.ifnode2 = IFNode_PYNATIVE() + + self.conv3 = _conv1x1(channel, out_channel, stride=1) + self.bn3 = _bn(out_channel) + + self.down_sample = False + if stride != 1 or in_channel != out_channel: + self.down_sample = True + self.down_sample_layer = None + + if self.down_sample: + self.down_sample_layer = nn.SequentialCell([_conv1x1(in_channel, out_channel, stride), _bn(out_channel)]) + + self.ifnode3 = IFNode_PYNATIVE() + + def construct(self, x): + """ResidualBlock with pynative mode""" + identity = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.ifnode1(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.ifnode2(out) + + out = self.conv3(out) + out = self.bn3(out) + + if self.down_sample: + identity = self.down_sample_layer(identity) + out = out + identity + out = self.ifnode3(out) + + return out + + +class ResNet_SNN_PYNATIVE(nn.Cell): + """ + ResNet architecture. + + Args: + block (Cell): Block for network. + layer_nums (list): Numbers of block in different layers. + in_channels (list): Input channel in each layer. + out_channels (list): Output channel in each layer. + strides (list): Stride size in each layer. + num_classes (int): The number of classes that the training images are belonging to. + + Returns: + Tensor, output tensor. + + Examples: + >>> ResNet_SNN_PYNATIVE(ResidualBlock, + >>> [3, 4, 6, 3], + >>> [64, 256, 512, 1024], + >>> [256, 512, 1024, 2048], + >>> [1, 2, 2, 2], + >>> 10) + """ + + def __init__(self, + block, + layer_nums, + in_channels, + out_channels, + strides, + num_classes): + super(ResNet_SNN_PYNATIVE, self).__init__() + + if not len(layer_nums) == len(in_channels) == len(out_channels) == 4: + raise ValueError("the length of layer_num, in_channels, out_channels list must be 4!") + + self.T = 5 + self.conv1 = _conv7x7(3, 64, stride=2) + self.bn1 = _bn(64) + self.ifnode1 = IFNode_PYNATIVE() + + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, pad_mode="same") + + self.layer1 = self._make_layer(block, layer_nums[0], in_channel=in_channels[0], + out_channel=out_channels[0], stride=strides[0]) + self.layer2 = self._make_layer(block, layer_nums[1], in_channel=in_channels[1], + out_channel=out_channels[1], stride=strides[1]) + self.layer3 = self._make_layer(block, layer_nums[2], in_channel=in_channels[2], + out_channel=out_channels[2], stride=strides[2]) + self.layer4 = self._make_layer(block, layer_nums[3], in_channel=in_channels[3], + out_channel=out_channels[3], stride=strides[3]) + + self.mean = ops.ReduceMean(keep_dims=True) + self.flatten = nn.Flatten() + self.end_point = _fc(out_channels[3], num_classes) + self.end_ifnode = IFNode_PYNATIVE(fire=False) + + def construct(self, x_in): + """ResNet SNN block with pynative mode""" + out = x_in + for _ in range(self.T): + x = self.conv1(x_in) + x = self.bn1(x) + x = self.ifnode1(x) + + c1 = self.maxpool(x) + + c2 = self.layer1(c1) + c3 = self.layer2(c2) + c4 = self.layer3(c3) + c5 = self.layer4(c4) + + out = self.mean(c5, (2, 3)) + out = self.flatten(out) + out = self.end_point(out) + out = self.end_ifnode(out) + + return out / self.T + + def reset_net(self): + for item in self.cells(): + if isinstance(type(item), type(nn.SequentialCell())): + if hasattr(item[-1], 'reset'): + item[-1].reset() + else: + if hasattr(item, 'reset'): + item.reset() + + def _make_layer(self, block, layer_num, in_channel, out_channel, stride): + """ + Make stage network of ResNet. + + Args: + block (Cell): Resnet block. + layer_num (int): Layer number. + in_channel (int): Input channel. + out_channel (int): Output channel. + stride (int): Stride size for the first convolutional layer. + Returns: + SequentialCell, the output layer. + + Examples: + >>> _make_layer(ResidualBlock, 3, 128, 256, 2) + """ + layers = [] + + resnet_block = block(in_channel, out_channel, stride=stride) + layers.append(resnet_block) + for _ in range(1, layer_num): + resnet_block = block(out_channel, out_channel, stride=1) + layers.append(resnet_block) + + return nn.SequentialCell(layers) + +def snn_resnet50_graph(class_num=10): + return ResNet_SNN_GRAPH(ResidualBlock_GRAPH, + [3, 4, 6, 3], + [64, 256, 512, 1024], + [256, 512, 1024, 2048], + [1, 2, 2, 2], + class_num) + + +def snn_resnet50_pynative(class_num=10): + return ResNet_SNN_PYNATIVE(ResidualBlock_PYNATIVE, + [3, 4, 6, 3], + [64, 256, 512, 1024], + [256, 512, 1024, 2048], + [1, 2, 2, 2], + class_num) diff --git a/community/cv/snn/train.py b/community/cv/snn/train.py new file mode 100644 index 0000000000000000000000000000000000000000..3e9548756bc603825709cc36c881cb8258ddb3c7 --- /dev/null +++ b/community/cv/snn/train.py @@ -0,0 +1,248 @@ +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""train resnet.""" +import os +import time + +import mindspore as ms +import mindspore.nn as nn +import mindspore.ops as ops +from mindspore import Tensor +from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, RunContext +from mindspore.communication.management import init, get_rank + +from src.lr_generator import get_lr +from src.model_utils.config import config +from src.model_utils.moxing_adapter import moxing_wrapper +from src.model_utils.device_adapter import get_rank_id +from src.dataset import create_dataset_cifar10 + +ms.set_seed(1) + + +def set_parameter(): + """set_parameter""" + if config.mode_name == 'GRAPH': + ms.set_context(mode=ms.GRAPH_MODE, device_target=config.device_target, save_graphs=config.save_graphs) + else: + ms.set_context(mode=ms.PYNATIVE_MODE, device_target=config.device_target, save_graphs=config.save_graphs) + if config.run_distribute: + device_id = int(os.getenv('DEVICE_ID')) + device_num = int(os.getenv('RANK_SIZE')) + ms.set_context(device_id=device_id) + ms.set_auto_parallel_context(device_num=device_num, parallel_mode=ms.ParallelMode.DATA_PARALLEL, + gradients_mean=True) + if config.all_reduce_fusion_config: + ms.set_auto_parallel_context(all_reduce_fusion_config=config.all_reduce_fusion_config) + + init() + else: + ms.set_context(device_id=config.device_id) + + +def init_weight(net): + """init_weight""" + for _, cell in net.cells_and_names(): + if isinstance(cell, nn.Conv2d): + if config.conv_init == "XavierUniform": + cell.weight.set_data(ms.common.initializer.initializer(ms.common.initializer.XavierUniform(), + cell.weight.shape, + cell.weight.dtype)) + if isinstance(cell, nn.Dense): + if config.dense_init == "TruncatedNormal": + cell.weight.set_data(ms.common.initializer.initializer(ms.common.initializer.TruncatedNormal(), + cell.weight.shape, + cell.weight.dtype)) + + +def init_loss_scale(): + """init loss scale""" + if config.loss_function == "SoftmaxCrossEntropy": + loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') + elif config.loss_function == "MSE": + loss = nn.MSELoss(reduction='mean') + return loss + + +def init_group_params(net): + """init group params""" + decayed_params = [] + no_decayed_params = [] + for param in net.trainable_params(): + if 'beta' not in param.name and 'gamma' not in param.name and 'bias' not in param.name: + decayed_params.append(param) + else: + no_decayed_params.append(param) + + group_params = [{'params': decayed_params, 'weight_decay': config.weight_decay}, + {'params': no_decayed_params}, + {'order_params': net.trainable_params()}] + return group_params + + +def set_save_ckpt_dir(): + """set save ckpt dir""" + ckpt_save_dir = os.path.join(config.output_path, config.checkpoint_path) + if config.enable_modelarts and config.run_distribute: + ckpt_save_dir = ckpt_save_dir + "ckpt_" + str(get_rank_id()) + "/" + else: + if config.run_distribute: + ckpt_save_dir = ckpt_save_dir + "ckpt_" + str(get_rank()) + "/" + return ckpt_save_dir + + +class InternalCallbackParam(dict): + """Internal callback object's parameters.""" + + def __getattr__(self, key): + return self[key] + + def __setattr__(self, key, value): + self[key] = value + + +class AverageMeter: + """Computes and stores the average and current value""" + + def __init__(self, name, fmt=':f', tb_writer=None): + self.name = name + self.fmt = fmt + self.reset() + self.tb_writer = tb_writer + self.cur_step = 1 + self.val = 0 + self.avg = 0 + self.sum = 0 + self.count = 0 + + def reset(self): + self.val = 0 + self.avg = 0 + self.sum = 0 + self.count = 0 + + def update(self, val, n=1): + self.val = val + self.sum += val * n + self.count += n + self.avg = self.sum / self.count + if self.tb_writer is not None: + self.tb_writer.add_scalar(self.name, self.val, self.cur_step) + self.cur_step += 1 + + def __str__(self): + fmtstr = '{name}:{avg' + self.fmt + '}' + return fmtstr.format(**self.__dict__) + + +def snn_model_build(): + """build snn model for resnet50 and lenet""" + if config.net_name == "resnet50": + if config.mode_name == 'GRAPH': + from src.snn_resnet import snn_resnet50_graph as snn_resnet50 + else: + from src.snn_resnet import snn_resnet50_pynative as snn_resnet50 + net = snn_resnet50(class_num=config.class_num) + init_weight(net=net) + elif config.net_name == "lenet": + if config.mode_name == 'GRAPH': + from src.snn_lenet import snn_lenet_graph as snn_lenet + else: + from src.snn_lenet import snn_lenet_pynative as snn_lenet + net = snn_lenet(num_class=config.class_num) + else: + raise ValueError(f'config.model: {config.model_name} is not supported') + return net + + +@moxing_wrapper() +def train_net(): + """train net: resnet50_snn or lenet_snn""" + set_parameter() + dataset = create_dataset_cifar10(data_path=config.data_path, do_train=True, batch_size=config.batch_size) + step_size = dataset.get_dataset_size() + net = snn_model_build() + + if config.pre_trained: + ckpt_param_dict = ms.load_checkpoint(config.pre_trained) + ms.load_param_into_net(net, ckpt_param_dict) + + # define opt + if config.optimizer == "Momentum": + lr = get_lr(lr_init=config.lr_init, lr_end=config.lr_end, lr_max=config.lr_max, + warmup_epochs=config.warmup_epochs, total_epochs=config.epoch_size, steps_per_epoch=step_size, + lr_decay_mode=config.lr_decay_mode) + lr = ms.Tensor(lr) + group_params = init_group_params(net) + opt = nn.Momentum(group_params, lr, config.momentum, loss_scale=config.loss_scale) + elif config.optimizer == "Adam": + opt = nn.Adam(net.trainable_params(), config.lr_init, loss_scale=config.loss_scale) + + loss = init_loss_scale() + network_with_loss = nn.WithLossCell(net, loss) + network_train = nn.TrainOneStepCell(network_with_loss, opt, sens=config.loss_scale) + network_train.set_train(True) + loss_meter = AverageMeter('loss') + + # define callbacks + ckpt_save_dir = set_save_ckpt_dir() + + if config.save_checkpoint: + cb_params = InternalCallbackParam() + cb_params.train_network = network_train + cb_params.epoch_num = config.epoch_size + cb_params.cur_epoch_num = 1 + run_context = RunContext(cb_params) + config_ck = CheckpointConfig(save_checkpoint_steps=config.save_checkpoint_epochs * step_size, + keep_checkpoint_max=config.keep_checkpoint_max) + ckpt_cb = ModelCheckpoint(prefix=config.net_name, directory=ckpt_save_dir, config=config_ck) + ckpt_cb.begin(run_context) + + dataset_size = dataset.get_dataset_size() + first_step = True + print("Start train resnet, the first epoch will be slower because of the graph compilation.", flush=True) + t_end = time.time() + for epoch_idx in range(config.epoch_size): + for step_idx, data in enumerate(dataset.create_dict_iterator()): + images = data["image"] + label = data["label"] + if config.loss_function == "MSE": + onehot = ops.OneHot() + label = onehot(label, config.class_num, Tensor(1.0, ms.float32), Tensor(0.0, ms.float32)) + loss = network_train(images, label) + loss_meter.update(loss.asnumpy()) + if config.mode_name == 'PYNATIVE': + net.reset_net() + + if config.save_checkpoint: + cb_params.cur_epoch_num = epoch_idx + 1 + cb_params.cur_step_num = step_idx + 1 + epoch_idx * dataset_size + cb_params.batch_num = dataset_size + ckpt_cb.step_end(run_context) + + time_used = (time.time() - t_end) * 1000 + if first_step: + per_step_time = time_used + first_step = False + else: + per_step_time = time_used / dataset_size + print('epoch: {}, step: {}, loss is {}, epoch time: {:.3f}ms, per step time: {:.3f}ms'.format( + epoch_idx + 1, dataset_size, loss_meter, time_used, per_step_time), flush=True) + t_end = time.time() + loss_meter.reset() + + +if __name__ == '__main__': + train_net()