diff --git a/research/cv/swin_transformer/README_CN.md b/research/cv/swin_transformer/README_CN.md index a207496ab248c4d7c78bf4e0761c5fa178303670..a772baa83412fb7c7d39b7c4ea8443e71907bcf4 100644 --- a/research/cv/swin_transformer/README_CN.md +++ b/research/cv/swin_transformer/README_CN.md @@ -43,7 +43,7 @@ SwinTransformer是新型的视觉Transformer,它可以用作计算机视觉的 - 注:数据在dataset.py中处理。 - 下载数据集,目录结构如下: - ```text +```text └─dataset ├─train # 训练数据集 └─val # 评估数据集 @@ -77,7 +77,9 @@ SwinTransformer是新型的视觉Transformer,它可以用作计算机视觉的 ├── scripts ├──run_standalone_train_ascend.sh // 单卡Ascend910训练脚本 ├──run_distribute_train_ascend.sh // 多卡Ascend910训练脚本 - ├──run_eval_ascend.sh // 测试脚本 + ├──run_distribute_train_gpu.sh // 多卡GPU训练脚本 + ├──run_eval_ascend.sh // Ascend测试脚本 + ├──run_eval_gpu.sh // GPU测试脚本 ├──run_infer_310.sh // 310推理脚本 ├── src ├──configs // SwinTransformer的配置文件 @@ -163,7 +165,7 @@ SwinTransformer是新型的视觉Transformer,它可以用作计算机视觉的 ```bash # 使用python启动单卡训练 - python train.py --device_id 0 --device_target Ascend --swin_config ./src/configs/swin_tiny_patch4_window7_224.yaml > train.log 2>&1 & + python train.py --device_id 0 --device_target Ascend --swin_config ./src/configs/ascend_swin_tiny_patch4_window7_224.yaml > train.log 2>&1 & # 使用脚本启动单卡训练 bash ./scripts/run_standalone_train_ascend.sh [DEVICE_ID] [CONFIG_PATH] @@ -172,7 +174,7 @@ SwinTransformer是新型的视觉Transformer,它可以用作计算机视觉的 bash ./scripts/run_distribute_train_ascend.sh [RANK_TABLE_FILE] [CONFIG_PATH] # 使用python启动单卡运行评估示例 - python eval.py --device_id 0 --device_target Ascend --swin_config ./src/configs/swin_tiny_patch4_window7_224.yaml --pretrained ./ckpt_0/swin_tiny_patch4_window7_224.ckpt > ./eval.log 2>&1 & + python eval.py --device_id 0 --device_target Ascend --swin_config ./src/configs/ascend_swin_tiny_patch4_window7_224.yaml --pretrained ./ckpt_0/swin_tiny_patch4_window7_224.ckpt > ./eval.log 2>&1 & # 使用脚本启动单卡运行评估示例 bash ./scripts/run_eval_ascend.sh [RANK_TABLE_FILE] [CONFIG_PATH] @@ -187,6 +189,19 @@ SwinTransformer是新型的视觉Transformer,它可以用作计算机视觉的 [hccl工具](https://gitee.com/mindspore/models/tree/master/utils/hccl_tools) +- GPU环境运行 + + ```bash + # 使用脚本启用多卡训练 + bash ./scripts/run_distribute_train_gpu.sh [CONFIG_PATH] [DEVICE_NUM] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] + + # 使用脚本启动单卡训练 + bash ./scripts/run_standalone_train_gpu.sh [CONFIG_PATH] [DEVICE_ID] + + # 使用脚本启用单卡评估 + bash ./scripts/run_eval_gpu.sh [DEVICE_ID] [CONFIG_PATH] [CHECKPOINT_PATH] + ``` + ## 导出过程 ### 导出 @@ -221,22 +236,22 @@ SwinTransformer是新型的视觉Transformer,它可以用作计算机视觉的 #### ImageNet-1k上的SwinTransformer -| 参数 | Ascend | -| -------------------------- | ----------------------------------------------------------- | -|模型|SwinTransformer| -| 模型版本 | swin_tiny_patch4_window7_224 | -| 资源 | Ascend 910 | -| 上传日期 | 2021-10-25 | -| MindSpore版本 | 1.3.0 | -| 数据集 | ImageNet-1k Train,共1,281,167张图像 | -| 训练参数 | epoch=300, batch_size=128 | -| 优化器 | AdamWeightDecay | -| 损失函数 | SoftTargetCrossEntropy | -| 损失| 0.8279| -| 输出 | 概率 | -| 分类准确率 | 八卡:top1:81.07% top5:95.31% | -| 速度 | 八卡:624.124毫秒/步 | -| 训练耗时 |79h55min08s(run on ModelArts)| +| 参数 | Ascend | GPU | +| -------------------------- | ----------------------------------------------------------- | ----------------------------------------------------------- | +|模型|SwinTransformerSwinTransformer| SwinTransformerSwinTransformer | +| 模型版本 | swin_tiny_patch4_window7_224 | swin_tiny_patch4_window7_224 | +| 资源 | Ascend 910 | Gefore RTX 3090 * 8 | +| 上传日期 | 2021-10-25 | 2022-5-28 | +| MindSpore版本 | 1.3.0 | 1.6.1 | +| 数据集 | ImageNet-1k Train,共1,281,167张图像 | ImageNet-1k Train,共1,281,167张图像 | +| 训练参数 | epoch=300, batch_size=128 | epoch=300, batch_size=128 | +| 优化器 | AdamWeightDecay | AdamWeightDecay | +| 损失函数 | SoftTargetCrossEntropy | SoftTargetCrossEntropy | +| 损失| 0.8279| | +| 输出 | 概率 | 概率 | +| 分类准确率 | 八卡:top1:81.07% top5:95.31% | 八卡:top1:80.65% top5:95.38% | +| 速度 | 八卡:624.124毫秒/步 | 八卡:4323ms/step | +| 训练耗时 |79h55min08s(run on ModelArts)|| ### 推理性能 diff --git a/research/cv/swin_transformer/scripts/run_distribute_train_gpu.sh b/research/cv/swin_transformer/scripts/run_distribute_train_gpu.sh new file mode 100644 index 0000000000000000000000000000000000000000..0a70441d148bd0f1542e6883d722b0ace7f1df04 --- /dev/null +++ b/research/cv/swin_transformer/scripts/run_distribute_train_gpu.sh @@ -0,0 +1,42 @@ +#!/bin/bash +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +if [ $# -lt 3 ] +then + echo "Usage: bash ./scripts/run_distribute_train_gpu.sh [CONFIG_PATH] [DEVICE_NUM] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)]" +exit 1 +fi +BASEPATH=$(cd "`dirname $0`" || exit; pwd) +export CONFIG_PATH=$1 +export CUDA_VISIBLE_DEVICES="$3" +export RANK_SIZE=$2 +export DEVICE_NUM=$2 +export DEPLOY_MODE=0 +# export LD_LIBRARY_PATH="/usr/local/cuda-11.1/extras/CUPTI/lib64" +export GE_USE_STATIC_MEMORY=1 +rm -rf train_gpu +mkdir ./train_gpu +cd ./train_gpu || exit +env > env.log +# pip show mindspore_gpu +# python -c "import mindspore;mindspore.run_check()" +mpirun --allow-run-as-root -n $2 \ + python ${BASEPATH}/../train.py --device_target="GPU" \ + --swin_config $CONFIG_PATH \ + --start_epoch 0 \ + --epochs 350 > log.txt 2>&1 & +cd ../ + + diff --git a/research/cv/swin_transformer/scripts/run_eval_gpu.sh b/research/cv/swin_transformer/scripts/run_eval_gpu.sh new file mode 100644 index 0000000000000000000000000000000000000000..26717f43916359aca44f8aa44ee29971dcedda0c --- /dev/null +++ b/research/cv/swin_transformer/scripts/run_eval_gpu.sh @@ -0,0 +1,36 @@ +#!/bin/bash +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +if [ $# -lt 3 ] +then + echo "Usage: bash ./scripts/run_eval_gpu.sh [DEVICE_ID] [CONFIG_PATH] [CHECKPOINT_PATH]" +exit 1 +fi + +export DEVICE_ID=$1 +CONFIG_PATH=$2 +CHECKPOINT_PATH=$3 +export RANK_SIZE=1 +export DEVICE_NUM=1 + +rm -rf evaluation_gpu +mkdir ./evaluation_gpu +cd ./evaluation_gpu || exit +echo "start training for device id $DEVICE_ID" +env > env.log +python ../eval.py --device_target=GPU --swin_config=$CONFIG_PATH --pretrained=$CHECKPOINT_PATH > eval.log 2>&1 & +cd ../ + diff --git a/research/cv/swin_transformer/scripts/run_infer_310.sh b/research/cv/swin_transformer/scripts/run_infer_310.sh index 6669913e60cdbec79e5467bdbf7f1dfa28f37226..fad0057cc20ea8e0a6311fa38ecaf59cfbdbf5e2 100644 --- a/research/cv/swin_transformer/scripts/run_infer_310.sh +++ b/research/cv/swin_transformer/scripts/run_infer_310.sh @@ -88,8 +88,7 @@ function infer() mkdir result_Files mkdir time_Result - ../ascend310_infer/out/main --mindir_path=$model --dataset_name=$dataset_name --input0_path=$dataset_path .\ - --device_id=$device_id &> infer.log + ../ascend310_infer/src/main --mindir_path=$model --dataset_name=$dataset_name --input0_path=$dataset_path --device_id=$device_id &> infer.log } function cal_acc() diff --git a/research/cv/swin_transformer/scripts/run_standalone_train_gpu.sh b/research/cv/swin_transformer/scripts/run_standalone_train_gpu.sh new file mode 100644 index 0000000000000000000000000000000000000000..52395828d1886509dea19bb82b3505ec7551d51c --- /dev/null +++ b/research/cv/swin_transformer/scripts/run_standalone_train_gpu.sh @@ -0,0 +1,41 @@ +#!/bin/bash +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +if [ $# -lt 2 ] +then + echo "Usage: bash ./scripts/run_standalone_train_gpu.sh [CONFIG_PATH] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)]" +exit 1 +fi +BASEPATH=$(cd "`dirname $0`" || exit; pwd) +export CONFIG_PATH=$1 +export CUDA_VISIBLE_DEVICES="$2" +export RANK_SIZE=1 +export DEVICE_NUM=1 +export DEPLOY_MODE=0 +# export LD_LIBRARY_PATH="/usr/local/cuda-11.1/extras/CUPTI/lib64" +export GE_USE_STATIC_MEMORY=1 +rm -rf train_gpu_alone +mkdir ./train_gpu_alone +cd ./train_gpu_alone || exit +env > env.log +# pip show mindspore_gpu +# python -c "import mindspore;mindspore.run_check()" +nohup python ${BASEPATH}/../train.py --device_target="GPU" \ + --swin_config $CONFIG_PATH \ + --start_epoch 0 \ + --epochs 350 > log.txt 2>&1 & +cd ../ + + diff --git a/research/cv/swin_transformer/src/configs/swin_tiny_patch4_window7_224.yaml b/research/cv/swin_transformer/src/configs/ascend_swin_tiny_patch4_window7_224.yaml similarity index 100% rename from research/cv/swin_transformer/src/configs/swin_tiny_patch4_window7_224.yaml rename to research/cv/swin_transformer/src/configs/ascend_swin_tiny_patch4_window7_224.yaml diff --git a/research/cv/swin_transformer/src/configs/gpu_swin_tiny_patch4_window7_224.yaml b/research/cv/swin_transformer/src/configs/gpu_swin_tiny_patch4_window7_224.yaml new file mode 100644 index 0000000000000000000000000000000000000000..aebcb031602ab447a1cdc10755180afe4bab5299 --- /dev/null +++ b/research/cv/swin_transformer/src/configs/gpu_swin_tiny_patch4_window7_224.yaml @@ -0,0 +1,53 @@ +# Architecture +arch: swin_tiny_patch4_window7_224 + +# ===== Dataset ===== # +data_url: ./data/imagenet +set: ImageNet +num_classes: 1000 +mix_up: 0.8 +cutmix: 1.0 +auto_augment: rand-m9-mstd0.5-inc1 +interpolation: bicubic +re_prob: 0.25 +re_mode: pixel +re_count: 1 +mixup_prob: 1. +switch_prob: 0.5 +mixup_mode: batch + + +# ===== Learning Rate Policy ======== # +optimizer: adamw +base_lr: 0.0005 +warmup_lr: 0.000007 +min_lr: 0.00006 +lr_scheduler: cosine_lr +warmup_length: 20 +nonlinearity: GELU + + +# ===== Network training config ===== # +amp_level: O1 +keep_bn_fp32: True +beta: [ 0.9, 0.999 ] +clip_global_norm_value: 5. +is_dynamic_loss_scale: True +epochs: 350 +label_smoothing: 0.1 +loss_scale: 1024 +weight_decay: 0.05 +momentum: 0.9 +batch_size: 128 + +# ===== Hardware setup ===== # +num_parallel_workers: 4 +device_target: GPU + +# ===== Model config ===== # +drop_path_rate: 0.2 +embed_dim: 96 +depths: [ 2, 2, 6, 2 ] +num_heads: [ 3, 6, 12, 24 ] +window_size: 7 +image_size: 224 \ No newline at end of file diff --git a/research/cv/swin_transformer/src/data/imagenet.py b/research/cv/swin_transformer/src/data/imagenet.py index f1883aef7f9f9ef819e68ef6b960a3e5e99e919a..babc060c8ef564fe7b64874fc93f6938e3d176eb 100644 --- a/research/cv/swin_transformer/src/data/imagenet.py +++ b/research/cv/swin_transformer/src/data/imagenet.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 Huawei Technologies Co., Ltd +# Copyright 2022 Huawei Technologies Co., Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -19,8 +19,9 @@ import os import mindspore.common.dtype as mstype import mindspore.dataset as ds -import mindspore.dataset.transforms as C -import mindspore.dataset.vision as vision +import mindspore.dataset.transforms.c_transforms as C +import mindspore.dataset.vision.c_transforms as vision +import mindspore.dataset.vision.py_transforms as py_vision from mindspore.dataset.vision.utils import Inter from src.data.augment.auto_augment import _pil_interp, rand_augment_transform @@ -62,16 +63,14 @@ def create_dataset_imagenet(dataset_dir, args, repeat_num=1, training=True): Returns: dataset """ - device_num, rank_id = _get_rank_info() shuffle = bool(training) if device_num == 1 or not training: data_set = ds.ImageFolderDataset(dataset_dir, num_parallel_workers=args.num_parallel_workers, shuffle=shuffle) else: - data_set = ds.ImageFolderDataset(dataset_dir, num_parallel_workers=args.num_parallel_workers, shuffle=shuffle, + data_set = ds.ImageFolderDataset(dataset_dir, shuffle=shuffle, num_parallel_workers=args.num_parallel_workers, num_shards=device_num, shard_id=rank_id) - image_size = args.image_size # define map operations @@ -93,12 +92,12 @@ def create_dataset_imagenet(dataset_dir, args, repeat_num=1, training=True): vision.RandomCropDecodeResize(image_size, scale=(0.08, 1.0), ratio=(3 / 4, 4 / 3), interpolation=Inter.BICUBIC), vision.RandomHorizontalFlip(prob=0.5), - vision.ToPIL() + py_vision.ToPIL() ] transform_img += [rand_augment_transform(auto_augment, aa_params)] transform_img += [ - vision.ToTensor(), - vision.Normalize(mean=mean, std=std, is_hwc=False), + py_vision.ToTensor(), + py_vision.Normalize(mean=mean, std=std), RandomErasing(args.re_prob, mode=args.re_mode, max_count=args.re_count) ] else: @@ -110,24 +109,23 @@ def create_dataset_imagenet(dataset_dir, args, repeat_num=1, training=True): vision.Decode(), vision.Resize(int(256 / 224 * image_size), interpolation=Inter.BICUBIC), vision.CenterCrop(image_size), - vision.Normalize(mean=mean, std=std, is_hwc=True), + vision.Normalize(mean=mean, std=std), vision.HWC2CHW() ] else: transform_img = [ vision.Decode(), vision.Resize(int(image_size), interpolation=Inter.BICUBIC), - vision.Normalize(mean=mean, std=std, is_hwc=True), + vision.Normalize(mean=mean, std=std), vision.HWC2CHW() ] transform_label = C.TypeCast(mstype.int32) - data_set = data_set.map(input_columns="image", num_parallel_workers=args.num_parallel_workers, operations=transform_img) data_set = data_set.map(input_columns="label", num_parallel_workers=args.num_parallel_workers, operations=transform_label) - if (args.mix_up > 0. or args.cutmix > 0.) and not training: + if (args.mix_up > 0. or args.cutmix > 0.) and not training: # if use mixup and not training(False), one hot val data label one_hot = C.OneHot(num_classes=args.num_classes) data_set = data_set.map(input_columns="label", num_parallel_workers=args.num_parallel_workers, @@ -135,7 +133,6 @@ def create_dataset_imagenet(dataset_dir, args, repeat_num=1, training=True): # apply batch operations data_set = data_set.batch(args.batch_size, drop_remainder=True, num_parallel_workers=args.num_parallel_workers) - if (args.mix_up > 0. or args.cutmix > 0.) and training: mixup_fn = Mixup( mixup_alpha=args.mix_up, cutmix_alpha=args.cutmix, cutmix_minmax=None, @@ -144,10 +141,12 @@ def create_dataset_imagenet(dataset_dir, args, repeat_num=1, training=True): data_set = data_set.map(operations=mixup_fn, input_columns=["image", "label"], num_parallel_workers=args.num_parallel_workers) - # apply dataset repeat operation data_set = data_set.repeat(repeat_num) + #ds.config.set_auto_num_workers(True) + #ds.config.set_prefetch_size(512) + #ds.config.set_enable_shared_mem(True) return data_set diff --git a/research/cv/swin_transformer/src/tools/get_misc.py b/research/cv/swin_transformer/src/tools/get_misc.py index cd074eeae3178a725644c4c04aba06a81de7fb7c..73ae63120028bca3a5f0acf10182af5861b5b849 100644 --- a/research/cv/swin_transformer/src/tools/get_misc.py +++ b/research/cv/swin_transformer/src/tools/get_misc.py @@ -114,7 +114,7 @@ def get_train_one_step(args, net_with_loss, optimizer): scale_window=2000) else: print(f"=> Using FixedLossScaleUpdateCell, loss_scale_value:{args.loss_scale}") - scale_sense = nn.wrap.FixedLossScaleUpdateCell(loss_scale_value=args.loss_cale) + scale_sense = nn.wrap.FixedLossScaleUpdateCell(loss_scale_value=args.loss_scale) net_with_loss = TrainClipGrad(net_with_loss, optimizer, scale_sense=scale_sense, clip_global_norm_value=args.clip_global_norm_value, use_global_norm=True) diff --git a/research/cv/swin_transformer/train.py b/research/cv/swin_transformer/train.py index 595c750b17f72d6451eb30a04d0e92a53c7fba40..d77f7a247cd586225b7b323373758ad796e56b5f 100644 --- a/research/cv/swin_transformer/train.py +++ b/research/cv/swin_transformer/train.py @@ -37,7 +37,7 @@ def main(): 1: context.PYNATIVE_MODE } context.set_context(mode=mode[args.graph_mode], device_target=args.device_target) - context.set_context(enable_graph_kernel=False) + context.set_context(enable_graph_kernel=True) if args.device_target == "Ascend": context.set_context(enable_auto_mixed_precision=True) rank = set_device(args)