diff --git a/official/cv/resnet/README_CN.md b/official/cv/resnet/README_CN.md index d84ed6e0067fc3e4afac45439284c755f444c957..538968dcdce434ce1f6418638d6a410b26d483ce 100644 --- a/official/cv/resnet/README_CN.md +++ b/official/cv/resnet/README_CN.md @@ -775,12 +775,12 @@ Total data: 50000, top1 accuracy: 0.76844, top5 accuracy: 0.93522. ## 璁粌杩囩▼ -| **绠楁硶** | SimQAT | SLB | SCOP | +| **绠楁硶** | SimQAT | SCOP | SLB | | --------- | ------ | --- | ---- | | **鏀寔鐨勫悗绔�** | GPU | GPU銆丄scend | GPU | | **鏄惁鏀寔棰勮缁�** | 鏀寔鍔犺浇棰勮缁僣kpt | 蹇呴』鎻愪緵棰勮缁僣kpt | 绠楁硶鍘熺悊涓婃棤娉曞鐢ㄥ師ckpt锛屾棤娉曞姞杞介璁粌ckpt | | **鏄惁鏀寔缁缁�** | 鏀寔 | 鏀寔 | 鏀寔 | -| **鏄惁鏀寔澶氬崱璁粌** | 鏀寔 | 涓嶆敮鎸� | 鏀寔 | +| **鏄惁鏀寔澶氬崱璁粌** | 鏀寔 | 鏀寔 | 涓嶆敮鎸� | - 棰勮缁冩槸鎸囧厛涓嶅簲鐢ㄧ畻娉曪紝鍏堣缁冩敹鏁涗竴涓叏绮惧害鐨勭綉缁溿€傞璁粌鑾峰緱鐨刢heckpoint鏂囦欢琚敤浜庡悗缁簲鐢ㄧ畻娉曞悗鐨勮缁冦€� - 缁缁冩槸鎸囧簲鐢ㄧ畻娉曞悗璁粌缃戠粶锛屽湪璁粌杩囩▼涓腑鏂缁冿紝鍚庣画浠庝腑鏂鐨刢kpt缁х画杩涜璁粌銆� @@ -830,6 +830,26 @@ cd ./golden_stick/scripts/ bash run_distribute_train_gpu.sh ../pruner/scop/ ../pruner/scop/resnet50_cifar10_config.yaml /path/to/dataset ``` +### Ascend澶勭悊鍣ㄧ幆澧冭繍琛� + +```text +# 鍒嗗竷寮忚缁� +cd ./golden_stick/scripts/ +# PYTHON_PATH 琛ㄧず闇€瑕佸簲鐢ㄧ殑绠楁硶鐨�'train.py'鑴氭湰鎵€鍦ㄧ殑鐩綍銆� +bash run_distribute_train.sh [RANK_TABLE_FILE] [PYTHON_PATH] [DATASET_PATH] [CONFIG_PATH] [PRETRAINED_CKPT_PATH](optional) + +# 鍒嗗竷寮忚缁冪ず渚�(SCOP绠楁硶浣跨敤澶氬崱璁粌) +bash run_standalone_train.sh /path/to/rank_table_file ../pruner/scop/ ../pruner/scop/resnet50_cifar10_config.yaml /path/to/dataset + +# 鍗曟満璁粌 +cd ./golden_stick/scripts/ +# PYTHON_PATH 琛ㄧず闇€瑕佸簲鐢ㄧ殑绠楁硶鐨�'train.py'鑴氭湰鎵€鍦ㄧ殑鐩綍銆� +bash run_standalone_train.sh [PYTHON_PATH] [CONFIG_FILE] [DATASET_PATH] [CKPT_TYPE](optional) [CKPT_PATH](optional) + +# 鍗曟満璁粌绀轰緥(SCOP绠楁硶浣跨敤鍗曞崱璁粌) +bash run_standalone_train_ascend.sh ../pruner/scop/ ../pruner/scop/resnet50_cifar10_config.yaml /path/to/dataset +``` + ## 璇勪及杩囩▼ ### GPU澶勭悊鍣ㄧ幆澧冭繍琛� @@ -850,10 +870,27 @@ bash run_eval_gpu.sh ../quantization/simqat/ ../quantization/simqat/resnet50_cif bash run_eval_gpu.sh ../quantization/slb/ ../quantization/slb/resnet18_cifar10_config.yaml /path/to/dataset /path/to/ckpt ``` +### Ascend澶勭悊鍣ㄧ幆澧冭繍琛� + +```text +# 璇勪及 +cd ./golden_stick/scripts/ +# PYTHON_PATH 琛ㄧず闇€瑕佸簲鐢ㄧ殑绠楁硶鐨�'eval.py'鑴氭湰鎵€鍦ㄧ殑鐩綍銆� +bash run_eval_ascend.sh [PYTHON_PATH] [CONFIG_FILE] [DATASET_PATH] [CHECKPOINT_PATH] +``` + +```text +# 璇勪及绀轰緥 +cd ./golden_stick/scripts/ +bash run_eval_gpu.sh ../pruner/scop/ ../pruner/scop/resnet50_cifar10_config.yaml /path/to/dataset /path/to/ckpt +``` + ### 缁撴灉 璇勪及缁撴灉淇濆瓨鍦ㄧず渚嬭矾寰勪腑锛屾枃浠跺す鍚嶄负鈥渆val鈥濄€傛偍鍙湪姝よ矾寰勪笅鐨勬棩蹇楁壘鍒板涓嬬粨鏋滐細 +#### GPU缁撴灉 + - 浣跨敤SimQAT绠楁硶閲忓寲ResNet50锛屽苟浣跨敤CIFAR-10鏁版嵁闆嗚瘎浼帮細 ```text @@ -884,6 +921,14 @@ result:{'top_1_accuracy': 0.9503205128205128, 'top_5_accuracy': 0.99669471153846 result:{'top_1_accuracy': 0.9485176282051282, 'top_5_accuracy': 0.9965945512820513} ckpt=~/resnet18_cifar10/train_parallel/resnet-100_1562.ckpt ``` +#### Ascend缁撴灉 + +- 浣跨敤SCOP绠楁硶鍓灊ResNet50锛屽苟浣跨敤CIFAR-10鏁版嵁闆嗚瘎浼帮細 + +```text +result:{'top_1_accuracy': 0.928385416666666} prune_rate=0.45 ckpt=~/resnet50_cifar10/train_parallel0/resnet-400_195.ckpt +``` + # 妯″瀷鎻忚堪 ## 鎬ц兘 diff --git a/official/cv/resnet/golden_stick/pruner/scop/resnet50_cifar10_config.yaml b/official/cv/resnet/golden_stick/pruner/scop/resnet50_cifar10_config.yaml index 8c50785bf7c7e73d5e6190f16770f3208484867e..22ae3efe9efd9441ae2b903529f2ac51406f34f0 100644 --- a/official/cv/resnet/golden_stick/pruner/scop/resnet50_cifar10_config.yaml +++ b/official/cv/resnet/golden_stick/pruner/scop/resnet50_cifar10_config.yaml @@ -23,8 +23,8 @@ batch_size: 32 loss_scale: 1024 momentum: 0.9 weight_decay: 0.0001 -epoch_size: 90 -epochs_ft: 400 +epoch_kf: 90 +epoch_ft: 400 pretrain_epoch_size: 0 save_checkpoint: True save_checkpoint_epochs: 5 @@ -32,12 +32,10 @@ keep_checkpoint_max: 10 warmup_epochs: 5 lr_decay_mode: "poly" lr_init: 0.01 -lr_end: 0.00001 -lr_max: 0.1 -lars_epsilon: 0.0 -lars_coefficient: 0.001 -lr_ft_end: 0.0001 -lr_ft_max: 0.4 +lr_end_kf: 0.0001 +lr_max_kf: 0.1 +lr_end_ft: 0.0001 +lr_max_ft: 0.04 net_name: "resnet50" dataset: "cifar10" diff --git a/official/cv/resnet/golden_stick/pruner/scop/train.py b/official/cv/resnet/golden_stick/pruner/scop/train.py index 6299c079b36e62398e7f0b3fe5204544990058e0..a2a64a9d8e1c8896e79d9234119938dc6f7f9cc9 100644 --- a/official/cv/resnet/golden_stick/pruner/scop/train.py +++ b/official/cv/resnet/golden_stick/pruner/scop/train.py @@ -239,9 +239,9 @@ def train_net(): lr = get_lr(lr_init=config.lr_init, lr_end=0.0, - lr_max=config.lr_max, + lr_max=config.lr_max_kf, warmup_epochs=config.warmup_epochs, - total_epochs=config.epoch_size, + total_epochs=config.epoch_kf, steps_per_epoch=step_size, lr_decay_mode='cosine') @@ -256,12 +256,28 @@ def train_net(): net_with_loss = NetWithLossCell(model, kf_loss_fn, 1) net_train_step = nn.TrainOneStepCell(net_with_loss, optimizer) - train_kf(dataset, net_train_step, model, kfconv_list, kfscale_list) + if config.pre_trained: + for _, (_, module) in enumerate(model.cells_and_names()): + if isinstance(module, KfConv2d): + module.score = module.bn.gamma.data.abs() * ops.Squeeze()( + module.kfscale.data - (1 - module.kfscale.data)) + module.prune_rate = config.prune_rate + for _, (_, module) in enumerate(model.cells_and_names()): + if isinstance(module, KfConv2d): + _, index = ops.Sort()(module.score) + num_pruned_channel = int(module.prune_rate * module.score.shape[0]) + module.out_index = index[num_pruned_channel:] + for param in model.get_parameters(): + param.requires_grad = True + train_ft(model, dataset) + else: + model = train_kf(dataset, net_train_step, model, kfconv_list, kfscale_list) + train_ft(model, dataset) def train_kf(dataset, net_train_step, model, kfconv_list, kfscale_list): """train konckoff.""" - for _ in range(0, config.epoch_size): + for _ in range(0, config.epoch_kf): from copy import deepcopy for _, (kf_data, kf_target) in enumerate(dataset.create_tuple_iterator()): kf = deepcopy(kf_data) @@ -304,18 +320,21 @@ def train_kf(dataset, net_train_step, model, kfconv_list, kfscale_list): _, index = ops.Sort()(module.score) num_pruned_channel = int(module.prune_rate * module.score.shape[0]) module.out_index = index[num_pruned_channel:] - train_ft(model, dataset) + return model def train_ft(model, dataset): """train finetune.""" algo_ft = PrunerFtCompressAlgo({}) model = algo_ft.apply(model) + if config.pre_trained: + pre_ckpt = ms.load_checkpoint(config.pre_trained) + ms.load_param_into_net(model, pre_ckpt) lr_ft_new = ms.Tensor(get_lr(lr_init=config.lr_init, - lr_end=config.lr_ft_end, - lr_max=config.lr_ft_max, + lr_end=config.lr_end_ft, + lr_max=config.lr_max_ft, warmup_epochs=config.warmup_epochs, - total_epochs=config.epochs_ft, + total_epochs=config.epoch_ft, steps_per_epoch=dataset.get_dataset_size(), lr_decay_mode='poly')) @@ -337,13 +356,14 @@ def train_ft(model, dataset): time_cb = TimeMonitor(data_size=step_size) loss_cb = LossMonitor() + ckpt_save_dir = set_save_ckpt_dir() config_ck = CheckpointConfig(save_checkpoint_steps=5 * step_size, keep_checkpoint_max=10) - ckpt_cb = ModelCheckpoint(prefix="resnet", directory=config.output_path, + ckpt_cb = ModelCheckpoint(prefix="resnet", directory=ckpt_save_dir, config=config_ck) ft_cb = [time_cb, loss_cb, ckpt_cb] - model_ft.train(config.epochs_ft, dataset, callbacks=ft_cb, + model_ft.train(config.epoch_ft, dataset, callbacks=ft_cb, sink_size=dataset.get_dataset_size(), dataset_sink_mode=True) masked_conv_list = [] diff --git a/official/cv/resnet/golden_stick/scripts/run_distribute_train.sh b/official/cv/resnet/golden_stick/scripts/run_distribute_train.sh new file mode 100644 index 0000000000000000000000000000000000000000..5eef1db308eaaed1b231d2fe766b62e3521bf661 --- /dev/null +++ b/official/cv/resnet/golden_stick/scripts/run_distribute_train.sh @@ -0,0 +1,130 @@ +#!/bin/bash +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +CURPATH="$(dirname "$0")" + +if [ $# != 4 ] && [ $# != 5 ] +then + echo "Usage: bash run_distribute_train.sh [RANK_TABLE_FILE] [PYTHON_PATH] [CONFIG_PATH] [DATASET_PATH] [CKPT_TYPE](optional) [CKPT_PATH](optional)" + exit 1 +fi + +get_real_path(){ + if [ "${1:0:1}" == "/" ]; then + echo "$1" + else + echo "$(realpath -m $PWD/$1)" + fi +} + +PATH1=$(get_real_path $1) +PYTHON_PATH=$(get_real_path $2) +CONFIG_FILE=$(get_real_path $3) +PATH2=$(get_real_path $4) + +if [ ! -d $PYTHON_PATH ] +then + echo "error: PYTHON_PATH=$PYTHON_PATH is not a directory" + exit 1 +fi + +if [ $# == 5 ] +then + PATH3=$(get_real_path $5) +fi + +if [ ! -f $PATH1 ] +then + echo "error: RANK_TABLE_FILE=$PATH1 is not a file" +exit 1 +fi + +if [ ! -d $PATH2 ] +then + echo "error: DATASET_PATH=$PATH2 is not a directory" +exit 1 +fi + +if [ $# == 5 ] && [ ! -f $PATH3 ] +then + echo "error: FP32_CKPT=$PATH3 is not a file" +exit 1 +fi +if [ $# == 5 ]; then + CKPT_TYPE=$4 + CKPT_FILE=$(get_real_path $5) + + if [ "x$CKPT_TYPE" != "xFP32" ] && [ "x$CKPT_TYPE" != "xPRETRAINED" ]; then + echo "error: CKPT_TYPE=$CKPT_TYPE is not valid, should be FP32 or PRETRAINED" + exit 1 + fi + if [ ! -f $CKPT_FILE ]; then + echo "error: CKPT_FILE=$CKPT_FILE is not a file" + exit 1 + fi +fi + +if [ "x${RUN_EVAL}" == "xTrue" ] && [ ! -d $EVAL_DATASET_PATH ] +then + echo "error: EVAL_DATASET_PATH=$EVAL_DATASET_PATH is not a directory" + exit 1 +fi + +if [ "x${RUN_EVAL}" == "xTrue" ] +then + bootup_cache_server +fi + +ulimit -u unlimited +export DEVICE_NUM=8 +export RANK_SIZE=8 +export RANK_TABLE_FILE=$PATH1 + +export SERVER_ID=0 +rank_start=$((DEVICE_NUM * SERVER_ID)) + +cpus=`cat /proc/cpuinfo| grep "processor"| wc -l` +avg=`expr $cpus \/ $DEVICE_NUM` +gap=`expr $avg \- 1` + +for((i=0; i<${DEVICE_NUM}; i++)) +do + start=`expr $i \* $avg` + end=`expr $start \+ $gap` + cmdopt=$start"-"$end + export DEVICE_ID=${i} + export RANK_ID=$((rank_start + i)) + rm -rf ./train_parallel$i + mkdir ./train_parallel$i + cp ${PYTHON_PATH}/*.py ./train_parallel$i + cp *.sh ./train_parallel$i + cp -r ${CURPATH}/../../src ./train_parallel$i + cd ./train_parallel$i || exit + echo "start training for rank $RANK_ID, device $DEVICE_ID" + env > env.log + + if [ "x$CKPT_TYPE" == "xFP32" ]; then + taskset -c $cmdopt python3 train.py --run_distribute=True --device_num=$RANK_SIZE --data_path=$PATH2 \ + --config_path=$CONFIG_FILE --fp32_ckpt=$CKPT_FILE --output_path './output' &> log & + elif [ "x$CKPT_TYPE" == "xPRETRAINED" ]; then + taskset -c $cmdopt python3 train.py --run_distribute=True --device_num=$RANK_SIZE --data_path=$PATH2 \ + --config_path=$CONFIG_FILE --pre_trained=$CKPT_FILE --output_path './output' &> log & + else + taskset -c $cmdopt python3 train.py --run_distribute=True --device_num=$RANK_SIZE --data_path=$PATH2 \ + --config_path=$CONFIG_FILE --output_path './output' &> log & + fi + cd .. +done diff --git a/official/cv/resnet/golden_stick/scripts/run_standalone_train.sh b/official/cv/resnet/golden_stick/scripts/run_standalone_train.sh new file mode 100644 index 0000000000000000000000000000000000000000..bae9f5de91cca8626990aef7022ca5f0f560ba4c --- /dev/null +++ b/official/cv/resnet/golden_stick/scripts/run_standalone_train.sh @@ -0,0 +1,98 @@ +#!/bin/bash +# Copyright 2022 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +CURPATH="$(dirname "$0")" + +if [ $# != 3 ] && [ $# != 5 ]; then + echo "Usage: bash run_standalone_train.sh [PYTHON_PATH] [CONFIG_FILE] [DATASET_PATH] [CKPT_TYPE](optional) [CKPT_PATH](optional)" + echo "Examples:" + echo " Train from the beginning:" + echo " bash run_standalone_train.sh /path/to/train.py resnet50_config.yaml /path/to/dataset" + echo " Train from full precision checkpoint:" + echo " bash run_standalone_train.sh /path/to/train.py resnet50_config.yaml /path/to/dataset FP32 /path/to/fp32_ckpt" + echo " Train from pretrained checkpoint:" + echo " bash run_standalone_train.sh /path/to/train.py resnet50_config.yaml /path/to/dataset PRETRAINED /path/to/pretrained_ckpt" + exit 1 +fi + +get_real_path(){ + if [ "${1:0:1}" == "/" ]; then + echo "$1" + else + echo "$(realpath -m $PWD/$1)" + fi +} + +PYTHON_PATH=$(get_real_path $1) +CONFIG_FILE=$(get_real_path $2) +DATASET_PATH=$(get_real_path $3) + +if [ ! -d $PYTHON_PATH ] +then + echo "error: PYTHON_PATH=$PYTHON_PATH is not a directory" + exit 1 +fi + +if [ ! -f $CONFIG_FILE ] +then + echo "error: CONFIG_FILE=$CONFIG_FILE is not a file" + exit 1 +fi + +if [ ! -d $DATASET_PATH ] +then + echo "error: DATASET_PATH=$DATASET_PATH is not a directory" + exit 1 +fi + +if [ $# == 5 ]; then + CKPT_TYPE=$4 + CKPT_FILE=$(get_real_path $5) + + if [ "x$CKPT_TYPE" != "xFP32" ] && [ "x$CKPT_TYPE" != "xPRETRAINED" ]; then + echo "error: CKPT_TYPE=$CKPT_TYPE is not valid, should be FP32 or PRETRAINED" + exit 1 + fi + if [ ! -f $CKPT_FILE ]; then + echo "error: CKPT_FILE=$CKPT_FILE is not a file" + exit 1 + fi +fi + +ulimit -u unlimited +export DEVICE_NUM=1 +export RANK_ID=0 +export RANK_SIZE=1 + +if [ -d "train" ]; +then + rm -rf ./train +fi +mkdir ./train +cp ${PYTHON_PATH}/*.py ./train +cp -r ${CURPATH}/../../src ./train +cd ./train || exit + +if [ "x$CKPT_TYPE" == "xFP32" ]; then + python train.py --config_path=$CONFIG_FILE --run_distribute=False \ + --data_path=$DATASET_PATH --fp32_ckpt=$CKPT_FILE --output_path './output' &> log & +elif [ "x$CKPT_TYPE" == "xPRETRAINED" ]; then + python train.py --config_path=$CONFIG_FILE --run_distribute=False \ + --data_path=$DATASET_PATH --pre_trained=$CKPT_FILE --output_path './output' &> log & +else + python train.py --config_path=$CONFIG_FILE --run_distribute=False \ + --data_path=$DATASET_PATH --output_path './output' &> log & +fi