diff --git a/research/cv/mobilenetv3_large/README_CN.md b/research/cv/mobilenetv3_large/README_CN.md new file mode 100644 index 0000000000000000000000000000000000000000..d9705935ea6f7e8ceb62f7f90524cde8d3cac3d2 --- /dev/null +++ b/research/cv/mobilenetv3_large/README_CN.md @@ -0,0 +1,185 @@ +# 鐩綍 + +- [鐩綍](#鐩綍) +- [MobileNetV3鎻忚堪](#mobilenetv3鎻忚堪) +- [妯″瀷鏋舵瀯](#妯″瀷鏋舵瀯) +- [鏁版嵁闆哴(#鏁版嵁闆�) +- [鐜瑕佹眰](#鐜瑕佹眰) +- [鑴氭湰璇存槑](#鑴氭湰璇存槑) + - [鑴氭湰鍜岀ず渚嬩唬鐮乚(#鑴氭湰鍜岀ず渚嬩唬鐮�) + - [鑴氭湰鍙傛暟](#鑴氭湰鍙傛暟) + - [璁粌杩囩▼](#璁粌杩囩▼) + - [鍚姩](#鍚姩) + - [缁撴灉](#缁撴灉) + - [璇勪及杩囩▼](#璇勪及杩囩▼) + - [鍚姩](#鍚姩-1) + - [缁撴灉](#缁撴灉-1) +- [妯″瀷璇存槑](#妯″瀷璇存槑) + - [璁粌鎬ц兘](#璁粌鎬ц兘) +- [闅忔満鎯呭喌鐨勬弿杩癩(#闅忔満鎯呭喌鐨勬弿杩�) +- [ModelZoo 涓婚〉](#modelzoo-涓婚〉) + +<!-- /TOC --> + +# MobileNetV3鎻忚堪 + +MobileNetV3缁撳悎纭欢鎰熺煡绁炵粡缃戠粶鏋舵瀯鎼滅储锛圢AS锛夊拰NetAdapt绠楁硶锛屽凡缁忓彲浠ョЩ妞嶅埌鎵嬫満CPU涓婅繍琛岋紝鍚庣画闅忔柊鏋舵瀯杩涗竴姝ヤ紭鍖栨敼杩涖€傦紙2019骞�11鏈�20鏃ワ級 + +[璁烘枃](https://arxiv.org/pdf/1905.02244)锛欻oward, Andrew, Mark Sandler, Grace Chu, Liang-Chieh Chen, Bo Chen, Mingxing Tan, Weijun Wang et al."Searching for mobilenetv3."In Proceedings of the IEEE International Conference on Computer Vision, pp. 1314-1324.2019. + +# 妯″瀷鏋舵瀯 + +MobileNetV3鎬讳綋缃戠粶鏋舵瀯濡備笅锛� + +[閾炬帴](https://arxiv.org/pdf/1905.02244) + +# 鏁版嵁闆� + +浣跨敤鐨勬暟鎹泦锛歔ImageNet(ILSVRC2012)](http://www.image-net.org/) + +- 鏁版嵁闆嗗ぇ灏�: 146G, 1330k 1000绫诲僵鑹插浘鍍� + - 璁粌: 140G, 1280k寮犲浘鐗� + - 娴嬭瘯: 6G, 50k寮犲浘鐗� +- 鏁版嵁鏍煎紡锛歊GB + - 娉細鏁版嵁鍦╯rc/dataset.py涓鐞嗐€� + +# 鐜瑕佹眰 + +- 纭欢锛圓scend锛� + - 浣跨敤Ascend鏉ユ惌寤虹‖浠剁幆澧冦€� +- 妗嗘灦 + - [MindSpore](https://www.mindspore.cn/install) +- 濡傞渶鏌ョ湅璇︽儏锛岃鍙傝濡備笅璧勬簮锛� + - [MindSpore 鏁欑▼](https://www.mindspore.cn/tutorials/zh-CN/master/index.html) + - [MindSpore Python API](https://www.mindspore.cn/docs/api/zh-CN/master/index.html) + +# 鑴氭湰璇存槑 + +## 鑴氭湰鍜岀ず渚嬩唬鐮� + +```bash +鈹溾攢鈹€ mobileNetv3_large + 鈹溾攢鈹€ README_CN.md # MobileNetV3鐩稿叧鎻忚堪 + 鈹溾攢鈹€ scripts + 鈹� 鈹溾攢鈹€run_standalone_train.sh # 鐢ㄤ簬鍗曞崱璁粌鐨剆hell鑴氭湰 + 鈹� 鈹溾攢鈹€run_distribute_train.sh # 鐢ㄤ簬鍏崱璁粌鐨剆hell鑴氭湰 + 鈹� 鈹斺攢鈹€run_eval.sh # 鐢ㄤ簬璇勪及鐨剆hell鑴氭湰 + 鈹溾攢鈹€ src + 鈹� 鈹溾攢鈹€config.py # 鍙傛暟閰嶇疆 + 鈹� 鈹溾攢鈹€dataset.py # 鍒涘缓鏁版嵁闆� + 鈹� 鈹溾攢鈹€lr_generator.py # 閰嶇疆瀛︿範鐜� + 鈹� 鈹溾攢鈹€mobilenetV3.py # MobileNetV3鏋舵瀯 + 鈹溾攢鈹€ eval.py # 璇勪及鑴氭湰 + 鈹溾攢鈹€ export.py # 妯″瀷鏍煎紡杞崲鑴氭湰 + 鈹斺攢鈹€ train.py # 璁粌鑴氭湰 +``` + +## 鑴氭湰鍙傛暟 + +妯″瀷璁粌鍜岃瘎浼拌繃绋嬩腑浣跨敤鐨勫弬鏁板彲浠ュ湪config.py涓缃�: + +```python + "num_classes": 1000, + "image_height": 224, + "image_width": 224, + "batch_size": 150, + "epoch_size": 370, + "warmup_epochs": 4, + "lr_init": 0.0, + "lr_end": 0.00, + "lr": 1.54, + "momentum": 0.9, + "weight_decay": 4e-5, + "label_smooth": 0.1, + "weight_init": "he_uniform", + "loss_scale": 1024, + "save_checkpoint": True, + "save_checkpoint_epochs": 1, + "keep_checkpoint_max": 50, + "save_checkpoint_path": "./", + "export_format": "MINDIR", + "export_file": "mobilenetv3_large", +``` + +## 璁粌杩囩▼ + +### 鍚姩 + +鎮ㄥ彲浠ヤ娇鐢╬ython鎴杝hell鑴氭湰杩涜璁粌銆� + +```shell +# 璁粌绀轰緥 + python: + Ascend鍗曞崱璁粌绀轰緥: python train.py --train_dataset_path [TRAIN_DATA_DIR] --eval_dataset_path [EVAL_DATA_DIR] --device_id [DEVICE_ID] --run_distribute False + + shell: + Ascend鍗曞崱璁粌绀轰緥: bash ./scripts/run_standalone_train.sh [DEVICE_ID] [TRAIN_DATA_DIR] [EVAL_DATA_DIR] + Ascend鍏崱骞惰璁粌: + bash ./run_distribute_train.sh [RANK_TABLE_FILE] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [TRAIN_DATA_DIR] [EVAL_DATA_DIR] +``` + +### 缁撴灉 + +ckpt鏂囦欢灏嗗瓨鍌ㄥ湪 `./ckpts_rank_0` 璺緞涓嬶紝璁粌鏃ュ織灏嗚璁板綍鍒� `log.txt` 涓€傝缁冩棩蹇楅儴鍒嗙ず渚嬪涓嬶細 + +```shell +epoch 1: epoch time: 553262.126, per step time: 518.521, avg loss: 5.270 +epoch 2: epoch time: 151033.049, per step time: 141.549, avg loss: 4.529 +epoch 3: epoch time: 150605.300, per step time: 141.148, avg loss: 4.101 +epoch 4: epoch time: 150638.805, per step time: 141.180, avg loss: 4.014 +epoch 5: epoch time: 150594.088, per step time: 141.138, avg loss: 3.607 +``` + +## 璇勪及杩囩▼ + +### 鍚姩 + +鎮ㄥ彲浠ヤ娇鐢╬ython鎴杝hell鑴氭湰杩涜璇勪及銆� + +```shell +# 璇勪及绀轰緥 + python: + python eval.py --device_id [DEVICE_ID] --checkpoint_path [PATH_CHECKPOINT] --dataset_path [DATA_DIR] + shell: + bash ./scripts/run_eval.sh [DEVICE_ID] [PATH_CHECKPOINT] [EVAL_DATA_DIR] +``` + +> 璁粌杩囩▼涓彲浠ョ敓鎴恈kpt鏂囦欢銆� + +### 缁撴灉 + +鍙互鍦� `evaluation_ascend/eval.log` 鏌ョ湅璇勪及缁撴灉銆� + +```shell +metric: {'Loss': 6.346325377444248, 'Top1-Acc': 0.744024024024024, 'Top5-Acc': 0.916956956956957} +``` + +# 妯″瀷璇存槑 + +## 璁粌鎬ц兘 + +| 鍙傛暟 | Ascend | +| -------------------------- | ------------------------------------- | +| 妯″瀷鍚嶇О | mobilenetv3_large | +| 妯″瀷鐗堟湰 | v1.0 | +| 杩愯鐜 | HUAWEI CLOUD Modelarts | +| Mindspore鐗堟湰 | 1.3.0 | +| 涓婁紶鏃堕棿 | 2021-9-17 | +| 鏁版嵁闆� | imagenet | +| 璁粌鍙傛暟 | src/config.py | +| 浼樺寲鍣� | Momentum | +| 鎹熷け鍑芥暟 | CrossEntropyWithLabelSmooth | +| 鏈€缁堟崯澶� | 2.01 | +| 绮剧‘搴� (8p) | Top1[74.4%], Top5[91.7%] | +| 璁粌鎬绘椂闂� (8p) | 40h | +| 璇勪及鎬绘椂闂� | 2min | +| 鍙傛暟閲� (M) | 43.4M | +| 鑴氭湰 | [閾炬帴](https://gitee.com/mindspore/models/tree/master/research/cv/mobilenetv3_large) | + +# 闅忔満鎯呭喌鐨勬弿杩� + +鎴戜滑鍦� `dataset.py` 鍜� `train.py` 鑴氭湰涓缃簡闅忔満绉嶅瓙銆� + +# ModelZoo + +璇锋牳瀵瑰畼鏂� [涓婚〉](https://gitee.com/mindspore/models)銆� \ No newline at end of file diff --git a/research/cv/mobilenetv3_large/eval.py b/research/cv/mobilenetv3_large/eval.py new file mode 100644 index 0000000000000000000000000000000000000000..bfee4dcdb603562255dc6b003f8f166d676705cb --- /dev/null +++ b/research/cv/mobilenetv3_large/eval.py @@ -0,0 +1,61 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""evaluate_imagenet""" +import argparse + +import mindspore.nn as nn +from mindspore import context +from mindspore.train.model import Model +from mindspore.train.serialization import load_checkpoint, load_param_into_net +from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits + +from src.dataset import create_dataset +from src.mobilenetV3 import mobilenet_v3_large +from src.config import config_ascend as config + +def parse_args(): + '''parse_args''' + parser = argparse.ArgumentParser(description='image classification evaluation') + parser.add_argument('--dataset_path', type=str, default='', help='Dataset path') + parser.add_argument('--checkpoint_path', type=str, default='', help='checkpoint of mobilenetv3_large') + parser.add_argument('--device_id', type=int, default=0, help='device id') + args_opt = parser.parse_args() + return args_opt + +if __name__ == '__main__': + args = parse_args() + + # set device id + context.set_context(device_id=args.device_id) + context.set_context(mode=context.GRAPH_MODE, device_target='Ascend') + + # define network + net = mobilenet_v3_large(num_classes=config.num_classes, activation="Softmax") + ckpt = load_checkpoint(args.checkpoint_path) + load_param_into_net(net, ckpt) + net.set_train(False) + + # define dataset + dataset = create_dataset(dataset_path=args.dataset_path, do_train=False, config=config, repeat_num=1, + batch_size=config.batch_size) + loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean") + eval_metrics = {'Loss': nn.Loss(), + 'Top1-Acc': nn.Top1CategoricalAccuracy(), + 'Top5-Acc': nn.Top5CategoricalAccuracy()} + + model = Model(net, loss, optimizer=None, metrics=eval_metrics) + print('='*20, 'Evalute start', '='*20) + metrics = model.eval(dataset) + print("metric: ", metrics) diff --git a/research/cv/mobilenetv3_large/export.py b/research/cv/mobilenetv3_large/export.py new file mode 100644 index 0000000000000000000000000000000000000000..ad96960b8c5e0fb9f0dae00a59ab8bbc3fee420b --- /dev/null +++ b/research/cv/mobilenetv3_large/export.py @@ -0,0 +1,38 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" +mobilenetv3_large export. +""" +import argparse +import numpy as np +from mindspore import context, Tensor, load_checkpoint, load_param_into_net, export +from src.config import config_ascend +from src.mobilenetV3 import mobilenet_v3_large + + +parser = argparse.ArgumentParser(description='Image classification') +parser.add_argument('--checkpoint_path', type=str, required=True, help='Checkpoint file path') +args_opt = parser.parse_args() + +if __name__ == '__main__': + context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") + + net = mobilenet_v3_large(num_classes=config_ascend.num_classes, multiplier=1.) + + param_dict = load_checkpoint(args_opt.checkpoint_path) + load_param_into_net(net, param_dict) + input_shp = [1, 3, config_ascend.image_height, config_ascend.image_width] + input_array = Tensor(np.random.uniform(-1.0, 1.0, size=input_shp).astype(np.float32)) + export(net, input_array, file_name=config_ascend.export_file, file_format=config_ascend.export_format) diff --git a/research/cv/mobilenetv3_large/scripts/run_distribute_train.sh b/research/cv/mobilenetv3_large/scripts/run_distribute_train.sh new file mode 100644 index 0000000000000000000000000000000000000000..6be6c086e9b2583f97c42a8add8c74bc92e14fea --- /dev/null +++ b/research/cv/mobilenetv3_large/scripts/run_distribute_train.sh @@ -0,0 +1,89 @@ +#!/bin/bash +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +if [ $# != 4 ]; then + echo "Usage: bash ./run_distribute_train.sh [RANK_TABLE_FILE] [VISIABLE_DEVICES(0,1,2,3,4,5,6,7)] [TRAIN_DATA_DIR] [EVAL_DATA_DIR]" + exit 1 +fi + +get_real_path() { + if [ "${1:0:1}" == "/" ]; then + echo "$1" + else + echo "$(realpath -m $PWD/$1)" + fi +} + +RANK_TABLE_FILE=$(get_real_path $1) +echo $RANK_TABLE_FILE + +if [ ! -f $RANK_TABLE_FILE ] +then + echo "error: RANK_TABLE_FILE=$RANK_TABLE_FILE is not a file." +exit 1 +fi + +VISIABLE_DEVICES=$2 +IFS="," read -r -a CANDIDATE_DEVICE <<< "$VISIABLE_DEVICES" +export RANK_SIZE=${#CANDIDATE_DEVICE[@]} + +TRAIN_DATA_DIR=$(get_real_path $3) +echo $TRAIN_DATA_DIR + +if [ ! -d $TRAIN_DATA_DIR ] +then + echo "error: TRAIN_DATA_DIR=$TRAIN_DATA_DIR is not a directory." +exit 1 +fi + +EVAL_DATA_DIR=$(get_real_path $4) +echo $EVAL_DATA_DIR + +if [ ! -d $EVAL_DATA_DIR ] +then + echo "error: EVAL_DATA_DIR=$EVAL_DATA_DIR is not a directory." +exit 1 +fi + +export RANK_TABLE_FILE=$RANK_TABLE_FILE + +cores=`cat /proc/cpuinfo|grep "processor" |wc -l` +echo "the number of logical core" $cores +avg_core_per_rank=`expr $cores \/ $RANK_SIZE` +core_gap=`expr $avg_core_per_rank \- 1` +echo "avg_core_per_rank" $avg_core_per_rank +echo "core_gap" $core_gap +for((i=0;i<RANK_SIZE;i++)) +do + start=`expr $i \* $avg_core_per_rank` + export DEVICE_ID=${CANDIDATE_DEVICE[i]} + export RANK_ID=$i + export DEPLOY_MODE=0 + export GE_USE_STATIC_MEMORY=1 + end=`expr $start \+ $core_gap` + cmdopt=$start"-"$end + + rm -rf train_parallel$i + mkdir ./train_parallel$i + cp *.py ./train_parallel$i + cd ./train_parallel$i || exit + echo "start training for rank $i, device $DEVICE_ID rank_id $RANK_ID" + + env > env.log + taskset -c $cmdopt python -u ../train.py \ + --train_dataset_path=$TRAIN_DATA_DIR --eval_dataset_path=$EVAL_DATA_DIR --device_id=${CANDIDATE_DEVICE[i]} > log.txt 2>&1 & + cd ../ +done diff --git a/research/cv/mobilenetv3_large/scripts/run_eval.sh b/research/cv/mobilenetv3_large/scripts/run_eval.sh new file mode 100644 index 0000000000000000000000000000000000000000..51bfcbd92c5f3af3aaba7210749bda9b675e2ecd --- /dev/null +++ b/research/cv/mobilenetv3_large/scripts/run_eval.sh @@ -0,0 +1,58 @@ +#!/bin/bash +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +if [ $# != 3 ]; then + echo "Usage: bash ./scripts/run_eval.sh [DEVICE_ID] [PATH_CHECKPOINT] [EVAL_DATA_DIR]" + exit 1 +fi + +get_real_path() { + if [ "${1:0:1}" == "/" ]; then + echo "$1" + else + echo "$(realpath -m $PWD/$1)" + fi +} + +DEVICE_ID=$1 + +CHECKPOINT_PATH=$(get_real_path $2) +echo $CHECKPOINT_PATH + +if [ ! -f $CHECKPOINT_PATH ] +then + echo "error: CHECKPOINT_PATH=$CHECKPOINT_PATH is not a file." +exit 1 +fi + +DATA_DIR=$(get_real_path $3) +echo $DATA_DIR + +if [ ! -d $DATA_DIR ] +then + echo "error: DATA_DIR=$DATA_DIR is not a directory." +exit 1 +fi + +export RANK_SIZE=1 + +rm -rf evaluation_ascend +mkdir ./evaluation_ascend +cd ./evaluation_ascend || exit +echo "start evaluating for device id $DEVICE_ID" +env > env.log +python ../eval.py --dataset_path=$DATA_DIR --checkpoint_path=$CHECKPOINT_PATH --device_id=$DEVICE_ID > eval.log 2>&1 & +cd ../ diff --git a/research/cv/mobilenetv3_large/scripts/run_standalone_train.sh b/research/cv/mobilenetv3_large/scripts/run_standalone_train.sh new file mode 100644 index 0000000000000000000000000000000000000000..8d5f58695d505f271c8943c4ef18c4cf44e2d225 --- /dev/null +++ b/research/cv/mobilenetv3_large/scripts/run_standalone_train.sh @@ -0,0 +1,54 @@ +#!/bin/bash +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +if [ $# != 3 ]; then + echo "Usage: bash ./scripts/run_standalone_train.sh [DEVICE_ID] [TRAIN_DATA_DIR] [EVAL_DATA_DIR]" + exit 1 +fi + +get_real_path() { + if [ "${1:0:1}" == "/" ]; then + echo "$1" + else + echo "$(realpath -m $PWD/$1)" + fi +} + +export DEVICE_ID=$1 +export RANK_SIZE=1 + +TRAIN_DATA_DIR=$(get_real_path $2) +echo $TRAIN_DATA_DIR + +if [ ! -d $TRAIN_DATA_DIR ] +then + echo "error: TRAIN_DATA_DIR=$TRAIN_DATA_DIR is not a directory." +exit 1 +fi + +EVAL_DATA_DIR=$(get_real_path $3) +echo $EVAL_DATA_DIR + +if [ ! -d $EVAL_DATA_DIR ] +then + echo "error: EVAL_DATA_DIR=$EVAL_DATA_DIR is not a directory." +exit 1 +fi + +python ./train.py \ + --train_dataset_path=$TRAIN_DATA_DIR \ + --eval_dataset_path=$EVAL_DATA_DIR \ + --run_distribute=False > log.txt 2>&1 & \ No newline at end of file diff --git a/research/cv/mobilenetv3_large/src/config.py b/research/cv/mobilenetv3_large/src/config.py new file mode 100644 index 0000000000000000000000000000000000000000..d2d63725fe28a5589d644b7fe8f133148e364f96 --- /dev/null +++ b/research/cv/mobilenetv3_large/src/config.py @@ -0,0 +1,41 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +""" +network config setting, will be used in main.py +""" +from easydict import EasyDict as ed + +config_ascend = ed({ + "num_classes": 1000, + "image_height": 224, + "image_width": 224, + "batch_size": 150, + "epoch_size": 370, + "warmup_epochs": 4, + "lr_init": 0.0, + "lr_end": 0.00, + "lr": 1.54, + "momentum": 0.9, + "weight_decay": 4e-5, + "label_smooth": 0.1, + "weight_init": "he_uniform", + "loss_scale": 1024, + "save_checkpoint": True, + "save_checkpoint_epochs": 1, + "keep_checkpoint_max": 50, + "save_checkpoint_path": "./", + "export_format": "MINDIR", + "export_file": "mobilenetv3_large", +}) diff --git a/research/cv/mobilenetv3_large/src/dataset.py b/research/cv/mobilenetv3_large/src/dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..49ebdacc5adce53891d73e564aacfea6254947a1 --- /dev/null +++ b/research/cv/mobilenetv3_large/src/dataset.py @@ -0,0 +1,82 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""Create train or eval dataset.""" +import mindspore.common.dtype as mstype +import mindspore.dataset as de +import mindspore.dataset.vision.c_transforms as C +import mindspore.dataset.transforms.c_transforms as C2 +from mindspore.communication.management import get_rank, get_group_size + +def create_dataset(dataset_path, do_train, config, repeat_num=1, batch_size=32, run_distribute=True): + """ + create a train or eval dataset + + Args: + dataset_path(string): the path of dataset. + do_train(bool): whether dataset is used for train or eval. + repeat_num(int): the repeat times of dataset. Default: 1 + batch_size(int): the batch size of dataset. Default: 32 + + Returns: + dataset + """ + + if not do_train: + data_set = de.ImageFolderDataset(dataset_path, num_parallel_workers=8, shuffle=True) + else: + if run_distribute: + rank_id = get_rank() + rank_size = get_group_size() + data_set = de.ImageFolderDataset(dataset_path, num_parallel_workers=8, shuffle=True, + num_shards=rank_size, shard_id=rank_id) + else: + data_set = de.ImageFolderDataset(dataset_path, num_parallel_workers=8, shuffle=True) + + resize_height = config.image_height + resize_width = config.image_width + buffer_size = 1000 + + # define map operations + decode_op = C.Decode() + resize_crop_op = C.RandomCropDecodeResize(resize_height, scale=(0.08, 1.0), ratio=(0.75, 1.333)) + horizontal_flip_op = C.RandomHorizontalFlip(prob=0.5) + + resize_op = C.Resize(256) + center_crop = C.CenterCrop(resize_width) + rescale_op = C.RandomColorAdjust(brightness=0.4, contrast=0.4, saturation=0.4) + normalize_op = C.Normalize(mean=[0.485 * 255, 0.456 * 255, 0.406 * 255], + std=[0.229 * 255, 0.224 * 255, 0.225 * 255]) + change_swap_op = C.HWC2CHW() + + if do_train: + trans = [resize_crop_op, horizontal_flip_op, rescale_op, normalize_op, change_swap_op] + else: + trans = [decode_op, resize_op, center_crop, normalize_op, change_swap_op] + + type_cast_op = C2.TypeCast(mstype.int32) + + data_set = data_set.map(operations=trans, input_columns="image", num_parallel_workers=8) + data_set = data_set.map(operations=type_cast_op, input_columns="label", num_parallel_workers=8) + + # apply shuffle operations + data_set = data_set.shuffle(buffer_size=buffer_size) + + # apply batch operations + data_set = data_set.batch(batch_size, drop_remainder=True) + + # apply dataset repeat operation + data_set = data_set.repeat(repeat_num) + + return data_set diff --git a/research/cv/mobilenetv3_large/src/lr_generator.py b/research/cv/mobilenetv3_large/src/lr_generator.py new file mode 100644 index 0000000000000000000000000000000000000000..f6f9dcda1bdbd6b14153f557f56507492d680b55 --- /dev/null +++ b/research/cv/mobilenetv3_large/src/lr_generator.py @@ -0,0 +1,54 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""learning rate generator""" +import math +import numpy as np + + +def get_lr(global_step, lr_init, lr_end, lr_max, warmup_epochs, total_epochs, steps_per_epoch): + """ + generate learning rate array + + Args: + global_step(int): total steps of the training + lr_init(float): init learning rate + lr_end(float): end learning rate + lr_max(float): max learning rate + warmup_epochs(int): number of warmup epochs + total_epochs(int): total epoch of training + steps_per_epoch(int): steps of one epoch + + Returns: + np.array, learning rate array + """ + lr_each_step = [] + total_steps = steps_per_epoch * total_epochs + warmup_steps = steps_per_epoch * warmup_epochs + for i in range(total_steps): + if i < warmup_steps: + lr = lr_init + (lr_max - lr_init) * i / warmup_steps + else: + lr = lr_end + \ + (lr_max - lr_end) * \ + (1. + math.cos(math.pi * (i - warmup_steps) / (total_steps - warmup_steps))) / 2. + if lr < 0.0: + lr = 0.0 + lr_each_step.append(lr) + + current_step = global_step + lr_each_step = np.array(lr_each_step).astype(np.float32) + learning_rate = lr_each_step[current_step:] + + return learning_rate diff --git a/research/cv/mobilenetv3_large/src/mobilenetV3.py b/research/cv/mobilenetv3_large/src/mobilenetV3.py new file mode 100644 index 0000000000000000000000000000000000000000..2171f28243d1f4b0d5d3fca6e30c99732f64c16c --- /dev/null +++ b/research/cv/mobilenetv3_large/src/mobilenetV3.py @@ -0,0 +1,422 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""MobileNetV3 model define""" +from functools import partial +import numpy as np +import mindspore.nn as nn +from mindspore.ops import operations as P +from mindspore import Tensor + + +__all__ = ['mobilenet_v3_large', + 'mobilenet_v3_small'] + +class hswish(nn.Cell): + """hswish""" + def construct(self, x): + out = x * nn.ReLU6()(x + 3) / 6 + return out + + +class hsigmoid(nn.Cell): + """hsigmoid""" + def construct(self, x): + out = nn.ReLU6()(x + 3) / 6 + return out + +def _make_divisible(x, divisor=8): + """_make_divisible""" + return int(np.ceil(x * 1. / divisor) * divisor) + + +class Activation(nn.Cell): + """ + Activation definition. + + Args: + act_func(string): activation name. + + Returns: + Tensor, output tensor. + """ + + def __init__(self, act_func): + super(Activation, self).__init__() + if act_func == 'relu': + self.act = nn.ReLU() + elif act_func == 'relu6': + self.act = nn.ReLU6() + elif act_func in ('hsigmoid', 'hard_sigmoid'): + self.act = hsigmoid() + elif act_func in ('hswish', 'hard_swish'): + self.act = hswish() + else: + raise NotImplementedError + + def construct(self, x): + return self.act(x) + + +class GlobalAvgPooling(nn.Cell): + """ + Global avg pooling definition. + + Args: + + Returns: + Tensor, output tensor. + + Examples: + >>> GlobalAvgPooling() + """ + + def __init__(self, keep_dims=False): + super(GlobalAvgPooling, self).__init__() + self.mean = P.ReduceMean(keep_dims=keep_dims) + + def construct(self, x): + x = self.mean(x, (2, 3)) + return x + + +class SE(nn.Cell): + """ + SE warpper definition. + + Args: + num_out (int): Numbers of output channels. + ratio (int): middle output ratio. + + Returns: + Tensor, output tensor. + + Examples: + >>> SE(4) + """ + + def __init__(self, num_out, ratio=4): + super(SE, self).__init__() + num_mid = _make_divisible(num_out // ratio) + self.pool = GlobalAvgPooling(keep_dims=True) + self.conv1 = nn.Conv2d(in_channels=num_out, out_channels=num_mid, + kernel_size=1, has_bias=True, pad_mode='pad') + self.act1 = Activation('relu') + self.conv2 = nn.Conv2d(in_channels=num_mid, out_channels=num_out, + kernel_size=1, has_bias=True, pad_mode='pad') + self.act2 = Activation('hsigmoid') + self.mul = P.Mul() + + def construct(self, x): + out = self.pool(x) + out = self.conv1(out) + out = self.act1(out) + out = self.conv2(out) + out = self.act2(out) + out = self.mul(x, out) + return out + + +class Unit(nn.Cell): + """ + Unit warpper definition. + + Args: + num_in (int): Input channel. + num_out (int): Output channel. + kernel_size (int): Input kernel size. + stride (int): Stride size. + padding (int): Padding number. + num_groups (int): Output num group. + use_act (bool): Used activation or not. + act_type (string): Activation type. + + Returns: + Tensor, output tensor. + + Examples: + >>> Unit(3, 3) + """ + + def __init__(self, num_in, num_out, kernel_size=1, stride=1, padding=0, num_groups=1, + use_act=True, act_type='relu'): + super(Unit, self).__init__() + self.conv = nn.Conv2d(in_channels=num_in, + out_channels=num_out, + kernel_size=kernel_size, + stride=stride, + padding=padding, + group=num_groups, + has_bias=False, + pad_mode='pad') + self.bn = nn.BatchNorm2d(num_out) + self.use_act = use_act + self.act = Activation(act_type) if use_act else None + + def construct(self, x): + out = self.conv(x) + out = self.bn(out) + if self.use_act: + out = self.act(out) + return out + + +class ResUnit(nn.Cell): + """ + ResUnit warpper definition. + + Args: + num_in (int): Input channel. + num_mid (int): Middle channel. + num_out (int): Output channel. + kernel_size (int): Input kernel size. + stride (int): Stride size. + act_type (str): Activation type. + use_se (bool): Use SE warpper or not. + + Returns: + Tensor, output tensor. + + Examples: + >>> ResUnit(16, 3, 1, 1) + """ + def __init__(self, num_in, num_mid, num_out, kernel_size, stride=1, act_type='relu', use_se=False): + super(ResUnit, self).__init__() + self.use_se = use_se + self.first_conv = (num_out != num_mid) + self.use_short_cut_conv = True + + if self.first_conv: + self.expand = Unit(num_in, num_mid, kernel_size=1, + stride=1, padding=0, act_type=act_type) + else: + self.expand = None + self.conv1 = Unit(num_mid, num_mid, kernel_size=kernel_size, stride=stride, + padding=self._get_pad(kernel_size), act_type=act_type, num_groups=num_mid) + if use_se: + self.se = SE(num_mid) + self.conv2 = Unit(num_mid, num_out, kernel_size=1, stride=1, + padding=0, act_type=act_type, use_act=False) + if num_in != num_out or stride != 1: + self.use_short_cut_conv = False + self.add = P.Add() if self.use_short_cut_conv else None + + def construct(self, x): + """construct""" + if self.first_conv: + out = self.expand(x) + else: + out = x + out = self.conv1(out) + if self.use_se: + out = self.se(out) + out = self.conv2(out) + if self.use_short_cut_conv: + out = self.add(x, out) + return out + + def _get_pad(self, kernel_size): + """set the padding number""" + pad = 0 + if kernel_size == 1: + pad = 0 + elif kernel_size == 3: + pad = 1 + elif kernel_size == 5: + pad = 2 + elif kernel_size == 7: + pad = 3 + else: + raise NotImplementedError + return pad + + +class MobileNetV3(nn.Cell): + """ + MobileNetV3 architecture. + + Args: + model_cfgs (Cell): number of classes. + num_classes (int): Output number classes. + multiplier (int): Channels multiplier for round to 8/16 and others. Default is 1. + final_drop (float): Dropout number. + round_nearest (list): Channel round to . Default is 8. + Returns: + Tensor, output tensor. + + Examples: + >>> MobileNetV3(num_classes=1000) + """ + + def __init__(self, model_cfgs, num_classes=1000, multiplier=1., final_drop=0., + round_nearest=8, include_top=True, activation="None"): + super(MobileNetV3, self).__init__() + print(num_classes) + self.cfgs = model_cfgs['cfg'] + self.inplanes = 16 + self.features = [] + first_conv_in_channel = 3 + first_conv_out_channel = _make_divisible(multiplier * self.inplanes) + + self.features.append(nn.Conv2d(in_channels=first_conv_in_channel, + out_channels=first_conv_out_channel, + kernel_size=3, padding=1, stride=2, + has_bias=False, pad_mode='pad')) + self.features.append(nn.BatchNorm2d(first_conv_out_channel)) + self.features.append(Activation('hswish')) + for layer_cfg in self.cfgs: + self.features.append(self._make_layer(kernel_size=layer_cfg[0], + exp_ch=_make_divisible(multiplier * layer_cfg[1]), + out_channel=_make_divisible(multiplier * layer_cfg[2]), + use_se=layer_cfg[3], + act_func=layer_cfg[4], + stride=layer_cfg[5])) + output_channel = _make_divisible(multiplier * model_cfgs["cls_ch_squeeze"]) + self.features.append(nn.Conv2d(in_channels=_make_divisible(multiplier * self.cfgs[-1][2]), + out_channels=output_channel, + kernel_size=1, padding=0, stride=1, + has_bias=False, pad_mode='pad')) + self.features.append(nn.BatchNorm2d(output_channel)) + self.features.append(Activation('hswish')) + self.features.append(GlobalAvgPooling(keep_dims=True)) + self.features.append(nn.Conv2d(in_channels=output_channel, + out_channels=model_cfgs['cls_ch_expand'], + kernel_size=1, padding=0, stride=1, + has_bias=False, pad_mode='pad')) + self.features.append(Activation('hswish')) + if final_drop > 0: + self.features.append((nn.Dropout(final_drop))) + + # make it nn.CellList + self.features = nn.SequentialCell(self.features) + self.include_top = include_top + self.need_activation = False + if self.include_top: + self.output = nn.Conv2d(in_channels=model_cfgs['cls_ch_expand'], + out_channels=num_classes, + kernel_size=1, has_bias=True, pad_mode='pad') + self.squeeze = P.Squeeze(axis=(2, 3)) + if activation != "None": + self.need_activation = True + if activation == "Sigmoid": + self.activation = P.Sigmoid() + elif activation == "Softmax": + self.activation = P.Softmax() + else: + raise NotImplementedError(f"The activation {activation} not in [Sigmoid, Softmax].") + + self._initialize_weights() + + def construct(self, x): + x = self.features(x) + if self.include_top: + x = self.output(x) + x = self.squeeze(x) + if self.need_activation: + x = self.activation(x) + return x + + + def _make_layer(self, kernel_size, exp_ch, out_channel, use_se, act_func, stride=1): + mid_planes = exp_ch + out_planes = out_channel + + layer = ResUnit(self.inplanes, mid_planes, out_planes, + kernel_size, stride=stride, act_type=act_func, use_se=use_se) + self.inplanes = out_planes + return layer + + def _initialize_weights(self): + """ + Initialize weights. + + Args: + + Returns: + None. + + Examples: + >>> _initialize_weights() + """ + self.init_parameters_data() + for _, m in self.cells_and_names(): + if isinstance(m, (nn.Conv2d)): + n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + m.weight.set_data(Tensor(np.random.normal(0, np.sqrt(2. / n), + m.weight.data.shape).astype("float32"))) + if m.bias is not None: + m.bias.set_data( + Tensor(np.zeros(m.bias.data.shape, dtype="float32"))) + elif isinstance(m, nn.BatchNorm2d): + m.gamma.set_data( + Tensor(np.ones(m.gamma.data.shape, dtype="float32"))) + m.beta.set_data( + Tensor(np.zeros(m.beta.data.shape, dtype="float32"))) + elif isinstance(m, nn.Dense): + m.weight.set_data(Tensor(np.random.normal( + 0, 0.01, m.weight.data.shape).astype("float32"))) + if m.bias is not None: + m.bias.set_data( + Tensor(np.zeros(m.bias.data.shape, dtype="float32"))) + + +def mobilenet_v3(model_name, **kwargs): + """ + Constructs a MobileNet V2 model + """ + model_cfgs = { + "large": { + "cfg": [ + # k, exp, c, se, nl, s, + [3, 16, 16, False, 'relu', 1], + [3, 64, 24, False, 'relu', 2], + [3, 72, 24, False, 'relu', 1], + [5, 72, 40, True, 'relu', 2], + [5, 120, 40, True, 'relu', 1], + [5, 120, 40, True, 'relu', 1], + [3, 240, 80, False, 'hswish', 2], + [3, 200, 80, False, 'hswish', 1], + [3, 184, 80, False, 'hswish', 1], + [3, 184, 80, False, 'hswish', 1], + [3, 480, 112, True, 'hswish', 1], + [3, 672, 112, True, 'hswish', 1], + [5, 672, 160, True, 'hswish', 2], + [5, 960, 160, True, 'hswish', 1], + [5, 960, 160, True, 'hswish', 1]], + "cls_ch_squeeze": 960, + "cls_ch_expand": 1280, + }, + "small": { + "cfg": [ + # k, exp, c, se, nl, s, + [3, 16, 16, True, 'relu', 2], + [3, 72, 24, False, 'relu', 2], + [3, 88, 24, False, 'relu', 1], + [5, 96, 40, True, 'hswish', 2], + [5, 240, 40, True, 'hswish', 1], + [5, 240, 40, True, 'hswish', 1], + [5, 120, 48, True, 'hswish', 1], + [5, 144, 48, True, 'hswish', 1], + [5, 288, 96, True, 'hswish', 2], + [5, 576, 96, True, 'hswish', 1], + [5, 576, 96, True, 'hswish', 1]], + "cls_ch_squeeze": 576, + "cls_ch_expand": 1280, + } + } + return MobileNetV3(model_cfgs[model_name], **kwargs) + + +mobilenet_v3_large = partial(mobilenet_v3, model_name="large") +mobilenet_v3_small = partial(mobilenet_v3, model_name="small") diff --git a/research/cv/mobilenetv3_large/train.py b/research/cv/mobilenetv3_large/train.py new file mode 100644 index 0000000000000000000000000000000000000000..75fbd94374d79316bd0b03df2c6bdd1130eec8b4 --- /dev/null +++ b/research/cv/mobilenetv3_large/train.py @@ -0,0 +1,292 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ +"""train_imagenet.""" + +import os +import time +import argparse +import ast +import numpy as np + +from mindspore import context +from mindspore import Tensor +from mindspore import nn +from mindspore.nn.optim.momentum import Momentum +from mindspore.nn.loss import SoftmaxCrossEntropyWithLogits +from mindspore.nn.loss.loss import _Loss +from mindspore.ops import operations as P +from mindspore.ops import functional as F +from mindspore.common import dtype as mstype +from mindspore.train.model import Model +from mindspore.context import ParallelMode +from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, Callback +from mindspore.train.loss_scale_manager import FixedLossScaleManager +from mindspore.train.serialization import load_checkpoint, load_param_into_net +from mindspore.common import set_seed +from mindspore.communication.management import init +from mindspore import save_checkpoint +from src.dataset import create_dataset +from src.lr_generator import get_lr +from src.config import config_ascend +from src.mobilenetV3 import mobilenet_v3_large + + +set_seed(1) + +parser = argparse.ArgumentParser(description='Image classification') +parser.add_argument('--data_url', type=str) +parser.add_argument('--train_url', type=str) +parser.add_argument('--train_dataset_path', type=str, default=None, help='Dataset path') +parser.add_argument('--eval_dataset_path', type=str, default=None, help='Dataset path') +parser.add_argument('--device_id', type=int, default=0, help='device id of Ascend. (Default: 0)') +parser.add_argument('--pre_trained', type=str, default=None, help='Pretrained checkpoint path') +parser.add_argument('--is_modelarts', type=ast.literal_eval, default=False, help='modelarts') +parser.add_argument('--run_distribute', type=ast.literal_eval, default=True, help='Run distribute') +args_opt = parser.parse_args() + +class SaveCallback(Callback): + """ + SaveCallback. + + Args: + model_save (nn.Cell): the network. + eval_dataset_save (dataset): dataset used to evaluation. + save_file_path (string): the path to save checkpoint. + + Returns: + None. + + Examples: + >>> SaveCallback(model, dataset, './save_ckpt') + """ + def __init__(self, model_save, eval_dataset_save, save_file_path): + super(SaveCallback, self).__init__() + self.model = model_save + self.eval_dataset = eval_dataset_save + self.acc = 0.75 + self.save_path = save_file_path + + def step_end(self, run_context): + cb_params = run_context.original_args() + + result = self.model.eval(self.eval_dataset) + print(result) + if result['Top1-Acc'] > self.acc: + self.acc = result['Top1-Acc'] + file_name = self.save_path + str(self.acc) + ".ckpt" + save_checkpoint(save_obj=cb_params.train_network, ckpt_file_name=file_name) + print("Save the maximum accuracy checkpoint,the accuracy is", self.acc) + + +class CrossEntropyWithLabelSmooth(_Loss): + """ + CrossEntropyWith LabelSmooth. + + Args: + smooth_factor (float): smooth factor for label smooth. Default is 0. + num_classes (int): number of classes. Default is 1000. + + Returns: + None. + + Examples: + >>> CrossEntropyWithLabelSmooth(smooth_factor=0., num_classes=1000) + """ + + def __init__(self, smooth_factor=0., num_classes=1000): + super(CrossEntropyWithLabelSmooth, self).__init__() + self.onehot = P.OneHot() + self.on_value = Tensor(1.0 - smooth_factor, mstype.float32) + self.off_value = Tensor(1.0 * smooth_factor / + (num_classes - 1), mstype.float32) + self.ce = nn.SoftmaxCrossEntropyWithLogits() + self.mean = P.ReduceMean(False) + self.cast = P.Cast() + + def construct(self, logit, label): + one_hot_label = self.onehot(self.cast(label, mstype.int32), F.shape(logit)[1], + self.on_value, self.off_value) + out_loss = self.ce(logit, one_hot_label) + out_loss = self.mean(out_loss, 0) + return out_loss + + +class Monitor(Callback): + """ + Monitor loss and time. + + Args: + lr_init (numpy array): train lr + + Returns: + None + + Examples: + >>> Monitor(100,lr_init=Tensor([0.05]*100).asnumpy()) + """ + + def __init__(self, lr_init=None): + super(Monitor, self).__init__() + self.lr_init = lr_init + self.lr_init_len = len(lr_init) + + def epoch_begin(self, run_context): + self.losses = [] + self.epoch_time = time.time() + + def epoch_end(self, run_context): + cb_params = run_context.original_args() + + epoch_mseconds = (time.time() - self.epoch_time) * 1000 + per_step_mseconds = epoch_mseconds / cb_params.batch_num + print("epoch time: {:5.3f}, per step time: {:5.3f}, avg loss: {:5.3f}".format(epoch_mseconds, + per_step_mseconds, + np.mean(self.losses))) + + def step_begin(self, run_context): + self.step_time = time.time() + + def step_end(self, run_context): + """step_end""" + cb_params = run_context.original_args() + step_mseconds = (time.time() - self.step_time) * 1000 + step_loss = cb_params.net_outputs + + if isinstance(step_loss, (tuple, list)) and isinstance(step_loss[0], Tensor): + step_loss = step_loss[0] + if isinstance(step_loss, Tensor): + step_loss = np.mean(step_loss.asnumpy()) + + self.losses.append(step_loss) + cur_step_in_epoch = (cb_params.cur_step_num - 1) % cb_params.batch_num + + print("epoch: [{:3d}/{:3d}], step:[{:5d}/{:5d}], loss:[{:5.3f}/{:5.3f}], time:[{:5.3f}], lr:[{:5.3f}]".format( + cb_params.cur_epoch_num - + 1, cb_params.epoch_num, cur_step_in_epoch, cb_params.batch_num, step_loss, + np.mean(self.losses), step_mseconds, self.lr_init[cb_params.cur_step_num - 1])) + + +if __name__ == '__main__': + + config = config_ascend + + # print configuration + print("train args: ", args_opt) + print("cfg: ", config) + + device_id = args_opt.device_id + + # set context and device init + if args_opt.run_distribute: + context.set_context(mode=context.GRAPH_MODE, device_target='Ascend', device_id=device_id) + init() + context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, gradients_mean=True) + else: + context.set_context(device_id=device_id) + + # define net + net = mobilenet_v3_large(num_classes=config.num_classes) + + # define loss + if config.label_smooth > 0: + loss = CrossEntropyWithLabelSmooth( + smooth_factor=config.label_smooth, num_classes=config.num_classes) + else: + loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') + + # define dataset + epoch_size = config.epoch_size + + if args_opt.is_modelarts: + import moxing as mox + + mox.file.copy_parallel(src_url=args_opt.data_url, dst_url='/cache/dataset/device_' + os.getenv('DEVICE_ID')) + train_dataset_path = '/cache/dataset/device_' + str(device_id) + '/train' + eval_dataset_path = '/cache/dataset/device_' + str(device_id) + '/val' + dataset = create_dataset(dataset_path=train_dataset_path, + do_train=True, + config=config, + repeat_num=1, + batch_size=args_opt.batch_size) + eval_dataset = create_dataset(dataset_path=eval_dataset_path, + do_train=False, + config=config, + repeat_num=1, + batch_size=args_opt.batch_size) + + else: + dataset = create_dataset(dataset_path=args_opt.train_dataset_path, + do_train=True, + config=config, + repeat_num=1, + batch_size=config.batch_size, + run_distribute=args_opt.run_distribute) + + eval_dataset = create_dataset( + dataset_path=args_opt.eval_dataset_path, + do_train=False, + config=config, + repeat_num=1, + batch_size=config.batch_size) + + step_size = dataset.get_dataset_size() + + # resume + if args_opt.pre_trained: + param_dict = load_checkpoint(args_opt.pre_trained) + load_param_into_net(net, param_dict) + + # define optimizer + loss_scale = FixedLossScaleManager( + config.loss_scale, drop_overflow_update=False) + lr = Tensor(get_lr(global_step=0, + lr_init=config.lr_init, + lr_end=config.lr_end, + lr_max=config.lr, + warmup_epochs=config.warmup_epochs, + total_epochs=epoch_size, + steps_per_epoch=step_size)) + + # define optimizer + opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, config.momentum, + config.weight_decay, + config.loss_scale) + + # define evaluation metrics + eval_metrics = {'Loss': nn.Loss(), + 'Top1-Acc': nn.Top1CategoricalAccuracy(), + 'Top5-Acc': nn.Top5CategoricalAccuracy()} + # define model + model = Model(net, loss_fn=loss, optimizer=opt, + loss_scale_manager=loss_scale, metrics=eval_metrics) + + cb = [Monitor(lr_init=lr.asnumpy())] + + if args_opt.is_modelarts: + save_checkpoint_path = '/cache/train_output/device_' + str(device_id) + '/' + else: + rank = 0 + save_checkpoint_path = 'ckpts_rank_' + str(rank) + ckp_save_step = config.save_checkpoint_epochs * step_size + config_ck = CheckpointConfig(save_checkpoint_steps=ckp_save_step, keep_checkpoint_max=config.keep_checkpoint_max) + ckpoint_cb = ModelCheckpoint(prefix=f"mobilenetv3", + directory=save_checkpoint_path, config=config_ck) + save_cb = SaveCallback(model, eval_dataset, save_checkpoint_path) + cb += [ckpoint_cb, save_cb] + + # begin train + model.train(epoch_size, dataset, callbacks=cb, dataset_sink_mode=True) + if args_opt.is_modelarts: + mox.file.copy_parallel(src_url='/cache/train_output', dst_url=args_opt.train_url)