diff --git a/official/cv/tinydarknet/README.md b/official/cv/tinydarknet/README.md index 2bdca4270d4a37f3296d26f095eabd55b2fc9a1b..c5aba2f4e2b6cc5c0e401b0d15ea5e3d4813685c 100644 --- a/official/cv/tinydarknet/README.md +++ b/official/cv/tinydarknet/README.md @@ -75,16 +75,17 @@ After installing MindSpore via the official website, you can start training and - running on Ascend: ```python - # run training example - bash ./scripts/run_standalone_train.sh 0 + # run in standalone environment + cd scripts/ + bash run_standalone_train.sh [DEVICE_ID] [TRAIN_DATA_DIR] [cifar10|imagenet] - # run distributed training example - bash ./scripts/run_distribute_train.sh /{path}/*.json + # run in distribute environment + cd scripts/ + bash run_distribute_train.sh [RANK_TABLE_FILE] [cifar10|imagenet] [TRAIN_DATA_DIR] - # run evaluation example - python eval.py > eval.log 2>&1 & - OR - bash ./script/run_eval.sh + # evaluation + cd scripts/ + bash run_train.sh [VAL_DATA_DIR] [cifar10|imagenet] [checkpoint_path] ``` For distributed training, a hccl configuration file [RANK_TABLE_FILE] with JSON format needs to be created in advance. @@ -98,11 +99,11 @@ After installing MindSpore via the official website, you can start training and ```python # GPU standalone training example python train.py \ - --config_path=./imagenet_config_gpu.yaml \ + --config_path=./config/imagenet_config_gpu.yaml \ --dataset_name=imagenet --train_data_dir=../dataset/imagenet_original/train --device_target=GPU - OR - cd scripts - bash run_distribute_train_gpu.sh [DEVICE_ID] [TRAIN_DATA_DIR] [cifar10 | imagenet] + # OR + cd scripts/ + bash run_standalone_train_gpu.sh [DEVICE_ID] [TRAIN_DATA_DIR] [cifar10|imagenet] # GPU distribute training example export RANK_SIZE=8 @@ -112,14 +113,15 @@ After installing MindSpore via the official website, you can start training and --dataset_name=imagenet \ --train_data_dir=../dataset/imagenet_original/train \ --device_target=GPU - OR - bash scripts/run_distribute_train_gpu.sh [RANK_SIZE] [TRAIN_DATA_DIR] [cifar10 | imagenet] + # OR + cd scripts/ + bash run_distribute_train_gpu.sh [RANK_SIZE] [TRAIN_DATA_DIR] [cifar10|imagenet] # GPU evaluation example python eval.py -device_target=GPU --val_data_dir=../dataset/imagenet_original/val --dataset_name=imagenet --config_path=./config/imagenet_config_gpu.yaml \ --checkpoint_path=$PATH2 - OR - bash scripts/run_train_gpu.sh [VAL_DATA_DIR] [cifar10|imagenet] [checkpoint_path] + # OR + bash run_eval_gpu.sh [VAL_DATA_DIR] [cifar10|imagenet] [checkpoint_path] ``` - Running on ModelArts @@ -265,7 +267,8 @@ For more configuration details, please refer the script `imagenet_config.yaml`. - running on Ascend: ```python - bash ./scripts/run_standalone_train.sh [DEVICE_ID] + cd scripts/ + bash run_standalone_train.sh [DEVICE_ID] [TRAIN_DATA_DIR] [cifar10|imagenet] ``` The command above will run in the background, you can view the results through the file train.log. @@ -290,7 +293,7 @@ For more configuration details, please refer the script `imagenet_config.yaml`. - running on GPU: ```python - cd scripts + cd scripts/ bash run_standalone_train_gpu.sh [DEVICE_ID] [TRAIN_DATA_DIR] [cifar10|imagenet] ``` @@ -313,7 +316,8 @@ For more configuration details, please refer the script `imagenet_config.yaml`. - running on CPU ```python - bash scripts/run_train_cpu.sh [TRAIN_DATA_DIR] [cifar10|imagenet] + cd scripts/ + bash run_train_cpu.sh [TRAIN_DATA_DIR] [cifar10|imagenet] ``` ### [Distributed Training](#contents) @@ -321,7 +325,8 @@ For more configuration details, please refer the script `imagenet_config.yaml`. - running on Ascend: ```python - bash ./scripts/run_distribute_train.sh [RANK_TABLE_FILE] + cd scripts/ + bash run_distribute_train.sh [RANK_TABLE_FILE] [cifar10|imagenet] [TRAIN_DATA_DIR] ``` The above shell script will run distribute training in the background. You can view the results through the file train_parallel[X]/log. The loss value will be achieved as follows: @@ -340,7 +345,8 @@ For more configuration details, please refer the script `imagenet_config.yaml`. - running on GPU: ```python - bash scripts/run_distribute_train_gpu.sh [RANK_SIZE] [TRAIN_DATA_DIR] [cifar10|imagenet] + cd scripts/ + bash run_standalone_train_gpu.sh [DEVICE_ID] [TRAIN_DATA_DIR] [cifar10|imagenet] ``` The above shell script will run distribute training in the background. You can view the results through the file train_parallel[X]/log. The loss value will be achieved as follows: @@ -365,9 +371,9 @@ For more configuration details, please refer the script `imagenet_config.yaml`. Before running the command below, please check the checkpoint path used for evaluation. Please set the checkpoint path to be the absolute full path, e.g., "/username/tinydaeknet/train_tinydarknet.ckpt". ```python - python eval.py > eval.log 2>&1 & - OR - bash scripts/run_eval.sh + python eval.py --val_data_dir=VAL_DATA_PATH --dataset_name=cifar10|imagenet --config_path=CONFIG_FILE --checkpoint_path=CHECKPOINT_PATH + # OR + bash run_eval.sh [VAL_DATA_DIR] [cifar10|imagenet] [checkpoint_path] ``` The above python command will run in the background. You can view the results through the file "eval.log". The accuracy of the test dataset will be as follows: @@ -389,7 +395,7 @@ For more configuration details, please refer the script `imagenet_config.yaml`. Before running the command below, please check the checkpoint path used for evaluation. Please set the checkpoint path to be the absolute full path, e.g., "/username/tinydaeknet/train_tinydarknet.ckpt". ```python - bash scripts/run_train_gpu.sh [VAL_DATA_DIR] [cifar10|imagenet] [checkpoint_path] + bash run_eval_gpu.sh [VAL_DATA_DIR] [cifar10|imagenet] [checkpoint_path] ``` The above python command will run in the background. You can view the results through the file "eval.log". The accuracy of the test dataset will be as follows: @@ -411,7 +417,7 @@ For more configuration details, please refer the script `imagenet_config.yaml`. Before running the command below, please check the checkpoint path used for evaluation. Please set the checkpoint path to be the absolute full path, e.g., "/username/tinydaeknet/train_tinydarknet.ckpt". ```python - bash scripts/run_eval.sh [VAL_DATA_DIR] [imagenet|cifar10] [CHECKPOINT_PATH] + bash run_eval_cpu.sh [VAL_DATA_DIR] [cifar10|imagenet] [checkpoint_path] ``` You can view the results through the file "eval.log". The accuracy of the test dataset will be as follows: @@ -429,7 +435,7 @@ For more configuration details, please refer the script `imagenet_config.yaml`. ```shell # Ascend310 inference -python export.py --dataset [DATASET] --file_name [FILE_NAME] --file_format [EXPORT_FORMAT] +python export.py --dataset_name [DATASET] --file_name [FILE_NAME] --file_format [EXPORT_FORMAT] ``` - Export on ModelArts (If you want to run in modelarts, please check the official documentation of [modelarts](https://support.huaweicloud.com/modelarts/), and you can start as follows) @@ -488,33 +494,33 @@ Inference result is saved in current path, you can find result like this in acc. ### [Training Performance](#contents) -| Parameters | Ascend | GPU | -| -------------------------- | ------------------------------------------------------------| ----------------------------------------------------| -| Model Version | V1 | V1 | -| Resource | Ascend 910;CPU 2.60GHz,56cores;内存 314G;系统 Euler2.8 | PCIE V100-32G | -| Uploaded Date | 2020/12/22 | 2021/07/15 | -| MindSpore Version | 1.1.0 | 1.3.0 | -| Dataset | 1200k images | 1200k images | -| Training Parameters | epoch=500, steps=1251, batch_size=128, lr=0.1 | epoch=500, steps=1251, batch_size = 128, lr=0.005 | -| Optimizer | Momentum | Momentum | -| Loss Function | Softmax Cross Entropy | Softmax Cross Entropy | -| Speed | 8pc: 104 ms/step | 8pc: 255 ms/step | -| Parameters(M) | 4.0; | 4.0; | +| Parameters | Ascend | GPU | CPU | +| -------------------------- | ------------------------------------------------------------| ----------------------------------------------------|------------------------------------------------| +| Model Version | V1 | V1 | V1 | +| Resource | Ascend 910;CPU 2.60GHz,56cores;内存 314G;系统 Euler2.8 | PCIE V100-32G | CPU 72cores, Memory 503G | +| Uploaded Date | 2020/12/22 | 2021/07/15 | 2021/12/22 | +| MindSpore Version | 1.1.0 | 1.3.0 | 1.5.0 | +| Dataset | 1200k images | 1200k images | 1200k图片 | +| Training Parameters | epoch=500, steps=1251, batch_size=128, lr=0.1 | epoch=500, steps=1251, batch_size = 128, lr=0.005 | epoch=500, steps=10009, batch_size=128, lr=0.1 | +| Optimizer | Momentum | Momentum | Momentum | +| Loss Function | Softmax Cross Entropy | Softmax Cross Entropy | Softmax Cross Entropy | +| Speed | 8pc: 104 ms/step | 8pc: 255 ms/step | 1p:11081 ms/step | +| Parameters(M) | 4.0; | 4.0; | 4.0; | | Scripts | [Tiny-Darknet scripts](https://gitee.com/mindspore/models/tree/master/official/cv/tinydarknet) ### [Evaluation Performance](#contents) -| Parameters | Ascend | GPU | -| ------------------- | ----------------------------------| ----------------------------------| -| Model Version | V1 | V1 | -| Resource | Ascend 910;Euler2.8 | PCIE V100-32G | -| Uploaded Date | 2020/12/22 | 2021/7/15 | -| MindSpore Version | 1.1.0 | 1.3.0 | -| Dataset | 200k images | 200k images | -| batch_size | 128 | 128 | -| Outputs | probability | probability | -| Accuracy | 8pcs Top-1: 58.7%; Top-5: 81.7% | 8pcs Top-1: 58.9%; Top-5: 81.7% | -| Model for inference | 11.6M (.ckpt file) | 10.06M (.ckpt file) | +| Parameters | Ascend | GPU | | +| ------------------- | ----------------------------------| ----------------------------------|------------------------------| +| Model Version | V1 | V1 | V1 | +| Resource | Ascend 910; Euler2.8 | PCIE V100-32G | CPU 72cores, Memory 503G | +| Uploaded Date | 2020/12/22 | 2021/7/15 | 2020/12/22 | +| MindSpore Version | 1.1.0 | 1.3.0 | 1.5.0 | +| Dataset | 200k images | 200k images | 200k images | +| batch_size | 128 | 128 | 128 | +| Outputs | probability | probability | probability | +| Accuracy | 8pcs Top-1: 58.7%; Top-5: 81.7% | 8pcs Top-1: 58.9%; Top-5: 81.7% | 1p: Top-1: 58.7%; Top-5:81.5 | +| Model for inference | 11.6M (.ckpt file) | 10.06M (.ckpt file) | 11.6M (.ckpt file) | ### [Inference Performance](#contents) diff --git a/official/cv/tinydarknet/README_CN.md b/official/cv/tinydarknet/README_CN.md index 2942d0c59328c412e0626517990f9f121de28649..655033c76240580e1409557464e415596a131571 100644 --- a/official/cv/tinydarknet/README_CN.md +++ b/official/cv/tinydarknet/README_CN.md @@ -84,15 +84,16 @@ Tiny-DarkNet是Joseph Chet Redmon等人提出的一个16层的针对于经典的 ```python # 单卡训练 - bash ./scripts/run_standalone_train.sh 0 + cd scripts/ + bash run_standalone_train.sh [DEVICE_ID] [TRAIN_DATA_DIR] [cifar10|imagenet] # 分布式训练 - bash ./scripts/run_distribute_train.sh /{path}/*.json + cd scripts/ + bash run_distribute_train.sh [RANK_TABLE_FILE] [cifar10|imagenet] [TRAIN_DATA_DIR] # 评估 - python eval.py > eval.log 2>&1 & - OR - bash ./script/run_eval.sh + cd scripts/ + bash run_train.sh [VAL_DATA_DIR] [cifar10|imagenet] [checkpoint_path] ``` 进行并行训练时, 需要提前创建JSON格式的hccl配置文件 [RANK_TABLE_FILE]。 @@ -108,9 +109,9 @@ Tiny-DarkNet是Joseph Chet Redmon等人提出的一个16层的针对于经典的 python train.py \ --config_path=./config/imagenet_config_gpu.yaml \ --dataset_name=imagenet --train_data_dir=../dataset/imagenet_original/train --device_target=GPU - OR - cd scripts - bash run_distribute_train_gpu.sh [DEVICE_ID] [TRAIN_DATA_DIR] [cifar10 | imagenet] + # OR + cd scripts/ + bash run_standalone_train_gpu.sh [DEVICE_ID] [TRAIN_DATA_DIR] [cifar10|imagenet] # GPU多卡训练示例 export RANK_SIZE=8 @@ -120,14 +121,15 @@ Tiny-DarkNet是Joseph Chet Redmon等人提出的一个16层的针对于经典的 --dataset_name=imagenet \ --train_data_dir=../dataset/imagenet_original/train \ --device_target=GPU - OR - bash scripts/run_distribute_train_gpu.sh [RANK_SIZE] [TRAIN_DATA_DIR] [cifar10 | imagenet] + # OR + cd scripts/ + bash run_distribute_train_gpu.sh [RANK_SIZE] [TRAIN_DATA_DIR] [cifar10|imagenet] # GPU评估示例 python eval.py -device_target=GPU --val_data_dir=../dataset/imagenet_original/val --dataset_name=imagenet --config_path=./config/imagenet_config_gpu.yaml \ --checkpoint_path=$PATH2 - OR - bash scripts/run_train_gpu.sh [VAL_DATA_DIR] [cifar10|imagenet] [checkpoint_path] + # OR + bash run_eval_gpu.sh [VAL_DATA_DIR] [cifar10|imagenet] [checkpoint_path] ``` - 在ModelArts上运行 @@ -272,7 +274,8 @@ Tiny-DarkNet是Joseph Chet Redmon等人提出的一个16层的针对于经典的 - 在Ascend资源上运行: ```python - bash ./scripts/run_standalone_train.sh [DEVICE_ID] + cd scripts/ + bash run_standalone_train.sh [DEVICE_ID] [TRAIN_DATA_DIR] [cifar10|imagenet] ``` 上述的命令将运行在后台中,可以通过 `train.log` 文件查看运行结果. @@ -297,7 +300,7 @@ Tiny-DarkNet是Joseph Chet Redmon等人提出的一个16层的针对于经典的 - 在GPU资源上运行: ```python - cd scripts + cd scripts/ bash run_standalone_train_gpu.sh [DEVICE_ID] [TRAIN_DATA_DIR] [cifar10|imagenet] ``` @@ -320,7 +323,8 @@ Tiny-DarkNet是Joseph Chet Redmon等人提出的一个16层的针对于经典的 - 在CPU资源上运行: ```python - bash scripts/run_train_cpu.sh [TRAIN_DATA_DIR] [cifar10|imagenet] + cd scripts/ + bash run_train_cpu.sh [TRAIN_DATA_DIR] [cifar10|imagenet] ``` ### [分布式训练](#目录) @@ -328,7 +332,8 @@ Tiny-DarkNet是Joseph Chet Redmon等人提出的一个16层的针对于经典的 - 在Ascend资源上运行: ```python - bash scripts/run_distribute_train.sh [RANK_TABLE_FILE] + cd scripts/ + bash run_distribute_train.sh [RANK_TABLE_FILE] [cifar10|imagenet] [TRAIN_DATA_DIR] ``` 上述的脚本命令将在后台中进行分布式训练,可以通过`distribute_train/nohup.out`文件查看运行结果. 训练的损失值将以如下的形式展示: @@ -347,7 +352,8 @@ Tiny-DarkNet是Joseph Chet Redmon等人提出的一个16层的针对于经典的 - 在GPU资源上运行: ```python - bash scripts/run_distribute_train_gpu.sh [RANK_SIZE] [TRAIN_DATA_DIR] [cifar10|imagenet] + cd scripts/ + bash run_standalone_train_gpu.sh [DEVICE_ID] [TRAIN_DATA_DIR] [cifar10|imagenet] ``` 上述的脚本命令将在后台中进行分布式训练,可以通过`distribute_train_gpu/nohup.out`文件查看运行结果. 训练的损失值将以如下的形式展示: @@ -372,9 +378,9 @@ Tiny-DarkNet是Joseph Chet Redmon等人提出的一个16层的针对于经典的 在运行如下命令前,请确认用于评估的checkpoint文件的路径.checkpoint文件须包含在tinydarknet文件夹内.请将checkpoint路径设置为相对于 eval.py文件 的路径,例如:"./ckpts/train_tinydarknet.ckpt"(ckpts 与 eval.py 同级). ```python - python eval.py > eval.log 2>&1 & - OR - bash scripts/run_eval.sh + python eval.py --val_data_dir=VAL_DATA_PATH --dataset_name=cifar10|imagenet --config_path=CONFIG_FILE --checkpoint_path=CHECKPOINT_PATH + # OR + bash run_eval.sh [VAL_DATA_DIR] [cifar10|imagenet] [checkpoint_path] ``` 上述的python命令将运行在后台中,可以通过"eval.log"文件查看结果. 测试数据集的准确率将如下面所列: @@ -396,7 +402,7 @@ Tiny-DarkNet是Joseph Chet Redmon等人提出的一个16层的针对于经典的 在运行如下命令前,请确认用于评估的checkpoint文件的路径.checkpoint文件须包含在tinydarknet文件夹内.请将checkpoint路径设置为相对于 eval.py文件 的路径,例如:"./ckpts/train_tinydarknet.ckpt"(ckpts 与 eval.py 同级). ```python - bash scripts/run_train_gpu.sh [VAL_DATA_DIR] [cifar10|imagenet] [checkpoint_path] + bash run_eval_gpu.sh [VAL_DATA_DIR] [cifar10|imagenet] [checkpoint_path] ``` 上述的python命令将运行在后台中,可以通过"eval.log"文件查看结果. 测试数据集的准确率将如下面所列: @@ -418,7 +424,7 @@ Tiny-DarkNet是Joseph Chet Redmon等人提出的一个16层的针对于经典的 在运行如下命令前,请确认用于评估的checkpoint文件的路径.checkpoint文件须包含在tinydarknet文件夹内.请将checkpoint路径设置为相对于 eval.py文件 的路径,例如:"./ckpts/train_tinydarknet.ckpt"(ckpts 与 eval.py 同级). ```python - bash scripts/run_eval_cpu.sh [VAL_DATA_DIR] [imagenet|cifar10] [CHECKPOINT_PATH] + bash run_eval_cpu.sh [VAL_DATA_DIR] [cifar10|imagenet] [checkpoint_path] ``` 可以通过"eval.log"文件查看结果. 测试数据集的准确率将如下面所列: @@ -435,7 +441,7 @@ Tiny-DarkNet是Joseph Chet Redmon等人提出的一个16层的针对于经典的 - 在本地导出 ```shell -python export.py --dataset [DATASET] --file_name [FILE_NAME] --file_format [EXPORT_FORMAT] +python export.py --dataset_name [DATASET] --file_name [FILE_NAME] --file_format [EXPORT_FORMAT] ``` - 在ModelArts上导出 @@ -496,34 +502,34 @@ bash run_infer_310.sh [MINDIR_PATH] [DATA_PATH] [LABEL_PATH] [DVPP] [DEVICE_ID] #### Tinydarknet on ImageNet 2012 -| 参数 | Ascend | GPU | -| -------------------------- | ------------------------------------------------------------| ----------------------------------------------------| -| 模型版本 | V1 | V1 | -| 资源 | Ascend 910;CPU 2.60GHz,56cores;内存 314G;系统 Euler2.8 | PCIE V100-32G | -| 上传日期 | 2020/12/22 | 2021/07/15 | -| MindSpore版本 | 1.1.0 | 1.3.0 | -| 数据集 | 1200k张图片 | 1200k张图片 | -| 训练参数 | epoch=500, steps=1251, batch_size=128, lr=0.1 | epoch=500, steps=1251, batch_size = 128, lr=0.005 | -| 优化器 | Momentum | Momentum | -| 损失函数 | Softmax Cross Entropy | Softmax Cross Entropy | -| 速度 | 8卡: 104 ms/step | 8卡: 255 ms/step | -| 总时间 | 8卡: 17.8小时 | 8卡: 46.9小时 | -| 参数(M) | 4.0; | 4.0; | +| 参数 | Ascend | GPU | CPU | +| -------------------------- | ------------------------------------------------------------| ----------------------------------------------------|------------------------------------------------| +| 模型版本 | V1 | V1 | V1 | +| 资源 | Ascend 910;CPU 2.60GHz,56cores;内存 314G;系统 Euler2.8 | PCIE V100-32G | CPU 72cores, 内存 503G | +| 上传日期 | 2020/12/22 | 2021/07/15 | 2021/12/22 | +| MindSpore版本 | 1.1.0 | 1.3.0 | 1.5.0 | +| 数据集 | 1200k张图片 | 1200k张图片 | 1200k图片 | +| 训练参数 | epoch=500, steps=1251, batch_size=128, lr=0.1 | epoch=500, steps=1251, batch_size = 128, lr=0.005 | epoch=500, steps=10009, batch_size=128, lr=0.1 | +| 优化器 | Momentum | Momentum | Momentum | +| 损失函数 | Softmax Cross Entropy | Softmax Cross Entropy | Softmax Cross Entropy | +| 速度 | 8卡: 104 ms/step | 8卡: 255 ms/step | 单卡:11081 ms/step | +| 总时间 | 8卡: 17.8小时 | 8卡: 46.9小时 | > 200小时 | +| 参数(M) | 4.0; | 4.0; | 4.0; | | 脚本 | [Tiny-Darknet脚本](https://gitee.com/mindspore/models/tree/master/official/cv/tinydarknet) ### [评估性能](#目录) -| 参数 | Ascend | GPU | -| ------------------- | ----------------------------------| ----------------------------------| -| 模型版本 | V1 | V1 | -| 资源 | Ascend 910;系统 Euler2.8 | NV SMX2 V100-32G | -| 上传日期 | 2020/12/22 | 2021/7/15 | -| MindSpore版本 | 1.1.0 | 1.3.0 | -| 数据集 | 200k张图片 | 200k张图片 | -| batch_size | 128 | 128 | -| 输出 | 分类概率 | 分类概率 | -| 准确率 | 8卡 Top-1: 58.7%; Top-5: 81.7% | 8卡 Top-1: 58.9%; Top-5: 81.7% | -| 推理模型 | 11.6M (.ckpt文件) | 10.06M (.ckpt文件) | +| 参数 | Ascend | GPU | CPU | +| ------------------- | ----------------------------------| ----------------------------------|--------------------------------| +| 模型版本 | V1 | V1 | | +| 资源 | Ascend 910;系统 Euler2.8 | NV SMX2 V100-32G | CPU 72cores, 内存 503G | +| 上传日期 | 2020/12/22 | 2021/7/15 | 2020/12/22 | +| MindSpore版本 | 1.1.0 | 1.3.0 | 1.5.0 | +| 数据集 | 200k张图片 | 200k张图片 | 200k张图片 | +| batch_size | 128 | 128 | 128 | +| 输出 | 分类概率 | 分类概率 | 分类概率 | +| 准确率 | 8卡 Top-1: 58.7%; Top-5: 81.7% | 8卡 Top-1: 58.9%; Top-5: 81.7% | 单卡 Top-1: 58.7%; Top-5:81.5 | +| 推理模型 | 11.6M (.ckpt文件) | 10.06M (.ckpt文件) | 11.6M (.ckpt文件) | ### [推理性能](#目录) diff --git a/official/cv/tinydarknet/scripts/run_distribute_train.sh b/official/cv/tinydarknet/scripts/run_distribute_train.sh index 28ffe26789d8457382c93a2b79ffcf2df08b9322..5c9ed77243bbb04713518b3a949edaf12ecc9dd2 100644 --- a/official/cv/tinydarknet/scripts/run_distribute_train.sh +++ b/official/cv/tinydarknet/scripts/run_distribute_train.sh @@ -14,31 +14,38 @@ # limitations under the License. # ============================================================================ -echo "$1 $2" +echo "$1 $2 $3" -if [ $# != 1 ] && [ $# != 2 ] +if [ $# != 3 ] then - echo "Usage: bash run_distribute_train.sh [RANK_TABLE_FILE] [cifar10|imagenet]" + echo "Usage: bash run_distribute_train.sh [RANK_TABLE_FILE] [cifar10|imagenet] [TRAIN_DATA_DIR]" exit 1 fi if [ ! -f $1 ] then - echo "error:RANK_TABLE_FILE=$1 is not a file" -exit 1 + echo "error: RANK_TABLE_FILE=$1 is not a file" + exit 1 fi -dataset_type='imagenet' -if [ $# == 2 ] -then - if [ $2 != "cifar10" ] && [ $2 != "imagenet" ] - then - echo "error: the selected dataset is neither cifar10 nor imagenet" +PROJECT_DIR=$(cd ./"`dirname $0`" || exit; pwd) +if [ $2 == 'imagenet' ]; then + CONFIG_FILE="$PROJECT_DIR/../config/imagenet_config.yaml" + dataset_type='imagenet' +elif [ $2 == 'cifar10' ]; then + CONFIG_FILE="$PROJECT_DIR/../config/cifar10_config.yaml" + dataset_type='cifar10' +else + echo "error: the selected dataset is neither cifar10 nor imagenet" exit 1 - fi - dataset_type=$2 fi +if [ ! -d $3 ] +then + echo "error: TRAIN_DATA_DIR=$3 is not a dir" + exit 1 +fi +data_path=$3 ulimit -u unlimited export DEVICE_NUM=8 @@ -61,6 +68,7 @@ do echo "start training for rank $RANK_ID, device $DEVICE_ID, $dataset_type" cd ./train_parallel$i || exit env > env.log - python train.py --dataset_name=$dataset_type > log 2>&1 & + python train.py --dataset_name=$dataset_type --train_data_dir=$data_path \ + --config_path=$CONFIG_FILE > log 2>&1 & cd .. -done \ No newline at end of file +done diff --git a/official/cv/tinydarknet/scripts/run_distribute_train_gpu.sh b/official/cv/tinydarknet/scripts/run_distribute_train_gpu.sh index bb31d5bbd8071cb06b1421f94c954107134f6dfe..41887c1221fdf686cf21feb823179362052b63a7 100644 --- a/official/cv/tinydarknet/scripts/run_distribute_train_gpu.sh +++ b/official/cv/tinydarknet/scripts/run_distribute_train_gpu.sh @@ -15,7 +15,7 @@ # ============================================================================ if [ $# != 3 ]; then - echo "Usage: sh run_distribute_train_gpu.sh [RANK_SIZE] [TRAIN_DATA_DIR] [cifar10|imagenet]" + echo "Usage: bash run_distribute_train_gpu.sh [RANK_SIZE] [TRAIN_DATA_DIR] [cifar10|imagenet]" exit 1 fi @@ -52,9 +52,9 @@ if [ -d "distribute_train_gpu" ]; then fi mkdir ./distribute_train_gpu -cp ./*.py ./distribute_train_gpu -cp -r ./config ./distribute_train_gpu -cp -r ./src ./distribute_train_gpu +cp ../*.py ./distribute_train_gpu +cp -r ../config ./distribute_train_gpu +cp -r ../src ./distribute_train_gpu cd ./distribute_train_gpu || exit if [ $3 == 'imagenet' ]; then @@ -68,8 +68,8 @@ fi mpirun --allow-run-as-root -n $RANK_SIZE --output-filename log_output --merge-stderr-to-stdout \ nohup python train.py \ - --config_path=$CONFIG_FILE \ - --dataset_name=$dataset_type \ - --train_data_dir=$TRAIN_DATA_DIR \ - --device_target=GPU > log.txt 2>&1 & + --config_path=$CONFIG_FILE \ + --dataset_name=$dataset_type \ + --train_data_dir=$TRAIN_DATA_DIR \ + --device_target=GPU > log.txt 2>&1 & cd .. \ No newline at end of file diff --git a/official/cv/tinydarknet/scripts/run_eval.sh b/official/cv/tinydarknet/scripts/run_eval.sh index 1c5b4fde06ce33ff1b21301c56c748c72d9e337f..365a7446b959341b8d9bb4f73ea56d73b5758491 100644 --- a/official/cv/tinydarknet/scripts/run_eval.sh +++ b/official/cv/tinydarknet/scripts/run_eval.sh @@ -14,9 +14,43 @@ # limitations under the License. # ============================================================================ -abs_path=$(readlink -f "$0") -cur_path=$(dirname $abs_path) -cd $cur_path +if [ $# != 3 ] +then + echo "Usage: bash run_eval.sh [VAL_DATA_DIR] [cifar10|imagenet] [checkpoint_path]" +exit 1 +fi + +get_real_path(){ + if [ "${1:0:1}" == "/" ]; then + echo "$1" + else + echo "$(realpath -m $PWD/$1)" + fi +} + +PATH1=$(get_real_path $1) +if [ ! -d $PATH1 ] +then + echo "error: VAL_DATA_DIR=$PATH1 is not a directory" +exit 1 +fi + +PATH2=$(get_real_path $3) +if [ ! -f $PATH2 ] +then + echo "error: CHECKPOINT_PATH=$PATH2 is not a file" +exit 1 +fi + +BASE_PATH=$(dirname "$(dirname "$(readlink -f $0)")") +if [ $2 == 'imagenet' ]; then + CONFIG_FILE="${BASE_PATH}/config/imagenet_config.yaml" +elif [ $2 == 'cifar10' ]; then + CONFIG_FILE="${BASE_PATH}/config/cifar10_config.yaml" +else + echo "error: the selected dataset is neither cifar10 nor imagenet" +exit 1 +fi rm -rf ./eval mkdir ./eval @@ -24,6 +58,7 @@ cp -r ../src ./eval cp ../eval.py ./eval cp -r ../config ./eval cd ./eval || exit -env >env.log -python ./eval.py > ./eval.log 2>&1 & +env > env.log +python ./eval.py --val_data_dir=$PATH1 --dataset_name=$2 --config_path=$CONFIG_FILE \ + --checkpoint_path=$PATH2 > ./eval.log 2>&1 & cd .. diff --git a/official/cv/tinydarknet/scripts/run_eval_cpu.sh b/official/cv/tinydarknet/scripts/run_eval_cpu.sh index a4aef5fffb6b79ae8965c71c2dc1f17e7dafaebb..b6982f9417ff34edd3cfdeca64ef89d219d25a48 100644 --- a/official/cv/tinydarknet/scripts/run_eval_cpu.sh +++ b/official/cv/tinydarknet/scripts/run_eval_cpu.sh @@ -15,7 +15,7 @@ # ============================================================================ if [ $# != 1 ] && [ $# != 2 ] && [ $# != 3 ] then - echo "Usage bash scripts/run_eval_cpu.sh [VAL_DATA_DIR] [cifar10|imagenet] [checkpoint_path]" + echo "Usage: bash run_eval_cpu.sh [VAL_DATA_DIR] [cifar10|imagenet] [checkpoint_path]" exit 1 fi @@ -53,12 +53,12 @@ fi rm -rf ./eval mkdir ./eval -cp -r ./src ./eval -cp ./eval.py ./eval -cp -r ./config ./eval -env >env.log +cp -r ../src ./eval +cp ../eval.py ./eval +cp -r ../config ./eval +env > env.log echo "start evaluation for device CPU" cd ./eval || exit python ./eval.py --device_target=CPU --val_data_dir=$PATH1 --dataset_name=$2 --config_path=$CONFIG_FILE \ ---checkpoint_path=$PATH2 > ./eval.log 2>&1 & + --checkpoint_path=$PATH2 > ./eval.log 2>&1 & cd .. diff --git a/official/cv/tinydarknet/scripts/run_eval_gpu.sh b/official/cv/tinydarknet/scripts/run_eval_gpu.sh index 1075aa1e31e7d04648c7a7eca782067ce43155f9..27eefd850aca419658258cfc318b436b731f29d4 100644 --- a/official/cv/tinydarknet/scripts/run_eval_gpu.sh +++ b/official/cv/tinydarknet/scripts/run_eval_gpu.sh @@ -13,9 +13,9 @@ # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================ -if [ $# != 1 ] && [ $# != 2 ] && [ $# != 3 ] +if [ $# != 3 ] then - echo "Usage bash scripts/run_train_gpu.sh [VAL_DATA_DIR] [cifar10|imagenet] [checkpoint_path]" + echo "Usage: bash run_eval_gpu.sh [VAL_DATA_DIR] [cifar10|imagenet] [checkpoint_path]" exit 1 fi @@ -53,12 +53,12 @@ fi rm -rf ./eval mkdir ./eval -cp -r ./src ./eval -cp ./eval.py ./eval -cp -r ./config ./eval -env >env.log +cp -r ../src ./eval +cp ../eval.py ./eval +cp -r ../config ./eval +env > env.log echo "start evaluation for device GPU" cd ./eval || exit python ./eval.py --device_target=GPU --val_data_dir=$PATH1 --dataset_name=$2 --config_path=$CONFIG_FILE \ ---checkpoint_path=$PATH2 > ./eval.log 2>&1 & + --checkpoint_path=$PATH2 > ./eval.log 2>&1 & cd .. diff --git a/official/cv/tinydarknet/scripts/run_standalone_train.sh b/official/cv/tinydarknet/scripts/run_standalone_train.sh index 91b2e04c47ba81f9486820afb80628f28c55743b..b8db4a8a57beb5ce8c22f8c9fbb00cfb9e38926d 100644 --- a/official/cv/tinydarknet/scripts/run_standalone_train.sh +++ b/official/cv/tinydarknet/scripts/run_standalone_train.sh @@ -18,7 +18,7 @@ echo "$1 $2 $3" if [ $# != 2 ] && [ $# != 3 ] then - echo "Usage: bash run_distribute_train.sh [DEVICE_ID] [TRAIN_DATA_DIR] [cifar10|imagenet]" + echo "Usage: bash run_standalone_train.sh [DEVICE_ID] [TRAIN_DATA_DIR] [cifar10|imagenet]" exit 1 fi @@ -36,15 +36,16 @@ exit 1 fi train_data_dir=$2 -dataset_type='imagenet' -if [ $# == 3 ] -then - if [ $3 != "cifar10" ] && [ $3 != "imagenet" ] - then - echo "error: the selected dataset is neither cifar10 nor imagenet" +PROJECT_DIR=$(cd ./"`dirname $0`" || exit; pwd) +if [ $3 == 'imagenet' ]; then + CONFIG_FILE="$PROJECT_DIR/../config/imagenet_config.yaml" + dataset_type='imagenet' +elif [ $3 == 'cifar10' ]; then + CONFIG_FILE="$PROJECT_DIR/../config/cifar10_config.yaml" + dataset_type='cifar10' +else + echo "error: the selected dataset is neither cifar10 nor imagenet" exit 1 - fi - dataset_type=$3 fi export DEVICE_ID=$1 @@ -58,4 +59,5 @@ cp ../train.py ./train_single cp -r ../config ./train_single echo "start training for rank $RANK_ID, device $DEVICE_ID, $dataset_type" cd ./train_single || exit -python ./train.py --dataset_name=$dataset_type --train_data_dir=$train_data_dir> ./train.log 2>&1 & +python ./train.py --dataset_name=$dataset_type --train_data_dir=$train_data_dir \ + --config_path=$CONFIG_FILE > ./train.log 2>&1 & diff --git a/official/cv/tinydarknet/scripts/run_standalone_train_gpu.sh b/official/cv/tinydarknet/scripts/run_standalone_train_gpu.sh index 513ce25ab6a87cd17611c345614b335d178d2e2d..40bbfbbaa0ace41a2346553968fc8311e16e81d2 100644 --- a/official/cv/tinydarknet/scripts/run_standalone_train_gpu.sh +++ b/official/cv/tinydarknet/scripts/run_standalone_train_gpu.sh @@ -18,7 +18,7 @@ echo "$1 $2 $3" if [ $# != 2 ] && [ $# != 3 ] then - echo "Usage: bash run_distribute_train_gpu.sh [DEVICE_ID] [TRAIN_DATA_DIR] [cifar10|imagenet]" + echo "Usage: bash run_standalone_train_gpu.sh [DEVICE_ID] [TRAIN_DATA_DIR] [cifar10|imagenet]" exit 1 fi @@ -49,12 +49,12 @@ then fi if [ $3 == 'imagenet' ]; then - CONFIG_FILE="$PROJECT_DIR/../config/imagenet_config_gpu.yaml" + CONFIG_FILE="$PROJECT_DIR/../config/imagenet_config_gpu.yaml" elif [ $3 == 'cifar10' ]; then - CONFIG_FILE="$PROJECT_DIR/../config/cifar10_config_gpu.yaml" + CONFIG_FILE="$PROJECT_DIR/../config/cifar10_config_gpu.yaml" else - echo "error: the selected dataset is neither cifar10 nor imagenet" -exit 1 + echo "error: the selected dataset is neither cifar10 nor imagenet" + exit 1 fi export DEVICE_ID=$1 @@ -69,5 +69,5 @@ cp -r ../config ./train_single_gpu echo "start training for rank $RANK_ID, device $DEVICE_ID, $dataset_type" cd ./train_single_gpu || exit python ./train.py --config_path=$CONFIG_FILE \ ---dataset_name=$dataset_type --train_data_dir=$train_data_dir --device_target=GPU> ./train.log 2>&1 & + --dataset_name=$dataset_type --train_data_dir=$train_data_dir --device_target=GPU> ./train.log 2>&1 & diff --git a/official/cv/tinydarknet/scripts/run_train_cpu.sh b/official/cv/tinydarknet/scripts/run_train_cpu.sh index cb01a8b7a9c62118bc2c9a476606f091146cda5b..5fa428a4f32332c759abeab521f4ca00b2db26dc 100644 --- a/official/cv/tinydarknet/scripts/run_train_cpu.sh +++ b/official/cv/tinydarknet/scripts/run_train_cpu.sh @@ -16,7 +16,7 @@ if [ $# != 1 ] && [ $# != 2 ] then - echo "Usage bash scripts/run_train_cpu.sh [TRAIN_DATA_DIR] [cifar10|imagenet]" + echo "Usage bash run_train_cpu.sh [TRAIN_DATA_DIR] [cifar10|imagenet]" exit 1 fi @@ -47,11 +47,12 @@ fi rm -rf ./train_cpu mkdir ./train_cpu -cp ./train.py ./train_cpu -cp -r ./src ./train_cpu -cp -r ./config ./train_cpu +cp ../train.py ./train_cpu +cp -r ../src ./train_cpu +cp -r ../config ./train_cpu echo "start training for device CPU" cd ./train_cpu || exit env > env.log -python train.py --device_target=CPU --train_data_dir=$PATH1 --dataset_name=$2 --config_path=$CONFIG_FILE> ./train.log 2>&1 & +python train.py --device_target=CPU --train_data_dir=$PATH1 --dataset_name=$2 \ + --config_path=$CONFIG_FILE> ./train.log 2>&1 & cd ..