diff --git a/research/cv/PDarts/src/call_backs.py b/research/cv/PDarts/src/call_backs.py index 4cac04de37decff05c4eeefa3f1b183c64922e32..93d4df358b3ea788a457e461ac5f3850149142b7 100644 --- a/research/cv/PDarts/src/call_backs.py +++ b/research/cv/PDarts/src/call_backs.py @@ -13,12 +13,6 @@ # limitations under the License. # ============================================================================ """train callbacks""" -try: - from moxing.framework import file - print("import moxing success") -except ModuleNotFoundError as e: - print(f'not modelarts env, error={e}') - import os import time @@ -132,6 +126,7 @@ class Val_Callback(Callback): ckpt_file = os.path.join(ckpt_path, 'model_checkpoint.ckpt') save_checkpoint(cb_params.train_network, ckpt_file) if self.checkpoint_path.startswith('s3://') or self.checkpoint_path.startswith('obs://'): + from moxing.framework import file file.copy_parallel(save_path, os.path.join( self.checkpoint_path, model_info)) print('==============save checkpoint finished===================') diff --git a/research/cv/PDarts/train.py b/research/cv/PDarts/train.py index 5b101c5522cf5702250f658fdbdb628f1fe6cd12..d59beceb979ae24d714e763cfdb35f33c4fe3fd2 100644 --- a/research/cv/PDarts/train.py +++ b/research/cv/PDarts/train.py @@ -13,12 +13,6 @@ # limitations under the License. # ============================================================================ """train the PDarts model""" -try: - from moxing.framework import file - print("import moxing success") -except ModuleNotFoundError as e: - print(f'not modelarts env, error={e}') - import os import time import logging @@ -136,6 +130,7 @@ def main(): load_param_into_net(network, param_dict) if args.data_url.startswith('s3://') or args.data_url.startswith('obs://'): + from moxing.framework import file data_url_cache = os.path.join(args.local_data_root, 'data') file.copy_parallel(args.data_url, data_url_cache) args.data_url = data_url_cache diff --git a/research/cv/yolov3_tiny/scripts/run_standalone_train.sh b/research/cv/yolov3_tiny/scripts/run_standalone_train.sh index 29adf4b1e9ece4cffdccaea59a118c1b64067e5c..7c7fc6facb7cc02acb2dda73934fe1c825f16caa 100644 --- a/research/cv/yolov3_tiny/scripts/run_standalone_train.sh +++ b/research/cv/yolov3_tiny/scripts/run_standalone_train.sh @@ -65,7 +65,6 @@ python train.py \ --max_epoch=300 \ --warmup_epochs=4 \ --training_shape=640 \ - --per_batch_size=32 \ --weight_decay=0.016 \ --lr_scheduler=cosine_annealing > log.txt 2>&1 & cd .. diff --git a/research/cv/yolov3_tiny/src/yolo_dataset.py b/research/cv/yolov3_tiny/src/yolo_dataset.py index 1678f9d7e317692281b91599bbcf9f1b1f10db2d..78c18779ec73d0ff4e5bb89253d4ea1974c17662 100644 --- a/research/cv/yolov3_tiny/src/yolo_dataset.py +++ b/research/cv/yolov3_tiny/src/yolo_dataset.py @@ -298,7 +298,7 @@ def create_yolo_dataset( input_columns=dataset_column_names, output_columns=map1_out_column_names, column_order=map1_out_column_names, - num_parallel_workers=num_parallel_workers, + num_parallel_workers=16, python_multiprocessing=True ) ds = ds.map( @@ -306,7 +306,7 @@ def create_yolo_dataset( input_columns=map2_in_column_names, output_columns=map2_out_column_names, column_order=output_column_names, - num_parallel_workers=num_parallel_workers, + num_parallel_workers=2, python_multiprocessing=False ) mean = [m * 255 for m in [0.485, 0.456, 0.406]] @@ -316,9 +316,9 @@ def create_yolo_dataset( CV.Normalize(mean, std), hwc_to_chw ], - num_parallel_workers=num_parallel_workers + num_parallel_workers=2 ) - ds = ds.batch(batch_size, num_parallel_workers=num_parallel_workers, drop_remainder=True) + ds = ds.batch(batch_size, num_parallel_workers=1, drop_remainder=True) else: ds = de.GeneratorDataset( yolo_dataset,