diff --git a/official/cv/cspdarknet53/src/utils/var_init.py b/official/cv/cspdarknet53/src/utils/var_init.py index fd89909f53ad85d0bdfba89cf43c7a187fba4225..fcc326257b241707cd0f706b00f83783899be336 100644 --- a/official/cv/cspdarknet53/src/utils/var_init.py +++ b/official/cv/cspdarknet53/src/utils/var_init.py @@ -121,14 +121,14 @@ def default_recurisive_init(custom_cell): for _, cell in custom_cell.cells_and_names(): if isinstance(cell, nn.Conv2d): cell.weight.set_data(init.initializer(KaimingUniform(a=math.sqrt(5.0)), cell.weight.data.shape, - cell.weight.data.dtype).to_tensor()) + cell.weight.data.dtype).init_data()) if cell.bias is not None: fan_in, _ = _calculate_in_and_out(cell.weight.data.asnumpy()) bound = 1 / math.sqrt(fan_in) cell.bias.set_data(Tensor(np.random.uniform(-bound, bound, cell.bias.data.shape), cell.bias.data.dtype)) elif isinstance(cell, nn.Dense): cell.weight.set_data(init.initializer(KaimingUniform(a=math.sqrt(5)), cell.weight.data.shape, - cell.weight.data.dtype).to_tensor()) + cell.weight.data.dtype).init_data()) if cell.bias is not None: fan_in, _ = _calculate_in_and_out(cell.weight.data.asnumpy()) bound = 1 / math.sqrt(fan_in) diff --git a/official/nlp/fasttext/README.md b/official/nlp/fasttext/README.md index 026d6715eacfcc4a9d8c13fc8f158d773a75f2ad..77038ef44171ad235b0291973be744690c78f7ab 100644 --- a/official/nlp/fasttext/README.md +++ b/official/nlp/fasttext/README.md @@ -68,9 +68,6 @@ After dataset preparation, you can start training and evaluation as follows: cd ./scripts bash run_standalone_train.sh [TRAIN_DATASET] [DEVICEID] [DATANAME] - # run distributed training example - bash run_distribute_train.sh [TRAIN_DATASET] [RANK_TABLE_PATH] [DATANAME] - # run evaluation example bash run_eval.sh [EVAL_DATASET_PATH] [DATASET_NAME] [MODEL_CKPT] [DEVICEID] ``` @@ -220,13 +217,6 @@ Parameters for both training and evaluation can be set in config.py. All the dat bash run_standalone_train.sh [DATASET_PATH] [DEVICE_ID] [DATANAME] ``` - - Running scripts for distributed training of FastText. Task training on multiple device and run the following command in bash to be executed in `scripts/`: - - ```bash - cd ./scripts - bash run_distributed_train.sh [DATASET_PATH] [RANK_TABLE_PATH] [DATANAME] - ``` - - Running on GPU - Start task training on a single device and run the shell script diff --git a/official/nlp/fasttext/scripts/run_distribute_train_8p.sh b/official/nlp/fasttext/scripts/run_distribute_train_8p.sh deleted file mode 100644 index ea0d2183db70bbf74a4dc9b05f453c1cb30d30b8..0000000000000000000000000000000000000000 --- a/official/nlp/fasttext/scripts/run_distribute_train_8p.sh +++ /dev/null @@ -1,88 +0,0 @@ -#!/bin/bash -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -echo "==============================================================================================================" -echo "Please run the script as: " -echo "sh run_distributed_train.sh DATASET_PATH RANK_TABLE_PATH" -echo "for example: sh run_distributed_train.sh /home/workspace/ag /home/workspace/rank_table_file.json ag" -echo "It is better to use absolute path." -echo "Please pay attention that the dataset should corresponds to dataset_name" -echo "==============================================================================================================" -get_real_path(){ - if [ "${1:0:1}" == "/" ]; then - echo "$1" - else - echo "$(realpath -m $PWD/$1)" - fi -} - -if [ $3 != "ag" ] && [ $3 != "dbpedia" ] && [ $3 != "yelp_p" ] -then - echo "Unrecognized dataset name, the name can choose from [ag, dbpedia, yelp_p]" -exit 1 -fi - -DATASET=$(get_real_path $1) -echo $DATASET -RANK_TABLE_PATH=$(get_real_path $2) -if [ ! -d $DATASET ] -then - echo "Error: DATA_PATH=$DATASET is not a file" -exit 1 -fi -current_exec_path=$(pwd) -echo ${current_exec_path} - -export RANK_TABLE_FILE=$RANK_TABLE_PATH - - -echo $RANK_TABLE_FILE -export RANK_SIZE=8 -export DEVICE_NUM=8 - -if [ $# -ge 1 ]; then - if [ $3 == 'ag' ]; then - DATANAME='ag' - elif [ $3 == 'dbpedia' ]; then - DATANAME='dbpedia' - elif [ $3 == 'yelp_p' ]; then - DATANAME='yelp_p' - else - echo "Unrecognized dataset name,he name can choose from [ag, dbpedia, yelp_p]" - exit 1 - fi -fi - -config_path="./${DATANAME}_config.yaml" -echo "config path is : ${config_path}" - -for((i=0;i<=7;i++)); -do - rm -rf ${current_exec_path}/device$i - mkdir ${current_exec_path}/device$i - cd ${current_exec_path}/device$i || exit - cp ../../*.py ./ - cp ../../*.yaml ./ - cp -r ../../src ./ - cp -r ../../model_utils ./ - cp -r ../*.sh ./ - export RANK_ID=$i - export DEVICE_ID=$i - echo "start training for rank $i, device $DEVICE_ID" - python ../../train.py --config_path $config_path --dataset_path $DATASET --data_name $DATANAME > log_fasttext.log 2>&1 & - cd ${current_exec_path} || exit -done -cd ${current_exec_path} || exit diff --git a/official/recommend/wide_and_deep/train_and_eval.py b/official/recommend/wide_and_deep/train_and_eval.py index f5c0c25a4cb561ab204f5205062e64ee54e130fd..c462062478e5533afe4e88c00000819359e6bd43 100644 --- a/official/recommend/wide_and_deep/train_and_eval.py +++ b/official/recommend/wide_and_deep/train_and_eval.py @@ -113,7 +113,6 @@ def train_wide_and_deep(): enable_graph_kernel=_enable_graph_kernel, device_target=cfg.device_target) if _enable_graph_kernel: context.set_context(graph_kernel_flags="--enable_cluster_ops=MatMul") - context.set_context(enable_sparse=cfg.sparse) test_train_eval(cfg) if __name__ == "__main__": diff --git a/official/recommend/wide_and_deep/train_and_eval_auto_parallel.py b/official/recommend/wide_and_deep/train_and_eval_auto_parallel.py index cc45783b3715935cc82963bb00ea1a576d3a0d35..9f8b333b717e7547336a2139251ef8489326a707 100644 --- a/official/recommend/wide_and_deep/train_and_eval_auto_parallel.py +++ b/official/recommend/wide_and_deep/train_and_eval_auto_parallel.py @@ -154,7 +154,6 @@ def train_wide_and_deep(): if cfg.device_target == "GPU": context.set_context(enable_graph_kernel=True) context.set_context(variable_memory_max_size="24GB") - context.set_context(enable_sparse=True) init() context.set_context(save_graphs_path='./graphs_of_device_id_' + str(get_rank()), save_graphs=True) if cfg.sparse: diff --git a/official/recommend/wide_and_deep/train_and_eval_distribute.py b/official/recommend/wide_and_deep/train_and_eval_distribute.py index 50450e668820a6e0364916c4f9f581aeb049e000..786e67ecd1021c698b45dcc26a9b91b5041d5657 100644 --- a/official/recommend/wide_and_deep/train_and_eval_distribute.py +++ b/official/recommend/wide_and_deep/train_and_eval_distribute.py @@ -130,7 +130,6 @@ def train_wide_and_deep(): context.set_context(enable_graph_kernel=True) context.set_context(graph_kernel_flags="--enable_cluster_ops=MatMul") - context.set_context(enable_sparse=cfg.sparse) init() context.set_context(save_graphs_path='./graphs_of_device_id_'+str(get_rank())) context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, gradients_mean=True, diff --git a/official/recommend/wide_and_deep/train_and_eval_parameter_server_distribute.py b/official/recommend/wide_and_deep/train_and_eval_parameter_server_distribute.py index 227bf48a3526a8b3bdd5abc69be6b8245e8659fb..0cffc8d1ae1f60a57473725b1d1d94ea29c7f809 100644 --- a/official/recommend/wide_and_deep/train_and_eval_parameter_server_distribute.py +++ b/official/recommend/wide_and_deep/train_and_eval_parameter_server_distribute.py @@ -167,8 +167,6 @@ def train_wide_and_deep(): device_num=get_group_size()) cfg.sparse = True - if cfg.sparse: - context.set_context(enable_sparse=True) if cfg.device_target == "GPU": context.set_context(enable_graph_kernel=True) context.set_context(graph_kernel_flags="--enable_cluster_ops=MatMul") diff --git a/official/recommend/wide_and_deep/train_and_eval_parameter_server_standalone.py b/official/recommend/wide_and_deep/train_and_eval_parameter_server_standalone.py index 8d59149ade47826e9fdc806e9165d08ec33a3b71..f30263f7e522cb63ad46ed69d17d5c97ae58bfb5 100644 --- a/official/recommend/wide_and_deep/train_and_eval_parameter_server_standalone.py +++ b/official/recommend/wide_and_deep/train_and_eval_parameter_server_standalone.py @@ -127,8 +127,6 @@ def train_wide_and_deep(): """ train_wide_and_deep """ if not cache_enable: cfg.sparse = True - if cfg.sparse: - context.set_context(enable_sparse=True) if cfg.device_target == "GPU": context.set_context(enable_graph_kernel=True) context.set_context(graph_kernel_flags="--enable_cluster_ops=MatMul")