Skip to content
Snippets Groups Projects
Unverified Commit c901738c authored by i-robot's avatar i-robot Committed by Gitee
Browse files

!2006 fix densenet121 can not run out of standard accuracy by distributed training on gpu bug

Merge pull request !2006 from zhanghuiyao/fix_densenet121_gpu_bug
parents 9fd344f3 e7f6f1ef
No related branches found
Tags v1.6.0
No related merge requests found
......@@ -25,7 +25,7 @@ import numpy as np
from mindspore import context
import mindspore.nn as nn
from mindspore import Tensor
from mindspore.communication.management import init, get_group_size, release
from mindspore.communication.management import init, get_group_size, get_rank, release
from mindspore.train.serialization import load_checkpoint, load_param_into_net
from mindspore.ops import operations as P
from mindspore.ops import functional as F
......@@ -33,7 +33,7 @@ from mindspore.common import dtype as mstype
from src.utils.logging import get_logger
from src.model_utils.moxing_adapter import moxing_wrapper
from src.model_utils.config import config
from src.model_utils.device_adapter import get_device_id, get_rank_id
from src.model_utils.device_adapter import get_device_id
class ParameterReduce(nn.Cell):
......@@ -115,7 +115,7 @@ def test():
# init distributed
if config.is_distributed:
init()
config.rank = get_rank_id()
config.rank = get_rank()
config.group_size = get_group_size()
config.outputs_dir = os.path.join(config.log_path, datetime.datetime.now().strftime('%Y-%m-%d_time_%H_%M_%S'))
......
......@@ -21,7 +21,7 @@ import moxing as mox
import mindspore.nn as nn
from mindspore import Tensor
from mindspore.nn.optim import Momentum
from mindspore.communication.management import init, get_group_size
from mindspore.communication.management import init, get_group_size, get_rank
from mindspore.train.callback import ModelCheckpoint
from mindspore.train.callback import CheckpointConfig, Callback
from mindspore.train.serialization import export, load_checkpoint, load_param_into_net
......@@ -36,7 +36,7 @@ from src.lr_scheduler import MultiStepLR, CosineAnnealingLR
from src.utils.logging import get_logger
from src.model_utils.moxing_adapter import moxing_wrapper
from src.model_utils.config import config
from src.model_utils.device_adapter import get_device_id, get_rank_id
from src.model_utils.device_adapter import get_device_id
set_seed(1)
......@@ -174,7 +174,7 @@ def train():
# init distributed
if config.is_distributed:
init()
config.rank = get_rank_id()
config.rank = get_rank()
config.group_size = get_group_size()
if config.is_dynamic_loss_scale == 1:
......
......@@ -19,7 +19,7 @@ import datetime
import mindspore.nn as nn
from mindspore import Tensor
from mindspore.nn.optim import Momentum
from mindspore.communication.management import init, get_group_size
from mindspore.communication.management import init, get_group_size, get_rank
from mindspore.train.callback import ModelCheckpoint
from mindspore.train.callback import CheckpointConfig, Callback
from mindspore.train.serialization import load_checkpoint, load_param_into_net
......@@ -34,7 +34,7 @@ from src.lr_scheduler import MultiStepLR, CosineAnnealingLR
from src.utils.logging import get_logger
from src.model_utils.moxing_adapter import moxing_wrapper
from src.model_utils.config import config
from src.model_utils.device_adapter import get_device_id, get_rank_id
from src.model_utils.device_adapter import get_device_id
set_seed(1)
......@@ -142,7 +142,7 @@ def train():
# init distributed
if config.is_distributed:
init()
config.rank = get_rank_id()
config.rank = get_rank()
config.group_size = get_group_size()
if config.is_dynamic_loss_scale == 1:
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment