Move optimizer creation after device placement for ddp backends. (#2904)
This commit is contained in:
parent
56396abe98
commit
e3528afae3
|
@ -106,13 +106,6 @@ class DDP2Backend(object):
|
|||
log.info(f'All DDP processes registered. Starting ddp with {self.trainer.world_size} processes')
|
||||
log.info('-' * 100)
|
||||
|
||||
# CHOOSE OPTIMIZER
|
||||
# allow for lr schedulers as well
|
||||
optimizers, lr_schedulers, optimizer_frequencies = self.trainer.init_optimizers(model)
|
||||
self.trainer.optimizers = optimizers
|
||||
self.trainer.lr_schedulers = lr_schedulers
|
||||
self.trainer.optimizer_frequencies = optimizer_frequencies
|
||||
|
||||
# MODEL
|
||||
# copy model to each gpu
|
||||
if self.trainer.on_gpu:
|
||||
|
@ -130,6 +123,13 @@ class DDP2Backend(object):
|
|||
torch.cuda.set_device(self.trainer.root_gpu)
|
||||
model.cuda(self.trainer.root_gpu)
|
||||
|
||||
# CHOOSE OPTIMIZER
|
||||
# allow for lr schedulers as well
|
||||
optimizers, lr_schedulers, optimizer_frequencies = self.trainer.init_optimizers(model)
|
||||
self.trainer.optimizers = optimizers
|
||||
self.trainer.lr_schedulers = lr_schedulers
|
||||
self.trainer.optimizer_frequencies = optimizer_frequencies
|
||||
|
||||
# set model properties before going into wrapper
|
||||
self.trainer.copy_trainer_model_properties(model)
|
||||
|
||||
|
|
|
@ -169,13 +169,6 @@ class DDPBackend(object):
|
|||
log.info(f'All DDP processes registered. Starting ddp with {self.trainer.world_size} processes')
|
||||
log.info('-' * 100)
|
||||
|
||||
# CHOOSE OPTIMIZER
|
||||
# allow for lr schedulers as well
|
||||
optimizers, lr_schedulers, optimizer_frequencies = self.trainer.init_optimizers(model)
|
||||
self.trainer.optimizers = optimizers
|
||||
self.trainer.lr_schedulers = lr_schedulers
|
||||
self.trainer.optimizer_frequencies = optimizer_frequencies
|
||||
|
||||
# call sync_bn before .cuda(), configure_apex and configure_ddp
|
||||
if self.trainer.sync_batchnorm:
|
||||
model = model.configure_sync_batchnorm(model)
|
||||
|
@ -197,6 +190,13 @@ class DDPBackend(object):
|
|||
torch.cuda.set_device(self.trainer.root_gpu)
|
||||
model.cuda(self.trainer.root_gpu)
|
||||
|
||||
# CHOOSE OPTIMIZER
|
||||
# allow for lr schedulers as well
|
||||
optimizers, lr_schedulers, optimizer_frequencies = self.trainer.init_optimizers(model)
|
||||
self.trainer.optimizers = optimizers
|
||||
self.trainer.lr_schedulers = lr_schedulers
|
||||
self.trainer.optimizer_frequencies = optimizer_frequencies
|
||||
|
||||
# set model properties before going into wrapper
|
||||
self.trainer.copy_trainer_model_properties(model)
|
||||
|
||||
|
|
|
@ -110,13 +110,6 @@ class DDPSpawnBackend(object):
|
|||
log.info(f'All DDP processes registered. Starting ddp with {self.trainer.world_size} processes')
|
||||
log.info('-' * 100)
|
||||
|
||||
# CHOOSE OPTIMIZER
|
||||
# allow for lr schedulers as well
|
||||
optimizers, lr_schedulers, optimizer_frequencies = self.trainer.init_optimizers(model)
|
||||
self.trainer.optimizers = optimizers
|
||||
self.trainer.lr_schedulers = lr_schedulers
|
||||
self.trainer.optimizer_frequencies = optimizer_frequencies
|
||||
|
||||
# call sync_bn before .cuda(), configure_apex and configure_ddp
|
||||
if self.trainer.sync_batchnorm:
|
||||
model = model.configure_sync_batchnorm(model)
|
||||
|
@ -129,6 +122,13 @@ class DDPSpawnBackend(object):
|
|||
torch.cuda.set_device(self.trainer.root_gpu)
|
||||
model.cuda(self.trainer.root_gpu)
|
||||
|
||||
# CHOOSE OPTIMIZER
|
||||
# allow for lr schedulers as well
|
||||
optimizers, lr_schedulers, optimizer_frequencies = self.trainer.init_optimizers(model)
|
||||
self.trainer.optimizers = optimizers
|
||||
self.trainer.lr_schedulers = lr_schedulers
|
||||
self.trainer.optimizer_frequencies = optimizer_frequencies
|
||||
|
||||
# set model properties before going into wrapper
|
||||
self.trainer.copy_trainer_model_properties(model)
|
||||
|
||||
|
|
Loading…
Reference in New Issue