train: don't save per-worker checkpoints if we're not doing distributed training

Saves disk space
This commit is contained in:
Giovanni Campagna 2019-03-01 10:51:12 -08:00
parent a36f2efb8c
commit d4b35d7ae6
1 changed files with 1 additions and 2 deletions

View File

@ -216,7 +216,6 @@ def train(args, model, opt, train_iters, train_iterations, field, rank=0, world_
if world_size > 1:
torch.distributed.barrier()
torch.save(opt.state_dict(), os.path.join(args.log_dir, f'iteration_{iteration}_rank_{rank}_optim.pth'))
if world_size > 1:
torch.distributed.barrier()
# lr update