genienlp/decanlp/arguments.py

#
# Copyright (c) 2018, Salesforce, Inc.
#                     The Board of Trustees of the Leland Stanford Junior University
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# * Redistributions of source code must retain the above copyright notice, this
#   list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright notice,
#   this list of conditions and the following disclaimer in the documentation
#   and/or other materials provided with the distribution.
#
# * Neither the name of the copyright holder nor the names of its
#   contributors may be used to endorse or promote products derived from
#   this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import os
from argparse import ArgumentParser
import subprocess
import json
import datetime
import logging

from .tasks.registry import get_tasks

logger = logging.getLogger(__name__)

def get_commit():
    directory = os.path.dirname(__file__)
    return subprocess.Popen("cd {} && git log | head -n 1".format(directory), shell=True, stdout=subprocess.PIPE).stdout.read().split()[1].decode()


def save_args(args):
    os.makedirs(args.log_dir, exist_ok=args.exist_ok)
    with open(os.path.join(args.log_dir, 'config.json'), 'wt') as f:
        json.dump(vars(args), f, indent=2)


def parse(argv):
    """
    Returns the arguments from the command line.
    """
    parser = ArgumentParser(prog=argv[0])
    parser.add_argument('--root', default='./decaNLP', type=str, help='root directory for data, results, embeddings, code, etc.')
    parser.add_argument('--data', default='.data/', type=str, help='where to load data from.')
    parser.add_argument('--save', default='results', type=str, help='where to save results.')
    parser.add_argument('--embeddings', default='.embeddings', type=str, help='where to save embeddings.')
    parser.add_argument('--cached', default='', type=str, help='where to save cached files')
    parser.add_argument('--saved_models', default='./saved_models', type=str, help='directory where cached models should be loaded from')

    parser.add_argument('--train_tasks', nargs='+', type=str, dest='train_task_names', help='tasks to use for training', required=True)
    parser.add_argument('--train_iterations', nargs='+', type=int, help='number of iterations to focus on each task')
    parser.add_argument('--train_batch_tokens', nargs='+', default=[9000], type=int, help='Number of tokens to use for dynamic batching, corresponging to tasks in train tasks')
    parser.add_argument('--jump_start', default=0, type=int, help='number of iterations to give jump started tasks')
    parser.add_argument('--n_jump_start', default=0, type=int, help='how many tasks to jump start (presented in order)')    
    parser.add_argument('--num_print', default=15, type=int, help='how many validation examples with greedy output to print to std out')

    parser.add_argument('--no_tensorboard', action='store_false', dest='tensorboard', help='Turn off tensorboard logging')
    parser.add_argument('--tensorboard_dir', default=None, help='Directory where to save Tensorboard logs (defaults to --save)')
    parser.add_argument('--max_to_keep', default=5, type=int, help='number of checkpoints to keep')
    parser.add_argument('--log_every', default=int(1e2), type=int, help='how often to log results in # of iterations')
    parser.add_argument('--save_every', default=int(1e3), type=int, help='how often to save a checkpoint in # of iterations')

    parser.add_argument('--val_tasks', nargs='+', type=str, dest='val_task_names', help='tasks to collect evaluation metrics for')
    parser.add_argument('--val_every', default=int(1e3), type=int, help='how often to run validation in # of iterations')
    parser.add_argument('--val_no_filter', action='store_false', dest='val_filter', help='whether to allow filtering on the validation sets')
    parser.add_argument('--val_batch_size', nargs='+', default=[256], type=int, help='Batch size for validation corresponding to tasks in val tasks')

    parser.add_argument('--vocab_tasks', nargs='+', type=str, help='tasks to use in the construction of the vocabulary')
    parser.add_argument('--max_output_length', default=100, type=int, help='maximum output length for generation')
    parser.add_argument('--max_generative_vocab', default=50000, type=int, help='max vocabulary for the generative softmax')
    parser.add_argument('--max_train_context_length', default=500, type=int, help='maximum length of the contexts during training')
    parser.add_argument('--max_val_context_length', default=500, type=int, help='maximum length of the contexts during validation')
    parser.add_argument('--max_answer_length', default=50, type=int, help='maximum length of answers during training and validation')
    parser.add_argument('--subsample', default=20000000, type=int, help='subsample the datasets')
    parser.add_argument('--preserve_case', action='store_false', dest='lower', help='whether to preserve casing for all text')

    parser.add_argument('--model', type=str, choices=['Seq2Seq'], default='Seq2Seq', help='which model to import')
    parser.add_argument('--seq2seq_encoder', type=str, choices=['MQANEncoder', 'BiLSTM', 'Identity'],
                        default='MQANEncoder', help='which encoder to use for the Seq2Seq model')
    parser.add_argument('--seq2seq_decoder', type=str, choices=['MQANDecoder'], default='MQANDecoder',
                        help='which decoder to use for the Seq2Seq model')
    parser.add_argument('--dimension', default=200, type=int, help='output dimensions for all layers')
    parser.add_argument('--rnn_dimension', default=None, type=int, help='output dimensions for RNN layers')
    parser.add_argument('--rnn_layers', default=1, type=int, help='number of layers for RNN modules')
    parser.add_argument('--rnn_zero_state', default='zero', choices=['zero', 'average'],
                        help='how to construct RNN zero state (for Identity encoder)')
    parser.add_argument('--transformer_layers', default=2, type=int, help='number of layers for transformer modules')
    parser.add_argument('--transformer_hidden', default=150, type=int, help='hidden size of the transformer modules')
    parser.add_argument('--transformer_heads', default=3, type=int, help='number of heads for transformer modules')
    parser.add_argument('--dropout_ratio', default=0.2, type=float, help='dropout for the model')

    parser.add_argument('--encoder_embeddings', default='glove+char', help='which word embedding to use on the encoder side; use a bert-* pretrained model for BERT; multiple embeddings can be concatenated with +')
    parser.add_argument('--train_encoder_embeddings', action='store_true', default=False, help='back propagate into pretrained encoder embedding (recommended for BERT)')
    parser.add_argument('--decoder_embeddings', default='glove+char', help='which pretrained word embedding to use on the decoder side')
    parser.add_argument('--trainable_decoder_embeddings', default=0, type=int, help='size of trainable portion of decoder embedding (0 or omit to disable)')

    parser.add_argument('--warmup', default=800, type=int, help='warmup for learning rate')
    parser.add_argument('--grad_clip', default=1.0, type=float, help='gradient clipping')
    parser.add_argument('--beta0', default=0.9, type=float, help='alternative momentum for Adam (only when not using transformer_lr)')
    parser.add_argument('--optimizer', default='adam', type=str, help='Adam or SGD')
    parser.add_argument('--no_transformer_lr', action='store_false', dest='transformer_lr', help='turns off the transformer learning rate strategy')
    parser.add_argument('--transformer_lr_multiply', default=1.0, type=float, help='multiplier for transformer learning rate (if using Adam)')
    parser.add_argument('--lr_rate', default=0.001, type=float, help='fixed learning rate (if not using warmup)')
    parser.add_argument('--weight_decay', default=0.0, type=float, help='weight L2 regularization')

    parser.add_argument('--load', default=None, type=str, help='path to checkpoint to load model from inside args.save')
    parser.add_argument('--resume', action='store_true', help='whether to resume training with past optimizers')

    parser.add_argument('--seed', default=123, type=int, help='Random seed.')
    parser.add_argument('--devices', default=[0], nargs='+', type=int, help='a list of devices that can be used for training')

    parser.add_argument('--no_commit', action='store_false', dest='commit', help='do not track the git commit associated with this training run') 
    parser.add_argument('--exist_ok', action='store_true', help='Ok if the save directory already exists, i.e. overwrite is ok') 

    parser.add_argument('--skip_cache', action='store_true', dest='skip_cache_bool', help='whether to use exisiting cached splits or generate new ones')
    parser.add_argument('--use_curriculum', action='store_true', help='Use curriculum learning')
    parser.add_argument('--aux_dataset', default='', type=str, help='path to auxiliary dataset (ignored if curriculum is not used)')
    parser.add_argument('--curriculum_max_frac', default=1.0, type=float, help='max fraction of harder dataset to keep for curriculum')
    parser.add_argument('--curriculum_rate', default=0.1, type=float, help='growth rate for curriculum')
    parser.add_argument('--curriculum_strategy', default='linear', type=str, choices=['linear', 'exp'], help='growth strategy for curriculum')
    parser.add_argument('--question', type=str, help='provide a fixed question')
    parser.add_argument('--use_google_translate', action='store_true', help='use google translate instead of pre-trained machine translator')

    args = parser.parse_args(argv[1:])

    if args.val_task_names is None:
        args.val_task_names = []
        for t in args.train_task_names:
            if t not in args.val_task_names:
                args.val_task_names.append(t)
    if 'imdb' in args.val_task_names:
        args.val_task_names.remove('imdb')

    args.timestamp = datetime.datetime.now(tz=datetime.timezone.utc).isoformat()

    if args.use_google_translate:
        args.data = args.data + '_google_translate'

    if len(args.train_task_names) > 1:
        if args.train_iterations is None:
            args.train_iterations = [1]
        if len(args.train_iterations) < len(args.train_task_names):
            args.train_iterations = len(args.train_task_names) * args.train_iterations
        if len(args.train_batch_tokens) < len(args.train_task_names):
            args.train_batch_tokens = len(args.train_task_names) * args.train_batch_tokens
    if len(args.val_batch_size) < len(args.val_task_names):
        args.val_batch_size = len(args.val_task_names) * args.val_batch_size
        
    # postprocess arguments
    if args.commit:
        args.commit = get_commit()
    else:
        args.commit = ''

    if args.rnn_dimension is None:
        args.rnn_dimension = args.dimension

    args.log_dir = args.save
    if args.tensorboard_dir is None:
        args.tensorboard_dir = args.log_dir
    args.dist_sync_file = os.path.join(args.log_dir, 'distributed_sync_file')
    
    for x in ['data', 'save', 'embeddings', 'log_dir', 'dist_sync_file']:
        setattr(args, x, os.path.join(args.root, getattr(args, x)))
    save_args(args)

    # create the task objects after we saved the configuration to the JSON file, because
    # tasks are not JSON serializable
    args.train_tasks = get_tasks(args.train_task_names, args)
    args.val_tasks = get_tasks(args.val_task_names, args)

    return args
Add copyright notices to all files Makes the license clear and explicit 2019-03-01 23:51:45 +00:00			`#`
			`# Copyright (c) 2018, Salesforce, Inc.`
Add Stanford copyright to all files that we touched 2019-03-01 23:54:54 +00:00			`# The Board of Trustees of the Leland Stanford Junior University`
Add copyright notices to all files Makes the license clear and explicit 2019-03-01 23:51:45 +00:00			`# All rights reserved.`
			`#`
			`# Redistribution and use in source and binary forms, with or without`
			`# modification, are permitted provided that the following conditions are met:`
			`#`
			`# * Redistributions of source code must retain the above copyright notice, this`
			`# list of conditions and the following disclaimer.`
			`#`
			`# * Redistributions in binary form must reproduce the above copyright notice,`
			`# this list of conditions and the following disclaimer in the documentation`
			`# and/or other materials provided with the distribution.`
			`#`
			`# * Neither the name of the copyright holder nor the names of its`
			`# contributors may be used to endorse or promote products derived from`
			`# this software without specific prior written permission.`
			`#`
			`# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"`
			`# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE`
			`# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE`
			`# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE`
			`# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL`
			`# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR`
			`# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER`
			`# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,`
			`# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE`
			`# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.`

Initial commit 2018-06-20 06:22:34 +00:00			`import os`
			`from argparse import ArgumentParser`
			`import subprocess`
			`import json`
			`import datetime`
Use logger instead of print() print() uses stdout by default, which has two problems: - it is not flushed until later (so messages don't show, or don't show up in order with other loggers) - it conflicts with stdin/stdout usage by `decanlp server --stdin` 2019-03-02 01:35:04 +00:00			`import logging`
Initial commit 2018-06-20 06:22:34 +00:00
Make use of task classes And clean up the metric handling code as well 2019-03-19 17:01:45 +00:00			`from .tasks.registry import get_tasks`

Use logger instead of print() print() uses stdout by default, which has two problems: - it is not flushed until later (so messages don't show, or don't show up in order with other loggers) - it conflicts with stdin/stdout usage by `decanlp server --stdin` 2019-03-02 01:35:04 +00:00			`logger = logging.getLogger(__name__)`
Initial commit 2018-06-20 06:22:34 +00:00
			`def get_commit():`
Fix "get_commit" when invoking "decanlp" installed with "pip install -e" sys.argv will be a script living in ~/.bin that loads and executes the module, so it will not live in the git repository 2019-03-02 18:28:04 +00:00			`directory = os.path.dirname(__file__)`
Initial commit 2018-06-20 06:22:34 +00:00			`return subprocess.Popen("cd {} && git log \| head -n 1".format(directory), shell=True, stdout=subprocess.PIPE).stdout.read().split()[1].decode()`


			`def save_args(args):`
			`os.makedirs(args.log_dir, exist_ok=args.exist_ok)`
			`with open(os.path.join(args.log_dir, 'config.json'), 'wt') as f:`
			`json.dump(vars(args), f, indent=2)`


Add a "decanlp" script that calls out to the different subcommands Usage: - decanlp train ... - decanlp predict ... - decanlp convert-to-logical-forms ... 2019-01-23 20:08:41 +00:00			`def parse(argv):`
Initial commit 2018-06-20 06:22:34 +00:00			`"""`
			`Returns the arguments from the command line.`
			`"""`
Add a "decanlp" script that calls out to the different subcommands Usage: - decanlp train ... - decanlp predict ... - decanlp convert-to-logical-forms ... 2019-01-23 20:08:41 +00:00			`parser = ArgumentParser(prog=argv[0])`
updates and fixes 2019-01-24 00:41:37 +00:00			`parser.add_argument('--root', default='./decaNLP', type=str, help='root directory for data, results, embeddings, code, etc.')`
separate out root dir; add option for experiment name 2019-01-08 02:05:55 +00:00			`parser.add_argument('--data', default='.data/', type=str, help='where to load data from.')`
			`parser.add_argument('--save', default='results', type=str, help='where to save results.')`
			`parser.add_argument('--embeddings', default='.embeddings', type=str, help='where to save embeddings.')`
save cached files to an assigned path useful when you don't have write permissions to dataset directory 2019-02-19 23:55:20 +00:00			`parser.add_argument('--cached', default='', type=str, help='where to save cached files')`
adding arguments 2019-05-20 18:02:42 +00:00			`parser.add_argument('--saved_models', default='./saved_models', type=str, help='directory where cached models should be loaded from')`
Initial commit 2018-06-20 06:22:34 +00:00
Make use of task classes And clean up the metric handling code as well 2019-03-19 17:01:45 +00:00			`parser.add_argument('--train_tasks', nargs='+', type=str, dest='train_task_names', help='tasks to use for training', required=True)`
Initial commit 2018-06-20 06:22:34 +00:00			`parser.add_argument('--train_iterations', nargs='+', type=int, help='number of iterations to focus on each task')`
Reducing train_batch_tokens; 10k OOMs on 0.4.1 2018-10-24 00:44:39 +00:00			`parser.add_argument('--train_batch_tokens', nargs='+', default=[9000], type=int, help='Number of tokens to use for dynamic batching, corresponging to tasks in train tasks')`
Initial commit 2018-06-20 06:22:34 +00:00			`parser.add_argument('--jump_start', default=0, type=int, help='number of iterations to give jump started tasks')`
			`parser.add_argument('--n_jump_start', default=0, type=int, help='how many tasks to jump start (presented in order)')`
			`parser.add_argument('--num_print', default=15, type=int, help='how many validation examples with greedy output to print to std out')`

Add an option to set the directory where to save Tensorboard It's useful to set it to a different directory than the saved model, so it can be shared with a running Tensorboard (eg. through NFS) without sharing all the model data. 2019-12-15 01:21:23 +00:00			`parser.add_argument('--no_tensorboard', action='store_false', dest='tensorboard', help='Turn off tensorboard logging')`
			`parser.add_argument('--tensorboard_dir', default=None, help='Directory where to save Tensorboard logs (defaults to --save)')`
let user specifies total number of checkpoints to keep 2019-03-04 20:03:12 +00:00			`parser.add_argument('--max_to_keep', default=5, type=int, help='number of checkpoints to keep')`
Initial commit 2018-06-20 06:22:34 +00:00			`parser.add_argument('--log_every', default=int(1e2), type=int, help='how often to log results in # of iterations')`
			`parser.add_argument('--save_every', default=int(1e3), type=int, help='how often to save a checkpoint in # of iterations')`

Make use of task classes And clean up the metric handling code as well 2019-03-19 17:01:45 +00:00			`parser.add_argument('--val_tasks', nargs='+', type=str, dest='val_task_names', help='tasks to collect evaluation metrics for')`
Initial commit 2018-06-20 06:22:34 +00:00			`parser.add_argument('--val_every', default=int(1e3), type=int, help='how often to run validation in # of iterations')`
			`parser.add_argument('--val_no_filter', action='store_false', dest='val_filter', help='whether to allow filtering on the validation sets')`
			`parser.add_argument('--val_batch_size', nargs='+', default=[256], type=int, help='Batch size for validation corresponding to tasks in val tasks')`

			`parser.add_argument('--vocab_tasks', nargs='+', type=str, help='tasks to use in the construction of the vocabulary')`
			`parser.add_argument('--max_output_length', default=100, type=int, help='maximum output length for generation')`
			`parser.add_argument('--max_generative_vocab', default=50000, type=int, help='max vocabulary for the generative softmax')`
define a new task for almond_with_thingpedia 2019-03-15 21:59:15 +00:00			`parser.add_argument('--max_train_context_length', default=500, type=int, help='maximum length of the contexts during training')`
			`parser.add_argument('--max_val_context_length', default=500, type=int, help='maximum length of the contexts during validation')`
Initial commit 2018-06-20 06:22:34 +00:00			`parser.add_argument('--max_answer_length', default=50, type=int, help='maximum length of answers during training and validation')`
			`parser.add_argument('--subsample', default=20000000, type=int, help='subsample the datasets')`
			`parser.add_argument('--preserve_case', action='store_false', dest='lower', help='whether to preserve casing for all text')`

BERT 2020-01-16 23:34:16 +00:00			`parser.add_argument('--model', type=str, choices=['Seq2Seq'], default='Seq2Seq', help='which model to import')`
Add a BiLSTM encoder To produce a meaningful initial state for the BiLSTM on the decoder, and to further encode the output of BERT 2020-01-18 05:41:31 +00:00			`parser.add_argument('--seq2seq_encoder', type=str, choices=['MQANEncoder', 'BiLSTM', 'Identity'],`
			`default='MQANEncoder', help='which encoder to use for the Seq2Seq model')`
BERT 2020-01-16 23:34:16 +00:00			`parser.add_argument('--seq2seq_decoder', type=str, choices=['MQANDecoder'], default='MQANDecoder',`
			`help='which decoder to use for the Seq2Seq model')`
Initial commit 2018-06-20 06:22:34 +00:00			`parser.add_argument('--dimension', default=200, type=int, help='output dimensions for all layers')`
Allow setting the output dimension of RNN and transformer separately To reduce the dimensionality when using BERT 2020-01-19 01:44:11 +00:00			`parser.add_argument('--rnn_dimension', default=None, type=int, help='output dimensions for RNN layers')`
Initial commit 2018-06-20 06:22:34 +00:00			`parser.add_argument('--rnn_layers', default=1, type=int, help='number of layers for RNN modules')`
Add average pooling to construct a sentence vector for Identity encoder 2020-01-19 21:46:25 +00:00			`parser.add_argument('--rnn_zero_state', default='zero', choices=['zero', 'average'],`
			`help='how to construct RNN zero state (for Identity encoder)')`
Initial commit 2018-06-20 06:22:34 +00:00			`parser.add_argument('--transformer_layers', default=2, type=int, help='number of layers for transformer modules')`
			`parser.add_argument('--transformer_hidden', default=150, type=int, help='hidden size of the transformer modules')`
			`parser.add_argument('--transformer_heads', default=3, type=int, help='number of heads for transformer modules')`
			`parser.add_argument('--dropout_ratio', default=0.2, type=float, help='dropout for the model')`
BERT 2020-01-16 23:34:16 +00:00
			`parser.add_argument('--encoder_embeddings', default='glove+char', help='which word embedding to use on the encoder side; use a bert-* pretrained model for BERT; multiple embeddings can be concatenated with +')`
			`parser.add_argument('--train_encoder_embeddings', action='store_true', default=False, help='back propagate into pretrained encoder embedding (recommended for BERT)')`
			`parser.add_argument('--decoder_embeddings', default='glove+char', help='which pretrained word embedding to use on the decoder side')`
			`parser.add_argument('--trainable_decoder_embeddings', default=0, type=int, help='size of trainable portion of decoder embedding (0 or omit to disable)')`
Initial commit 2018-06-20 06:22:34 +00:00
			`parser.add_argument('--warmup', default=800, type=int, help='warmup for learning rate')`
			`parser.add_argument('--grad_clip', default=1.0, type=float, help='gradient clipping')`
			`parser.add_argument('--beta0', default=0.9, type=float, help='alternative momentum for Adam (only when not using transformer_lr)')`
options for sgd 2019-01-09 01:13:46 +00:00			`parser.add_argument('--optimizer', default='adam', type=str, help='Adam or SGD')`
Make transformer learning rate configurable 2020-01-18 00:13:54 +00:00			`parser.add_argument('--no_transformer_lr', action='store_false', dest='transformer_lr', help='turns off the transformer learning rate strategy')`
Use the standard pytorch facilities for learning rate scheduling So we don't mess with internals of optimizers, and we can replace the optimizer at will. Also clean up the confusing command line arguments. 2020-01-18 07:11:21 +00:00			`parser.add_argument('--transformer_lr_multiply', default=1.0, type=float, help='multiplier for transformer learning rate (if using Adam)')`
			`parser.add_argument('--lr_rate', default=0.001, type=float, help='fixed learning rate (if not using warmup)')`
adding weight regularization option 2019-03-13 21:19:41 +00:00			`parser.add_argument('--weight_decay', default=0.0, type=float, help='weight L2 regularization')`
Initial commit 2018-06-20 06:22:34 +00:00
			`parser.add_argument('--load', default=None, type=str, help='path to checkpoint to load model from inside args.save')`
			`parser.add_argument('--resume', action='store_true', help='whether to resume training with past optimizers')`

			`parser.add_argument('--seed', default=123, type=int, help='Random seed.')`
Remove distributed training We want multi-gpu (with nn.DataParallel), and we don't care about distributed. Also it was unfinished and buggy, and was adding complexity to the code. 2020-01-14 17:43:09 +00:00			`parser.add_argument('--devices', default=[0], nargs='+', type=int, help='a list of devices that can be used for training')`
Initial commit 2018-06-20 06:22:34 +00:00
rm dependency on .git; multi-gpu WIP; fine-grained time 2018-06-27 21:18:45 +00:00			`parser.add_argument('--no_commit', action='store_false', dest='commit', help='do not track the git commit associated with this training run')`
Initial commit 2018-06-20 06:22:34 +00:00			`parser.add_argument('--exist_ok', action='store_true', help='Ok if the save directory already exists, i.e. overwrite is ok')`

Add differentiable BLEU loss -Using differentiable BLEU loss instead of cross_entropy loss -it helps decreasing train-test evaluation gap 2018-11-27 23:22:38 +00:00			`parser.add_argument('--skip_cache', action='store_true', dest='skip_cache_bool', help='whether to use exisiting cached splits or generate new ones')`
Adding curriculum learning Introducing harder datasets grdually, after the model has learned the basic features from easier dataset, makes training more robust and usually yields better precision 2019-03-12 17:47:57 +00:00			`parser.add_argument('--use_curriculum', action='store_true', help='Use curriculum learning')`
updating curriculum 2019-03-15 18:17:42 +00:00			`parser.add_argument('--aux_dataset', default='', type=str, help='path to auxiliary dataset (ignored if curriculum is not used)')`
Adding curriculum learning Introducing harder datasets grdually, after the model has learned the basic features from easier dataset, makes training more robust and usually yields better precision 2019-03-12 17:47:57 +00:00			`parser.add_argument('--curriculum_max_frac', default=1.0, type=float, help='max fraction of harder dataset to keep for curriculum')`
			`parser.add_argument('--curriculum_rate', default=0.1, type=float, help='growth rate for curriculum')`
			`parser.add_argument('--curriculum_strategy', default='linear', type=str, choices=['linear', 'exp'], help='growth strategy for curriculum')`
Updates 1) use FastText for encoding persian text 2) let the user choose the question for almond task 3) bug fixes 2019-05-13 20:03:51 +00:00			`parser.add_argument('--question', type=str, help='provide a fixed question')`
bunch of updates 2019-05-22 21:04:16 +00:00			`parser.add_argument('--use_google_translate', action='store_true', help='use google translate instead of pre-trained machine translator')`
Getting the best from both sides The branches are getting messy! This branch is a clean updated version of the codebase which is actually compatible with 1) new version of pytorch 2) Almond project 2018-11-07 23:06:41 +00:00
Fix argument handling 2019-03-02 00:13:10 +00:00			`args = parser.parse_args(argv[1:])`
Implement MultiGPU training Using nn.DataParallel (single process, multi GPU with cuda pipelining) 2020-01-14 18:52:39 +00:00
Make use of task classes And clean up the metric handling code as well 2019-03-19 17:01:45 +00:00			`if args.val_task_names is None:`
			`args.val_task_names = []`
			`for t in args.train_task_names:`
			`if t not in args.val_task_names:`
			`args.val_task_names.append(t)`
			`if 'imdb' in args.val_task_names:`
			`args.val_task_names.remove('imdb')`

Remove dateutil library So much just get the current time in ISO format... 2020-01-17 04:57:40 +00:00			`args.timestamp = datetime.datetime.now(tz=datetime.timezone.utc).isoformat()`
Initial commit 2018-06-20 06:22:34 +00:00
bunch of updates 2019-05-22 21:04:16 +00:00			`if args.use_google_translate:`
			`args.data = args.data + '_google_translate'`

Fix JSON serialization of arguments 2019-03-19 17:06:20 +00:00			`if len(args.train_task_names) > 1:`
bunch of updates 2019-05-22 21:04:16 +00:00			`if args.train_iterations is None:`
Initial commit 2018-06-20 06:22:34 +00:00			`args.train_iterations = [1]`
Fix JSON serialization of arguments 2019-03-19 17:06:20 +00:00			`if len(args.train_iterations) < len(args.train_task_names):`
			`args.train_iterations = len(args.train_task_names) * args.train_iterations`
			`if len(args.train_batch_tokens) < len(args.train_task_names):`
			`args.train_batch_tokens = len(args.train_task_names) * args.train_batch_tokens`
			`if len(args.val_batch_size) < len(args.val_task_names):`
			`args.val_batch_size = len(args.val_task_names) * args.val_batch_size`
Initial commit 2018-06-20 06:22:34 +00:00
			`# postprocess arguments`
rm dependency on .git; multi-gpu WIP; fine-grained time 2018-06-27 21:18:45 +00:00			`if args.commit:`
			`args.commit = get_commit()`
			`else:`
			`args.commit = ''`
arguments: remove bad ugly log dir messing Stick with what the user says as the --save directory. If the user needs, they can provide the timestamp on the command line. 2019-03-01 16:30:35 +00:00
Allow setting the output dimension of RNN and transformer separately To reduce the dimensionality when using BERT 2020-01-19 01:44:11 +00:00			`if args.rnn_dimension is None:`
			`args.rnn_dimension = args.dimension`

arguments: remove bad ugly log dir messing Stick with what the user says as the --save directory. If the user needs, they can provide the timestamp on the command line. 2019-03-01 16:30:35 +00:00			`args.log_dir = args.save`
Add an option to set the directory where to save Tensorboard It's useful to set it to a different directory than the saved model, so it can be shared with a running Tensorboard (eg. through NFS) without sharing all the model data. 2019-12-15 01:21:23 +00:00			`if args.tensorboard_dir is None:`
			`args.tensorboard_dir = args.log_dir`
Initial commit 2018-06-20 06:22:34 +00:00			`args.dist_sync_file = os.path.join(args.log_dir, 'distributed_sync_file')`
more dynamic elmo use cases 2018-11-30 00:19:13 +00:00
prepend root before save 2019-01-10 21:24:43 +00:00			`for x in ['data', 'save', 'embeddings', 'log_dir', 'dist_sync_file']:`
separate out root dir; add option for experiment name 2019-01-08 02:05:55 +00:00			`setattr(args, x, os.path.join(args.root, getattr(args, x)))`
Initial commit 2018-06-20 06:22:34 +00:00			`save_args(args)`

Fix JSON serialization of arguments 2019-03-19 17:06:20 +00:00			`# create the task objects after we saved the configuration to the JSON file, because`
			`# tasks are not JSON serializable`
Prepare for supporting grammar Use a consistent preprocessing function, provided by the task class, between server and train/predict, and load the tasks once. 2019-03-19 18:14:32 +00:00			`args.train_tasks = get_tasks(args.train_task_names, args)`
			`args.val_tasks = get_tasks(args.val_task_names, args)`
Fix JSON serialization of arguments 2019-03-19 17:06:20 +00:00
Initial commit 2018-06-20 06:22:34 +00:00			`return args`