update tests

This commit is contained in:
mehrad 2019-03-04 15:08:56 -08:00
parent ef94113a67
commit b3bbad24c5
9 changed files with 36 additions and 16 deletions

View File

@ -17,13 +17,14 @@ install:
- pip install flake8 # pytest # add another testing frameworks later
- pip install pipenv
- pipenv install --dev
- python3 ./setup.py install
before_script:
# stop the build if there are Python syntax errors or undefined names
- flake8 . --count --select=E901,E999,F821,F822,F823 --show-source --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
- flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
script:
- "./tests/test.sh" # pytest --capture=sys # add other tests here
- "./decanlp/tests/test.sh" # pytest --capture=sys # add other tests here
notifications:
on_success: change
on_failure: change # `always` will be the setting once code changes slow down

View File

@ -127,6 +127,7 @@ def parse(argv):
parser.add_argument('--use_bleu_loss', action='store_true', help='whether to use differentiable BLEU loss or not')
parser.add_argument('--use_maxmargin_loss', action='store_true', help='whether to use max-margin loss or not')
parser.add_argument('--loss_switch', default=0.666, type=float, help='switch to BLEU loss after certain iterations controlled by this ratio')
parser.add_argument('--small_glove', action='store_true', help='Use glove.6B.50d instead of glove.840B.300d')
args = parser.parse_args(argv[1:])

View File

@ -71,9 +71,12 @@ def prepare_data(args, FIELD):
args.max_generative_vocab = min(len(FIELD.vocab), args.max_generative_vocab)
FIELD.append_vocab(new_vocab)
logger.info(f'Vocabulary has expanded to {len(FIELD.vocab)} tokens')
logger.info(f'Getting pretrained word vectors')
char_vectors = torchtext.vocab.CharNGram(cache=args.embeddings)
glove_vectors = torchtext.vocab.GloVe(cache=args.embeddings)
if args.small_glove:
glove_vectors = torchtext.vocab.GloVe(cache=args.embeddings, name="6B", dim=50)
else:
glove_vectors = torchtext.vocab.GloVe(cache=args.embeddings)
vectors = [char_vectors, glove_vectors]
FIELD.vocab.load_vectors(vectors, True)
FIELD.decoder_to_vocab = {idx: FIELD.vocab.stoi[word] for idx, word in enumerate(FIELD.decoder_itos)}
@ -268,7 +271,8 @@ def get_args(argv):
'transformer_layers', 'rnn_layers', 'transformer_hidden',
'dimension', 'load', 'max_val_context_length', 'val_batch_size',
'transformer_heads', 'max_output_length', 'max_generative_vocab',
'lower', 'cove', 'intermediate_cove', 'elmo', 'glove_and_char', 'use_maxmargin_loss']
'lower', 'cove', 'intermediate_cove', 'elmo', 'glove_and_char',
'use_maxmargin_loss', 'small_glove']
for r in retrieve:
if r in config:
setattr(args, r, config[r])

View File

@ -68,7 +68,10 @@ class Server():
logger.info(f'Vocabulary has {len(self.field.vocab)} tokens from training')
char_vectors = torchtext.vocab.CharNGram(cache=self.args.embeddings)
glove_vectors = torchtext.vocab.GloVe(cache=self.args.embeddings)
if args.small_glove:
glove_vectors = torchtext.vocab.GloVe(cache=args.embeddings, name="6B", dim=50)
else:
glove_vectors = torchtext.vocab.GloVe(cache=args.embeddings)
self._vector_collections = [char_vectors, glove_vectors]
self._limited_idx_to_full_idx = deepcopy(self.field.decoder_to_vocab) # should avoid this with a conditional in map to full
@ -222,8 +225,8 @@ def get_args(argv):
'transformer_layers', 'rnn_layers', 'transformer_hidden',
'dimension', 'load', 'max_val_context_length', 'val_batch_size',
'transformer_heads', 'max_output_length', 'max_generative_vocab',
'lower', 'cove', 'intermediate_cove', 'elmo', 'glove_and_char', 'use_maxmargin_loss',
'reverse_task_bool']
'lower', 'cove', 'intermediate_cove', 'elmo', 'glove_and_char',
'use_maxmargin_loss', 'reverse_task_bool', 'small_glove']
for r in retrieve:
if r in config:
setattr(args, r, config[r])

View File

Can't render this file because it contains an unexpected character in line 1 and column 189.

View File

Can't render this file because it contains an unexpected character in line 3 and column 230.

View File

Can't render this file because it contains an unexpected character in line 3 and column 397.

View File

@ -6,10 +6,12 @@ SRCDIR=`dirname $0`
# functional tests
#mkdir ./embeddings
#wget --no-verbose http://nlp.stanford.edu/data/glove.840B.300d.zip ; unzip glove.840B.300d.zip ; mv glove.840B.300d.zip embeddings/ ; rm glove.42B.300d.zip
#wget --no-verbose http://www.logos.t.u-tokyo.ac.jp/~hassy/publications/arxiv2016jmt/jmt_pre-trained_embeddings.tar.gz ; tar -xzvf jmt_pre-trained_embeddings.tar.gz; mv jmt_pre-trained_embeddings embeddings/; rm jmt_pre-trained_embeddings.tar.gz
function delete {
rm -rf $1
}
mkdir -p $SRCDIR/embeddings
curl -O "https://parmesan.stanford.edu/glove/glove.6B.50d.txt.pt" ; mv glove.6B.50d.txt.pt $SRCDIR/embeddings/
curl -O "https://parmesan.stanford.edu/glove/charNgram.txt.pt" ; mv charNgram.txt.pt $SRCDIR/embeddings/
TMPDIR=`pwd`
workdir=`mktemp -d $TMPDIR/decaNLP-tests-XXXXXX`
@ -19,15 +21,21 @@ SRCDIR=`dirname $0`
for hparams in "" ; do
# train
pipenv run python3 $SRCDIR/../train.py --train_tasks almond --train_iterations 4 --preserve_case --save_every 2--log_every 2 --val_every 2 --save $workdir/model_$i --data dataset/ $hparams --exist_ok --skip_cache --no_glove_and_char --elmo 0
pipenv run decanlp train --train_tasks almond --train_iterations 2 --preserve_case --save_every 2 --log_every 2 --val_every 2 --save $workdir/model_$i --data $SRCDIR/dataset/ $hparams --exist_ok --skip_cache --root "" --embeddings $SRCDIR/embeddings --small_glove
# greedy decode
pipenv run python3 $SRCDIR/../predict.py --tasks almond --evaluate test --path ~/$workdir/model_$i --overwrite --eval_dir $workdir/model_$i/eval_results/ --data dataset/ --no_glove_and_char --elmo 0
pipenv run decanlp predict --tasks almond --evaluate test --path $workdir/model_$i --overwrite --eval_dir $workdir/model_$i/eval_results/ --data $SRCDIR/dataset/ --embeddings $SRCDIR/embeddings
# export prediction results
pipenv run python3 $SRCDIR/../utils/post_process_decoded_results.py --original_data dataset/test.tsv --gold_program $workdir/model_$i/eval_results/almond.gold.txt --predicted_program $workdir/model_$i/eval_results/almond.txt --output_file $workdir/model_$i/results.tsv
pipenv run python3 $SRCDIR/../utils/post_process_decoded_results.py --original_data $SRCDIR/dataset/almond/test.tsv --gold_program $workdir/model_$i/best/test/almond.gold.txt --predicted_program $workdir/model_$i/best/test/almond.txt --output_file $workdir/model_$i/results.tsv
# check if result files exist
if [ ! -f $workdir/model_$i/results.tsv ] && [ ! -f $workdir/model_$i/results_raw.tsv ]; then
echo "File not found!"
exit
fi
i=$((i+1))
done
trap { rm -rf $workdir } EXIT
trap "delete $workdir" EXIT

View File

@ -115,7 +115,10 @@ def prepare_data(args, field, logger):
if args.load is None:
logger.info(f'Getting pretrained word vectors')
char_vectors = torchtext.vocab.CharNGram(cache=args.embeddings)
glove_vectors = torchtext.vocab.GloVe(cache=args.embeddings)
if args.small_glove:
glove_vectors = torchtext.vocab.GloVe(cache=args.embeddings, name="6B", dim=50)
else:
glove_vectors = torchtext.vocab.GloVe(cache=args.embeddings)
vectors = [char_vectors, glove_vectors]
vocab_sets = (train_sets + val_sets) if len(vocab_sets) == 0 else vocab_sets
logger.info(f'Building vocabulary')