update tests
This commit is contained in:
parent
ef94113a67
commit
b3bbad24c5
|
@ -17,13 +17,14 @@ install:
|
|||
- pip install flake8 # pytest # add another testing frameworks later
|
||||
- pip install pipenv
|
||||
- pipenv install --dev
|
||||
- python3 ./setup.py install
|
||||
before_script:
|
||||
# stop the build if there are Python syntax errors or undefined names
|
||||
- flake8 . --count --select=E901,E999,F821,F822,F823 --show-source --statistics
|
||||
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
|
||||
- flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
|
||||
script:
|
||||
- "./tests/test.sh" # pytest --capture=sys # add other tests here
|
||||
- "./decanlp/tests/test.sh" # pytest --capture=sys # add other tests here
|
||||
notifications:
|
||||
on_success: change
|
||||
on_failure: change # `always` will be the setting once code changes slow down
|
||||
|
|
|
@ -127,6 +127,7 @@ def parse(argv):
|
|||
parser.add_argument('--use_bleu_loss', action='store_true', help='whether to use differentiable BLEU loss or not')
|
||||
parser.add_argument('--use_maxmargin_loss', action='store_true', help='whether to use max-margin loss or not')
|
||||
parser.add_argument('--loss_switch', default=0.666, type=float, help='switch to BLEU loss after certain iterations controlled by this ratio')
|
||||
parser.add_argument('--small_glove', action='store_true', help='Use glove.6B.50d instead of glove.840B.300d')
|
||||
|
||||
|
||||
args = parser.parse_args(argv[1:])
|
||||
|
|
|
@ -71,9 +71,12 @@ def prepare_data(args, FIELD):
|
|||
args.max_generative_vocab = min(len(FIELD.vocab), args.max_generative_vocab)
|
||||
FIELD.append_vocab(new_vocab)
|
||||
logger.info(f'Vocabulary has expanded to {len(FIELD.vocab)} tokens')
|
||||
|
||||
logger.info(f'Getting pretrained word vectors')
|
||||
char_vectors = torchtext.vocab.CharNGram(cache=args.embeddings)
|
||||
glove_vectors = torchtext.vocab.GloVe(cache=args.embeddings)
|
||||
if args.small_glove:
|
||||
glove_vectors = torchtext.vocab.GloVe(cache=args.embeddings, name="6B", dim=50)
|
||||
else:
|
||||
glove_vectors = torchtext.vocab.GloVe(cache=args.embeddings)
|
||||
vectors = [char_vectors, glove_vectors]
|
||||
FIELD.vocab.load_vectors(vectors, True)
|
||||
FIELD.decoder_to_vocab = {idx: FIELD.vocab.stoi[word] for idx, word in enumerate(FIELD.decoder_itos)}
|
||||
|
@ -268,7 +271,8 @@ def get_args(argv):
|
|||
'transformer_layers', 'rnn_layers', 'transformer_hidden',
|
||||
'dimension', 'load', 'max_val_context_length', 'val_batch_size',
|
||||
'transformer_heads', 'max_output_length', 'max_generative_vocab',
|
||||
'lower', 'cove', 'intermediate_cove', 'elmo', 'glove_and_char', 'use_maxmargin_loss']
|
||||
'lower', 'cove', 'intermediate_cove', 'elmo', 'glove_and_char',
|
||||
'use_maxmargin_loss', 'small_glove']
|
||||
for r in retrieve:
|
||||
if r in config:
|
||||
setattr(args, r, config[r])
|
||||
|
|
|
@ -68,7 +68,10 @@ class Server():
|
|||
logger.info(f'Vocabulary has {len(self.field.vocab)} tokens from training')
|
||||
|
||||
char_vectors = torchtext.vocab.CharNGram(cache=self.args.embeddings)
|
||||
glove_vectors = torchtext.vocab.GloVe(cache=self.args.embeddings)
|
||||
if args.small_glove:
|
||||
glove_vectors = torchtext.vocab.GloVe(cache=args.embeddings, name="6B", dim=50)
|
||||
else:
|
||||
glove_vectors = torchtext.vocab.GloVe(cache=args.embeddings)
|
||||
self._vector_collections = [char_vectors, glove_vectors]
|
||||
|
||||
self._limited_idx_to_full_idx = deepcopy(self.field.decoder_to_vocab) # should avoid this with a conditional in map to full
|
||||
|
@ -222,8 +225,8 @@ def get_args(argv):
|
|||
'transformer_layers', 'rnn_layers', 'transformer_hidden',
|
||||
'dimension', 'load', 'max_val_context_length', 'val_batch_size',
|
||||
'transformer_heads', 'max_output_length', 'max_generative_vocab',
|
||||
'lower', 'cove', 'intermediate_cove', 'elmo', 'glove_and_char', 'use_maxmargin_loss',
|
||||
'reverse_task_bool']
|
||||
'lower', 'cove', 'intermediate_cove', 'elmo', 'glove_and_char',
|
||||
'use_maxmargin_loss', 'reverse_task_bool', 'small_glove']
|
||||
for r in retrieve:
|
||||
if r in config:
|
||||
setattr(args, r, config[r])
|
||||
|
|
Can't render this file because it contains an unexpected character in line 1 and column 189.
|
Can't render this file because it contains an unexpected character in line 3 and column 230.
|
Can't render this file because it contains an unexpected character in line 3 and column 397.
|
|
@ -6,10 +6,12 @@ SRCDIR=`dirname $0`
|
|||
|
||||
# functional tests
|
||||
|
||||
|
||||
#mkdir ./embeddings
|
||||
#wget --no-verbose http://nlp.stanford.edu/data/glove.840B.300d.zip ; unzip glove.840B.300d.zip ; mv glove.840B.300d.zip embeddings/ ; rm glove.42B.300d.zip
|
||||
#wget --no-verbose http://www.logos.t.u-tokyo.ac.jp/~hassy/publications/arxiv2016jmt/jmt_pre-trained_embeddings.tar.gz ; tar -xzvf jmt_pre-trained_embeddings.tar.gz; mv jmt_pre-trained_embeddings embeddings/; rm jmt_pre-trained_embeddings.tar.gz
|
||||
function delete {
|
||||
rm -rf $1
|
||||
}
|
||||
mkdir -p $SRCDIR/embeddings
|
||||
curl -O "https://parmesan.stanford.edu/glove/glove.6B.50d.txt.pt" ; mv glove.6B.50d.txt.pt $SRCDIR/embeddings/
|
||||
curl -O "https://parmesan.stanford.edu/glove/charNgram.txt.pt" ; mv charNgram.txt.pt $SRCDIR/embeddings/
|
||||
|
||||
TMPDIR=`pwd`
|
||||
workdir=`mktemp -d $TMPDIR/decaNLP-tests-XXXXXX`
|
||||
|
@ -19,15 +21,21 @@ SRCDIR=`dirname $0`
|
|||
for hparams in "" ; do
|
||||
|
||||
# train
|
||||
pipenv run python3 $SRCDIR/../train.py --train_tasks almond --train_iterations 4 --preserve_case --save_every 2--log_every 2 --val_every 2 --save $workdir/model_$i --data dataset/ $hparams --exist_ok --skip_cache --no_glove_and_char --elmo 0
|
||||
pipenv run decanlp train --train_tasks almond --train_iterations 2 --preserve_case --save_every 2 --log_every 2 --val_every 2 --save $workdir/model_$i --data $SRCDIR/dataset/ $hparams --exist_ok --skip_cache --root "" --embeddings $SRCDIR/embeddings --small_glove
|
||||
|
||||
# greedy decode
|
||||
pipenv run python3 $SRCDIR/../predict.py --tasks almond --evaluate test --path ~/$workdir/model_$i --overwrite --eval_dir $workdir/model_$i/eval_results/ --data dataset/ --no_glove_and_char --elmo 0
|
||||
pipenv run decanlp predict --tasks almond --evaluate test --path $workdir/model_$i --overwrite --eval_dir $workdir/model_$i/eval_results/ --data $SRCDIR/dataset/ --embeddings $SRCDIR/embeddings
|
||||
|
||||
# export prediction results
|
||||
pipenv run python3 $SRCDIR/../utils/post_process_decoded_results.py --original_data dataset/test.tsv --gold_program $workdir/model_$i/eval_results/almond.gold.txt --predicted_program $workdir/model_$i/eval_results/almond.txt --output_file $workdir/model_$i/results.tsv
|
||||
pipenv run python3 $SRCDIR/../utils/post_process_decoded_results.py --original_data $SRCDIR/dataset/almond/test.tsv --gold_program $workdir/model_$i/best/test/almond.gold.txt --predicted_program $workdir/model_$i/best/test/almond.txt --output_file $workdir/model_$i/results.tsv
|
||||
|
||||
# check if result files exist
|
||||
if [ ! -f $workdir/model_$i/results.tsv ] && [ ! -f $workdir/model_$i/results_raw.tsv ]; then
|
||||
echo "File not found!"
|
||||
exit
|
||||
fi
|
||||
|
||||
i=$((i+1))
|
||||
done
|
||||
|
||||
trap { rm -rf $workdir } EXIT
|
||||
trap "delete $workdir" EXIT
|
|
@ -115,7 +115,10 @@ def prepare_data(args, field, logger):
|
|||
if args.load is None:
|
||||
logger.info(f'Getting pretrained word vectors')
|
||||
char_vectors = torchtext.vocab.CharNGram(cache=args.embeddings)
|
||||
glove_vectors = torchtext.vocab.GloVe(cache=args.embeddings)
|
||||
if args.small_glove:
|
||||
glove_vectors = torchtext.vocab.GloVe(cache=args.embeddings, name="6B", dim=50)
|
||||
else:
|
||||
glove_vectors = torchtext.vocab.GloVe(cache=args.embeddings)
|
||||
vectors = [char_vectors, glove_vectors]
|
||||
vocab_sets = (train_sets + val_sets) if len(vocab_sets) == 0 else vocab_sets
|
||||
logger.info(f'Building vocabulary')
|
||||
|
|
Loading…
Reference in New Issue