Factor out some shared parameters in test cases

This commit is contained in:
Sina 2022-08-24 21:20:05 -07:00
parent c5d04f31bc
commit 25ebee3964
11 changed files with 66 additions and 88 deletions

View File

@ -4,8 +4,8 @@ SRCDIR=`dirname $0`
on_error () {
rm -fr $workdir
rm -rf $SRCDIR/torch-shm-file-*
rm -fr $workdir
rm -rf $SRCDIR/torch-shm-file-*
}
# allow faster local testing
@ -17,6 +17,8 @@ else
fi
export SENTENCE_TRANSFORMERS_HOME="$EMBEDDING_DIR"
# parameters that are commonly passed to `genienlp train` test cases
export SHARED_TRAIN_HPARAMS="--embeddings $EMBEDDING_DIR --exist_ok --no_commit --preserve_case --save_every 2 --log_every 2 --val_every 2"
TMPDIR=`pwd`
workdir=`mktemp -d $TMPDIR/genieNLP-tests-XXXXXX`

View File

@ -16,21 +16,15 @@ do
# train
genienlp train \
$SHARED_TRAIN_HPARAMS \
--train_tasks almond_dialogue_nlu \
--train_batch_tokens 100 \
--val_batch_size 100 \
--train_iterations 6 \
--preserve_case \
--save_every 2 \
--log_every 2 \
--val_every 2 \
--save $workdir/model_$i \
--database_dir $SRCDIR/database/ \
--data $SRCDIR/dataset/thingpedia_99/ \
--bootleg_output_dir $SRCDIR/dataset/thingpedia_99/bootleg/ \
--exist_ok \
--embeddings $EMBEDDING_DIR \
--no_commit \
--do_ned \
--min_entity_len 2 \
--max_entity_len 4 \

View File

@ -10,19 +10,13 @@ do
# train
genienlp train \
$SHARED_TRAIN_HPARAMS \
--train_tasks almond \
--train_batch_tokens 100 \
--val_batch_size 100 \
--train_iterations 6 \
--preserve_case \
--save_every 2 \
--log_every 2 \
--val_every 2 \
--save $workdir/model_$i \
--data $SRCDIR/dataset/ \
--exist_ok \
--embeddings $EMBEDDING_DIR \
--no_commit \
$hparams
# greedy prediction

View File

@ -10,19 +10,13 @@ do
# train
genienlp train \
$SHARED_TRAIN_HPARAMS \
--train_tasks almond \
--train_batch_tokens 100 \
--val_batch_size 100 \
--train_iterations 2 \
--preserve_case \
--save_every 2 \
--log_every 2 \
--val_every 2 \
--save $workdir/model_$i \
--data $SRCDIR/dataset/ \
--exist_ok \
--embeddings $EMBEDDING_DIR \
--no_commit \
$hparams
# generate a long sequence

View File

@ -19,20 +19,14 @@ for i in ${!hparams[*]};
do
# train
genienlp train \
$SHARED_TRAIN_HPARAMS \
--train_tasks ${tasks[i]} \
--train_batch_tokens 100 \
--val_batch_size 300 \
--train_iterations 4 \
--min_output_length 2 \
--preserve_case \
--save_every 2 \
--log_every 2 \
--val_every 2 \
--save $workdir/model_$i \
--data $SRCDIR/dataset/bitod \
--exist_ok \
--embeddings $EMBEDDING_DIR \
--no_commit ${hparams[i]}
--data $SRCDIR/dataset/bitod
# greedy prediction
genienlp predict \

View File

@ -10,19 +10,13 @@ do
# train
genienlp train \
$SHARED_TRAIN_HPARAMS \
--train_tasks almond \
--train_batch_tokens 100 \
--val_batch_size 100 \
--train_iterations 6 \
--preserve_case \
--save_every 2 \
--log_every 2 \
--val_every 2 \
--save $workdir/model_$i \
--data $SRCDIR/dataset/ \
--exist_ok \
--embeddings $EMBEDDING_DIR \
--no_commit \
$hparams
# run kfserver in background

View File

@ -15,18 +15,13 @@ do
# train
genienlp train \
$SHARED_TRAIN_HPARAMS \
--train_tasks almond \
--train_batch_tokens 100 \
--val_batch_size 100 \
--train_iterations 4 \
--preserve_case \
--save_every 2 \
--log_every 2 --val_every 2 \
--save $workdir/model_$i \
--data $SRCDIR/dataset/ \
--exist_ok \
--embeddings $EMBEDDING_DIR \
--no_commit \
$hparams
# greedy prediction

View File

@ -10,21 +10,15 @@ for model in \
# train
genienlp train \
$SHARED_TRAIN_HPARAMS \
--train_tasks almond_natural_seq2seq \
--train_batch_tokens 100 \
--val_batch_size 100 \
--train_iterations 6 \
--preserve_case \
--save_every 2 \
--log_every 2 \
--val_every 2 \
--save $workdir/model_$i \
--data $SRCDIR/dataset/ \
--model TransformerSeq2Seq \
--pretrained_model $model \
--exist_ok \
--embeddings $EMBEDDING_DIR \
--no_commit
--pretrained_model $model
# greedy prediction
genienlp predict \

View File

@ -4,7 +4,20 @@
# Test ood task
# train
genienlp train --train_tasks ood_task --model TransformerForSequenceClassification --pretrained_model distilbert-base-uncased --min_output_length 1 --save $workdir/model --train_iterations 20 --save_every 10 --log_every 10 --val_every 10 --data $SRCDIR/dataset/ood/ --force_fast_tokenizer --train_batch_tokens 200 --num_print 0
genienlp train \
--train_tasks ood_task \
--model TransformerForSequenceClassification \
--pretrained_model distilbert-base-uncased \
--min_output_length 1 \
--save $workdir/model \
--train_iterations 20 \
--save_every 10 \
--log_every 10 \
--val_every 10 \
--data $SRCDIR/dataset/ood/ \
--force_fast_tokenizer \
--train_batch_tokens 200 \
--num_print 0
# greedy prediction
genienlp predict --tasks ood_task --evaluate valid --pred_set_name eval --path $workdir/model --overwrite --eval_dir $workdir/model/eval_results/ --data $SRCDIR/dataset/ood/ --embeddings $EMBEDDING_DIR --val_batch_size 200
@ -23,15 +36,37 @@ rm -rf $workdir/model
# Test bitod_error_cls task
# train
genienlp train --train_tasks bitod_error_cls --model TransformerForSequenceClassification --pretrained_model distilbert-base-uncased --min_output_length 1 --save $workdir/model_error/ --train_iterations 100 --save_every 50 --log_every 50 --val_every 50 --data $SRCDIR/dataset/bitod_error/ --force_fast_tokenizer --train_batch_tokens 200 --num_print 0
genienlp train \
--train_tasks bitod_error_cls \
--model TransformerForSequenceClassification \
--pretrained_model distilbert-base-uncased \
--min_output_length 1 \
--save $workdir/model_error/ \
--train_iterations 100 \
--save_every 50 \
--log_every 50 \
--val_every 50 \
--data $SRCDIR/dataset/bitod_error/ \
--force_fast_tokenizer \
--train_batch_tokens 200 \
--num_print 0
# greedy prediction
genienlp predict --tasks bitod_error_cls --evaluate valid --pred_set_name valid --path $workdir/model_error --overwrite --eval_dir $workdir/model_error/eval_results/ --data $SRCDIR/dataset/bitod_error/ --embeddings $EMBEDDING_DIR --val_batch_size 200
genienlp predict \
--tasks bitod_error_cls \
--evaluate valid \
--pred_set_name valid \
--path $workdir/model_error \
--overwrite \
--eval_dir $workdir/model_error/eval_results/ \
--data $SRCDIR/dataset/bitod_error/ \
--embeddings $EMBEDDING_DIR \
--val_batch_size 200
# check if result file exists
if test ! -f $workdir/model_error/eval_results/valid/bitod_error_cls.tsv ; then
echo "File not found!"
exit 1
echo "File not found!"
exit 1
fi
# check if predictions matches expected_results

View File

@ -11,20 +11,16 @@ do
# train
genienlp train \
$SHARED_TRAIN_HPARAMS \
--train_tasks cross_ner \
--model TransformerForTokenClassification \
--pretrained_model bert-base-cased \
--force_fast_tokenizer --train_batch_tokens 200 \
--force_fast_tokenizer \
--train_batch_tokens 200 \
--val_batch_size 200 \
--train_iterations 4 --preserve_case \
--save_every 2 \
--log_every 2 \
--val_every 2 \
--train_iterations 4 \
--save $workdir/model_$i \
--data $SRCDIR/dataset/cross_ner/ \
--embeddings $EMBEDDING_DIR $hparams \
--exist_ok \
--no_commit
--data $SRCDIR/dataset/cross_ner/
# greedy prediction
genienlp predict \
@ -59,6 +55,7 @@ do
# train
genienlp train \
$SHARED_TRAIN_HPARAMS \
--train_tasks conll2003 \
--crossner_domains music \
--model TransformerForTokenClassification \
@ -68,15 +65,8 @@ do
--train_batch_tokens 100 \
--val_batch_size 100 \
--train_iterations 4 \
--preserve_case \
--save_every 2 \
--log_every 2 \
--val_every 2 \
--save $workdir/model_$i \
--data $SRCDIR/dataset/cross_ner/ \
--embeddings $EMBEDDING_DIR $hparams \
--exist_ok \
--no_commit
--data $SRCDIR/dataset/cross_ner/
# greedy prediction
genienlp predict \

View File

@ -21,6 +21,7 @@ for model in "Helsinki-NLP/opus-mt-en-de" ; do
# save model
genienlp train \
$SHARED_TRAIN_HPARAMS \
--train_tasks almond_translate \
--train_languages en \
--train_tgt_languages de \
@ -31,11 +32,7 @@ for model in "Helsinki-NLP/opus-mt-en-de" ; do
--train_batch_tokens 100 \
--val_batch_size 100 \
--train_iterations 0 \
--preserve_case \
--save $workdir/model_$i \
--exist_ok \
--embeddings $EMBEDDING_DIR \
--no_commit
--save $workdir/model_$i
# translate entities
genienlp predict \
@ -89,7 +86,9 @@ for model in "Helsinki-NLP/opus-mt-en-de" "sshleifer/tiny-mbart" ; do
cp $workdir/translation/almond/train.tsv $workdir/translation/almond/eval.tsv
# train
genienlp train --train_tasks almond_translate \
genienlp train \
$SHARED_TRAIN_HPARAMS \
--train_tasks almond_translate \
--do_alignment \
--train_languages en \
--train_tgt_languages de \
@ -100,15 +99,8 @@ for model in "Helsinki-NLP/opus-mt-en-de" "sshleifer/tiny-mbart" ; do
--train_batch_tokens 100 \
--val_batch_size 100 \
--train_iterations 6 \
--preserve_case \
--save_every 2 \
--log_every 2 \
--val_every 2 \
--save $workdir/model_$i \
--data $workdir/translation/ \
--exist_ok \
--embeddings $EMBEDDING_DIR \
--no_commit
--data $workdir/translation/
# greedy prediction
genienlp predict \