Factor out some shared parameters in test cases
This commit is contained in:
parent
c5d04f31bc
commit
25ebee3964
|
@ -4,8 +4,8 @@ SRCDIR=`dirname $0`
|
|||
|
||||
|
||||
on_error () {
|
||||
rm -fr $workdir
|
||||
rm -rf $SRCDIR/torch-shm-file-*
|
||||
rm -fr $workdir
|
||||
rm -rf $SRCDIR/torch-shm-file-*
|
||||
}
|
||||
|
||||
# allow faster local testing
|
||||
|
@ -17,6 +17,8 @@ else
|
|||
fi
|
||||
|
||||
export SENTENCE_TRANSFORMERS_HOME="$EMBEDDING_DIR"
|
||||
# parameters that are commonly passed to `genienlp train` test cases
|
||||
export SHARED_TRAIN_HPARAMS="--embeddings $EMBEDDING_DIR --exist_ok --no_commit --preserve_case --save_every 2 --log_every 2 --val_every 2"
|
||||
|
||||
TMPDIR=`pwd`
|
||||
workdir=`mktemp -d $TMPDIR/genieNLP-tests-XXXXXX`
|
||||
|
|
|
@ -16,21 +16,15 @@ do
|
|||
|
||||
# train
|
||||
genienlp train \
|
||||
$SHARED_TRAIN_HPARAMS \
|
||||
--train_tasks almond_dialogue_nlu \
|
||||
--train_batch_tokens 100 \
|
||||
--val_batch_size 100 \
|
||||
--train_iterations 6 \
|
||||
--preserve_case \
|
||||
--save_every 2 \
|
||||
--log_every 2 \
|
||||
--val_every 2 \
|
||||
--save $workdir/model_$i \
|
||||
--database_dir $SRCDIR/database/ \
|
||||
--data $SRCDIR/dataset/thingpedia_99/ \
|
||||
--bootleg_output_dir $SRCDIR/dataset/thingpedia_99/bootleg/ \
|
||||
--exist_ok \
|
||||
--embeddings $EMBEDDING_DIR \
|
||||
--no_commit \
|
||||
--do_ned \
|
||||
--min_entity_len 2 \
|
||||
--max_entity_len 4 \
|
||||
|
|
|
@ -10,19 +10,13 @@ do
|
|||
|
||||
# train
|
||||
genienlp train \
|
||||
$SHARED_TRAIN_HPARAMS \
|
||||
--train_tasks almond \
|
||||
--train_batch_tokens 100 \
|
||||
--val_batch_size 100 \
|
||||
--train_iterations 6 \
|
||||
--preserve_case \
|
||||
--save_every 2 \
|
||||
--log_every 2 \
|
||||
--val_every 2 \
|
||||
--save $workdir/model_$i \
|
||||
--data $SRCDIR/dataset/ \
|
||||
--exist_ok \
|
||||
--embeddings $EMBEDDING_DIR \
|
||||
--no_commit \
|
||||
$hparams
|
||||
|
||||
# greedy prediction
|
||||
|
|
|
@ -10,19 +10,13 @@ do
|
|||
|
||||
# train
|
||||
genienlp train \
|
||||
$SHARED_TRAIN_HPARAMS \
|
||||
--train_tasks almond \
|
||||
--train_batch_tokens 100 \
|
||||
--val_batch_size 100 \
|
||||
--train_iterations 2 \
|
||||
--preserve_case \
|
||||
--save_every 2 \
|
||||
--log_every 2 \
|
||||
--val_every 2 \
|
||||
--save $workdir/model_$i \
|
||||
--data $SRCDIR/dataset/ \
|
||||
--exist_ok \
|
||||
--embeddings $EMBEDDING_DIR \
|
||||
--no_commit \
|
||||
$hparams
|
||||
|
||||
# generate a long sequence
|
||||
|
|
|
@ -19,20 +19,14 @@ for i in ${!hparams[*]};
|
|||
do
|
||||
# train
|
||||
genienlp train \
|
||||
$SHARED_TRAIN_HPARAMS \
|
||||
--train_tasks ${tasks[i]} \
|
||||
--train_batch_tokens 100 \
|
||||
--val_batch_size 300 \
|
||||
--train_iterations 4 \
|
||||
--min_output_length 2 \
|
||||
--preserve_case \
|
||||
--save_every 2 \
|
||||
--log_every 2 \
|
||||
--val_every 2 \
|
||||
--save $workdir/model_$i \
|
||||
--data $SRCDIR/dataset/bitod \
|
||||
--exist_ok \
|
||||
--embeddings $EMBEDDING_DIR \
|
||||
--no_commit ${hparams[i]}
|
||||
--data $SRCDIR/dataset/bitod
|
||||
|
||||
# greedy prediction
|
||||
genienlp predict \
|
||||
|
|
|
@ -10,19 +10,13 @@ do
|
|||
|
||||
# train
|
||||
genienlp train \
|
||||
$SHARED_TRAIN_HPARAMS \
|
||||
--train_tasks almond \
|
||||
--train_batch_tokens 100 \
|
||||
--val_batch_size 100 \
|
||||
--train_iterations 6 \
|
||||
--preserve_case \
|
||||
--save_every 2 \
|
||||
--log_every 2 \
|
||||
--val_every 2 \
|
||||
--save $workdir/model_$i \
|
||||
--data $SRCDIR/dataset/ \
|
||||
--exist_ok \
|
||||
--embeddings $EMBEDDING_DIR \
|
||||
--no_commit \
|
||||
$hparams
|
||||
|
||||
# run kfserver in background
|
||||
|
|
|
@ -15,18 +15,13 @@ do
|
|||
|
||||
# train
|
||||
genienlp train \
|
||||
$SHARED_TRAIN_HPARAMS \
|
||||
--train_tasks almond \
|
||||
--train_batch_tokens 100 \
|
||||
--val_batch_size 100 \
|
||||
--train_iterations 4 \
|
||||
--preserve_case \
|
||||
--save_every 2 \
|
||||
--log_every 2 --val_every 2 \
|
||||
--save $workdir/model_$i \
|
||||
--data $SRCDIR/dataset/ \
|
||||
--exist_ok \
|
||||
--embeddings $EMBEDDING_DIR \
|
||||
--no_commit \
|
||||
$hparams
|
||||
|
||||
# greedy prediction
|
||||
|
|
|
@ -10,21 +10,15 @@ for model in \
|
|||
|
||||
# train
|
||||
genienlp train \
|
||||
$SHARED_TRAIN_HPARAMS \
|
||||
--train_tasks almond_natural_seq2seq \
|
||||
--train_batch_tokens 100 \
|
||||
--val_batch_size 100 \
|
||||
--train_iterations 6 \
|
||||
--preserve_case \
|
||||
--save_every 2 \
|
||||
--log_every 2 \
|
||||
--val_every 2 \
|
||||
--save $workdir/model_$i \
|
||||
--data $SRCDIR/dataset/ \
|
||||
--model TransformerSeq2Seq \
|
||||
--pretrained_model $model \
|
||||
--exist_ok \
|
||||
--embeddings $EMBEDDING_DIR \
|
||||
--no_commit
|
||||
--pretrained_model $model
|
||||
|
||||
# greedy prediction
|
||||
genienlp predict \
|
||||
|
|
|
@ -4,7 +4,20 @@
|
|||
|
||||
# Test ood task
|
||||
# train
|
||||
genienlp train --train_tasks ood_task --model TransformerForSequenceClassification --pretrained_model distilbert-base-uncased --min_output_length 1 --save $workdir/model --train_iterations 20 --save_every 10 --log_every 10 --val_every 10 --data $SRCDIR/dataset/ood/ --force_fast_tokenizer --train_batch_tokens 200 --num_print 0
|
||||
genienlp train \
|
||||
--train_tasks ood_task \
|
||||
--model TransformerForSequenceClassification \
|
||||
--pretrained_model distilbert-base-uncased \
|
||||
--min_output_length 1 \
|
||||
--save $workdir/model \
|
||||
--train_iterations 20 \
|
||||
--save_every 10 \
|
||||
--log_every 10 \
|
||||
--val_every 10 \
|
||||
--data $SRCDIR/dataset/ood/ \
|
||||
--force_fast_tokenizer \
|
||||
--train_batch_tokens 200 \
|
||||
--num_print 0
|
||||
|
||||
# greedy prediction
|
||||
genienlp predict --tasks ood_task --evaluate valid --pred_set_name eval --path $workdir/model --overwrite --eval_dir $workdir/model/eval_results/ --data $SRCDIR/dataset/ood/ --embeddings $EMBEDDING_DIR --val_batch_size 200
|
||||
|
@ -23,15 +36,37 @@ rm -rf $workdir/model
|
|||
|
||||
# Test bitod_error_cls task
|
||||
# train
|
||||
genienlp train --train_tasks bitod_error_cls --model TransformerForSequenceClassification --pretrained_model distilbert-base-uncased --min_output_length 1 --save $workdir/model_error/ --train_iterations 100 --save_every 50 --log_every 50 --val_every 50 --data $SRCDIR/dataset/bitod_error/ --force_fast_tokenizer --train_batch_tokens 200 --num_print 0
|
||||
genienlp train \
|
||||
--train_tasks bitod_error_cls \
|
||||
--model TransformerForSequenceClassification \
|
||||
--pretrained_model distilbert-base-uncased \
|
||||
--min_output_length 1 \
|
||||
--save $workdir/model_error/ \
|
||||
--train_iterations 100 \
|
||||
--save_every 50 \
|
||||
--log_every 50 \
|
||||
--val_every 50 \
|
||||
--data $SRCDIR/dataset/bitod_error/ \
|
||||
--force_fast_tokenizer \
|
||||
--train_batch_tokens 200 \
|
||||
--num_print 0
|
||||
|
||||
# greedy prediction
|
||||
genienlp predict --tasks bitod_error_cls --evaluate valid --pred_set_name valid --path $workdir/model_error --overwrite --eval_dir $workdir/model_error/eval_results/ --data $SRCDIR/dataset/bitod_error/ --embeddings $EMBEDDING_DIR --val_batch_size 200
|
||||
genienlp predict \
|
||||
--tasks bitod_error_cls \
|
||||
--evaluate valid \
|
||||
--pred_set_name valid \
|
||||
--path $workdir/model_error \
|
||||
--overwrite \
|
||||
--eval_dir $workdir/model_error/eval_results/ \
|
||||
--data $SRCDIR/dataset/bitod_error/ \
|
||||
--embeddings $EMBEDDING_DIR \
|
||||
--val_batch_size 200
|
||||
|
||||
# check if result file exists
|
||||
if test ! -f $workdir/model_error/eval_results/valid/bitod_error_cls.tsv ; then
|
||||
echo "File not found!"
|
||||
exit 1
|
||||
echo "File not found!"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# check if predictions matches expected_results
|
||||
|
|
|
@ -11,20 +11,16 @@ do
|
|||
|
||||
# train
|
||||
genienlp train \
|
||||
$SHARED_TRAIN_HPARAMS \
|
||||
--train_tasks cross_ner \
|
||||
--model TransformerForTokenClassification \
|
||||
--pretrained_model bert-base-cased \
|
||||
--force_fast_tokenizer --train_batch_tokens 200 \
|
||||
--force_fast_tokenizer \
|
||||
--train_batch_tokens 200 \
|
||||
--val_batch_size 200 \
|
||||
--train_iterations 4 --preserve_case \
|
||||
--save_every 2 \
|
||||
--log_every 2 \
|
||||
--val_every 2 \
|
||||
--train_iterations 4 \
|
||||
--save $workdir/model_$i \
|
||||
--data $SRCDIR/dataset/cross_ner/ \
|
||||
--embeddings $EMBEDDING_DIR $hparams \
|
||||
--exist_ok \
|
||||
--no_commit
|
||||
--data $SRCDIR/dataset/cross_ner/
|
||||
|
||||
# greedy prediction
|
||||
genienlp predict \
|
||||
|
@ -59,6 +55,7 @@ do
|
|||
|
||||
# train
|
||||
genienlp train \
|
||||
$SHARED_TRAIN_HPARAMS \
|
||||
--train_tasks conll2003 \
|
||||
--crossner_domains music \
|
||||
--model TransformerForTokenClassification \
|
||||
|
@ -68,15 +65,8 @@ do
|
|||
--train_batch_tokens 100 \
|
||||
--val_batch_size 100 \
|
||||
--train_iterations 4 \
|
||||
--preserve_case \
|
||||
--save_every 2 \
|
||||
--log_every 2 \
|
||||
--val_every 2 \
|
||||
--save $workdir/model_$i \
|
||||
--data $SRCDIR/dataset/cross_ner/ \
|
||||
--embeddings $EMBEDDING_DIR $hparams \
|
||||
--exist_ok \
|
||||
--no_commit
|
||||
--data $SRCDIR/dataset/cross_ner/
|
||||
|
||||
# greedy prediction
|
||||
genienlp predict \
|
||||
|
|
|
@ -21,6 +21,7 @@ for model in "Helsinki-NLP/opus-mt-en-de" ; do
|
|||
|
||||
# save model
|
||||
genienlp train \
|
||||
$SHARED_TRAIN_HPARAMS \
|
||||
--train_tasks almond_translate \
|
||||
--train_languages en \
|
||||
--train_tgt_languages de \
|
||||
|
@ -31,11 +32,7 @@ for model in "Helsinki-NLP/opus-mt-en-de" ; do
|
|||
--train_batch_tokens 100 \
|
||||
--val_batch_size 100 \
|
||||
--train_iterations 0 \
|
||||
--preserve_case \
|
||||
--save $workdir/model_$i \
|
||||
--exist_ok \
|
||||
--embeddings $EMBEDDING_DIR \
|
||||
--no_commit
|
||||
--save $workdir/model_$i
|
||||
|
||||
# translate entities
|
||||
genienlp predict \
|
||||
|
@ -89,7 +86,9 @@ for model in "Helsinki-NLP/opus-mt-en-de" "sshleifer/tiny-mbart" ; do
|
|||
cp $workdir/translation/almond/train.tsv $workdir/translation/almond/eval.tsv
|
||||
|
||||
# train
|
||||
genienlp train --train_tasks almond_translate \
|
||||
genienlp train \
|
||||
$SHARED_TRAIN_HPARAMS \
|
||||
--train_tasks almond_translate \
|
||||
--do_alignment \
|
||||
--train_languages en \
|
||||
--train_tgt_languages de \
|
||||
|
@ -100,15 +99,8 @@ for model in "Helsinki-NLP/opus-mt-en-de" "sshleifer/tiny-mbart" ; do
|
|||
--train_batch_tokens 100 \
|
||||
--val_batch_size 100 \
|
||||
--train_iterations 6 \
|
||||
--preserve_case \
|
||||
--save_every 2 \
|
||||
--log_every 2 \
|
||||
--val_every 2 \
|
||||
--save $workdir/model_$i \
|
||||
--data $workdir/translation/ \
|
||||
--exist_ok \
|
||||
--embeddings $EMBEDDING_DIR \
|
||||
--no_commit
|
||||
--data $workdir/translation/
|
||||
|
||||
# greedy prediction
|
||||
genienlp predict \
|
||||
|
|
Loading…
Reference in New Issue