genienlp/tests/test_NED.sh

68 lines
3.1 KiB
Bash
Raw Normal View History

2021-03-02 23:19:46 +00:00
#!/usr/bin/env bash
. ./tests/lib.sh
i=0
# test NED
for hparams in \
2022-08-25 04:04:08 +00:00
"--model TransformerSeq2Seq --pretrained_model sshleifer/bart-tiny-random --ned_retrieve_method bootleg --ned_domains thingpedia --bootleg_model bootleg_uncased_mini --add_entities_to_text append --ned_normalize_types soft --ned_dump_entity_type_pairs" \
"--model TransformerSeq2Seq --pretrained_model sshleifer/bart-tiny-random --ned_retrieve_method bootleg --ned_domains thingpedia --bootleg_model bootleg_uncased_mini --add_entities_to_text off --ned_normalize_types soft" \
"--model TransformerSeq2Seq --pretrained_model sshleifer/bart-tiny-random --ned_retrieve_method naive --ned_domains thingpedia --add_entities_to_text insert" \
"--model TransformerSeq2Seq --pretrained_model sshleifer/bart-tiny-random --ned_retrieve_method entity-oracle --ned_domains thingpedia --add_entities_to_text insert --ned_dump_entity_type_pairs" \
"--model TransformerSeq2Seq --pretrained_model sshleifer/bart-tiny-random --ned_retrieve_method type-oracle --ned_domains thingpedia --add_entities_to_text insert" \
"--model TransformerLSTM --pretrained_model bert-base-cased --ned_retrieve_method bootleg --ned_domains thingpedia --bootleg_model bootleg_uncased_mini --add_entities_to_text off --ned_normalize_types soft" \
"--model TransformerLSTM --pretrained_model bert-base-cased --ned_retrieve_method bootleg --ned_domains thingpedia --bootleg_model bootleg_uncased_mini --add_entities_to_text append --ned_normalize_types soft --override_context ." ;
2021-03-02 23:19:46 +00:00
do
2022-08-25 04:04:08 +00:00
# train
genienlp train \
--train_tasks almond_dialogue_nlu \
--train_batch_tokens 100 \
--val_batch_size 100 \
--train_iterations 6 \
--preserve_case \
--save_every 2 \
--log_every 2 \
--val_every 2 \
--save $workdir/model_$i \
--database_dir $SRCDIR/database/ \
--data $SRCDIR/dataset/thingpedia_99/ \
--bootleg_output_dir $SRCDIR/dataset/thingpedia_99/bootleg/ \
--exist_ok \
--embeddings $EMBEDDING_DIR \
--no_commit \
--do_ned \
--min_entity_len 2 \
--max_entity_len 4 \
$hparams
2021-03-02 23:19:46 +00:00
2022-08-25 04:04:08 +00:00
# greedy prediction
genienlp predict \
--tasks almond_dialogue_nlu \
--evaluate valid \
--path $workdir/model_$i \
--overwrite \
--eval_dir $workdir/model_$i/eval_results/ \
--database_dir $SRCDIR/database/ \
--data $SRCDIR/dataset/thingpedia_99/ \
--embeddings $EMBEDDING_DIR
2021-03-02 23:19:46 +00:00
2022-08-25 04:04:08 +00:00
# check if result file exists
if test ! -f $workdir/model_$i/eval_results/valid/almond_dialogue_nlu.tsv ; then
echo "File not found!"
exit 1
fi
2021-03-02 23:19:46 +00:00
2022-08-25 04:04:08 +00:00
# test server for bootleg
# due to travis memory limitations, uncomment and run this test locally
# echo '{"task": "almond_dialogue_nlu", "id": "dummy_example_1", "context": "show me .", "question": "translate to thingtalk", "answer": "now => () => notify"}' | genienlp server --database_dir $SRCDIR/../database/ --path $workdir/model_$i --stdin
2021-04-09 20:33:03 +00:00
2022-08-25 04:04:08 +00:00
if [ $i == 0 ] ; then
# check if predictions matches expected_results
diff -u $SRCDIR/expected_results/NED/bart_tiny_random_0.json $workdir/model_$i/eval_results/valid/almond_dialogue_nlu.results.json
fi
2021-03-02 23:19:46 +00:00
2022-08-25 04:04:08 +00:00
rm -rf $workdir/model_$i
i=$((i+1))
2021-03-02 23:19:46 +00:00
done