2021-04-18 23:29:18 +00:00
|
|
|
#!/usr/bin/env bash
|
|
|
|
|
|
|
|
. ./tests/lib.sh
|
|
|
|
|
|
|
|
i=0
|
|
|
|
# test cross_ner task
|
|
|
|
for hparams in \
|
2021-08-02 19:05:07 +00:00
|
|
|
"--crossner_domains music" \
|
|
|
|
"--crossner_domains news --do_ned --entity_attributes type_id type_prob --bootleg_output_dir ${SRCDIR}/dataset/cross_ner/bootleg/ --database_dir ${SRCDIR}/database/ --add_entities_to_text append --ned_domains thingpedia" ;
|
2021-04-18 23:29:18 +00:00
|
|
|
do
|
|
|
|
|
|
|
|
# train
|
2022-02-28 19:45:23 +00:00
|
|
|
genienlp train --train_tasks cross_ner --model TransformerForTokenClassification --pretrained_model bert-base-cased --force_fast_tokenizer --train_batch_tokens 200 --val_batch_size 200 --train_iterations 4 --preserve_case --save_every 2 --log_every 2 --val_every 2 --save $workdir/model_$i --data $SRCDIR/dataset/cross_ner/ --embeddings $EMBEDDING_DIR $hparams --exist_ok --no_commit
|
2021-04-18 23:29:18 +00:00
|
|
|
|
|
|
|
# greedy prediction
|
2022-02-28 19:45:23 +00:00
|
|
|
genienlp predict --tasks cross_ner --evaluate valid --pred_set_name dev --path $workdir/model_$i --overwrite --eval_dir $workdir/model_$i/eval_results/ --data $SRCDIR/dataset/cross_ner/ --embeddings $EMBEDDING_DIR --val_batch_size 2000
|
2021-04-18 23:29:18 +00:00
|
|
|
|
|
|
|
# check if result file exists
|
|
|
|
if test ! -f $workdir/model_$i/eval_results/valid/cross_ner.tsv ; then
|
|
|
|
echo "File not found!"
|
|
|
|
exit 1
|
|
|
|
fi
|
|
|
|
|
|
|
|
# check if predictions matches expected_results
|
|
|
|
diff -u $SRCDIR/expected_results/token_classification/cross_ner_news_$i.tsv $workdir/model_$i/eval_results/valid/cross_ner.tsv
|
|
|
|
|
|
|
|
rm -rf $workdir/model_$i
|
|
|
|
|
|
|
|
i=$((i+1))
|
|
|
|
done
|
|
|
|
|
|
|
|
# test conll2003 task
|
|
|
|
for hparams in \
|
|
|
|
"" ;
|
|
|
|
do
|
|
|
|
|
|
|
|
# train
|
2022-02-28 19:45:23 +00:00
|
|
|
genienlp train --train_tasks conll2003 --crossner_domains music --model TransformerForTokenClassification --pretrained_model bert-base-cased --force_fast_tokenizer --subsample 5 --train_batch_tokens 100 --val_batch_size 100 --train_iterations 4 --preserve_case --save_every 2 --log_every 2 --val_every 2 --save $workdir/model_$i --data $SRCDIR/dataset/cross_ner/ --embeddings $EMBEDDING_DIR $hparams --exist_ok --no_commit
|
2021-04-18 23:29:18 +00:00
|
|
|
|
|
|
|
# greedy prediction
|
2022-02-28 19:45:23 +00:00
|
|
|
genienlp predict --tasks conll2003 --evaluate valid --pred_set_name validation --subsample 5 --path $workdir/model_$i --overwrite --eval_dir $workdir/model_$i/eval_results/ --data $SRCDIR/dataset/cross_ner/ --embeddings $EMBEDDING_DIR --val_batch_size 2000
|
2021-04-18 23:29:18 +00:00
|
|
|
|
|
|
|
# check if result file exists
|
|
|
|
if test ! -f $workdir/model_$i/eval_results/valid/conll2003.tsv ; then
|
|
|
|
echo "File not found!"
|
|
|
|
exit 1
|
|
|
|
fi
|
|
|
|
|
|
|
|
# check if predictions matches expected_results
|
2021-04-23 20:14:50 +00:00
|
|
|
diff -u $SRCDIR/expected_results/token_classification/conll2003_$i.tsv $workdir/model_$i/eval_results/valid/conll2003.tsv
|
2021-04-18 23:29:18 +00:00
|
|
|
|
|
|
|
rm -rf $workdir/model_$i
|
|
|
|
|
|
|
|
i=$((i+1))
|
|
|
|
done
|