From 1f1ceef39e60c9e0981ebf29db8495fea6bf8afa Mon Sep 17 00:00:00 2001 From: mehrad Date: Tue, 16 Feb 2021 00:15:35 -0800 Subject: [PATCH] Allow prediction on test sets without prepped bootleg features Useful for calibration as new ood data is fed during runtime --- genienlp/calibrate.py | 2 +- genienlp/data_utils/bootleg.py | 72 ++++++++++++++++++++++++- genienlp/predict.py | 16 ++++-- genienlp/server.py | 99 ++++++++-------------------------- tests/test.sh | 2 +- 5 files changed, 105 insertions(+), 86 deletions(-) diff --git a/genienlp/calibrate.py b/genienlp/calibrate.py index 17a8bf6b..ca93a74b 100644 --- a/genienlp/calibrate.py +++ b/genienlp/calibrate.py @@ -554,7 +554,7 @@ def main(args): if args.plot: from matplotlib import pyplot # lazy import - confidences = torch.load(args.confidence_path, map_location=torch.device('cpu')) + confidences = torch.load(args.confidence_path, map_location=torch.device('cuda' if torch.cuda.is_available() else 'cpu')) all_estimators = [] train_confidences, dev_confidences = train_test_split(confidences, test_size=args.dev_split, random_state=args.seed) diff --git a/genienlp/data_utils/bootleg.py b/genienlp/data_utils/bootleg.py index 19cc3d8e..0e7403d8 100644 --- a/genienlp/data_utils/bootleg.py +++ b/genienlp/data_utils/bootleg.py @@ -27,11 +27,13 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - +import functools import os import ujson import numpy as np import logging +import torch +from bootleg.annotator import Annotator from .database_utils import is_banned @@ -39,6 +41,7 @@ from bootleg.extract_mentions import extract_mentions from bootleg.utils.parser_utils import get_full_config from bootleg import run +from .progbar import progress_bar logger = logging.getLogger(__name__) @@ -60,6 +63,73 @@ def reverse_bisect_left(a, x, lo=None, hi=None): return lo +def bootleg_process_examples(ex, bootleg_annotator, args, label, task): + line = {} + line['sentence'] = getattr(ex, task.utterance_field()) + + assert len(label) == 7 + line['cands'] = label[3] + line['cand_probs'] = list(map(lambda item: list(item), label[4])) + line['spans'] = label[5] + line['aliases'] = label[6] + tokens_type_ids, tokens_type_probs = bootleg_annotator.bootleg.collect_features_per_line(line, args.bootleg_prob_threshold) + + if task.utterance_field() == 'question': + for i in range(len(tokens_type_ids)): + ex.question_feature[i].type_id = tokens_type_ids[i] + ex.question_feature[i].type_prob = tokens_type_probs[i] + ex.context_plus_question_feature[i + len(ex.context.split(' '))].type_id = tokens_type_ids[i] + ex.context_plus_question_feature[i + len(ex.context.split(' '))].type_prob = tokens_type_probs[i] + + else: + for i in range(len(tokens_type_ids)): + ex.context_feature[i].type_id = tokens_type_ids[i] + ex.context_feature[i].type_prob = tokens_type_probs[i] + ex.context_plus_question_feature[i].type_id = tokens_type_ids[i] + ex.context_plus_question_feature[i].type_prob = tokens_type_probs[i] + + context_plus_question_with_types = task.create_sentence_plus_types_tokens(ex.context_plus_question, + ex.context_plus_question_feature, + args.add_types_to_text) + ex = ex._replace(context_plus_question_with_types=context_plus_question_with_types) + + return ex + + +def extract_features_with_annotator(examples, bootleg_annotator, args, task): + with torch.no_grad(): + bootleg_inputs = [] + for ex in examples: + bootleg_inputs.append(getattr(ex, task.utterance_field())) + + bootleg_labels = bootleg_annotator.label_mentions(bootleg_inputs) + bootleg_labels_unpacked = list(zip(*bootleg_labels)) + + for i in range(len(examples)): + ex = examples[i] + label = bootleg_labels_unpacked[i] + examples[i] = bootleg_process_examples(ex, bootleg_annotator, args, label, task) + + +def init_bootleg_annotator(args, device): + # instantiate a bootleg object to load config and relevant databases + bootleg = Bootleg(args) + bootleg_config = bootleg.create_config(bootleg.fixed_overrides) + + # instantiate the annotator class. we use annotator only in server mode + # for training we use bootleg functions which preprocess and cache data using multiprocessing, and batching to speed up NED + bootleg_annotator = Annotator(config_args=bootleg_config, + device='cpu' if device.type == 'cpu' else 'cuda', + max_alias_len=args.max_entity_len, + cand_map=bootleg.cand_map, + threshold=args.bootleg_prob_threshold, + progbar_func=functools.partial(progress_bar, disable=True)) + # collect all outputs now; we will filter later + bootleg_annotator.set_threshold(0.0) + setattr(bootleg_annotator, 'bootleg', bootleg) + return bootleg_annotator + + def post_process_bootleg_types(qid, type, title, almond_domains): # TODO if training on multiple domains (in one run) these mapping should be modified # e.g. song is mapped to book which is not correct if training on music domain too diff --git a/genienlp/predict.py b/genienlp/predict.py index f45c9f89..06ac00b8 100644 --- a/genienlp/predict.py +++ b/genienlp/predict.py @@ -39,7 +39,7 @@ import shutil # multiprocessing with CUDA from torch.multiprocessing import Process, set_start_method -from .data_utils.bootleg import Bootleg +from .data_utils.bootleg import Bootleg, init_bootleg_annotator, extract_features_with_annotator from .run_bootleg import bootleg_process_splits try: @@ -48,7 +48,6 @@ except RuntimeError: pass import torch -import pickle from . import models from .tasks.registry import get_tasks @@ -61,7 +60,8 @@ from .arguments import check_and_update_generation_args logger = logging.getLogger(__name__) -def prepare_data(args): + +def prepare_data(args, device): # initialize bootleg bootleg = None if args.do_ned and args.ned_retrieve_method == 'bootleg': @@ -110,7 +110,13 @@ def prepare_data(args): data = split.test path = path.test if bootleg: - bootleg_process_splits(args, data.examples, path, task, bootleg) + if split.train or split.eval: + bootleg_process_splits(args, data.examples, path, task, bootleg) + else: + # no prepped bootleg features are available + # extract features on-the-fly using bootleg annotator + bootleg_annotator = init_bootleg_annotator(args, device) + extract_features_with_annotator(data.examples, bootleg_annotator, args, task) task_data_processed.append(data) task_path_processed.append(path) datasets.append(task_data_processed) @@ -165,7 +171,7 @@ def run(args, device): locale=locale ) - val_sets = prepare_data(args) + val_sets = prepare_data(args, device) model.add_new_vocab_from_data(args.tasks) iters = prepare_data_iterators(args, val_sets, model.numericalizer, device) diff --git a/genienlp/server.py b/genienlp/server.py index 496a661b..603deb5f 100644 --- a/genienlp/server.py +++ b/genienlp/server.py @@ -35,25 +35,22 @@ import logging import sys import os from pprint import pformat -import functools import torch from . import models from .data_utils.example import Example, NumericalizedExamples +from .data_utils.bootleg import init_bootleg_annotator, extract_features_with_annotator from .tasks.registry import get_tasks from .util import set_seed, init_devices, load_config_json, log_model_size from .validate import generate_with_model from .calibrate import ConfidenceEstimator -from bootleg.annotator import Annotator -from .data_utils.bootleg import Bootleg -from .data_utils.progbar import progress_bar logger = logging.getLogger(__name__) -class Server: +class Server(object): def __init__(self, args, numericalizer, model, device, confidence_estimators, estimator_filenames, bootleg_annotator=None): self.args = args self.device = device @@ -71,37 +68,6 @@ class Server: # make a single batch with all examples return NumericalizedExamples.collate_batches(all_features, self.numericalizer, device=self.device, db_unk_id=self.args.db_unk_id) - def bootleg_process_examples(self, ex, label, task): - line = {} - line['sentence'] = getattr(ex, task.utterance_field()) - - assert len(label) == 7 - line['cands'] = label[3] - line['cand_probs'] = list(map(lambda item: list(item), label[4])) - line['spans'] = label[5] - line['aliases'] = label[6] - tokens_type_ids, tokens_type_probs = self.bootleg_annotator.bootleg.collect_features_per_line(line, self.args.bootleg_prob_threshold) - - if task.utterance_field() == 'question': - for i in range(len(tokens_type_ids)): - ex.question_feature[i].type_id = tokens_type_ids[i] - ex.question_feature[i].type_prob = tokens_type_probs[i] - ex.context_plus_question_feature[i + len(ex.context.split(' '))].type_id = tokens_type_ids[i] - ex.context_plus_question_feature[i + len(ex.context.split(' '))].type_prob = tokens_type_probs[i] - - else: - for i in range(len(tokens_type_ids)): - ex.context_feature[i].type_id = tokens_type_ids[i] - ex.context_feature[i].type_prob = tokens_type_probs[i] - ex.context_plus_question_feature[i].type_id = tokens_type_ids[i] - ex.context_plus_question_feature[i].type_prob = tokens_type_probs[i] - - context_plus_question_with_types = task.create_sentence_plus_types_tokens(ex.context_plus_question, - ex.context_plus_question_feature, - self.args.add_types_to_text) - ex = ex._replace(context_plus_question_with_types=context_plus_question_with_types) - - return ex def handle_request(self, request): task_name = request['task'] if 'task' in request else 'generic' @@ -125,36 +91,28 @@ class Server: ex = Example.from_raw(str(example_id), context, question, answer, preprocess=task.preprocess_field, lower=self.args.lower) examples.append(ex) - - with torch.no_grad(): - bootleg_inputs = [] - if self.bootleg_annotator: - for ex in examples: - bootleg_inputs.append(getattr(ex, task.utterance_field())) - bootleg_labels = self.bootleg_annotator.label_mentions(bootleg_inputs) - bootleg_labels_unpacked = list(zip(*bootleg_labels)) - - for i in range(len(examples)): - ex = examples[i] - label = bootleg_labels_unpacked[i] - examples[i] = self.bootleg_process_examples(ex, label, task) + # process bootleg features + if self.bootleg_annotator: + extract_features_with_annotator(examples, self.bootleg_annotator, self.args, task) self.model.add_new_vocab_from_data([task]) batch = self.numericalize_examples(examples) - if self.args.calibrator_paths is not None: - output = generate_with_model(self.model, [batch], self.numericalizer, task, self.args, - output_predictions_only=True, - confidence_estimators=self.confidence_estimators) - response = [] - for idx, p in enumerate(output.predictions): - instance = {'answer': p[0], 'score': {}} - for e_idx, estimator_scores in enumerate(output.confidence_scores): - instance['score'][self.estimator_filenames[e_idx]] = float(estimator_scores[idx]) - response.append(instance) - else: - output = generate_with_model(self.model, [batch], self.numericalizer, task, self.args, output_predictions_only=True) - response = [{'answer': p[0]} for p in output.predictions] + + with torch.no_grad(): + if self.args.calibrator_paths is not None: + output = generate_with_model(self.model, [batch], self.numericalizer, task, self.args, + output_predictions_only=True, + confidence_estimators=self.confidence_estimators) + response = [] + for idx, p in enumerate(output.predictions): + instance = {'answer': p[0], 'score': {}} + for e_idx, estimator_scores in enumerate(output.confidence_scores): + instance['score'][self.estimator_filenames[e_idx]] = float(estimator_scores[idx]) + response.append(instance) + else: + output = generate_with_model(self.model, [batch], self.numericalizer, task, self.args, output_predictions_only=True) + response = [{'answer': p[0]} for p in output.predictions] return response @@ -244,22 +202,7 @@ def init(args): bootleg_annotator = None if args.do_ned and args.ned_retrieve_method == 'bootleg': - # instantiate a bootleg object to load config and relevant databases - bootleg = Bootleg(args) - bootleg_config = bootleg.create_config(bootleg.fixed_overrides) - - # instantiate the annotator class. we use annotator only in server mode - # for training we use bootleg functions which preprocess and cache data using multiprocessing, and batching to speed up NED - bootleg_annotator = Annotator(config_args=bootleg_config, - device='cpu' if device.type=='cpu' else 'cuda', - max_alias_len=args.max_entity_len, - cand_map=bootleg.cand_map, - threshold=args.bootleg_prob_threshold, - progbar_func=functools.partial(progress_bar, disable=True)) - # collect all outputs now; we will filter later - bootleg_annotator.set_threshold(0.0) - setattr(bootleg_annotator, 'bootleg', bootleg) - + bootleg_annotator = init_bootleg_annotator(args, device) logger.info(f'Arguments:\n{pformat(vars(args))}') logger.info(f'Loading from {args.best_checkpoint}') diff --git a/tests/test.sh b/tests/test.sh index ca2dc5ef..b04419c5 100755 --- a/tests/test.sh +++ b/tests/test.sh @@ -128,7 +128,7 @@ do # test server for bootleg # due to travis memory limitations, uncomment and run this test locally - # echo '{"id": "dummy_example_1", "context": "show me .", "question": "translate to thingtalk", "answer": "now => () => notify"}' | pipenv run python3 -m genienlp server --database_dir $SRCDIR/database/ --path $workdir/model_$i --stdin + # echo '{"task": "almond", "id": "dummy_example_1", "context": "show me .", "question": "translate to thingtalk", "answer": "now => () => notify"}' | pipenv run python3 -m genienlp server --database_dir $SRCDIR/database/ --path $workdir/model_$i --stdin rm -rf $workdir/model_$i i=$((i+1))