diff --git a/.gitignore b/.gitignore index d50d1667..9831d608 100644 --- a/.gitignore +++ b/.gitignore @@ -13,6 +13,7 @@ local_scripts/* dataset/* generated/* *events.out.tfevents* +*.pkl .DS_Store .idea/ diff --git a/genienlp/calibrate.py b/genienlp/calibrate.py index 998c0b45..e4241cca 100644 --- a/genienlp/calibrate.py +++ b/genienlp/calibrate.py @@ -230,6 +230,12 @@ class ConfidenceEstimator(): return features, labels + def estimate(self, confidences: Iterable[ConfidenceOutput]): + features, labels = self.convert_to_dataset(confidences, train=False) + dataset = xgb.DMatrix(data=features, label=labels) + confidence_scores = ConfidenceEstimator._extract_confidence_scores(self.model, dataset) + return confidence_scores + def evaluate(self, dev_features, dev_labels): dev_dataset = xgb.DMatrix(data=dev_features, label=dev_labels) confidence_scores = ConfidenceEstimator._extract_confidence_scores(self.model, dev_dataset) diff --git a/genienlp/predict.py b/genienlp/predict.py index 1fa6b98d..372ca9e1 100644 --- a/genienlp/predict.py +++ b/genienlp/predict.py @@ -167,9 +167,10 @@ def run(args, device): else: raise OSError(f'{results_file_name} already exists') - generation_outputs = generate_with_model(model, it, model.numericalizer, task, args, prediction_file_name, output_confidences=args.output_confidences, original_order=original_order) + generation_outputs = generate_with_model(model, it, model.numericalizer, task, args, output_confidences=args.output_confidences, original_order=original_order) + if args.output_confidences: - _, predictions, answers, contexts, confidences = generation_outputs + _, example_ids, predictions, answers, contexts, confidences = generation_outputs # print('confidences = ', confidences) import pickle @@ -177,7 +178,12 @@ def run(args, device): pickle.dump(confidences, f, protocol=4) else: - _, predictions, answers, contexts = generation_outputs + _, example_ids, predictions, answers, contexts = generation_outputs + + # write into file + with open(prediction_file_name, 'w' + ('' if args.overwrite else 'x')) as prediction_file: + for i in range(len(example_ids)): + prediction_file.write(example_ids[i] + '\t' + '\t'.join(predictions[i]) + '\n') # write all outputs in the prediction file, separated by \t if len(answers) > 0: metrics_to_compute = task.metrics @@ -207,7 +213,7 @@ def run(args, device): def parse_argv(parser): - parser.add_argument('--path', required=True) + parser.add_argument('--path', type=str, required=True, help='Folder to load the model from') parser.add_argument('--evaluate', type=str, required=True, choices=['valid', 'test'], help='Which dataset to do predictions for (test or dev)') parser.add_argument('--pred_set_name', type=str, help='Name of dataset to run prediction for; will be ignored if --evaluate is test') diff --git a/genienlp/server.py b/genienlp/server.py index 3abcb969..74717e45 100644 --- a/genienlp/server.py +++ b/genienlp/server.py @@ -86,7 +86,7 @@ class Server: self.model.add_new_vocab_from_data([task]) batch = self.numericalize_examples(examples) # it is a single batch, so wrap it in [] - predictions = generate_with_model(self.model, [batch], self.numericalizer, task, self.args, prediction_file_name=None, output_predictions_only=True) + predictions = generate_with_model(self.model, [batch], self.numericalizer, task, self.args, output_predictions_only=True) response = json.dumps({ 'id': request['id'], 'instances': [{ 'answer': p[0] } for p in predictions] }) return response + '\n' @@ -103,7 +103,7 @@ class Server: self.model.add_new_vocab_from_data([task]) batch = self.numericalize_examples([ex]) - predictions = generate_with_model(self.model, [batch], self.numericalizer, task, self.args, prediction_file_name=None, output_predictions_only=True) + predictions = generate_with_model(self.model, [batch], self.numericalizer, task, self.args, output_predictions_only=True) response = json.dumps(dict(id=request['id'], answer=predictions[0][0])) return response + '\n' diff --git a/genienlp/validate.py b/genienlp/validate.py index 3d0b2953..c2052582 100644 --- a/genienlp/validate.py +++ b/genienlp/validate.py @@ -35,7 +35,7 @@ from collections import OrderedDict from .metrics import compute_metrics -def generate_with_model(model, data_iterator, numericalizer, task, args, prediction_file_name=None, output_predictions_only=False, output_confidences=False, original_order=None): +def generate_with_model(model, data_iterator, numericalizer, task, args, output_predictions_only=False, output_confidences=False, original_order=None): """ Inputs: original_order: List of indices. If provided, we will sort the results according to this order @@ -96,20 +96,16 @@ def generate_with_model(model, data_iterator, numericalizer, task, args, predict if original_order is not None: # sort back to the original order original_order, example_ids, predictions, answers, contexts, confidences = [list(a) for a in tuple(zip(*sorted(list(zip(original_order, example_ids, predictions, answers, contexts, confidences)))))] - - if prediction_file_name is not None: - with open(prediction_file_name, 'w' + ('' if args.overwrite else 'x')) as prediction_file: - for i in range(len(example_ids)): - prediction_file.write(example_ids[i] + '\t' + '\t'.join(predictions[i]) + '\n') # write all outputs in the prediction file, separated by \t - if output_predictions_only: - return predictions # TODO calculate and return loss loss = None + + if output_predictions_only: + return predictions if output_confidences: - return loss, predictions, answers, contexts, confidences + return loss, example_ids, predictions, answers, contexts, confidences else: - return loss, predictions, answers, contexts + return loss, example_ids, predictions, answers, contexts def calculate_and_reduce_metrics(predictions, answers, metrics_to_compute, args): @@ -142,7 +138,7 @@ def validate(task, val_iter, model, numericalizer, args, num_print=10): with torch.no_grad(): model.eval() names = ['beam search', 'answer', 'context'] - loss, predictions, answers, contexts = generate_with_model(model, val_iter, numericalizer, task, args, prediction_file_name=None) + loss, _, predictions, answers, contexts = generate_with_model(model, val_iter, numericalizer, task, args) metrics = calculate_and_reduce_metrics(predictions, answers, task.metrics, args) results = [predictions, answers, contexts]