Refactor code that writes predictions to file

2021-01-04 23:07:05 -08:00 · 2021-01-04 23:07:05 -08:00 · a30fcba976
parent 4a080366df
commit a30fcba976
5 changed files with 26 additions and 17 deletions
--- a/.gitignore
+++ b/.gitignore
@ -13,6 +13,7 @@ local_scripts/*
 dataset/*
 generated/*
 *events.out.tfevents*
+*.pkl

 .DS_Store
 .idea/
--- a/genienlp/calibrate.py
+++ b/genienlp/calibrate.py
@ -230,6 +230,12 @@ class ConfidenceEstimator():

        return features, labels

+    def estimate(self, confidences: Iterable[ConfidenceOutput]):
+        features, labels = self.convert_to_dataset(confidences, train=False)
+        dataset = xgb.DMatrix(data=features, label=labels)
+        confidence_scores = ConfidenceEstimator._extract_confidence_scores(self.model, dataset)
+        return confidence_scores
+
    def evaluate(self, dev_features, dev_labels):
        dev_dataset = xgb.DMatrix(data=dev_features, label=dev_labels)
        confidence_scores = ConfidenceEstimator._extract_confidence_scores(self.model, dev_dataset)
--- a/genienlp/predict.py
+++ b/genienlp/predict.py
@ -167,9 +167,10 @@ def run(args, device):
                else:
                    raise OSError(f'{results_file_name} already exists')

-            generation_outputs = generate_with_model(model, it, model.numericalizer, task, args, prediction_file_name, output_confidences=args.output_confidences, original_order=original_order)
+            generation_outputs = generate_with_model(model, it, model.numericalizer, task, args, output_confidences=args.output_confidences, original_order=original_order)
+            
            if args.output_confidences:
-                _, predictions, answers, contexts, confidences = generation_outputs
+                _, example_ids, predictions, answers, contexts, confidences = generation_outputs
                # print('confidences = ', confidences)
                
                import pickle
@ -177,7 +178,12 @@ def run(args, device):
                    pickle.dump(confidences, f, protocol=4)

            else:
-                _, predictions, answers, contexts = generation_outputs
+                _, example_ids, predictions, answers, contexts = generation_outputs
+
+            # write into file
+            with open(prediction_file_name, 'w' + ('' if args.overwrite else 'x')) as prediction_file:
+                for i in range(len(example_ids)):
+                    prediction_file.write(example_ids[i] + '\t' + '\t'.join(predictions[i]) + '\n') # write all outputs in the prediction file, separated by \t

            if len(answers) > 0:
                metrics_to_compute = task.metrics
@ -207,7 +213,7 @@ def run(args, device):


 def parse_argv(parser):
-    parser.add_argument('--path', required=True)
+    parser.add_argument('--path', type=str, required=True, help='Folder to load the model from')
    parser.add_argument('--evaluate', type=str, required=True, choices=['valid', 'test'],
                        help='Which dataset to do predictions for (test or dev)')
    parser.add_argument('--pred_set_name', type=str, help='Name of dataset to run prediction for; will be ignored if --evaluate is test')
--- a/genienlp/server.py
+++ b/genienlp/server.py
@ -86,7 +86,7 @@ class Server:
            self.model.add_new_vocab_from_data([task])
            batch = self.numericalize_examples(examples)
            # it is a single batch, so wrap it in []
-            predictions = generate_with_model(self.model, [batch], self.numericalizer, task, self.args, prediction_file_name=None, output_predictions_only=True)
+            predictions = generate_with_model(self.model, [batch], self.numericalizer, task, self.args, output_predictions_only=True)

            response = json.dumps({ 'id': request['id'], 'instances': [{ 'answer': p[0] } for p in predictions] })
            return response + '\n'
@ -103,7 +103,7 @@ class Server:

            self.model.add_new_vocab_from_data([task])
            batch = self.numericalize_examples([ex])
-            predictions = generate_with_model(self.model, [batch], self.numericalizer, task, self.args, prediction_file_name=None, output_predictions_only=True)
+            predictions = generate_with_model(self.model, [batch], self.numericalizer, task, self.args, output_predictions_only=True)

            response = json.dumps(dict(id=request['id'], answer=predictions[0][0]))
            return response + '\n'
--- a/genienlp/validate.py
+++ b/genienlp/validate.py
@ -35,7 +35,7 @@ from collections import OrderedDict
 from .metrics import compute_metrics


-def generate_with_model(model, data_iterator, numericalizer, task, args, prediction_file_name=None, output_predictions_only=False, output_confidences=False, original_order=None):
+def generate_with_model(model, data_iterator, numericalizer, task, args, output_predictions_only=False, output_confidences=False, original_order=None):
    """
    Inputs:
        original_order: List of indices. If provided, we will sort the results according to this order
@ -96,20 +96,16 @@ def generate_with_model(model, data_iterator, numericalizer, task, args, predict
    if original_order is not None:
        # sort back to the original order
        original_order, example_ids, predictions, answers, contexts, confidences = [list(a) for a in tuple(zip(*sorted(list(zip(original_order, example_ids, predictions, answers, contexts, confidences)))))]
-
-    if prediction_file_name is not None:
-        with open(prediction_file_name, 'w' + ('' if args.overwrite else 'x')) as prediction_file:
-            for i in range(len(example_ids)):
-                prediction_file.write(example_ids[i] + '\t' + '\t'.join(predictions[i]) + '\n') # write all outputs in the prediction file, separated by \t
    
-    if output_predictions_only:
-        return predictions
    # TODO calculate and return loss
    loss = None
+
+    if output_predictions_only:
+        return predictions
    if output_confidences:
-        return loss, predictions, answers, contexts, confidences
+        return loss, example_ids, predictions, answers, contexts, confidences
    else:
-        return loss, predictions, answers, contexts
+        return loss, example_ids, predictions, answers, contexts


 def calculate_and_reduce_metrics(predictions, answers, metrics_to_compute, args):
@ -142,7 +138,7 @@ def validate(task, val_iter, model, numericalizer, args, num_print=10):
    with torch.no_grad():
        model.eval()
        names = ['beam search', 'answer', 'context']
-        loss, predictions, answers, contexts = generate_with_model(model, val_iter, numericalizer, task, args, prediction_file_name=None)
+        loss, _, predictions, answers, contexts = generate_with_model(model, val_iter, numericalizer, task, args)

        metrics = calculate_and_reduce_metrics(predictions, answers, task.metrics, args)
        results = [predictions, answers, contexts]