From 1dc5f7d28ee4c0a16e9eac21e726baa39c7d9147 Mon Sep 17 00:00:00 2001 From: Bryan Marcus McCann Date: Mon, 10 Dec 2018 21:52:34 +0000 Subject: [PATCH] moving schema raw files to s3; updated pretrained models --- README.md | 18 ++++++------------ predict.py | 4 ++-- text/torchtext/datasets/generic.py | 2 +- 3 files changed, 9 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index d2e4543f..24cbdef2 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ [![Build Status](https://travis-ci.org/salesforce/decaNLP.svg?branch=master)](https://travis-ci.org/salesforce/decaNLP) The Natural Language Decathlon is a multitask challenge that spans ten tasks: -question answering ([SQuAD](https://rajpurkar.github.io/SQuAD-explorer/)), machine translation ([IWSLT](https://wit3.fbk.eu/mt.php?release=2016-01)), summarization ([CNN/DM](https://cs.nyu.edu/~kcho/DMQA/)), natural language inference ([MNLI](https://www.nyu.edu/projects/bowman/multinli/)), sentiment analysis ([SST](https://nlp.stanford.edu/sentiment/treebank.html)), semantic role labeling([QA‑SRL](https://dada.cs.washington.edu/qasrl/)), zero-shot relation extraction ([QA‑ZRE](http://nlp.cs.washington.edu/zeroshot/)), goal-oriented dialogue ([WOZ](https://github.com/nmrksic/neural-belief-tracker/tree/master/data/woz), semantic parsing ([WikiSQL](https://github.com/salesforce/WikiSQL)), and commonsense reasoning ([MWSC](https://github.com/salesforce/decaNLP/blob/d594b2bf127e13d0e61151b6a2af3bf63612f380/local_data/schema.txt)). +question answering ([SQuAD](https://rajpurkar.github.io/SQuAD-explorer/)), machine translation ([IWSLT](https://wit3.fbk.eu/mt.php?release=2016-01)), summarization ([CNN/DM](https://cs.nyu.edu/~kcho/DMQA/)), natural language inference ([MNLI](https://www.nyu.edu/projects/bowman/multinli/)), sentiment analysis ([SST](https://nlp.stanford.edu/sentiment/treebank.html)), semantic role labeling([QA‑SRL](https://dada.cs.washington.edu/qasrl/)), zero-shot relation extraction ([QA‑ZRE](http://nlp.cs.washington.edu/zeroshot/)), goal-oriented dialogue ([WOZ](https://github.com/nmrksic/neural-belief-tracker/tree/master/data/woz), semantic parsing ([WikiSQL](https://github.com/salesforce/WikiSQL)), and commonsense reasoning ([MWSC](https://s3.amazonaws.com/research.metamind.io/decaNLP/data/schema.txt)). Each task is cast as question answering, which makes it possible to use our new Multitask Question Answering Network ([MQAN](https://github.com/salesforce/decaNLP/blob/d594b2bf127e13d0e61151b6a2af3bf63612f380/models/multitask_question_answering_network.py)). This model jointly learns all tasks in decaNLP without any task-specific modules or parameters in the multitask setting. For a more thorough introduction to decaNLP and the tasks, see the main [website](http://decanlp.com/), our [blog post](https://einstein.ai/research/the-natural-language-decathlon), or the [paper](https://arxiv.org/abs/1806.08730). @@ -11,7 +11,7 @@ While the research direction associated with this repository focused on multitas ## Leaderboard -| Model | decaNLP | [SQuAD](https://rajpurkar.github.io/SQuAD-explorer/) | [IWSLT](https://wit3.fbk.eu/mt.php?release=2016-01) | [CNN/DM](https://cs.nyu.edu/~kcho/DMQA/) | [MNLI](https://www.nyu.edu/projects/bowman/multinli/) | [SST](https://nlp.stanford.edu/sentiment/treebank.html) | [QA‑SRL](https://dada.cs.washington.edu/qasrl/) | [QA‑ZRE](http://nlp.cs.washington.edu/zeroshot/) | [WOZ](https://github.com/nmrksic/neural-belief-tracker/tree/master/data/woz) | [WikiSQL](https://github.com/salesforce/WikiSQL) | [MWSC](https://github.com/salesforce/decaNLP/blob/d594b2bf127e13d0e61151b6a2af3bf63612f380/local_data/schema.txt) | +| Model | decaNLP | [SQuAD](https://rajpurkar.github.io/SQuAD-explorer/) | [IWSLT](https://wit3.fbk.eu/mt.php?release=2016-01) | [CNN/DM](https://cs.nyu.edu/~kcho/DMQA/) | [MNLI](https://www.nyu.edu/projects/bowman/multinli/) | [SST](https://nlp.stanford.edu/sentiment/treebank.html) | [QA‑SRL](https://dada.cs.washington.edu/qasrl/) | [QA‑ZRE](http://nlp.cs.washington.edu/zeroshot/) | [WOZ](https://github.com/nmrksic/neural-belief-tracker/tree/master/data/woz) | [WikiSQL](https://github.com/salesforce/WikiSQL) | [MWSC](https://s3.amazonaws.com/research.metamind.io/decaNLP/data/schema.txt) | | --- | --- | --- | --- | --- | --- | --- | ---- | ---- | --- | --- |--- | | [MQAN](https://arxiv.org/abs/1806.08730)(QA‑first+[CoVe](http://papers.nips.cc/paper/7209-learned-in-translation-contextualized-word-vectors)) | 599.9 | 75.5 | 18.9 | 24.4 | 73.6 | 86.4 | 80.8 | 37.4 | 85.8 | 68.5 | 48.8 | | [MQAN](https://arxiv.org/abs/1806.08730)(QA‑first) | 590.5 | 74.4 | 18.6 | 24.3 | 71.5 | 87.4 | 78.4 | 37.6 | 84.8 | 64.8 | 48.7 | @@ -19,12 +19,6 @@ While the research direction associated with this repository focused on multitas ## Getting Started -First, you will need to make a `.data` directory and move the examples for the Winograd Schemas into it: -```bash -mkdir -p .data/schema -cp local_data/schema.txt .data/schema/ -``` - ### GPU vs. CPU The `devices` argument can be used to specify the devices for training. For CPU training, specify `--devices -1`; for GPU training, specify `--devices DEVICEID`. Note that Multi-GPU training is currently a WIP, so `--device` is sufficient for commands below. The default will be to train on GPU 0 as training on CPU will be quite time-consuming to train on all ten tasks in decaNLP. @@ -119,12 +113,12 @@ For test performance, please use the original [SQuAD](https://rajpurkar.github.i ## Pretrained Models -This model is the best MQAN trained on decaNLP so far. It was trained first on SQuAD and then on all of decaNLP. You can obtain this model and run it on the validation sets with the following. +This model is the best MQAN trained on decaNLP so far. It was trained first on SQuAD and then on all of decaNLP. It uses [CoVe](http://papers.nips.cc/paper/7209-learned-in-translation-contextualized-word-vectors.pdf) as well. You can obtain this model and run it on the validation sets with the following. ```bash -wget https://s3.amazonaws.com/research.metamind.io/decaNLP/pretrained/mqan_decanlp_qa_first_cpu.tar.gz -tar -xvzf mqan_decanlp_qa_first_cpu.tar.gz -nvidia-docker run -it --rm -v `pwd`:/decaNLP/ -u $(id -u):$(id -g) bmccann/decanlp:cuda9_torch041 bash -c "python /decaNLP/predict.py --evaluate validation --path /decaNLP/mqan_decanlp_qa_first_cpu --checkpoint_name iteration_1140000.pth --device 0" +wget https://s3.amazonaws.com/research.metamind.io/decaNLP/pretrained/mqan_decanlp_qa_first_cove_cpu.tar.gz +tar -xvzf mqan_decanlp_qa_first_cove_cpu.tar.gz +nvidia-docker run -it --rm -v `pwd`:/decaNLP/ -u $(id -u):$(id -g) bmccann/decanlp:cuda9_torch041 bash -c "python /decaNLP/predict.py --evaluate validation --path /decaNLP/mqan_decanlp_qa_first_cove_cpu/ --device 0 --silent" ``` This model is the best MQAN trained on WikiSQL alone, which established [a new state-of-the-art performance by several points on that task](https://github.com/salesforce/WikiSQL): 73.2 / 75.4 / 81.4 (ordered test logical form accuracy, unordered test logical form accuracy, test execution accuracy). diff --git a/predict.py b/predict.py index ec36986d..1826d47a 100644 --- a/predict.py +++ b/predict.py @@ -207,14 +207,14 @@ def get_args(): 'transformer_layers', 'rnn_layers', 'transformer_hidden', 'dimension', 'load', 'max_val_context_length', 'val_batch_size', 'transformer_heads', 'max_output_length', 'max_generative_vocab', - 'lower', 'cove', 'intermediate_cove'] + 'lower', 'cove', 'intermediate_cove', 'elmo', 'glove_and_char'] for r in retrieve: if r in config: setattr(args, r, config[r]) elif 'cove' in r: setattr(args, r, False) elif 'elmo' in r: - setattr(args, r, False) + setattr(args, r, [-1]) elif 'glove_and_char' in r: setattr(args, r, True) else: diff --git a/text/torchtext/datasets/generic.py b/text/torchtext/datasets/generic.py index caee9e10..3b15d709 100644 --- a/text/torchtext/datasets/generic.py +++ b/text/torchtext/datasets/generic.py @@ -776,7 +776,7 @@ class WinogradSchema(CQA, data.Dataset): def sort_key(ex): return data.interleave_keys(len(ex.context), len(ex.answer)) - urls = [] + urls = ['https://s3.amazonaws.com/research.metamind.io/decaNLP/data/schema.txt'] name = 'schema' dirname = ''