Edited using Colaboratory (#3601)

2020-09-22 11:15:25 -06:00 · 2020-09-22 11:15:25 -06:00 · ba01ec9dbf
parent 0b222fc6cf
commit ba01ec9dbf
1 changed files with 15 additions and 25 deletions
--- a/notebooks/04-transformers-text-classification.ipynb
+++ b/notebooks/04-transformers-text-classification.ipynb
@ -6,8 +6,7 @@
      "name": "04-transformers-text-classification.ipynb",
      "provenance": [],
      "collapsed_sections": [],
-      "toc_visible": true,
+      "toc_visible": true
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
@ -16,16 +15,6 @@
    "accelerator": "GPU"
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/PytorchLightning/pytorch-lightning/blob/master/notebooks/04-transformers-text-classification.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
@ -42,9 +31,10 @@
        "---\n",
        "  - Give us a ⭐ [on Github](https://www.github.com/PytorchLightning/pytorch-lightning/)\n",
        "  - Check out [the documentation](https://pytorch-lightning.readthedocs.io/en/latest/)\n",
        "  - Ask a question on [the forum](https://forums.pytorchlightning.ai/)\n",
        "  - Join us [on Slack](https://join.slack.com/t/pytorch-lightning/shared_invite/zt-f6bl2l0l-JYMK3tbAgAmGRrlNr00f1A)\n",
        "\n",
-        "  - [HuggingFace nlp](https://github.com/huggingface/nlp)\n",
+        "  - [HuggingFace datasets](https://github.com/huggingface/datasets)\n",
        "  - [HuggingFace transformers](https://github.com/huggingface/transformers)"
      ]
    },
@ -83,7 +73,7 @@
        "from datetime import datetime\n",
        "from typing import Optional\n",
        "\n",
-        "import nlp\n",
+        "import datasets\n",
        "import numpy as np\n",
        "import pytorch_lightning as pl\n",
        "import torch\n",
@ -97,7 +87,7 @@
        "    glue_compute_metrics\n",
        ")"
      ],
-      "execution_count": null,
+      "execution_count": 2,
      "outputs": []
    },
    {
@ -147,7 +137,7 @@
        "    }\n",
        "\n",
        "    loader_columns = [\n",
-        "        'nlp_idx',\n",
+        "        'datasets_idx',\n",
        "        'input_ids',\n",
        "        'token_type_ids',\n",
        "        'attention_mask',\n",
@ -177,7 +167,7 @@
        "        self.tokenizer = AutoTokenizer.from_pretrained(self.model_name_or_path, use_fast=True)\n",
        "\n",
        "    def setup(self, stage):\n",
-        "        self.dataset = nlp.load_dataset('glue', self.task_name)\n",
+        "        self.dataset = datasets.load_dataset('glue', self.task_name)\n",
        "\n",
        "        for split in self.dataset.keys():\n",
        "            self.dataset[split] = self.dataset[split].map(\n",
@ -191,7 +181,7 @@
        "        self.eval_splits = [x for x in self.dataset.keys() if 'validation' in x]\n",
        "\n",
        "    def prepare_data(self):\n",
-        "        nlp.load_dataset('glue', self.task_name)\n",
+        "        datasets.load_dataset('glue', self.task_name)\n",
        "        AutoTokenizer.from_pretrained(self.model_name_or_path, use_fast=True)\n",
        "    \n",
        "    def train_dataloader(self):\n",
@ -230,7 +220,7 @@
        "\n",
        "        return features"
      ],
-      "execution_count": null,
+      "execution_count": 3,
      "outputs": []
    },
    {
@ -297,7 +287,7 @@
        "\n",
        "        self.config = AutoConfig.from_pretrained(model_name_or_path, num_labels=num_labels)\n",
        "        self.model = AutoModelForSequenceClassification.from_pretrained(model_name_or_path, config=self.config)\n",
-        "        self.metric = nlp.load_metric(\n",
+        "        self.metric = datasets.load_metric(\n",
        "            'glue',\n",
        "            self.hparams.task_name,\n",
        "            experiment_id=datetime.now().strftime(\"%d-%m-%Y_%H-%M-%S\")\n",
@ -335,7 +325,7 @@
        "                if i == 0:\n",
        "                    result = pl.EvalResult(checkpoint_on=loss)\n",
        "                result.log(f'val_loss_{split}', loss, prog_bar=True)\n",
-        "                split_metrics = {f\"{k}_{split}\": v for k, v in self.metric.compute(preds, labels).items()}\n",
+        "                split_metrics = {f\"{k}_{split}\": v for k, v in self.metric.compute(predictions=preds, references=labels).items()}\n",
        "                result.log_dict(split_metrics, prog_bar=True)\n",
        "            return result\n",
        "\n",
@ -344,7 +334,7 @@
        "        loss = torch.stack([x['loss'] for x in outputs]).mean()\n",
        "        result = pl.EvalResult(checkpoint_on=loss)\n",
        "        result.log('val_loss', loss, prog_bar=True)\n",
-        "        result.log_dict(self.metric.compute(preds, labels), prog_bar=True)\n",
+        "        result.log_dict(self.metric.compute(predictions=preds, references=labels), prog_bar=True)\n",
        "        return result\n",
        "\n",
        "    def setup(self, stage):\n",
@ -394,7 +384,7 @@
        "        parser.add_argument(\"--weight_decay\", default=0.0, type=float)\n",
        "        return parser"
      ],
-      "execution_count": null,
+      "execution_count": 5,
      "outputs": []
    },
    {
@ -434,7 +424,7 @@
        "    trainer = pl.Trainer.from_argparse_args(args)\n",
        "    return dm, model, trainer"
      ],
-      "execution_count": null,
+      "execution_count": 6,
      "outputs": []
    },
    {
@ -553,4 +543,4 @@
      "outputs": []
    }
  ]
-}
+}