From ba01ec9dbf3bdac1a26cfbbee67631c3ff1fadad Mon Sep 17 00:00:00 2001 From: Nathan Raw Date: Tue, 22 Sep 2020 11:15:25 -0600 Subject: [PATCH] Edited using Colaboratory (#3601) --- .../04-transformers-text-classification.ipynb | 40 +++++++------------ 1 file changed, 15 insertions(+), 25 deletions(-) diff --git a/notebooks/04-transformers-text-classification.ipynb b/notebooks/04-transformers-text-classification.ipynb index d2649c1a8d..e92673e66d 100644 --- a/notebooks/04-transformers-text-classification.ipynb +++ b/notebooks/04-transformers-text-classification.ipynb @@ -6,8 +6,7 @@ "name": "04-transformers-text-classification.ipynb", "provenance": [], "collapsed_sections": [], - "toc_visible": true, - "include_colab_link": true + "toc_visible": true }, "kernelspec": { "name": "python3", @@ -16,16 +15,6 @@ "accelerator": "GPU" }, "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "view-in-github", - "colab_type": "text" - }, - "source": [ - "\"Open" - ] - }, { "cell_type": "markdown", "metadata": { @@ -42,9 +31,10 @@ "---\n", " - Give us a ⭐ [on Github](https://www.github.com/PytorchLightning/pytorch-lightning/)\n", " - Check out [the documentation](https://pytorch-lightning.readthedocs.io/en/latest/)\n", + " - Ask a question on [the forum](https://forums.pytorchlightning.ai/)\n", " - Join us [on Slack](https://join.slack.com/t/pytorch-lightning/shared_invite/zt-f6bl2l0l-JYMK3tbAgAmGRrlNr00f1A)\n", "\n", - " - [HuggingFace nlp](https://github.com/huggingface/nlp)\n", + " - [HuggingFace datasets](https://github.com/huggingface/datasets)\n", " - [HuggingFace transformers](https://github.com/huggingface/transformers)" ] }, @@ -83,7 +73,7 @@ "from datetime import datetime\n", "from typing import Optional\n", "\n", - "import nlp\n", + "import datasets\n", "import numpy as np\n", "import pytorch_lightning as pl\n", "import torch\n", @@ -97,7 +87,7 @@ " glue_compute_metrics\n", ")" ], - "execution_count": null, + "execution_count": 2, "outputs": [] }, { @@ -147,7 +137,7 @@ " }\n", "\n", " loader_columns = [\n", - " 'nlp_idx',\n", + " 'datasets_idx',\n", " 'input_ids',\n", " 'token_type_ids',\n", " 'attention_mask',\n", @@ -177,7 +167,7 @@ " self.tokenizer = AutoTokenizer.from_pretrained(self.model_name_or_path, use_fast=True)\n", "\n", " def setup(self, stage):\n", - " self.dataset = nlp.load_dataset('glue', self.task_name)\n", + " self.dataset = datasets.load_dataset('glue', self.task_name)\n", "\n", " for split in self.dataset.keys():\n", " self.dataset[split] = self.dataset[split].map(\n", @@ -191,7 +181,7 @@ " self.eval_splits = [x for x in self.dataset.keys() if 'validation' in x]\n", "\n", " def prepare_data(self):\n", - " nlp.load_dataset('glue', self.task_name)\n", + " datasets.load_dataset('glue', self.task_name)\n", " AutoTokenizer.from_pretrained(self.model_name_or_path, use_fast=True)\n", " \n", " def train_dataloader(self):\n", @@ -230,7 +220,7 @@ "\n", " return features" ], - "execution_count": null, + "execution_count": 3, "outputs": [] }, { @@ -297,7 +287,7 @@ "\n", " self.config = AutoConfig.from_pretrained(model_name_or_path, num_labels=num_labels)\n", " self.model = AutoModelForSequenceClassification.from_pretrained(model_name_or_path, config=self.config)\n", - " self.metric = nlp.load_metric(\n", + " self.metric = datasets.load_metric(\n", " 'glue',\n", " self.hparams.task_name,\n", " experiment_id=datetime.now().strftime(\"%d-%m-%Y_%H-%M-%S\")\n", @@ -335,7 +325,7 @@ " if i == 0:\n", " result = pl.EvalResult(checkpoint_on=loss)\n", " result.log(f'val_loss_{split}', loss, prog_bar=True)\n", - " split_metrics = {f\"{k}_{split}\": v for k, v in self.metric.compute(preds, labels).items()}\n", + " split_metrics = {f\"{k}_{split}\": v for k, v in self.metric.compute(predictions=preds, references=labels).items()}\n", " result.log_dict(split_metrics, prog_bar=True)\n", " return result\n", "\n", @@ -344,7 +334,7 @@ " loss = torch.stack([x['loss'] for x in outputs]).mean()\n", " result = pl.EvalResult(checkpoint_on=loss)\n", " result.log('val_loss', loss, prog_bar=True)\n", - " result.log_dict(self.metric.compute(preds, labels), prog_bar=True)\n", + " result.log_dict(self.metric.compute(predictions=preds, references=labels), prog_bar=True)\n", " return result\n", "\n", " def setup(self, stage):\n", @@ -394,7 +384,7 @@ " parser.add_argument(\"--weight_decay\", default=0.0, type=float)\n", " return parser" ], - "execution_count": null, + "execution_count": 5, "outputs": [] }, { @@ -434,7 +424,7 @@ " trainer = pl.Trainer.from_argparse_args(args)\n", " return dm, model, trainer" ], - "execution_count": null, + "execution_count": 6, "outputs": [] }, { @@ -553,4 +543,4 @@ "outputs": [] } ] -} +} \ No newline at end of file