Edited using Colaboratory (#3601)
This commit is contained in:
parent
0b222fc6cf
commit
ba01ec9dbf
|
@ -6,8 +6,7 @@
|
|||
"name": "04-transformers-text-classification.ipynb",
|
||||
"provenance": [],
|
||||
"collapsed_sections": [],
|
||||
"toc_visible": true,
|
||||
"include_colab_link": true
|
||||
"toc_visible": true
|
||||
},
|
||||
"kernelspec": {
|
||||
"name": "python3",
|
||||
|
@ -16,16 +15,6 @@
|
|||
"accelerator": "GPU"
|
||||
},
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
"id": "view-in-github",
|
||||
"colab_type": "text"
|
||||
},
|
||||
"source": [
|
||||
"<a href=\"https://colab.research.google.com/github/PytorchLightning/pytorch-lightning/blob/master/notebooks/04-transformers-text-classification.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {
|
||||
|
@ -42,9 +31,10 @@
|
|||
"---\n",
|
||||
" - Give us a ⭐ [on Github](https://www.github.com/PytorchLightning/pytorch-lightning/)\n",
|
||||
" - Check out [the documentation](https://pytorch-lightning.readthedocs.io/en/latest/)\n",
|
||||
" - Ask a question on [the forum](https://forums.pytorchlightning.ai/)\n",
|
||||
" - Join us [on Slack](https://join.slack.com/t/pytorch-lightning/shared_invite/zt-f6bl2l0l-JYMK3tbAgAmGRrlNr00f1A)\n",
|
||||
"\n",
|
||||
" - [HuggingFace nlp](https://github.com/huggingface/nlp)\n",
|
||||
" - [HuggingFace datasets](https://github.com/huggingface/datasets)\n",
|
||||
" - [HuggingFace transformers](https://github.com/huggingface/transformers)"
|
||||
]
|
||||
},
|
||||
|
@ -83,7 +73,7 @@
|
|||
"from datetime import datetime\n",
|
||||
"from typing import Optional\n",
|
||||
"\n",
|
||||
"import nlp\n",
|
||||
"import datasets\n",
|
||||
"import numpy as np\n",
|
||||
"import pytorch_lightning as pl\n",
|
||||
"import torch\n",
|
||||
|
@ -97,7 +87,7 @@
|
|||
" glue_compute_metrics\n",
|
||||
")"
|
||||
],
|
||||
"execution_count": null,
|
||||
"execution_count": 2,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
|
@ -147,7 +137,7 @@
|
|||
" }\n",
|
||||
"\n",
|
||||
" loader_columns = [\n",
|
||||
" 'nlp_idx',\n",
|
||||
" 'datasets_idx',\n",
|
||||
" 'input_ids',\n",
|
||||
" 'token_type_ids',\n",
|
||||
" 'attention_mask',\n",
|
||||
|
@ -177,7 +167,7 @@
|
|||
" self.tokenizer = AutoTokenizer.from_pretrained(self.model_name_or_path, use_fast=True)\n",
|
||||
"\n",
|
||||
" def setup(self, stage):\n",
|
||||
" self.dataset = nlp.load_dataset('glue', self.task_name)\n",
|
||||
" self.dataset = datasets.load_dataset('glue', self.task_name)\n",
|
||||
"\n",
|
||||
" for split in self.dataset.keys():\n",
|
||||
" self.dataset[split] = self.dataset[split].map(\n",
|
||||
|
@ -191,7 +181,7 @@
|
|||
" self.eval_splits = [x for x in self.dataset.keys() if 'validation' in x]\n",
|
||||
"\n",
|
||||
" def prepare_data(self):\n",
|
||||
" nlp.load_dataset('glue', self.task_name)\n",
|
||||
" datasets.load_dataset('glue', self.task_name)\n",
|
||||
" AutoTokenizer.from_pretrained(self.model_name_or_path, use_fast=True)\n",
|
||||
" \n",
|
||||
" def train_dataloader(self):\n",
|
||||
|
@ -230,7 +220,7 @@
|
|||
"\n",
|
||||
" return features"
|
||||
],
|
||||
"execution_count": null,
|
||||
"execution_count": 3,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
|
@ -297,7 +287,7 @@
|
|||
"\n",
|
||||
" self.config = AutoConfig.from_pretrained(model_name_or_path, num_labels=num_labels)\n",
|
||||
" self.model = AutoModelForSequenceClassification.from_pretrained(model_name_or_path, config=self.config)\n",
|
||||
" self.metric = nlp.load_metric(\n",
|
||||
" self.metric = datasets.load_metric(\n",
|
||||
" 'glue',\n",
|
||||
" self.hparams.task_name,\n",
|
||||
" experiment_id=datetime.now().strftime(\"%d-%m-%Y_%H-%M-%S\")\n",
|
||||
|
@ -335,7 +325,7 @@
|
|||
" if i == 0:\n",
|
||||
" result = pl.EvalResult(checkpoint_on=loss)\n",
|
||||
" result.log(f'val_loss_{split}', loss, prog_bar=True)\n",
|
||||
" split_metrics = {f\"{k}_{split}\": v for k, v in self.metric.compute(preds, labels).items()}\n",
|
||||
" split_metrics = {f\"{k}_{split}\": v for k, v in self.metric.compute(predictions=preds, references=labels).items()}\n",
|
||||
" result.log_dict(split_metrics, prog_bar=True)\n",
|
||||
" return result\n",
|
||||
"\n",
|
||||
|
@ -344,7 +334,7 @@
|
|||
" loss = torch.stack([x['loss'] for x in outputs]).mean()\n",
|
||||
" result = pl.EvalResult(checkpoint_on=loss)\n",
|
||||
" result.log('val_loss', loss, prog_bar=True)\n",
|
||||
" result.log_dict(self.metric.compute(preds, labels), prog_bar=True)\n",
|
||||
" result.log_dict(self.metric.compute(predictions=preds, references=labels), prog_bar=True)\n",
|
||||
" return result\n",
|
||||
"\n",
|
||||
" def setup(self, stage):\n",
|
||||
|
@ -394,7 +384,7 @@
|
|||
" parser.add_argument(\"--weight_decay\", default=0.0, type=float)\n",
|
||||
" return parser"
|
||||
],
|
||||
"execution_count": null,
|
||||
"execution_count": 5,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
|
@ -434,7 +424,7 @@
|
|||
" trainer = pl.Trainer.from_argparse_args(args)\n",
|
||||
" return dm, model, trainer"
|
||||
],
|
||||
"execution_count": null,
|
||||
"execution_count": 6,
|
||||
"outputs": []
|
||||
},
|
||||
{
|
||||
|
@ -553,4 +543,4 @@
|
|||
"outputs": []
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue