CI: Force docs warnings to be raised as errors (+ fix all) (#1191)
* add argument to force warn
* fix automodule error
* fix permalink error
* fix indentation warning
* fix warning
* fix import warnings
* fix duplicate label warning
* fix bullet point indentation warning
* fix duplicate label warning
* fix "import not top level" warning
* line too long
* fix indentation
* fix bullet points indentation warning
* fix hooks warnings
* fix reference problem with excluded test_tube
* fix indentation in print
* change imports for trains logger
* remove pandas type annotation
* Update pytorch_lightning/core/lightning.py
* include bullet points inside note
* remove old quick start guide (unused)
* fix unused warning
* fix formatting
* fix duplicate label issue
* fix duplicate label warning (replaced by class ref)
* fix tick
* fix indentation warnings
* docstring ticks
* remove obsolete docstring typing
* Revert "remove old quick start guide (unused)"
This reverts commit d51bb40695
.
* added old quick start guide to navigation
* remove unused tutorials file
* ignore some modules that got deprecated and are not used anymore
* fix duplicate label warning
* move examples doc and exclude pl_examples from autodoc
* fix formatting for configure_optimizer
* fix no blank line warnings
* fix "see also" labels and add paramref extension
* fix more reference problems
* fix multi-gpu reference
* fix weird warning
* fix indentation and unrecognized characters in code block
* fix warning "... not included in toctree"
* fix PIL import error
* fix duplicate target "here" warning
* fix broken link
* revert accidentally moved pl_examples
* changelog
* stdout
* note some things to know
Co-Authored-By: Jirka Borovec <Borda@users.noreply.github.com>
Co-authored-by: J. Borovec <jirka.borovec@seznam.cz>
Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com>
This commit is contained in:
parent
732eaee4d7
commit
792962ecc9
|
@ -56,7 +56,7 @@ references:
|
|||
pip install -r requirements.txt --user
|
||||
sudo pip install -r docs/requirements.txt
|
||||
# sphinx-apidoc -o ./docs/source ./pytorch_lightning **/test_* --force --follow-links
|
||||
cd docs; make clean ; make html --debug --jobs 2
|
||||
cd docs; make clean ; make html --debug --jobs 2 SPHINXOPTS="-W"
|
||||
|
||||
jobs:
|
||||
|
||||
|
|
|
@ -31,6 +31,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/).
|
|||
|
||||
- Fixed bug related to type cheking of `ReduceLROnPlateau` lr schedulers([#1114](https://github.com/PyTorchLightning/pytorch-lightning/issues/1114))
|
||||
- Fixed a bug to ensure lightning checkpoints to be backward compatible ([#1132](https://github.com/PyTorchLightning/pytorch-lightning/pull/1132))
|
||||
- Fixed all warnings and errors in the docs build process ([#1191](https://github.com/PyTorchLightning/pytorch-lightning/pull/1191))
|
||||
|
||||
## [0.7.1] - 2020-03-07
|
||||
|
||||
|
|
|
@ -8,4 +8,5 @@ sphinxcontrib-fulltoc
|
|||
sphinxcontrib-mockautodoc
|
||||
git+https://github.com/PytorchLightning/lightning_sphinx_theme.git
|
||||
# pip_shims
|
||||
sphinx-autodoc-typehints
|
||||
sphinx-autodoc-typehints
|
||||
sphinx-paramlinks
|
|
@ -1,6 +1,8 @@
|
|||
.. role:: hidden
|
||||
:class: hidden-section
|
||||
|
||||
.. _callbacks:
|
||||
|
||||
Callbacks
|
||||
=========
|
||||
|
||||
|
|
|
@ -73,7 +73,7 @@ needs_sphinx = '1.4'
|
|||
# ones.
|
||||
extensions = [
|
||||
'sphinx.ext.autodoc',
|
||||
'sphinxcontrib.mockautodoc',
|
||||
# 'sphinxcontrib.mockautodoc', # raises error: directive 'automodule' is already registered ...
|
||||
# 'sphinxcontrib.fulltoc', # breaks pytorch-theme with unexpected kw argument 'titles_only'
|
||||
'sphinx.ext.doctest',
|
||||
'sphinx.ext.intersphinx',
|
||||
|
@ -87,6 +87,7 @@ extensions = [
|
|||
# 'm2r',
|
||||
'nbsphinx',
|
||||
'sphinx_autodoc_typehints',
|
||||
'sphinx_paramlinks',
|
||||
]
|
||||
|
||||
# Add any paths that contain templates here, relative to this directory.
|
||||
|
@ -125,7 +126,20 @@ language = None
|
|||
# List of patterns, relative to source directory, that match files and
|
||||
# directories to ignore when looking for source files.
|
||||
# This pattern also affects html_static_path and html_extra_path.
|
||||
exclude_patterns = ['*.test_*']
|
||||
exclude_patterns = [
|
||||
'pytorch_lightning.rst',
|
||||
'pl_examples.*',
|
||||
'modules.rst',
|
||||
|
||||
# deprecated/renamed:
|
||||
'pytorch_lightning.loggers.comet_logger.rst', # TODO: remove in v0.8.0
|
||||
'pytorch_lightning.loggers.mlflow_logger.rst', # TODO: remove in v0.8.0
|
||||
'pytorch_lightning.loggers.test_tube_logger.rst', # TODO: remove in v0.8.0
|
||||
'pytorch_lightning.callbacks.pt_callbacks.*', # TODO: remove in v0.8.0
|
||||
'pytorch_lightning.pt_overrides.*', # TODO: remove in v0.8.0
|
||||
'pytorch_lightning.root_module.*', # TODO: remove in v0.8.0
|
||||
'pytorch_lightning.logging.*', # TODO: remove in v0.8.0
|
||||
]
|
||||
|
||||
# The name of the Pygments (syntax highlighting) style to use.
|
||||
pygments_style = None
|
||||
|
@ -297,8 +311,17 @@ with open(os.path.join(PATH_ROOT, 'requirements.txt'), 'r') as fp:
|
|||
MOCK_REQUIRE_PACKAGES.append(pkg.rstrip())
|
||||
|
||||
# TODO: better parse from package since the import name and package name may differ
|
||||
MOCK_MANUAL_PACKAGES = ['torch', 'torchvision', 'test_tube',
|
||||
'mlflow', 'comet_ml', 'wandb', 'neptune', 'trains']
|
||||
MOCK_MANUAL_PACKAGES = [
|
||||
'torch',
|
||||
'torchvision',
|
||||
'PIL',
|
||||
'test_tube',
|
||||
'mlflow',
|
||||
'comet_ml',
|
||||
'wandb',
|
||||
'neptune',
|
||||
'trains',
|
||||
]
|
||||
autodoc_mock_imports = MOCK_REQUIRE_PACKAGES + MOCK_MANUAL_PACKAGES
|
||||
# for mod_name in MOCK_REQUIRE_PACKAGES:
|
||||
# sys.modules[mod_name] = mock.Mock()
|
||||
|
@ -369,7 +392,7 @@ autodoc_default_options = {
|
|||
# This value determines the text for the permalink; it defaults to "¶". Set it to None or the empty
|
||||
# string to disable permalinks.
|
||||
# https://www.sphinx-doc.org/en/master/usage/configuration.html#confval-html_add_permalinks
|
||||
html_add_permalinks = True
|
||||
html_add_permalinks = "¶"
|
||||
|
||||
# True to prefix each section label with the name of the document it is in, followed by a colon.
|
||||
# For example, index:Introduction for a section called Introduction that appears in document index.rst.
|
||||
|
|
|
@ -8,6 +8,9 @@ This flag runs a "unit test" by running 1 training batch and 1 validation batch.
|
|||
The point is to detect any bugs in the training/validation loop without having to wait for
|
||||
a full epoch to crash.
|
||||
|
||||
(See: :paramref:`~pytorch_lightning.trainer.trainer.Trainer.fast_dev_run`
|
||||
argument of :class:`~pytorch_lightning.trainer.trainer.Trainer`)
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
trainer = pl.Trainer(fast_dev_run=True)
|
||||
|
@ -16,6 +19,9 @@ Inspect gradient norms
|
|||
----------------------
|
||||
Logs (to a logger), the norm of each weight matrix.
|
||||
|
||||
(See: :paramref:`~pytorch_lightning.trainer.trainer.Trainer.track_grad_norm`
|
||||
argument of :class:`~pytorch_lightning.trainer.trainer.Trainer`)
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
# the 2-norm
|
||||
|
@ -25,7 +31,8 @@ Log GPU usage
|
|||
-------------
|
||||
Logs (to a logger) the GPU usage for each GPU on the master machine.
|
||||
|
||||
(See: :ref:`trainer`)
|
||||
(See: :paramref:`~pytorch_lightning.trainer.trainer.Trainer.log_gpu_memory`
|
||||
argument of :class:`~pytorch_lightning.trainer.trainer.Trainer`)
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
|
@ -37,7 +44,8 @@ Make model overfit on subset of data
|
|||
A good debugging technique is to take a tiny portion of your data (say 2 samples per class),
|
||||
and try to get your model to overfit. If it can't, it's a sign it won't work with large datasets.
|
||||
|
||||
(See: :ref:`trainer`)
|
||||
(See: :paramref:`~pytorch_lightning.trainer.trainer.Trainer.overfit_pct`
|
||||
argument of :class:`~pytorch_lightning.trainer.trainer.Trainer`)
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
|
@ -48,28 +56,23 @@ Print the parameter count by layer
|
|||
Whenever the .fit() function gets called, the Trainer will print the weights summary for the lightningModule.
|
||||
To disable this behavior, turn off this flag:
|
||||
|
||||
(See: :ref:`trainer.weights_summary`)
|
||||
(See: :paramref:`~pytorch_lightning.trainer.trainer.Trainer.weights_summary`
|
||||
argument of :class:`~pytorch_lightning.trainer.trainer.Trainer`)
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
trainer = pl.Trainer(weights_summary=None)
|
||||
|
||||
Print which gradients are nan
|
||||
-----------------------------
|
||||
Prints the tensors with nan gradients.
|
||||
|
||||
(See: :meth:`trainer.print_nan_grads`)
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
trainer = pl.Trainer(print_nan_grads=False)
|
||||
|
||||
Set the number of validation sanity steps
|
||||
-----------------------------------------
|
||||
Lightning runs a few steps of validation in the beginning of training.
|
||||
This avoids crashing in the validation loop sometime deep into a lengthy training loop.
|
||||
|
||||
(See: :paramref:`~pytorch_lightning.trainer.trainer.Trainer.num_sanity_val_steps`
|
||||
argument of :class:`~pytorch_lightning.trainer.trainer.Trainer`)
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
# DEFAULT
|
||||
trainer = Trainer(nb_sanity_val_steps=5)
|
||||
trainer = Trainer(num_sanity_val_steps=5)
|
|
@ -11,7 +11,8 @@ Enable Early Stopping
|
|||
---------------------
|
||||
There are two ways to enable early stopping.
|
||||
|
||||
.. seealso:: :ref:`trainer`
|
||||
.. seealso::
|
||||
:class:`~pytorch_lightning.trainer.trainer.Trainer`
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
|
@ -35,4 +36,5 @@ To disable early stopping pass ``False`` to the `early_stop_callback`.
|
|||
Note that ``None`` will not disable early stopping but will lead to the
|
||||
default behaviour.
|
||||
|
||||
.. seealso:: :ref:`trainer`
|
||||
.. seealso::
|
||||
:class:`~pytorch_lightning.trainer.trainer.Trainer`
|
||||
|
|
|
@ -1,18 +0,0 @@
|
|||
.. toctree::
|
||||
:maxdepth: 1
|
||||
:name: Community Examples
|
||||
:caption: Community Examples
|
||||
|
||||
Contextual Emotion Detection (DoubleDistilBert) <https://github.com/PyTorchLightning/emotion_transformer>
|
||||
Generative Adversarial Network <https://colab.research.google.com/drive/1F_RNcHzTfFuQf-LeKvSlud6x7jXYkG31#scrollTo=TyYOdg8g77P0>
|
||||
Hyperparameter optimization with Optuna <https://github.com/optuna/optuna/blob/master/examples/pytorch_lightning_simple.py>
|
||||
Image Inpainting using Partial Convolutions <https://github.com/ryanwongsa/Image-Inpainting>
|
||||
MNIST on TPU <https://colab.research.google.com/drive/1-_LKx4HwAxl5M6xPJmqAAu444LTDQoa3#scrollTo=BHBz1_AnamN_>
|
||||
NER (transformers, TPU) <https://colab.research.google.com/drive/1dBN-wwYUngLYVt985wGs_OKPlK_ANB9D>
|
||||
NeuralTexture (CVPR) <https://github.com/PyTorchLightning/neuraltexture>
|
||||
Recurrent Attentive Neural Process <https://github.com/PyTorchLightning/attentive-neural-processes>
|
||||
Siamese Nets for One-shot Image Recognition <https://github.com/PyTorchLightning/Siamese-Neural-Networks>
|
||||
Speech Transformers <https://github.com/PyTorchLightning/speech-transformer-pytorch_lightning>
|
||||
Transformers transfer learning (Huggingface) <https://colab.research.google.com/drive/1F_RNcHzTfFuQf-LeKvSlud6x7jXYkG31#scrollTo=yr7eaxkF-djf>
|
||||
Transformers text classification <https://github.com/ricardorei/lightning-text-classification>
|
||||
VAE Library of over 18+ VAE flavors <https://github.com/AntixK/PyTorch-VAE>
|
|
@ -7,7 +7,8 @@ Comet.ml
|
|||
`Comet.ml <https://www.comet.ml/site/>`_ is a third-party logger.
|
||||
To use CometLogger as your logger do the following.
|
||||
|
||||
.. seealso:: :ref:`comet` docs.
|
||||
.. seealso::
|
||||
:class:`~pytorch_lightning.loggers.CometLogger` docs.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
|
@ -38,7 +39,8 @@ Neptune.ai
|
|||
`Neptune.ai <https://neptune.ai/>`_ is a third-party logger.
|
||||
To use Neptune.ai as your logger do the following.
|
||||
|
||||
.. seealso:: :ref:`neptune` docs.
|
||||
.. seealso::
|
||||
:class:`~pytorch_lightning.loggers.NeptuneLogger` docs.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
|
@ -68,7 +70,8 @@ allegro.ai TRAINS
|
|||
`allegro.ai <https://github.com/allegroai/trains/>`_ is a third-party logger.
|
||||
To use TRAINS as your logger do the following.
|
||||
|
||||
.. seealso:: :ref:`trains` docs.
|
||||
.. seealso::
|
||||
:class:`~pytorch_lightning.loggers.TrainsLogger` docs.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
|
@ -95,7 +98,8 @@ Tensorboard
|
|||
|
||||
To use `Tensorboard <https://pytorch.org/docs/stable/tensorboard.html>`_ as your logger do the following.
|
||||
|
||||
.. seealso:: TensorBoardLogger :ref:`tf-logger`
|
||||
.. seealso::
|
||||
:class:`~pytorch_lightning.loggers.TensorBoardLogger` docs.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
|
@ -121,7 +125,8 @@ Test Tube
|
|||
`Test Tube <https://github.com/williamFalcon/test-tube>`_ is a tensorboard logger but with nicer file structure.
|
||||
To use TestTube as your logger do the following.
|
||||
|
||||
.. seealso:: TestTube :ref:`testTube`
|
||||
.. seealso::
|
||||
:class:`~pytorch_lightning.loggers.TestTubeLogger` docs.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
|
@ -146,7 +151,8 @@ Wandb
|
|||
`Wandb <https://www.wandb.com/>`_ is a third-party logger.
|
||||
To use Wandb as your logger do the following.
|
||||
|
||||
.. seealso:: :ref:`wandb` docs
|
||||
.. seealso::
|
||||
:class:`~pytorch_lightning.loggers.WandbLogger` docs.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
|
@ -167,7 +173,7 @@ The Wandb logger is available anywhere except ``__init__`` in your LightningModu
|
|||
|
||||
|
||||
Multiple Loggers
|
||||
^^^^^^^^^^^^^^^^^
|
||||
^^^^^^^^^^^^^^^^
|
||||
|
||||
PyTorch-Lightning supports use of multiple loggers, just pass a list to the `Trainer`.
|
||||
|
||||
|
|
|
@ -22,7 +22,8 @@ Control log writing frequency
|
|||
Writing to a logger can be expensive. In Lightning you can set the interval at which you
|
||||
want to log using this trainer flag.
|
||||
|
||||
.. seealso:: :ref:`trainer`
|
||||
.. seealso::
|
||||
:class:`~pytorch_lightning.trainer.trainer.Trainer`
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
|
|
|
@ -16,7 +16,8 @@ Force training for min or max epochs
|
|||
-------------------------------------
|
||||
It can be useful to force training for a minimum number of epochs or limit to a max number.
|
||||
|
||||
.. seealso:: :ref:`trainer`
|
||||
.. seealso::
|
||||
:class:`~pytorch_lightning.trainer.trainer.Trainer`
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
|
|
|
@ -2,6 +2,7 @@ Hooks
|
|||
=====
|
||||
|
||||
.. automodule:: pytorch_lightning.core.hooks
|
||||
:noindex:
|
||||
|
||||
Hooks lifecycle
|
||||
---------------
|
||||
|
|
|
@ -11,6 +11,7 @@ PyTorch Lightning Documentation
|
|||
:name: start
|
||||
:caption: Start Here
|
||||
|
||||
new-project
|
||||
introduction_guide
|
||||
|
||||
.. toctree::
|
||||
|
@ -24,13 +25,24 @@ PyTorch Lightning Documentation
|
|||
loggers
|
||||
trainer
|
||||
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
:name: Community Examples
|
||||
:caption: Community Examples
|
||||
|
||||
examples
|
||||
Contextual Emotion Detection (DoubleDistilBert) <https://github.com/PyTorchLightning/emotion_transformer>
|
||||
Generative Adversarial Network <https://colab.research.google.com/drive/1F_RNcHzTfFuQf-LeKvSlud6x7jXYkG31#scrollTo=TyYOdg8g77P0>
|
||||
Hyperparameter optimization with Optuna <https://github.com/optuna/optuna/blob/master/examples/pytorch_lightning_simple.py>
|
||||
Image Inpainting using Partial Convolutions <https://github.com/ryanwongsa/Image-Inpainting>
|
||||
MNIST on TPU <https://colab.research.google.com/drive/1-_LKx4HwAxl5M6xPJmqAAu444LTDQoa3#scrollTo=BHBz1_AnamN_>
|
||||
NER (transformers, TPU) <https://colab.research.google.com/drive/1dBN-wwYUngLYVt985wGs_OKPlK_ANB9D>
|
||||
NeuralTexture (CVPR) <https://github.com/PyTorchLightning/neuraltexture>
|
||||
Recurrent Attentive Neural Process <https://github.com/PyTorchLightning/attentive-neural-processes>
|
||||
Siamese Nets for One-shot Image Recognition <https://github.com/PyTorchLightning/Siamese-Neural-Networks>
|
||||
Speech Transformers <https://github.com/PyTorchLightning/speech-transformer-pytorch_lightning>
|
||||
Transformers transfer learning (Huggingface) <https://colab.research.google.com/drive/1F_RNcHzTfFuQf-LeKvSlud6x7jXYkG31#scrollTo=yr7eaxkF-djf>
|
||||
Transformers text classification <https://github.com/ricardorei/lightning-text-classification>
|
||||
VAE Library of over 18+ VAE flavors <https://github.com/AntixK/PyTorch-VAE>
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
@ -83,3 +95,17 @@ Indices and tables
|
|||
* :ref:`genindex`
|
||||
* :ref:`modindex`
|
||||
* :ref:`search`
|
||||
|
||||
|
||||
|
||||
.. This is here to make sphinx aware of the modules but not throw an error/warning
|
||||
.. toctree::
|
||||
:hidden:
|
||||
|
||||
pytorch_lightning.core
|
||||
pytorch_lightning.callbacks
|
||||
pytorch_lightning.loggers
|
||||
pytorch_lightning.overrides
|
||||
pytorch_lightning.profiler
|
||||
pytorch_lightning.trainer
|
||||
pytorch_lightning.utilities
|
|
@ -472,7 +472,7 @@ First, change the runtime to TPU (and reinstall lightning).
|
|||
|
||||
Next, install the required xla library (adds support for PyTorch on TPUs)
|
||||
|
||||
.. code-block::
|
||||
.. code-block:: python
|
||||
|
||||
import collections
|
||||
from datetime import datetime, timedelta
|
||||
|
@ -504,6 +504,8 @@ Next, install the required xla library (adds support for PyTorch on TPUs)
|
|||
update = threading.Thread(target=update_server_xrt)
|
||||
update.start()
|
||||
|
||||
.. code-block::
|
||||
|
||||
# Install Colab TPU compat PyTorch/TPU wheels and dependencies
|
||||
!pip uninstall -y torch torchvision
|
||||
!gsutil cp "$DIST_BUCKET/$TORCH_WHEEL" .
|
||||
|
@ -981,7 +983,8 @@ And pass the callbacks into the trainer
|
|||
|
||||
Trainer(callbacks=[MyPrintingCallback()])
|
||||
|
||||
.. note:: See full list of 12+ hooks in the `Callback docs <callbacks.rst#callback-class>`_
|
||||
.. note::
|
||||
See full list of 12+ hooks in the :ref:`callbacks`.
|
||||
|
||||
---------
|
||||
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
.. _multi-gpu-training:
|
||||
|
||||
Multi-GPU training
|
||||
===================
|
||||
==================
|
||||
Lightning supports multiple ways of doing distributed training.
|
||||
|
||||
Preparing your code
|
||||
|
@ -235,7 +237,7 @@ Validation and test step also have the same option when using dp
|
|||
...
|
||||
|
||||
Implement Your Own Distributed (DDP) training
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
If you need your own way to init PyTorch DDP you can override :meth:`pytorch_lightning.core.LightningModule.`.
|
||||
|
||||
If you also need to use your own DDP implementation, override: :meth:`pytorch_lightning.core.LightningModule.configure_ddp`.
|
||||
|
|
|
@ -41,65 +41,66 @@ hosted on GCP.
|
|||
|
||||
To get a TPU on colab, follow these steps:
|
||||
|
||||
1. Go to https://colab.research.google.com/.
|
||||
1. Go to `https://colab.research.google.com/ <https://colab.research.google.com/>`_.
|
||||
|
||||
2. Click "new notebook" (bottom right of pop-up).
|
||||
2. Click "new notebook" (bottom right of pop-up).
|
||||
|
||||
3. Click runtime > change runtime settings. Select Python 3,
|
||||
and hardware accelerator "TPU". This will give you a TPU with 8 cores.
|
||||
3. Click runtime > change runtime settings. Select Python 3, and hardware accelerator "TPU".
|
||||
This will give you a TPU with 8 cores.
|
||||
|
||||
4. Next, insert this code into the first cell and execute. This
|
||||
will install the xla library that interfaces between PyTorch and
|
||||
the TPU.
|
||||
4. Next, insert this code into the first cell and execute.
|
||||
This will install the xla library that interfaces between PyTorch and the TPU.
|
||||
|
||||
.. code-block:: python
|
||||
.. code-block:: python
|
||||
|
||||
import collections
|
||||
from datetime import datetime, timedelta
|
||||
import os
|
||||
import requests
|
||||
import threading
|
||||
import collections
|
||||
from datetime import datetime, timedelta
|
||||
import os
|
||||
import requests
|
||||
import threading
|
||||
|
||||
_VersionConfig = collections.namedtuple('_VersionConfig', 'wheels,server')
|
||||
VERSION = "xrt==1.15.0" #@param ["xrt==1.15.0", "torch_xla==nightly"]
|
||||
CONFIG = {
|
||||
'xrt==1.15.0': _VersionConfig('1.15', '1.15.0'),
|
||||
'torch_xla==nightly': _VersionConfig('nightly', 'XRT-dev{}'.format(
|
||||
(datetime.today() - timedelta(1)).strftime('%Y%m%d'))),
|
||||
}[VERSION]
|
||||
DIST_BUCKET = 'gs://tpu-pytorch/wheels'
|
||||
TORCH_WHEEL = 'torch-{}-cp36-cp36m-linux_x86_64.whl'.format(CONFIG.wheels)
|
||||
TORCH_XLA_WHEEL = 'torch_xla-{}-cp36-cp36m-linux_x86_64.whl'.format(CONFIG.wheels)
|
||||
TORCHVISION_WHEEL = 'torchvision-{}-cp36-cp36m-linux_x86_64.whl'.format(CONFIG.wheels)
|
||||
_VersionConfig = collections.namedtuple('_VersionConfig', 'wheels,server')
|
||||
VERSION = "xrt==1.15.0" #@param ["xrt==1.15.0", "torch_xla==nightly"]
|
||||
CONFIG = {
|
||||
'xrt==1.15.0': _VersionConfig('1.15', '1.15.0'),
|
||||
'torch_xla==nightly': _VersionConfig('nightly', 'XRT-dev{}'.format(
|
||||
(datetime.today() - timedelta(1)).strftime('%Y%m%d'))),
|
||||
}[VERSION]
|
||||
DIST_BUCKET = 'gs://tpu-pytorch/wheels'
|
||||
TORCH_WHEEL = 'torch-{}-cp36-cp36m-linux_x86_64.whl'.format(CONFIG.wheels)
|
||||
TORCH_XLA_WHEEL = 'torch_xla-{}-cp36-cp36m-linux_x86_64.whl'.format(CONFIG.wheels)
|
||||
TORCHVISION_WHEEL = 'torchvision-{}-cp36-cp36m-linux_x86_64.whl'.format(CONFIG.wheels)
|
||||
|
||||
# Update TPU XRT version
|
||||
def update_server_xrt():
|
||||
print('Updating server-side XRT to {} ...'.format(CONFIG.server))
|
||||
url = 'http://{TPU_ADDRESS}:8475/requestversion/{XRT_VERSION}'.format(
|
||||
TPU_ADDRESS=os.environ['COLAB_TPU_ADDR'].split(':')[0],
|
||||
XRT_VERSION=CONFIG.server,
|
||||
)
|
||||
print('Done updating server-side XRT: {}'.format(requests.post(url)))
|
||||
# Update TPU XRT version
|
||||
def update_server_xrt():
|
||||
print('Updating server-side XRT to {} ...'.format(CONFIG.server))
|
||||
url = 'http://{TPU_ADDRESS}:8475/requestversion/{XRT_VERSION}'.format(
|
||||
TPU_ADDRESS=os.environ['COLAB_TPU_ADDR'].split(':')[0],
|
||||
XRT_VERSION=CONFIG.server,
|
||||
)
|
||||
print('Done updating server-side XRT: {}'.format(requests.post(url)))
|
||||
|
||||
update = threading.Thread(target=update_server_xrt)
|
||||
update.start()
|
||||
update = threading.Thread(target=update_server_xrt)
|
||||
update.start()
|
||||
|
||||
# Install Colab TPU compat PyTorch/TPU wheels and dependencies
|
||||
!pip uninstall -y torch torchvision
|
||||
!gsutil cp "$DIST_BUCKET/$TORCH_WHEEL" .
|
||||
!gsutil cp "$DIST_BUCKET/$TORCH_XLA_WHEEL" .
|
||||
!gsutil cp "$DIST_BUCKET/$TORCHVISION_WHEEL" .
|
||||
!pip install "$TORCH_WHEEL"
|
||||
!pip install "$TORCH_XLA_WHEEL"
|
||||
!pip install "$TORCHVISION_WHEEL"
|
||||
!sudo apt-get install libomp5
|
||||
update.join()
|
||||
.. code-block::
|
||||
|
||||
5. Once the above is done, install PyTorch Lightning (v 0.7.0+).
|
||||
# Install Colab TPU compat PyTorch/TPU wheels and dependencies
|
||||
!pip uninstall -y torch torchvision
|
||||
!gsutil cp "$DIST_BUCKET/$TORCH_WHEEL" .
|
||||
!gsutil cp "$DIST_BUCKET/$TORCH_XLA_WHEEL" .
|
||||
!gsutil cp "$DIST_BUCKET/$TORCHVISION_WHEEL" .
|
||||
!pip install "$TORCH_WHEEL"
|
||||
!pip install "$TORCH_XLA_WHEEL"
|
||||
!pip install "$TORCHVISION_WHEEL"
|
||||
!sudo apt-get install libomp5
|
||||
update.join()
|
||||
|
||||
.. code-block::
|
||||
5. Once the above is done, install PyTorch Lightning (v 0.7.0+).
|
||||
|
||||
! pip install pytorch-lightning
|
||||
.. code-block::
|
||||
|
||||
!pip install pytorch-lightning
|
||||
|
||||
6. Then set up your LightningModule as normal.
|
||||
|
||||
|
@ -147,8 +148,8 @@ for TPU use
|
|||
|
||||
return loader
|
||||
|
||||
8. Configure the number of TPU cores in the trainer. You can only choose
|
||||
1 or 8. To use a full TPU pod skip to the TPU pod section.
|
||||
Configure the number of TPU cores in the trainer. You can only choose 1 or 8.
|
||||
To use a full TPU pod skip to the TPU pod section.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
|
|
|
@ -7,7 +7,7 @@ Accumulate gradients
|
|||
Accumulated gradients runs K small batches of size N before doing a backwards pass.
|
||||
The effect is a large effective batch size of size KxN.
|
||||
|
||||
.. seealso:: :ref:`trainer`
|
||||
.. seealso:: :class:`~pytorch_lightning.trainer.trainer.Trainer`
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
|
@ -20,7 +20,7 @@ Gradient Clipping
|
|||
Gradient clipping may be enabled to avoid exploding gradients. Specifically, this will `clip the gradient
|
||||
norm <https://pytorch.org/docs/stable/nn.html#torch.nn.utils.clip_grad_norm_>`_ computed over all model parameters together.
|
||||
|
||||
.. seealso:: :ref:`trainer`
|
||||
.. seealso:: :class:`~pytorch_lightning.trainer.trainer.Trainer`
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
|
|
|
@ -1,4 +0,0 @@
|
|||
From PyTorch to Lightning
|
||||
=========================
|
||||
|
||||
Talk about how to convert
|
|
@ -10,6 +10,10 @@ __homepage__ = 'https://github.com/PyTorchLightning/pytorch-lightning'
|
|||
__docs__ = "PyTorch Lightning is the lightweight PyTorch wrapper for ML researchers." \
|
||||
" Scale your models. Write less boilerplate."
|
||||
|
||||
from logging import getLogger
|
||||
|
||||
_logger = getLogger("lightning")
|
||||
|
||||
try:
|
||||
# This variable is injected in the __builtins__ by the build
|
||||
# process. It used to enable importing subpackages of skimage when
|
||||
|
@ -20,12 +24,9 @@ except NameError:
|
|||
|
||||
if __LIGHTNING_SETUP__:
|
||||
import sys # pragma: no-cover
|
||||
sys.stderr.write('Partial import of `torchlightning` during the build process.\n') # pragma: no-cover
|
||||
sys.stdout.write(f'Partial import of `{__name__}` during the build process.\n') # pragma: no-cover
|
||||
# We are not importing the rest of the lightning during the build process, as it may not be compiled yet
|
||||
else:
|
||||
from logging import getLogger
|
||||
_logger = getLogger("lightning")
|
||||
|
||||
from pytorch_lightning.core import LightningModule
|
||||
from pytorch_lightning.trainer import Trainer
|
||||
from pytorch_lightning.callbacks import Callback
|
||||
|
|
|
@ -15,7 +15,9 @@ class GradientAccumulationScheduler(Callback):
|
|||
|
||||
Args:
|
||||
scheduling: scheduling in format {epoch: accumulation_factor}
|
||||
.. warning:: Epochs indexing starts from "1" until v0.6.x,
|
||||
|
||||
.. warning::
|
||||
Epochs indexing starts from "1" until v0.6.x,
|
||||
but will start from "0" in v0.8.0.
|
||||
|
||||
Example::
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
"""
|
||||
Hooks
|
||||
=====
|
||||
Model Hooks
|
||||
===========
|
||||
|
||||
There are cases when you might want to do something different at different parts of the training/validation loop.
|
||||
To enable a hook, simply override the method in your LightningModule and the trainer will call it at the correct time.
|
||||
|
|
|
@ -81,11 +81,11 @@ class LightningModule(ABC, GradInformation, ModelIO, ModelHooks):
|
|||
x (object): The thing to print
|
||||
|
||||
Examples:
|
||||
.. code-block:: python
|
||||
.. code-block:: python
|
||||
|
||||
# example if we were using this model as a feature extractor
|
||||
def forward(self, x):
|
||||
self.print(x, 'in loader')
|
||||
# example if we were using this model as a feature extractor
|
||||
def forward(self, x):
|
||||
self.print(x, 'in loader')
|
||||
|
||||
"""
|
||||
if self.trainer.proc_rank == 0:
|
||||
|
@ -288,7 +288,8 @@ class LightningModule(ABC, GradInformation, ModelIO, ModelHooks):
|
|||
loss = nce_loss(loss)
|
||||
return {'loss': loss}
|
||||
|
||||
.. seealso:: see the `multi-gpu guide for more details <multi_gpu.rst#caveats>`_.
|
||||
.. seealso::
|
||||
see the :ref:`multi-gpu-training` guide for more details.
|
||||
"""
|
||||
|
||||
def validation_step(self, *args, **kwargs) -> Dict[str, Tensor]:
|
||||
|
@ -434,7 +435,8 @@ class LightningModule(ABC, GradInformation, ModelIO, ModelHooks):
|
|||
loss = nce_loss(loss)
|
||||
return {'loss': loss}
|
||||
|
||||
.. seealso:: see the `multi-gpu guide for more details <multi_gpu.rst#caveats>`_.
|
||||
.. seealso::
|
||||
see the :ref:`multi-gpu-training` guide for more details.
|
||||
"""
|
||||
|
||||
def validation_end(self, outputs):
|
||||
|
@ -656,7 +658,8 @@ class LightningModule(ABC, GradInformation, ModelIO, ModelHooks):
|
|||
loss = nce_loss(loss)
|
||||
return {'loss': loss}
|
||||
|
||||
.. seealso:: see the `multi-gpu guide for more details <multi_gpu.rst#caveats>`_.
|
||||
.. seealso::
|
||||
see the :ref:`multi-gpu-training` guide for more details.
|
||||
"""
|
||||
|
||||
def test_end(self, outputs):
|
||||
|
@ -942,27 +945,38 @@ class LightningModule(ABC, GradInformation, ModelIO, ModelHooks):
|
|||
dis_sched = CosineAnnealing(discriminator_opt, T_max=10) # called every epoch
|
||||
return [gen_opt, dis_opt], [gen_sched, dis_sched]
|
||||
|
||||
.. note:: Some things to note:
|
||||
- Lightning calls ``.backward()`` and ``.step()`` on each optimizer
|
||||
and learning rate scheduler as needed.
|
||||
- If you use 16-bit precision (``precision=16``), Lightning will automatically
|
||||
handle the optimizers for you.
|
||||
- If you use multiple optimizers, training_step will have an additional ``optimizer_idx`` parameter.
|
||||
- If you use LBFGS lightning handles the closure function automatically for you.
|
||||
- If you use multiple optimizers, gradients will be calculated only
|
||||
for the parameters of current optimizer at each training step.
|
||||
- If you need to control how often those optimizers step or override the
|
||||
default .step() schedule, override the `optimizer_step` hook.
|
||||
- If you only want to call a learning rate scheduler every `x` step or epoch,
|
||||
or want to monitor a custom metric, you can specify these in a dictionary:
|
||||
.. code-block:: python
|
||||
Note:
|
||||
|
||||
{
|
||||
'scheduler': lr_scheduler,
|
||||
'interval': 'step' # or 'epoch'
|
||||
'monitor': 'val_f1',
|
||||
'frequency': x
|
||||
}
|
||||
Some things to know:
|
||||
|
||||
- Lightning calls ``.backward()`` and ``.step()`` on each optimizer
|
||||
and learning rate scheduler as needed.
|
||||
|
||||
- If you use 16-bit precision (``precision=16``), Lightning will automatically
|
||||
handle the optimizers for you.
|
||||
|
||||
- If you use multiple optimizers, training_step will have an additional
|
||||
``optimizer_idx`` parameter.
|
||||
|
||||
- If you use LBFGS lightning handles the closure function automatically for you
|
||||
|
||||
- If you use multiple optimizers, gradients will be calculated only
|
||||
for the parameters of current optimizer at each training step.
|
||||
|
||||
- If you need to control how often those optimizers step or override the
|
||||
default .step() schedule, override the `optimizer_step` hook.
|
||||
|
||||
- If you only want to call a learning rate scheduler every `x` step or epoch,
|
||||
or want to monitor a custom metric, you can specify these in a dictionary:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
{
|
||||
'scheduler': lr_scheduler,
|
||||
'interval': 'step' # or 'epoch'
|
||||
'monitor': 'val_f1',
|
||||
'frequency': x
|
||||
}
|
||||
|
||||
"""
|
||||
return Adam(self.parameters(), lr=1e-3)
|
||||
|
|
|
@ -132,40 +132,40 @@ class NeptuneLogger(LightningLoggerBase):
|
|||
It is recommended to keep it in the `NEPTUNE_API_TOKEN`
|
||||
environment variable and then you can leave `api_key=None`
|
||||
project_name: Required in online mode. Qualified name of a project in a form of
|
||||
"namespace/project_name" for example "tom/minst-classification".
|
||||
If None, the value of NEPTUNE_PROJECT environment variable will be taken.
|
||||
You need to create the project in https://neptune.ai first.
|
||||
"namespace/project_name" for example "tom/minst-classification".
|
||||
If None, the value of NEPTUNE_PROJECT environment variable will be taken.
|
||||
You need to create the project in https://neptune.ai first.
|
||||
offline_mode: Optional default False. If offline_mode=True no logs will be send
|
||||
to neptune. Usually used for debug purposes.
|
||||
to neptune. Usually used for debug purposes.
|
||||
close_after_fit: Optional default True. If close_after_fit=False the experiment
|
||||
will not be closed after training and additional metrics,
|
||||
images or artifacts can be logged. Also, remember to close the experiment explicitly
|
||||
by running neptune_logger.experiment.stop().
|
||||
will not be closed after training and additional metrics,
|
||||
images or artifacts can be logged. Also, remember to close the experiment explicitly
|
||||
by running neptune_logger.experiment.stop().
|
||||
experiment_name: Optional. Editable name of the experiment.
|
||||
Name is displayed in the experiment’s Details (Metadata section) and i
|
||||
n experiments view as a column.
|
||||
Name is displayed in the experiment’s Details (Metadata section) and
|
||||
in experiments view as a column.
|
||||
upload_source_files: Optional. List of source files to be uploaded.
|
||||
Must be list of str or single str. Uploaded sources are displayed
|
||||
in the experiment’s Source code tab.
|
||||
If None is passed, Python file from which experiment was created will be uploaded.
|
||||
Pass empty list ([]) to upload no files.
|
||||
Unix style pathname pattern expansion is supported.
|
||||
For example, you can pass '\*.py'
|
||||
Must be list of str or single str. Uploaded sources are displayed
|
||||
in the experiment’s Source code tab.
|
||||
If None is passed, Python file from which experiment was created will be uploaded.
|
||||
Pass empty list ([]) to upload no files.
|
||||
Unix style pathname pattern expansion is supported.
|
||||
For example, you can pass '\*.py'
|
||||
to upload all python source files from the current directory.
|
||||
For recursion lookup use '\**/\*.py' (for Python 3.5 and later).
|
||||
For more information see glob library.
|
||||
For recursion lookup use '\**/\*.py' (for Python 3.5 and later).
|
||||
For more information see glob library.
|
||||
params: Optional. Parameters of the experiment.
|
||||
After experiment creation params are read-only.
|
||||
Parameters are displayed in the experiment’s Parameters section and
|
||||
each key-value pair can be viewed in experiments view as a column.
|
||||
After experiment creation params are read-only.
|
||||
Parameters are displayed in the experiment’s Parameters section and
|
||||
each key-value pair can be viewed in experiments view as a column.
|
||||
properties: Optional default is {}. Properties of the experiment.
|
||||
They are editable after experiment is created.
|
||||
Properties are displayed in the experiment’s Details and
|
||||
each key-value pair can be viewed in experiments view as a column.
|
||||
They are editable after experiment is created.
|
||||
Properties are displayed in the experiment’s Details and
|
||||
each key-value pair can be viewed in experiments view as a column.
|
||||
tags: Optional default []. Must be list of str. Tags of the experiment.
|
||||
They are editable after experiment is created (see: append_tag() and remove_tag()).
|
||||
Tags are displayed in the experiment’s Details and can be viewed
|
||||
in experiments view as a column.
|
||||
They are editable after experiment is created (see: append_tag() and remove_tag()).
|
||||
Tags are displayed in the experiment’s Details and can be viewed
|
||||
in experiments view as a column.
|
||||
"""
|
||||
super().__init__()
|
||||
self.api_key = api_key
|
||||
|
|
|
@ -17,9 +17,7 @@ class TensorBoardLogger(LightningLoggerBase):
|
|||
Log to local file system in TensorBoard format
|
||||
|
||||
Implemented using :class:`torch.utils.tensorboard.SummaryWriter`. Logs are saved to
|
||||
`os.path.join(save_dir, name, version)`
|
||||
|
||||
.. _tf-logger:
|
||||
``os.path.join(save_dir, name, version)``
|
||||
|
||||
Example:
|
||||
.. code-block:: python
|
||||
|
@ -29,14 +27,14 @@ class TensorBoardLogger(LightningLoggerBase):
|
|||
trainer.train(model)
|
||||
|
||||
Args:
|
||||
save_dir (str): Save directory
|
||||
name (str): Experiment name. Defaults to "default". If it is the empty string then no per-experiment
|
||||
save_dir: Save directory
|
||||
name: Experiment name. Defaults to "default". If it is the empty string then no per-experiment
|
||||
subdirectory is used.
|
||||
version (int|str): Experiment version. If version is not specified the logger inspects the save
|
||||
version: Experiment version. If version is not specified the logger inspects the save
|
||||
directory for existing versions, then automatically assigns the next available version.
|
||||
If it is a string then it is used as the run-specific subdirectory name,
|
||||
otherwise version_${version} is used.
|
||||
\**kwargs (dict): Other arguments are passed directly to the :class:`SummaryWriter` constructor.
|
||||
\**kwargs: Other arguments are passed directly to the :class:`SummaryWriter` constructor.
|
||||
|
||||
"""
|
||||
NAME_CSV_TAGS = 'meta_tags.csv'
|
||||
|
|
|
@ -24,8 +24,6 @@ class TestTubeLogger(LightningLoggerBase):
|
|||
):
|
||||
r"""
|
||||
|
||||
.. _testTube:
|
||||
|
||||
Example
|
||||
----------
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
"""
|
||||
Log using `allegro.ai TRAINS <https://github.com/allegroai/trains>'_
|
||||
Log using `allegro.ai TRAINS <https://github.com/allegroai/trains>`_
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
|
@ -23,13 +23,13 @@ Use the logger anywhere in you LightningModule as follows:
|
|||
self.logger.experiment.whatever_trains_supports(...)
|
||||
|
||||
"""
|
||||
|
||||
from argparse import Namespace
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Optional, Union
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
from PIL.Image import Image
|
||||
|
||||
try:
|
||||
import trains
|
||||
|
@ -205,7 +205,7 @@ class TrainsLogger(LightningLoggerBase):
|
|||
@rank_zero_only
|
||||
def log_image(
|
||||
self, title: str, series: str,
|
||||
image: Union[str, np.ndarray, 'PIL.Image', torch.Tensor],
|
||||
image: Union[str, np.ndarray, Image, torch.Tensor],
|
||||
step: Optional[int] = None) -> None:
|
||||
"""Log Debug image in TRAINS experiment
|
||||
|
||||
|
@ -242,7 +242,7 @@ class TrainsLogger(LightningLoggerBase):
|
|||
@rank_zero_only
|
||||
def log_artifact(
|
||||
self, name: str,
|
||||
artifact: Union[str, Path, Dict[str, Any], 'pandas.DataFrame', 'numpy.ndarray', 'PIL.Image.Image'],
|
||||
artifact: Union[str, Path, Dict[str, Any], np.ndarray, Image],
|
||||
metadata: Optional[Dict[str, Any]] = None, delete_after_upload: bool = False) -> None:
|
||||
"""Save an artifact (file/object) in TRAINS experiment storage.
|
||||
|
||||
|
@ -250,13 +250,15 @@ class TrainsLogger(LightningLoggerBase):
|
|||
name: Artifact name. Notice! it will override previous artifact
|
||||
if name already exists
|
||||
artifact: Artifact object to upload. Currently supports:
|
||||
|
||||
- string / pathlib2.Path are treated as path to artifact file to upload
|
||||
If wildcard or a folder is passed, zip file containing the
|
||||
local files will be created and uploaded
|
||||
If wildcard or a folder is passed, zip file containing the
|
||||
local files will be created and uploaded
|
||||
- dict will be stored as .json file and uploaded
|
||||
- pandas.DataFrame will be stored as .csv.gz (compressed CSV file) and uploaded
|
||||
- numpy.ndarray will be stored as .npz and uploaded
|
||||
- PIL.Image will be stored to .png file and uploaded
|
||||
|
||||
metadata:
|
||||
Simple key/value dictionary to store on the artifact. Defaults to None.
|
||||
delete_after_upload:
|
||||
|
|
|
@ -347,6 +347,7 @@ Example::
|
|||
gradient_clip:
|
||||
|
||||
.. warning:: .. deprecated:: 0.5.0
|
||||
|
||||
Use `gradient_clip_val` instead. Will remove 0.8.0.
|
||||
|
||||
log_gpu_memory
|
||||
|
@ -412,6 +413,7 @@ Example::
|
|||
max_nb_epochs:
|
||||
|
||||
.. warning:: .. deprecated:: 0.5.0
|
||||
|
||||
Use `max_epochs` instead. Will remove 0.8.0.
|
||||
|
||||
min_epochs
|
||||
|
@ -475,6 +477,7 @@ Example::
|
|||
nb_gpu_nodes:
|
||||
|
||||
.. warning:: .. deprecated:: 0.5.0
|
||||
|
||||
Use `num_nodes` instead. Will remove 0.8.0.
|
||||
|
||||
num_sanity_val_steps
|
||||
|
@ -495,6 +498,7 @@ Example::
|
|||
nb_sanity_val_steps:
|
||||
|
||||
.. warning:: .. deprecated:: 0.5.0
|
||||
|
||||
Use `num_sanity_val_steps` instead. Will remove 0.8.0.
|
||||
|
||||
num_tpu_cores
|
||||
|
@ -674,11 +678,13 @@ Example::
|
|||
add_row_log_interval:
|
||||
|
||||
.. warning:: .. deprecated:: 0.5.0
|
||||
|
||||
Use `row_log_interval` instead. Will remove 0.8.0.
|
||||
|
||||
use_amp:
|
||||
|
||||
.. warning:: .. deprecated:: 0.7.0
|
||||
|
||||
Use `precision` instead. Will remove 0.9.0.
|
||||
|
||||
show_progress_bar
|
||||
|
|
|
@ -114,7 +114,7 @@ However, when using a cluster, Lightning will NOT set these flags (and you shoul
|
|||
|
||||
16 bit precision can cut your memory footprint by half. If using volta architecture GPUs
|
||||
it can give a dramatic training speed-up as well.
|
||||
First, install apex (if install fails, look `here <https://github.com/NVIDIA/apex>`_::
|
||||
First, install apex (if install fails, look `here <https://github.com/NVIDIA/apex>`__)::
|
||||
|
||||
$ git clone https://github.com/NVIDIA/apex
|
||||
$ cd apex
|
||||
|
@ -276,7 +276,7 @@ in a `HyperOptArgumentParser
|
|||
|
||||
Here is an example where you run a grid search of 9 combinations of hyperparams.
|
||||
The full examples are
|
||||
`here <https://git.io/Jv87p>`_.
|
||||
`here <https://github.com/PyTorchLightning/pytorch-lightning/tree/master/pl_examples/multi_node_examples>`__.
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
|
|
|
@ -218,11 +218,11 @@ class TrainerEvaluationLoopMixin(ABC):
|
|||
def evaluate(self, model, dataloaders, max_batches, test_mode: bool = False):
|
||||
"""Run evaluation code.
|
||||
|
||||
:param model: PT model
|
||||
:param dataloaders: list of PT dataloaders
|
||||
:param max_batches: Scalar
|
||||
:param test_mode
|
||||
:return:
|
||||
Args:
|
||||
model: PT model
|
||||
dataloaders: list of PT dataloaders
|
||||
max_batches: Scalar
|
||||
test_mode:
|
||||
"""
|
||||
# enable eval mode
|
||||
model.zero_grad()
|
||||
|
|
|
@ -140,7 +140,9 @@ class Trainer(
|
|||
gradient_clip_val: 0 means don't clip.
|
||||
|
||||
gradient_clip:
|
||||
.. warning:: deprecated 0.7.0 Use `gradient_clip_val` instead. Will remove 0.9.0.
|
||||
.. warning:: .. deprecated:: 0.7.0
|
||||
|
||||
Use `gradient_clip_val` instead. Will remove 0.9.0.
|
||||
|
||||
process_position: orders the tqdm bar when running multiple models on same machine.
|
||||
|
||||
|
@ -148,6 +150,7 @@ class Trainer(
|
|||
|
||||
nb_gpu_nodes:
|
||||
.. warning:: .. deprecated:: 0.7.0
|
||||
|
||||
Use `num_nodes` instead. Will remove 0.9.0.
|
||||
|
||||
gpus: Which GPUs to train on.
|
||||
|
@ -172,12 +175,14 @@ class Trainer(
|
|||
|
||||
max_nb_epochs:
|
||||
.. warning:: .. deprecated:: 0.7.0
|
||||
|
||||
Use `max_epochs` instead. Will remove 0.9.0.
|
||||
|
||||
min_epochs: Force training for at least these many epochs
|
||||
|
||||
min_nb_epochs:
|
||||
.. warning:: .. deprecated:: 0.7.0
|
||||
|
||||
Use `min_epochs` instead. Will remove 0.9.0.
|
||||
|
||||
max_steps: Stop training after this number of steps. Disabled by default (None).
|
||||
|
@ -198,18 +203,21 @@ class Trainer(
|
|||
|
||||
add_row_log_interval:
|
||||
.. warning:: .. deprecated:: 0.7.0
|
||||
|
||||
Use `row_log_interval` instead. Will remove 0.9.0.
|
||||
|
||||
distributed_backend: The distributed backend to use.
|
||||
|
||||
use_amp:
|
||||
.. warning:: .. deprecated:: 0.7.0
|
||||
|
||||
Use `precision` instead. Will remove 0.9.0.
|
||||
|
||||
precision: Full precision (32), half precision (16).
|
||||
|
||||
print_nan_grads:
|
||||
.. warning:: .. deprecated:: 0.7.2
|
||||
|
||||
Has no effect. When detected, NaN grads will be printed automatically.
|
||||
Will remove 0.9.0.
|
||||
|
||||
|
@ -223,6 +231,7 @@ class Trainer(
|
|||
|
||||
nb_sanity_val_steps:
|
||||
.. warning:: .. deprecated:: 0.7.0
|
||||
|
||||
Use `num_sanity_val_steps` instead. Will remove 0.8.0.
|
||||
|
||||
truncated_bptt_steps: Truncated back prop breaks performs backprop every k steps of
|
||||
|
@ -503,10 +512,10 @@ class Trainer(
|
|||
def tng_tqdm_dic(self):
|
||||
"""Read-only for tqdm metrics.
|
||||
|
||||
:return: dictionary
|
||||
|
||||
.. warning:: .. deprecated:: 0.5.0
|
||||
Use `training_tqdm_dict` instead. Will remove 0.8.0.
|
||||
|
||||
Use `training_tqdm_dict` instead. Will remove 0.8.0.
|
||||
|
||||
"""
|
||||
warnings.warn("`tng_tqdm_dic` has renamed to `training_tqdm_dict` since v0.5.0"
|
||||
" and this method will be removed in v0.8.0", DeprecationWarning)
|
||||
|
@ -841,7 +850,7 @@ class Trainer(
|
|||
Separates from fit to make sure you never run on your test set until you want to.
|
||||
|
||||
Args:
|
||||
model (:class:`.LightningModule`): The model to test.
|
||||
model: The model to test.
|
||||
|
||||
Example::
|
||||
|
||||
|
@ -879,13 +888,14 @@ class Trainer(
|
|||
|
||||
|
||||
class _PatchDataLoader(object):
|
||||
r'''
|
||||
r"""
|
||||
Callable object for patching dataloaders passed into trainer.fit().
|
||||
Use this class to override model.*_dataloader() and be pickle-compatible.
|
||||
|
||||
Args:
|
||||
dataloader: Dataloader object to return when called.
|
||||
'''
|
||||
|
||||
"""
|
||||
def __init__(self, dataloader: Union[List[DataLoader], DataLoader]):
|
||||
self.dataloader = dataloader
|
||||
|
||||
|
|
Loading…
Reference in New Issue