parent
3ad6169f18
commit
774d9be357
|
@ -11,7 +11,7 @@ Enable Early Stopping
|
||||||
---------------------
|
---------------------
|
||||||
There are two ways to enable early stopping.
|
There are two ways to enable early stopping.
|
||||||
|
|
||||||
.. note:: See: :ref:`trainer`
|
.. seealso:: :ref:`trainer`
|
||||||
|
|
||||||
.. code-block:: python
|
.. code-block:: python
|
||||||
|
|
||||||
|
@ -35,4 +35,4 @@ To disable early stopping pass ``False`` to the `early_stop_callback`.
|
||||||
Note that ``None`` will not disable early stopping but will lead to the
|
Note that ``None`` will not disable early stopping but will lead to the
|
||||||
default behaviour.
|
default behaviour.
|
||||||
|
|
||||||
.. note:: See: :ref:`trainer`
|
.. seealso:: :ref:`trainer`
|
||||||
|
|
|
@ -7,7 +7,7 @@ Comet.ml
|
||||||
`Comet.ml <https://www.comet.ml/site/>`_ is a third-party logger.
|
`Comet.ml <https://www.comet.ml/site/>`_ is a third-party logger.
|
||||||
To use CometLogger as your logger do the following.
|
To use CometLogger as your logger do the following.
|
||||||
|
|
||||||
.. note:: See: :ref:`comet` docs.
|
.. seealso:: :ref:`comet` docs.
|
||||||
|
|
||||||
.. code-block:: python
|
.. code-block:: python
|
||||||
|
|
||||||
|
@ -38,7 +38,7 @@ Neptune.ai
|
||||||
`Neptune.ai <https://neptune.ai/>`_ is a third-party logger.
|
`Neptune.ai <https://neptune.ai/>`_ is a third-party logger.
|
||||||
To use Neptune.ai as your logger do the following.
|
To use Neptune.ai as your logger do the following.
|
||||||
|
|
||||||
.. note:: See: :ref:`neptune` docs.
|
.. seealso:: :ref:`neptune` docs.
|
||||||
|
|
||||||
.. code-block:: python
|
.. code-block:: python
|
||||||
|
|
||||||
|
@ -68,7 +68,7 @@ allegro.ai TRAINS
|
||||||
`allegro.ai <https://github.com/allegroai/trains/>`_ is a third-party logger.
|
`allegro.ai <https://github.com/allegroai/trains/>`_ is a third-party logger.
|
||||||
To use TRAINS as your logger do the following.
|
To use TRAINS as your logger do the following.
|
||||||
|
|
||||||
.. note:: See: :ref:`trains` docs.
|
.. seealso:: :ref:`trains` docs.
|
||||||
|
|
||||||
.. code-block:: python
|
.. code-block:: python
|
||||||
|
|
||||||
|
@ -95,7 +95,7 @@ Tensorboard
|
||||||
|
|
||||||
To use `Tensorboard <https://pytorch.org/docs/stable/tensorboard.html>`_ as your logger do the following.
|
To use `Tensorboard <https://pytorch.org/docs/stable/tensorboard.html>`_ as your logger do the following.
|
||||||
|
|
||||||
.. note:: See: TensorBoardLogger :ref:`tf-logger`
|
.. seealso:: TensorBoardLogger :ref:`tf-logger`
|
||||||
|
|
||||||
.. code-block:: python
|
.. code-block:: python
|
||||||
|
|
||||||
|
@ -121,7 +121,7 @@ Test Tube
|
||||||
`Test Tube <https://github.com/williamFalcon/test-tube>`_ is a tensorboard logger but with nicer file structure.
|
`Test Tube <https://github.com/williamFalcon/test-tube>`_ is a tensorboard logger but with nicer file structure.
|
||||||
To use TestTube as your logger do the following.
|
To use TestTube as your logger do the following.
|
||||||
|
|
||||||
.. note:: See: TestTube :ref:`testTube`
|
.. seealso:: TestTube :ref:`testTube`
|
||||||
|
|
||||||
.. code-block:: python
|
.. code-block:: python
|
||||||
|
|
||||||
|
@ -146,7 +146,7 @@ Wandb
|
||||||
`Wandb <https://www.wandb.com/>`_ is a third-party logger.
|
`Wandb <https://www.wandb.com/>`_ is a third-party logger.
|
||||||
To use Wandb as your logger do the following.
|
To use Wandb as your logger do the following.
|
||||||
|
|
||||||
.. note:: See: :ref:`wandb` docs
|
.. seealso:: :ref:`wandb` docs
|
||||||
|
|
||||||
.. code-block:: python
|
.. code-block:: python
|
||||||
|
|
||||||
|
|
|
@ -22,7 +22,7 @@ Control log writing frequency
|
||||||
Writing to a logger can be expensive. In Lightning you can set the interval at which you
|
Writing to a logger can be expensive. In Lightning you can set the interval at which you
|
||||||
want to log using this trainer flag.
|
want to log using this trainer flag.
|
||||||
|
|
||||||
.. note:: See: :ref:`trainer`
|
.. seealso:: :ref:`trainer`
|
||||||
|
|
||||||
.. code-block:: python
|
.. code-block:: python
|
||||||
|
|
||||||
|
|
|
@ -16,7 +16,7 @@ Force training for min or max epochs
|
||||||
-------------------------------------
|
-------------------------------------
|
||||||
It can be useful to force training for a minimum number of epochs or limit to a max number.
|
It can be useful to force training for a minimum number of epochs or limit to a max number.
|
||||||
|
|
||||||
.. note:: See: :ref:`trainer`
|
.. seealso:: :ref:`trainer`
|
||||||
|
|
||||||
.. code-block:: python
|
.. code-block:: python
|
||||||
|
|
||||||
|
|
|
@ -472,7 +472,7 @@ First, change the runtime to TPU (and reinstall lightning).
|
||||||
|
|
||||||
Next, install the required xla library (adds support for PyTorch on TPUs)
|
Next, install the required xla library (adds support for PyTorch on TPUs)
|
||||||
|
|
||||||
.. code-block:: python
|
.. code-block::
|
||||||
|
|
||||||
import collections
|
import collections
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
|
|
|
@ -7,7 +7,7 @@ Accumulate gradients
|
||||||
Accumulated gradients runs K small batches of size N before doing a backwards pass.
|
Accumulated gradients runs K small batches of size N before doing a backwards pass.
|
||||||
The effect is a large effective batch size of size KxN.
|
The effect is a large effective batch size of size KxN.
|
||||||
|
|
||||||
.. note:: See: :ref:`trainer`
|
.. seealso:: :ref:`trainer`
|
||||||
|
|
||||||
.. code-block:: python
|
.. code-block:: python
|
||||||
|
|
||||||
|
@ -20,7 +20,7 @@ Gradient Clipping
|
||||||
Gradient clipping may be enabled to avoid exploding gradients. Specifically, this will `clip the gradient
|
Gradient clipping may be enabled to avoid exploding gradients. Specifically, this will `clip the gradient
|
||||||
norm <https://pytorch.org/docs/stable/nn.html#torch.nn.utils.clip_grad_norm_>`_ computed over all model parameters together.
|
norm <https://pytorch.org/docs/stable/nn.html#torch.nn.utils.clip_grad_norm_>`_ computed over all model parameters together.
|
||||||
|
|
||||||
.. note:: See: :ref:`trainer`
|
.. seealso:: :ref:`trainer`
|
||||||
|
|
||||||
.. code-block:: python
|
.. code-block:: python
|
||||||
|
|
||||||
|
|
|
@ -8,8 +8,8 @@ from argparse import Namespace
|
||||||
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
|
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
import torch.distributed as dist
|
|
||||||
from torch import Tensor
|
from torch import Tensor
|
||||||
|
from torch.distributed import init_process_group
|
||||||
from torch.nn.parallel import DistributedDataParallel
|
from torch.nn.parallel import DistributedDataParallel
|
||||||
from torch.optim import Adam
|
from torch.optim import Adam
|
||||||
from torch.optim.optimizer import Optimizer
|
from torch.optim.optimizer import Optimizer
|
||||||
|
@ -859,7 +859,7 @@ class LightningModule(ABC, GradInformation, ModelIO, ModelHooks):
|
||||||
|
|
||||||
root_node = self.trainer.resolve_root_node_address(root_node)
|
root_node = self.trainer.resolve_root_node_address(root_node)
|
||||||
os.environ['MASTER_ADDR'] = root_node
|
os.environ['MASTER_ADDR'] = root_node
|
||||||
dist.init_process_group('nccl', rank=proc_rank, world_size=world_size)
|
init_process_group('nccl', rank=proc_rank, world_size=world_size)
|
||||||
|
|
||||||
def configure_apex(
|
def configure_apex(
|
||||||
self,
|
self,
|
||||||
|
|
|
@ -29,9 +29,7 @@ from argparse import Namespace
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Dict, Optional, Union
|
from typing import Any, Dict, Optional, Union
|
||||||
|
|
||||||
import PIL
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pandas as pd
|
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
@ -79,6 +77,7 @@ class TrainsLogger(LightningLoggerBase):
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
.. code-block:: python
|
.. code-block:: python
|
||||||
|
|
||||||
self.logger.experiment.some_trains_function()
|
self.logger.experiment.some_trains_function()
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
@ -180,7 +179,7 @@ class TrainsLogger(LightningLoggerBase):
|
||||||
@rank_zero_only
|
@rank_zero_only
|
||||||
def log_image(
|
def log_image(
|
||||||
self, title: str, series: str,
|
self, title: str, series: str,
|
||||||
image: Union[str, np.ndarray, PIL.Image.Image, torch.Tensor],
|
image: Union[str, np.ndarray, 'PIL.Image', torch.Tensor],
|
||||||
step: Optional[int] = None) -> None:
|
step: Optional[int] = None) -> None:
|
||||||
"""Log Debug image in TRAINS experiment
|
"""Log Debug image in TRAINS experiment
|
||||||
|
|
||||||
|
@ -217,7 +216,7 @@ class TrainsLogger(LightningLoggerBase):
|
||||||
@rank_zero_only
|
@rank_zero_only
|
||||||
def log_artifact(
|
def log_artifact(
|
||||||
self, name: str,
|
self, name: str,
|
||||||
artifact: Union[str, Path, Dict[str, Any], pd.DataFrame, np.ndarray, PIL.Image.Image],
|
artifact: Union[str, Path, Dict[str, Any], 'pandas.DataFrame', 'numpy.ndarray', 'PIL.Image.Image'],
|
||||||
metadata: Optional[Dict[str, Any]] = None, delete_after_upload: bool = False) -> None:
|
metadata: Optional[Dict[str, Any]] = None, delete_after_upload: bool = False) -> None:
|
||||||
"""Save an artifact (file/object) in TRAINS experiment storage.
|
"""Save an artifact (file/object) in TRAINS experiment storage.
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue