lightning/pytorch_lightning/loggers/mlflow.py

212 lines
7.2 KiB
Python
Raw Normal View History

2020-08-20 02:03:22 +00:00
# Copyright The PyTorch Lightning team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
MLflow Logger
-------------
"""
import logging
import re
from argparse import Namespace
from time import time
from typing import Any, Dict, Optional, Union
from pytorch_lightning.loggers.base import LightningLoggerBase, rank_zero_experiment
from pytorch_lightning.utilities import _module_available, rank_zero_only, rank_zero_warn
log = logging.getLogger(__name__)
LOCAL_FILE_URI_PREFIX = "file:"
_MLFLOW_AVAILABLE = _module_available("mlflow")
try:
import mlflow
from mlflow.tracking import MlflowClient
# todo: there seems to be still some remaining import error with Conda env
except ImportError:
_MLFLOW_AVAILABLE = False
mlflow, MlflowClient = None, None
class MLFlowLogger(LightningLoggerBase):
"""
Log using `MLflow <https://mlflow.org>`_.
Install it with pip:
.. code-block:: bash
pip install mlflow
Mocking Loggers Part 5/5 (final) (#3926) * base * add xfail * new test * import * missing import * xfail if not installed include mkpatch fix test * mock comet comet mocks fix test remove dep undo merge duplication * line * line * convert doctest * doctest * docs * prune Results usage in notebooks (#3911) * notebooks * notebooks * revamp entire metrics (#3868) * removed metric Co-authored-by: Teddy Koker <teddy.koker@gmail.com> * added new metrics Co-authored-by: Teddy Koker teddy.koker@gmail.com * pep8 Co-authored-by: Teddy Koker teddy.koker@gmail.com * pep8 Co-authored-by: Teddy Koker <teddy.koker@gmail.com> * docs Co-authored-by: Teddy Koker <teddy.koker@gmail.com> * docs Co-authored-by: Teddy Koker <teddy.koker@gmail.com> * win ddp tests skip Co-authored-by: Teddy Koker <teddy.koker@gmail.com> * win ddp tests skip Co-authored-by: Teddy Koker <teddy.koker@gmail.com> * win ddp tests skip Co-authored-by: Teddy Koker <teddy.koker@gmail.com> * win ddp tests skip Co-authored-by: Teddy Koker <teddy.koker@gmail.com> * reset in compute, cache compute Co-authored-by: Teddy Koker <teddy.koker@gmail.com> * reduce_ops handling Co-authored-by: Teddy Koker <teddy.koker@gmail.com> * sync -> sync_dist, type annotations Co-authored-by: Teddy Koker <teddy.koker@gmail.com> * wip docs Co-authored-by: Teddy Koker <teddy.koker@gmail.com> * mean squared error * docstring * added mean ___ error metrics * added mean ___ error metrics * seperated files * accuracy doctest * gpu fix * remove unnecessary mixin * metric and accuracy docstring Co-authored-by: Teddy Koker <teddy.koker@gmail.com> * metric docs Co-authored-by: Teddy Koker <teddy.koker@gmail.com> * pep8, changelog Co-authored-by: Teddy Koker <teddy.koker@gmail.com> * refactor dist utils, pep8 * refactor dist utils, pep8 Co-authored-by: Teddy Koker <teddy.koker@gmail.com> * Callback docs with autosummary (#3908) * callback docs with autosummary * do not show private methods * callback base docstring * skip some docker builds (temporally pass) (#3913) * skip some docker builds * todos * skip * use badges only with push (#3914) * testtube * mock test tube * mock mlflow * remove mlflow * clean up * test * test * test * test * test * test * code blocks * remove import * codeblock * logger * wandb causes stall Co-authored-by: William Falcon <waf2107@columbia.edu> Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com> Co-authored-by: Ananya Harsh Jha <ananya@pytorchlightning.ai> Co-authored-by: Teddy Koker <teddy.koker@gmail.com> Co-authored-by: Jeff Yang <ydcjeff@outlook.com>
2020-10-07 03:49:06 +00:00
.. code-block:: python
from pytorch_lightning import Trainer
from pytorch_lightning.loggers import MLFlowLogger
mlf_logger = MLFlowLogger(
experiment_name="default",
tracking_uri="file:./ml-runs"
)
trainer = Trainer(logger=mlf_logger)
Use the logger anywhere in your :class:`~pytorch_lightning.core.lightning.LightningModule` as follows:
.. code-block:: python
from pytorch_lightning import LightningModule
class LitModel(LightningModule):
def training_step(self, batch, batch_idx):
# example
self.logger.experiment.whatever_ml_flow_supports(...)
def any_lightning_module_function_or_hook(self):
self.logger.experiment.whatever_ml_flow_supports(...)
Args:
experiment_name: The name of the experiment
tracking_uri: Address of local or remote tracking server.
If not provided, defaults to `file:<save_dir>`.
tags: A dictionary tags for the experiment.
save_dir: A path to a local directory where the MLflow runs get saved.
Defaults to `./mlflow` if `tracking_uri` is not provided.
Has no effect if `tracking_uri` is provided.
prefix: A string to put at the beginning of metric keys.
Raises:
ImportError:
If required MLFlow package is not installed on the device.
"""
LOGGER_JOIN_CHAR = '-'
def __init__(
self,
experiment_name: str = 'default',
tracking_uri: Optional[str] = None,
tags: Optional[Dict[str, Any]] = None,
save_dir: Optional[str] = './mlruns',
prefix: str = '',
):
if mlflow is None:
raise ImportError(
'You want to use `mlflow` logger which is not installed yet,'
' install it with `pip install mlflow`.'
)
super().__init__()
if not tracking_uri:
tracking_uri = f'{LOCAL_FILE_URI_PREFIX}{save_dir}'
self._experiment_name = experiment_name
self._experiment_id = None
self._tracking_uri = tracking_uri
self._run_id = None
self.tags = tags
self._prefix = prefix
self._mlflow_client = MlflowClient(tracking_uri)
@property
@rank_zero_experiment
def experiment(self) -> MlflowClient:
clean v2 docs (#691) * updated gitignore * Update README.md * updated gitignore * updated links in ninja file * updated docs * Update README.md * Update README.md * finished callbacks * finished callbacks * finished callbacks * fixed left menu * added callbacks to menu * added direct links to docs * added direct links to docs * added direct links to docs * added direct links to docs * added direct links to docs * fixing TensorBoard (#687) * flake8 * fix typo * fix tensorboardlogger drop test_tube dependence * formatting * fix tensorboard & tests * upgrade Tensorboard * test formatting separately * try to fix JIT issue * add tests for 1.4 * added direct links to docs * updated gitignore * updated links in ninja file * updated docs * finished callbacks * finished callbacks * finished callbacks * fixed left menu * added callbacks to menu * added direct links to docs * added direct links to docs * added direct links to docs * added direct links to docs * added direct links to docs * added direct links to docs * finished rebase * making private members * making private members * making private members * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * set auto dp if no backend * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * fixed lightning import * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * finished lightning module * finished lightning module * finished lightning module * finished lightning module * added callbacks * added loggers * added loggers * added loggers * added loggers * added loggers * added loggers * added loggers * added loggers * set auto dp if no backend * added loggers * added loggers * added loggers * added loggers * added loggers * added loggers * flake 8 * flake 8 Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com>
2020-01-17 11:03:31 +00:00
r"""
Actual MLflow object. To use MLflow features in your
:class:`~pytorch_lightning.core.lightning.LightningModule` do the following.
clean v2 docs (#691) * updated gitignore * Update README.md * updated gitignore * updated links in ninja file * updated docs * Update README.md * Update README.md * finished callbacks * finished callbacks * finished callbacks * fixed left menu * added callbacks to menu * added direct links to docs * added direct links to docs * added direct links to docs * added direct links to docs * added direct links to docs * fixing TensorBoard (#687) * flake8 * fix typo * fix tensorboardlogger drop test_tube dependence * formatting * fix tensorboard & tests * upgrade Tensorboard * test formatting separately * try to fix JIT issue * add tests for 1.4 * added direct links to docs * updated gitignore * updated links in ninja file * updated docs * finished callbacks * finished callbacks * finished callbacks * fixed left menu * added callbacks to menu * added direct links to docs * added direct links to docs * added direct links to docs * added direct links to docs * added direct links to docs * added direct links to docs * finished rebase * making private members * making private members * making private members * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * set auto dp if no backend * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * working on trainer docs * fixed lightning import * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * cleared spaces * finished lightning module * finished lightning module * finished lightning module * finished lightning module * added callbacks * added loggers * added loggers * added loggers * added loggers * added loggers * added loggers * added loggers * added loggers * set auto dp if no backend * added loggers * added loggers * added loggers * added loggers * added loggers * added loggers * flake 8 * flake 8 Co-authored-by: Jirka Borovec <Borda@users.noreply.github.com>
2020-01-17 11:03:31 +00:00
Example::
self.logger.experiment.some_mlflow_function()
"""
if self._experiment_id is None:
expt = self._mlflow_client.get_experiment_by_name(self._experiment_name)
if expt is not None:
self._experiment_id = expt.experiment_id
else:
log.warning(f'Experiment with name {self._experiment_name} not found. Creating it.')
self._experiment_id = self._mlflow_client.create_experiment(name=self._experiment_name)
if self._run_id is None:
run = self._mlflow_client.create_run(experiment_id=self._experiment_id, tags=self.tags)
self._run_id = run.info.run_id
return self._mlflow_client
@property
def run_id(self):
# create the experiment if it does not exist to get the run id
_ = self.experiment
return self._run_id
@property
def experiment_id(self):
# create the experiment if it does not exist to get the experiment id
_ = self.experiment
return self._experiment_id
@rank_zero_only
def log_hyperparams(self, params: Union[Dict[str, Any], Namespace]) -> None:
params = self._convert_params(params)
params = self._flatten_dict(params)
for k, v in params.items():
if len(str(v)) > 250:
rank_zero_warn(
f"Mlflow only allows parameters with up to 250 characters. Discard {k}={v}", RuntimeWarning
)
continue
self.experiment.log_param(self.run_id, k, v)
@rank_zero_only
def log_metrics(self, metrics: Dict[str, float], step: Optional[int] = None) -> None:
assert rank_zero_only.rank == 0, 'experiment tried to log from global_rank != 0'
metrics = self._add_prefix(metrics)
timestamp_ms = int(time() * 1000)
for k, v in metrics.items():
if isinstance(v, str):
log.warning(f'Discarding metric with string value {k}={v}.')
continue
new_k = re.sub("[^a-zA-Z0-9_/. -]+", "", k)
if k != new_k:
rank_zero_warn(
"MLFlow only allows '_', '/', '.' and ' ' special characters in metric name."
f" Replacing {k} with {new_k}.", RuntimeWarning
)
k = new_k
self.experiment.log_metric(self.run_id, k, v, timestamp_ms, step)
@rank_zero_only
def finalize(self, status: str = 'FINISHED') -> None:
super().finalize(status)
status = 'FINISHED' if status == 'success' else status
if self.experiment.get_run(self.run_id):
self.experiment.set_terminated(self.run_id, status)
@property
def save_dir(self) -> Optional[str]:
"""
The root file directory in which MLflow experiments are saved.
Return:
Local path to the root experiment directory if the tracking uri is local.
Otherwhise returns `None`.
"""
if self._tracking_uri.startswith(LOCAL_FILE_URI_PREFIX):
return self._tracking_uri.lstrip(LOCAL_FILE_URI_PREFIX)
@property
def name(self) -> str:
return self.experiment_id
@property
def version(self) -> str:
return self.run_id