2020-08-20 02:03:22 +00:00
|
|
|
# Copyright The PyTorch Lightning team.
|
|
|
|
#
|
|
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
# you may not use this file except in compliance with the License.
|
|
|
|
# You may obtain a copy of the License at
|
|
|
|
#
|
|
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
#
|
|
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
# See the License for the specific language governing permissions and
|
|
|
|
# limitations under the License.
|
2019-11-28 17:48:55 +00:00
|
|
|
"""
|
2020-11-13 15:05:54 +00:00
|
|
|
MLflow Logger
|
|
|
|
-------------
|
2019-11-28 17:48:55 +00:00
|
|
|
"""
|
2021-03-02 09:47:55 +00:00
|
|
|
import logging
|
2021-05-12 09:26:57 +00:00
|
|
|
import os
|
2020-11-10 11:50:25 +00:00
|
|
|
import re
|
2020-03-04 14:33:39 +00:00
|
|
|
from argparse import Namespace
|
2019-11-27 03:39:18 +00:00
|
|
|
from time import time
|
2020-09-19 16:51:43 +00:00
|
|
|
from typing import Any, Dict, Optional, Union
|
2019-11-27 03:39:18 +00:00
|
|
|
|
2020-06-30 22:09:16 +00:00
|
|
|
from pytorch_lightning.loggers.base import LightningLoggerBase, rank_zero_experiment
|
2021-01-15 17:23:56 +00:00
|
|
|
from pytorch_lightning.utilities import _module_available, rank_zero_only, rank_zero_warn
|
2019-11-27 03:39:18 +00:00
|
|
|
|
2021-03-02 09:47:55 +00:00
|
|
|
log = logging.getLogger(__name__)
|
2020-07-09 11:15:41 +00:00
|
|
|
LOCAL_FILE_URI_PREFIX = "file:"
|
2021-01-05 19:34:47 +00:00
|
|
|
_MLFLOW_AVAILABLE = _module_available("mlflow")
|
|
|
|
try:
|
|
|
|
import mlflow
|
2021-04-15 07:36:04 +00:00
|
|
|
from mlflow.tracking import context, MlflowClient
|
2021-05-21 08:17:32 +00:00
|
|
|
from mlflow.utils.mlflow_tags import MLFLOW_RUN_NAME
|
2021-01-05 19:34:47 +00:00
|
|
|
# todo: there seems to be still some remaining import error with Conda env
|
|
|
|
except ImportError:
|
|
|
|
_MLFLOW_AVAILABLE = False
|
2021-04-08 09:45:23 +00:00
|
|
|
mlflow, MlflowClient, context = None, None, None
|
2021-05-21 08:17:32 +00:00
|
|
|
MLFLOW_RUN_NAME = "mlflow.runName"
|
2021-04-08 09:45:23 +00:00
|
|
|
|
|
|
|
# before v1.1.0
|
2021-07-26 11:37:35 +00:00
|
|
|
if hasattr(context, "resolve_tags"):
|
2021-04-08 09:45:23 +00:00
|
|
|
from mlflow.tracking.context import resolve_tags
|
2021-04-15 07:36:04 +00:00
|
|
|
|
|
|
|
|
2021-04-08 09:45:23 +00:00
|
|
|
# since v1.1.0
|
2021-07-26 11:37:35 +00:00
|
|
|
elif hasattr(context, "registry"):
|
2021-04-08 09:45:23 +00:00
|
|
|
from mlflow.tracking.context.registry import resolve_tags
|
|
|
|
else:
|
|
|
|
|
|
|
|
def resolve_tags(tags=None):
|
|
|
|
return tags
|
2021-01-05 19:34:47 +00:00
|
|
|
|
|
|
|
|
2019-11-27 03:39:18 +00:00
|
|
|
class MLFlowLogger(LightningLoggerBase):
|
2020-04-16 16:04:12 +00:00
|
|
|
"""
|
2020-11-13 15:05:54 +00:00
|
|
|
Log using `MLflow <https://mlflow.org>`_.
|
|
|
|
|
|
|
|
Install it with pip:
|
2020-04-16 16:04:12 +00:00
|
|
|
|
|
|
|
.. code-block:: bash
|
|
|
|
|
|
|
|
pip install mlflow
|
|
|
|
|
2020-10-07 03:49:06 +00:00
|
|
|
.. code-block:: python
|
|
|
|
|
|
|
|
from pytorch_lightning import Trainer
|
|
|
|
from pytorch_lightning.loggers import MLFlowLogger
|
2021-07-28 16:08:31 +00:00
|
|
|
|
|
|
|
mlf_logger = MLFlowLogger(experiment_name="default", tracking_uri="file:./ml-runs")
|
2020-10-07 03:49:06 +00:00
|
|
|
trainer = Trainer(logger=mlf_logger)
|
|
|
|
|
|
|
|
Use the logger anywhere in your :class:`~pytorch_lightning.core.lightning.LightningModule` as follows:
|
|
|
|
|
|
|
|
.. code-block:: python
|
|
|
|
|
|
|
|
from pytorch_lightning import LightningModule
|
2021-07-28 16:08:31 +00:00
|
|
|
|
|
|
|
|
2020-10-07 03:49:06 +00:00
|
|
|
class LitModel(LightningModule):
|
|
|
|
def training_step(self, batch, batch_idx):
|
|
|
|
# example
|
|
|
|
self.logger.experiment.whatever_ml_flow_supports(...)
|
|
|
|
|
|
|
|
def any_lightning_module_function_or_hook(self):
|
|
|
|
self.logger.experiment.whatever_ml_flow_supports(...)
|
2020-04-16 16:04:12 +00:00
|
|
|
|
|
|
|
Args:
|
|
|
|
experiment_name: The name of the experiment
|
2021-05-21 08:17:32 +00:00
|
|
|
run_name: Name of the new run. The `run_name` is internally stored as a ``mlflow.runName`` tag.
|
|
|
|
If the ``mlflow.runName`` tag has already been set in `tags`, the value is overridden by the `run_name`.
|
2020-04-16 16:04:12 +00:00
|
|
|
tracking_uri: Address of local or remote tracking server.
|
2021-05-12 09:26:57 +00:00
|
|
|
If not provided, defaults to `MLFLOW_TRACKING_URI` environment variable if set, otherwise it falls
|
|
|
|
back to `file:<save_dir>`.
|
2020-04-16 16:04:12 +00:00
|
|
|
tags: A dictionary tags for the experiment.
|
2020-07-09 11:15:41 +00:00
|
|
|
save_dir: A path to a local directory where the MLflow runs get saved.
|
|
|
|
Defaults to `./mlflow` if `tracking_uri` is not provided.
|
|
|
|
Has no effect if `tracking_uri` is provided.
|
2020-11-22 05:38:58 +00:00
|
|
|
prefix: A string to put at the beginning of metric keys.
|
2021-03-25 23:12:03 +00:00
|
|
|
artifact_location: The location to store run artifacts. If not provided, the server picks an appropriate
|
|
|
|
default.
|
2020-04-16 16:04:12 +00:00
|
|
|
|
2021-02-25 20:08:32 +00:00
|
|
|
Raises:
|
|
|
|
ImportError:
|
|
|
|
If required MLFlow package is not installed on the device.
|
2020-04-16 16:04:12 +00:00
|
|
|
"""
|
2020-05-25 11:31:35 +00:00
|
|
|
|
2021-07-26 11:37:35 +00:00
|
|
|
LOGGER_JOIN_CHAR = "-"
|
2020-11-22 05:38:58 +00:00
|
|
|
|
2020-09-19 16:51:43 +00:00
|
|
|
def __init__(
|
|
|
|
self,
|
2021-07-26 11:37:35 +00:00
|
|
|
experiment_name: str = "default",
|
2021-05-21 08:17:32 +00:00
|
|
|
run_name: Optional[str] = None,
|
2021-07-26 11:37:35 +00:00
|
|
|
tracking_uri: Optional[str] = os.getenv("MLFLOW_TRACKING_URI"),
|
2020-09-19 16:51:43 +00:00
|
|
|
tags: Optional[Dict[str, Any]] = None,
|
2021-07-26 11:37:35 +00:00
|
|
|
save_dir: Optional[str] = "./mlruns",
|
|
|
|
prefix: str = "",
|
2021-03-25 23:12:03 +00:00
|
|
|
artifact_location: Optional[str] = None,
|
2020-09-19 16:51:43 +00:00
|
|
|
):
|
2020-10-06 02:27:06 +00:00
|
|
|
if mlflow is None:
|
2021-02-08 19:28:38 +00:00
|
|
|
raise ImportError(
|
2021-07-26 11:37:35 +00:00
|
|
|
"You want to use `mlflow` logger which is not installed yet, install it with `pip install mlflow`."
|
2021-02-08 19:28:38 +00:00
|
|
|
)
|
2019-11-27 03:39:18 +00:00
|
|
|
super().__init__()
|
2020-07-09 11:15:41 +00:00
|
|
|
if not tracking_uri:
|
2021-07-26 11:37:35 +00:00
|
|
|
tracking_uri = f"{LOCAL_FILE_URI_PREFIX}{save_dir}"
|
2020-07-09 11:15:41 +00:00
|
|
|
|
|
|
|
self._experiment_name = experiment_name
|
|
|
|
self._experiment_id = None
|
|
|
|
self._tracking_uri = tracking_uri
|
2021-05-21 08:17:32 +00:00
|
|
|
self._run_name = run_name
|
2019-11-27 03:39:18 +00:00
|
|
|
self._run_id = None
|
|
|
|
self.tags = tags
|
2020-11-22 05:38:58 +00:00
|
|
|
self._prefix = prefix
|
2021-03-25 23:12:03 +00:00
|
|
|
self._artifact_location = artifact_location
|
|
|
|
|
2020-07-09 11:15:41 +00:00
|
|
|
self._mlflow_client = MlflowClient(tracking_uri)
|
2019-11-27 03:39:18 +00:00
|
|
|
|
2019-12-08 00:25:12 +00:00
|
|
|
@property
|
2020-06-30 22:09:16 +00:00
|
|
|
@rank_zero_experiment
|
2020-03-19 13:14:29 +00:00
|
|
|
def experiment(self) -> MlflowClient:
|
2020-01-17 11:03:31 +00:00
|
|
|
r"""
|
2020-07-09 11:15:41 +00:00
|
|
|
Actual MLflow object. To use MLflow features in your
|
2020-04-16 16:04:12 +00:00
|
|
|
:class:`~pytorch_lightning.core.lightning.LightningModule` do the following.
|
2020-01-17 11:03:31 +00:00
|
|
|
|
|
|
|
Example::
|
|
|
|
|
|
|
|
self.logger.experiment.some_mlflow_function()
|
|
|
|
|
|
|
|
"""
|
2020-09-09 09:38:26 +00:00
|
|
|
if self._experiment_id is None:
|
|
|
|
expt = self._mlflow_client.get_experiment_by_name(self._experiment_name)
|
|
|
|
if expt is not None:
|
|
|
|
self._experiment_id = expt.experiment_id
|
|
|
|
else:
|
2021-07-26 11:37:35 +00:00
|
|
|
log.warning(f"Experiment with name {self._experiment_name} not found. Creating it.")
|
2021-03-25 23:12:03 +00:00
|
|
|
self._experiment_id = self._mlflow_client.create_experiment(
|
2021-07-26 11:37:35 +00:00
|
|
|
name=self._experiment_name, artifact_location=self._artifact_location
|
2021-03-25 23:12:03 +00:00
|
|
|
)
|
2020-09-09 09:38:26 +00:00
|
|
|
|
|
|
|
if self._run_id is None:
|
2021-05-21 08:17:32 +00:00
|
|
|
if self._run_name is not None:
|
|
|
|
self.tags = self.tags or {}
|
|
|
|
if MLFLOW_RUN_NAME in self.tags:
|
|
|
|
log.warning(
|
2021-08-13 14:28:14 +00:00
|
|
|
f"The tag {MLFLOW_RUN_NAME} is found in tags. The value will be overridden by {self._run_name}."
|
2021-05-21 08:17:32 +00:00
|
|
|
)
|
|
|
|
self.tags[MLFLOW_RUN_NAME] = self._run_name
|
2021-04-08 09:45:23 +00:00
|
|
|
run = self._mlflow_client.create_run(experiment_id=self._experiment_id, tags=resolve_tags(self.tags))
|
2020-07-09 11:15:41 +00:00
|
|
|
self._run_id = run.info.run_id
|
|
|
|
return self._mlflow_client
|
|
|
|
|
|
|
|
@property
|
2021-08-23 17:11:50 +00:00
|
|
|
def run_id(self) -> str:
|
|
|
|
"""
|
|
|
|
Create the experiment if it does not exist to get the run id.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
The run id.
|
|
|
|
"""
|
2020-07-09 11:15:41 +00:00
|
|
|
_ = self.experiment
|
2019-11-27 03:39:18 +00:00
|
|
|
return self._run_id
|
|
|
|
|
2020-07-09 11:15:41 +00:00
|
|
|
@property
|
2021-08-23 17:11:50 +00:00
|
|
|
def experiment_id(self) -> str:
|
|
|
|
"""
|
|
|
|
Create the experiment if it does not exist to get the experiment id.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
The experiment id.
|
|
|
|
"""
|
2020-07-09 11:15:41 +00:00
|
|
|
_ = self.experiment
|
|
|
|
return self._experiment_id
|
|
|
|
|
2019-11-27 03:39:18 +00:00
|
|
|
@rank_zero_only
|
2020-03-04 14:33:39 +00:00
|
|
|
def log_hyperparams(self, params: Union[Dict[str, Any], Namespace]) -> None:
|
|
|
|
params = self._convert_params(params)
|
2020-03-19 13:15:47 +00:00
|
|
|
params = self._flatten_dict(params)
|
2020-03-04 14:33:39 +00:00
|
|
|
for k, v in params.items():
|
2021-02-16 21:22:06 +00:00
|
|
|
if len(str(v)) > 250:
|
|
|
|
rank_zero_warn(
|
|
|
|
f"Mlflow only allows parameters with up to 250 characters. Discard {k}={v}", RuntimeWarning
|
|
|
|
)
|
|
|
|
continue
|
|
|
|
|
2019-11-27 03:39:18 +00:00
|
|
|
self.experiment.log_param(self.run_id, k, v)
|
|
|
|
|
|
|
|
@rank_zero_only
|
2020-03-04 14:33:39 +00:00
|
|
|
def log_metrics(self, metrics: Dict[str, float], step: Optional[int] = None) -> None:
|
2021-07-26 11:37:35 +00:00
|
|
|
assert rank_zero_only.rank == 0, "experiment tried to log from global_rank != 0"
|
2020-06-30 22:09:16 +00:00
|
|
|
|
2020-11-22 05:38:58 +00:00
|
|
|
metrics = self._add_prefix(metrics)
|
|
|
|
|
2019-11-27 03:39:18 +00:00
|
|
|
timestamp_ms = int(time() * 1000)
|
|
|
|
for k, v in metrics.items():
|
|
|
|
if isinstance(v, str):
|
2021-07-26 11:37:35 +00:00
|
|
|
log.warning(f"Discarding metric with string value {k}={v}.")
|
2019-11-27 03:39:18 +00:00
|
|
|
continue
|
2020-11-10 11:50:25 +00:00
|
|
|
|
|
|
|
new_k = re.sub("[^a-zA-Z0-9_/. -]+", "", k)
|
|
|
|
if k != new_k:
|
2020-11-24 05:50:34 +00:00
|
|
|
rank_zero_warn(
|
|
|
|
"MLFlow only allows '_', '/', '.' and ' ' special characters in metric name."
|
2021-07-26 11:37:35 +00:00
|
|
|
f" Replacing {k} with {new_k}.",
|
|
|
|
RuntimeWarning,
|
2020-11-24 05:50:34 +00:00
|
|
|
)
|
|
|
|
k = new_k
|
2020-11-10 11:50:25 +00:00
|
|
|
|
2019-12-07 13:50:21 +00:00
|
|
|
self.experiment.log_metric(self.run_id, k, v, timestamp_ms, step)
|
2019-11-27 03:39:18 +00:00
|
|
|
|
|
|
|
@rank_zero_only
|
2021-07-26 11:37:35 +00:00
|
|
|
def finalize(self, status: str = "FINISHED") -> None:
|
2020-04-15 00:32:33 +00:00
|
|
|
super().finalize(status)
|
2021-07-26 11:37:35 +00:00
|
|
|
status = "FINISHED" if status == "success" else status
|
2020-07-09 11:15:41 +00:00
|
|
|
if self.experiment.get_run(self.run_id):
|
|
|
|
self.experiment.set_terminated(self.run_id, status)
|
|
|
|
|
|
|
|
@property
|
|
|
|
def save_dir(self) -> Optional[str]:
|
|
|
|
"""
|
|
|
|
The root file directory in which MLflow experiments are saved.
|
|
|
|
|
|
|
|
Return:
|
|
|
|
Local path to the root experiment directory if the tracking uri is local.
|
|
|
|
Otherwhise returns `None`.
|
|
|
|
"""
|
|
|
|
if self._tracking_uri.startswith(LOCAL_FILE_URI_PREFIX):
|
|
|
|
return self._tracking_uri.lstrip(LOCAL_FILE_URI_PREFIX)
|
2019-11-27 03:39:18 +00:00
|
|
|
|
|
|
|
@property
|
2020-02-25 19:52:39 +00:00
|
|
|
def name(self) -> str:
|
2021-08-23 17:11:50 +00:00
|
|
|
"""
|
|
|
|
Get the experiment id.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
The experiment id.
|
|
|
|
"""
|
2020-07-09 11:15:41 +00:00
|
|
|
return self.experiment_id
|
2019-11-27 03:39:18 +00:00
|
|
|
|
|
|
|
@property
|
2020-02-25 19:52:39 +00:00
|
|
|
def version(self) -> str:
|
2021-08-23 17:11:50 +00:00
|
|
|
"""
|
|
|
|
Get the run id.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
The run id.
|
|
|
|
"""
|
2020-07-09 11:15:41 +00:00
|
|
|
return self.run_id
|