""" Log using `allegro.ai TRAINS `_ .. code-block:: python from pytorch_lightning.loggers import TrainsLogger trains_logger = TrainsLogger( project_name="pytorch lightning", task_name="default", ) trainer = Trainer(logger=trains_logger) Use the logger anywhere in you LightningModule as follows: .. code-block:: python def train_step(...): # example self.logger.experiment.whatever_trains_supports(...) def any_lightning_module_function_or_hook(...): self.logger.experiment.whatever_trains_supports(...) """ from argparse import Namespace from os import environ from pathlib import Path from typing import Any, Dict, Optional, Union import numpy as np import torch from PIL.Image import Image try: import trains from trains import Task except ImportError: # pragma: no-cover raise ImportError('You want to use `TRAINS` logger which is not installed yet,' # pragma: no-cover ' install it with `pip install trains`.') from pytorch_lightning import _logger as log from pytorch_lightning.loggers.base import LightningLoggerBase, rank_zero_only class TrainsLogger(LightningLoggerBase): """Logs using TRAINS Args: project_name: The name of the experiment's project. Defaults to None. task_name: The name of the experiment. Defaults to None. task_type: The name of the experiment. Defaults to 'training'. reuse_last_task_id: Start with the previously used task id. Defaults to True. output_uri: Default location for output models. Defaults to None. auto_connect_arg_parser: Automatically grab the ArgParser and connect it with the task. Defaults to True. auto_connect_frameworks: If True, automatically patch to trains backend. Defaults to True. auto_resource_monitoring: If true, machine vitals will be sent along side the task scalars. Defaults to True. Examples: >>> logger = TrainsLogger("lightning_log", "my-lightning-test", output_uri=".") # doctest: +ELLIPSIS TRAINS Task: ... TRAINS results page: ... >>> logger.log_metrics({"val_loss": 1.23}, step=0) >>> logger.log_text("sample test") sample test >>> import numpy as np >>> logger.log_artifact("confusion matrix", np.ones((2, 3))) >>> logger.log_image("passed", "Image 1", np.random.randint(0, 255, (200, 150, 3), dtype=np.uint8)) """ _bypass = None def __init__( self, project_name: Optional[str] = None, task_name: Optional[str] = None, task_type: str = 'training', reuse_last_task_id: bool = True, output_uri: Optional[str] = None, auto_connect_arg_parser: bool = True, auto_connect_frameworks: bool = True, auto_resource_monitoring: bool = True ) -> None: super().__init__() if self.bypass_mode(): self._trains = None print('TRAINS Task: running in bypass mode') print('TRAINS results page: disabled') class _TaskStub(object): def __call__(self, *args, **kwargs): return self def __getattr__(self, attr): if attr in ('name', 'id'): return '' return self def __setattr__(self, attr, val): pass self._trains = _TaskStub() else: self._trains = Task.init( project_name=project_name, task_name=task_name, task_type=task_type, reuse_last_task_id=reuse_last_task_id, output_uri=output_uri, auto_connect_arg_parser=auto_connect_arg_parser, auto_connect_frameworks=auto_connect_frameworks, auto_resource_monitoring=auto_resource_monitoring ) @property def experiment(self) -> Task: r"""Actual TRAINS object. To use TRAINS features do the following. Example: .. code-block:: python self.logger.experiment.some_trains_function() """ return self._trains @property def id(self) -> Union[str, None]: """ ID is a uuid (string) representing this specific experiment in the entire system. """ if not self._trains: return None return self._trains.id @rank_zero_only def log_hyperparams(self, params: Union[Dict[str, Any], Namespace]) -> None: """Log hyperparameters (numeric values) in TRAINS experiments Args: params: The hyperparameters that passed through the model. """ if not self._trains: return if not params: return params = self._convert_params(params) params = self._flatten_dict(params) self._trains.connect(params) @rank_zero_only def log_metrics(self, metrics: Dict[str, float], step: Optional[int] = None) -> None: """Log metrics (numeric values) in TRAINS experiments. This method will be called by Trainer. Args: metrics: The dictionary of the metrics. If the key contains "/", it will be split by the delimiter, then the elements will be logged as "title" and "series" respectively. step: Step number at which the metrics should be recorded. Defaults to None. """ if not self._trains: return if not step: step = self._trains.get_last_iteration() for k, v in metrics.items(): if isinstance(v, str): log.warning("Discarding metric with string value {}={}".format(k, v)) continue if isinstance(v, torch.Tensor): v = v.item() parts = k.split('/') if len(parts) <= 1: series = title = k else: title = parts[0] series = '/'.join(parts[1:]) self._trains.get_logger().report_scalar( title=title, series=series, value=v, iteration=step) @rank_zero_only def log_metric(self, title: str, series: str, value: float, step: Optional[int] = None) -> None: """Log metrics (numeric values) in TRAINS experiments. This method will be called by the users. Args: title: The title of the graph to log, e.g. loss, accuracy. series: The series name in the graph, e.g. classification, localization. value: The value to log. step: Step number at which the metrics should be recorded. Defaults to None. """ if not self._trains: return if not step: step = self._trains.get_last_iteration() if isinstance(value, torch.Tensor): value = value.item() self._trains.get_logger().report_scalar( title=title, series=series, value=value, iteration=step) @rank_zero_only def log_text(self, text: str) -> None: """Log console text data in TRAINS experiment Args: text: The value of the log (data-point). """ if self.bypass_mode(): print(text) return if not self._trains: return self._trains.get_logger().report_text(text) @rank_zero_only def log_image( self, title: str, series: str, image: Union[str, np.ndarray, Image, torch.Tensor], step: Optional[int] = None) -> None: """Log Debug image in TRAINS experiment Args: title: The title of the debug image, i.e. "failed", "passed". series: The series name of the debug image, i.e. "Image 0", "Image 1". image: Debug image to log. Can be one of the following types: Torch, Numpy, PIL image, path to image file (str) If Numpy or Torch, the image is assume to be the following: shape: CHW color space: RGB value range: [0., 1.] (float) or [0, 255] (uint8) step: Step number at which the metrics should be recorded. Defaults to None. """ if not self._trains: return if not step: step = self._trains.get_last_iteration() if isinstance(image, str): self._trains.get_logger().report_image( title=title, series=series, local_path=image, iteration=step) else: if isinstance(image, torch.Tensor): image = image.cpu().numpy() if isinstance(image, np.ndarray): image = image.transpose(1, 2, 0) self._trains.get_logger().report_image( title=title, series=series, image=image, iteration=step) @rank_zero_only def log_artifact( self, name: str, artifact: Union[str, Path, Dict[str, Any], np.ndarray, Image], metadata: Optional[Dict[str, Any]] = None, delete_after_upload: bool = False) -> None: """Save an artifact (file/object) in TRAINS experiment storage. Arguments: name: Artifact name. Notice! it will override previous artifact if name already exists artifact: Artifact object to upload. Currently supports: - string / pathlib2.Path are treated as path to artifact file to upload If wildcard or a folder is passed, zip file containing the local files will be created and uploaded - dict will be stored as .json file and uploaded - pandas.DataFrame will be stored as .csv.gz (compressed CSV file) and uploaded - numpy.ndarray will be stored as .npz and uploaded - PIL.Image will be stored to .png file and uploaded metadata: Simple key/value dictionary to store on the artifact. Defaults to None. delete_after_upload: If True local artifact will be deleted (only applies if artifact_object is a local file). Defaults to False. """ if not self._trains: return self._trains.upload_artifact( name=name, artifact_object=artifact, metadata=metadata, delete_after_upload=delete_after_upload ) def save(self) -> None: pass @rank_zero_only def finalize(self, status: str = None) -> None: if self.bypass_mode() or not self._trains: return self._trains.close() self._trains = None @property def name(self) -> Union[str, None]: """ Name is a human readable non-unique name (str) of the experiment. """ if not self._trains: return '' return self._trains.name @property def version(self) -> Union[str, None]: if not self._trains: return None return self._trains.id @classmethod def set_credentials(cls, api_host: str = None, web_host: str = None, files_host: str = None, key: str = None, secret: str = None) -> None: """ Set new default TRAINS-server host and credentials These configurations could be overridden by either OS environment variables or trains.conf configuration file Notice! credentials needs to be set *prior* to Logger initialization :param api_host: Trains API server url, example: host='http://localhost:8008' :param web_host: Trains WEB server url, example: host='http://localhost:8080' :param files_host: Trains Files server url, example: host='http://localhost:8081' :param key: user key/secret pair, example: key='thisisakey123' :param secret: user key/secret pair, example: secret='thisisseceret123' """ Task.set_credentials(api_host=api_host, web_host=web_host, files_host=files_host, key=key, secret=secret) @classmethod def set_bypass_mode(cls, bypass: bool) -> None: """ set_bypass_mode will bypass all outside communication, and will drop all logs. Should only be used in "standalone mode", when there is no access to the *trains-server* :param bypass: If True, all outside communication is skipped """ cls._bypass = bypass @classmethod def bypass_mode(cls) -> bool: """ bypass_mode returns the bypass mode state. Notice GITHUB_ACTIONS env will automatically set bypass_mode to True unless overridden specifically with set_bypass_mode(False) :return: If True, all outside communication is skipped """ return cls._bypass if cls._bypass is not None else bool(environ.get('CI')) def __getstate__(self) -> Union[str, None]: if self.bypass_mode() or not self._trains: return '' return self._trains.id def __setstate__(self, state: str) -> None: self._rank = 0 self._trains = None if state: self._trains = Task.get_task(task_id=state)