2020-10-03 12:57:46 +00:00
|
|
|
from typing import TYPE_CHECKING, Dict, Any, Tuple, Callable, List, Optional, IO
|
2020-10-03 14:31:58 +00:00
|
|
|
from wasabi import Printer
|
2020-10-03 12:57:46 +00:00
|
|
|
import tqdm
|
|
|
|
import sys
|
2020-08-26 13:24:33 +00:00
|
|
|
|
|
|
|
from ..util import registry
|
2020-08-28 11:55:32 +00:00
|
|
|
from .. import util
|
2020-08-26 13:24:33 +00:00
|
|
|
from ..errors import Errors
|
|
|
|
|
2020-10-03 14:31:58 +00:00
|
|
|
if TYPE_CHECKING:
|
|
|
|
from ..language import Language # noqa: F401
|
|
|
|
|
2020-08-26 13:24:33 +00:00
|
|
|
|
|
|
|
@registry.loggers("spacy.ConsoleLogger.v1")
|
2020-10-03 14:31:58 +00:00
|
|
|
def console_logger(progress_bar: bool = False):
|
2020-08-26 13:24:33 +00:00
|
|
|
def setup_printer(
|
2020-10-03 14:31:58 +00:00
|
|
|
nlp: "Language", stdout: IO = sys.stdout, stderr: IO = sys.stderr
|
|
|
|
) -> Tuple[Callable[[Optional[Dict[str, Any]]], None], Callable[[], None]]:
|
|
|
|
msg = Printer(no_print=True)
|
2020-09-23 08:37:12 +00:00
|
|
|
# we assume here that only components are enabled that should be trained & logged
|
|
|
|
logged_pipes = nlp.pipe_names
|
2020-10-03 12:57:46 +00:00
|
|
|
eval_frequency = nlp.config["training"]["eval_frequency"]
|
2020-09-24 09:04:35 +00:00
|
|
|
score_weights = nlp.config["training"]["score_weights"]
|
|
|
|
score_cols = [col for col, value in score_weights.items() if value is not None]
|
2020-08-26 13:24:33 +00:00
|
|
|
score_widths = [max(len(col), 6) for col in score_cols]
|
2020-09-23 08:37:12 +00:00
|
|
|
loss_cols = [f"Loss {pipe}" for pipe in logged_pipes]
|
2020-08-26 13:24:33 +00:00
|
|
|
loss_widths = [max(len(col), 8) for col in loss_cols]
|
|
|
|
table_header = ["E", "#"] + loss_cols + score_cols + ["Score"]
|
|
|
|
table_header = [col.upper() for col in table_header]
|
|
|
|
table_widths = [3, 6] + loss_widths + score_widths + [6]
|
|
|
|
table_aligns = ["r" for _ in table_widths]
|
2020-10-03 17:26:10 +00:00
|
|
|
stdout.write(msg.row(table_header, widths=table_widths) + "\n")
|
|
|
|
stdout.write(msg.row(["-" * width for width in table_widths]) + "\n")
|
2020-10-03 12:57:46 +00:00
|
|
|
progress = None
|
|
|
|
|
2020-10-03 14:31:58 +00:00
|
|
|
def log_step(info: Optional[Dict[str, Any]]) -> None:
|
2020-10-03 12:57:46 +00:00
|
|
|
nonlocal progress
|
2020-08-26 13:24:33 +00:00
|
|
|
|
2020-10-03 12:57:46 +00:00
|
|
|
if info is None:
|
|
|
|
# If we don't have a new checkpoint, just return.
|
|
|
|
if progress is not None:
|
|
|
|
progress.update(1)
|
2020-10-03 14:31:58 +00:00
|
|
|
return
|
2020-10-05 14:33:28 +00:00
|
|
|
losses = [
|
|
|
|
"{0:.2f}".format(float(info["losses"][pipe_name]))
|
|
|
|
for pipe_name in logged_pipes if pipe_name in info["losses"]
|
|
|
|
]
|
2020-10-03 12:57:46 +00:00
|
|
|
|
2020-09-13 15:39:31 +00:00
|
|
|
scores = []
|
|
|
|
for col in score_cols:
|
2020-09-24 09:04:35 +00:00
|
|
|
score = info["other_scores"].get(col, 0.0)
|
|
|
|
try:
|
|
|
|
score = float(score)
|
|
|
|
except TypeError:
|
|
|
|
err = Errors.E916.format(name=col, score_type=type(score))
|
2020-09-24 09:29:07 +00:00
|
|
|
raise ValueError(err) from None
|
2020-10-03 12:57:46 +00:00
|
|
|
if col != "speed":
|
|
|
|
score *= 100
|
|
|
|
scores.append("{0:.2f}".format(score))
|
|
|
|
|
2020-08-26 13:24:33 +00:00
|
|
|
data = (
|
|
|
|
[info["epoch"], info["step"]]
|
|
|
|
+ losses
|
|
|
|
+ scores
|
|
|
|
+ ["{0:.2f}".format(float(info["score"]))]
|
|
|
|
)
|
2020-10-03 12:57:46 +00:00
|
|
|
if progress is not None:
|
|
|
|
progress.close()
|
2020-10-03 17:26:23 +00:00
|
|
|
stdout.write(msg.row(data, widths=table_widths, aligns=table_aligns) + "\n")
|
2020-10-03 12:57:46 +00:00
|
|
|
if progress_bar:
|
|
|
|
# Set disable=None, so that it disables on non-TTY
|
|
|
|
progress = tqdm.tqdm(
|
2020-10-03 14:31:58 +00:00
|
|
|
total=eval_frequency, disable=None, leave=False, file=stderr
|
2020-10-03 12:57:46 +00:00
|
|
|
)
|
|
|
|
progress.set_description(f"Epoch {info['epoch']+1}")
|
2020-08-26 13:24:33 +00:00
|
|
|
|
2020-10-03 14:31:58 +00:00
|
|
|
def finalize() -> None:
|
2020-08-26 13:24:33 +00:00
|
|
|
pass
|
|
|
|
|
|
|
|
return log_step, finalize
|
|
|
|
|
|
|
|
return setup_printer
|
|
|
|
|
|
|
|
|
|
|
|
@registry.loggers("spacy.WandbLogger.v1")
|
2020-08-28 12:08:33 +00:00
|
|
|
def wandb_logger(project_name: str, remove_config_values: List[str] = []):
|
2020-08-26 13:24:33 +00:00
|
|
|
import wandb
|
|
|
|
|
2020-10-03 12:57:46 +00:00
|
|
|
console = console_logger(progress_bar=False)
|
2020-08-26 13:24:33 +00:00
|
|
|
|
|
|
|
def setup_logger(
|
2020-10-03 14:31:58 +00:00
|
|
|
nlp: "Language", stdout: IO = sys.stdout, stderr: IO = sys.stderr
|
|
|
|
) -> Tuple[Callable[[Dict[str, Any]], None], Callable[[], None]]:
|
2020-08-26 13:24:33 +00:00
|
|
|
config = nlp.config.interpolate()
|
2020-08-28 11:55:32 +00:00
|
|
|
config_dot = util.dict_to_dot(config)
|
2020-08-28 12:06:23 +00:00
|
|
|
for field in remove_config_values:
|
2020-08-28 11:55:32 +00:00
|
|
|
del config_dot[field]
|
|
|
|
config = util.dot_to_dict(config_dot)
|
2020-09-15 10:56:33 +00:00
|
|
|
wandb.init(project=project_name, config=config, reinit=True)
|
2020-10-03 12:57:46 +00:00
|
|
|
console_log_step, console_finalize = console(nlp, stdout, stderr)
|
2020-08-26 13:24:33 +00:00
|
|
|
|
2020-10-03 12:57:46 +00:00
|
|
|
def log_step(info: Optional[Dict[str, Any]]):
|
2020-08-26 13:24:33 +00:00
|
|
|
console_log_step(info)
|
2020-10-03 12:57:46 +00:00
|
|
|
if info is not None:
|
|
|
|
score = info["score"]
|
|
|
|
other_scores = info["other_scores"]
|
|
|
|
losses = info["losses"]
|
|
|
|
wandb.log({"score": score})
|
|
|
|
if losses:
|
|
|
|
wandb.log({f"loss_{k}": v for k, v in losses.items()})
|
|
|
|
if isinstance(other_scores, dict):
|
|
|
|
wandb.log(other_scores)
|
2020-08-26 13:24:33 +00:00
|
|
|
|
2020-10-03 14:31:58 +00:00
|
|
|
def finalize() -> None:
|
2020-08-26 13:24:33 +00:00
|
|
|
console_finalize()
|
2020-09-15 10:56:33 +00:00
|
|
|
wandb.join()
|
2020-08-26 13:24:33 +00:00
|
|
|
|
|
|
|
return log_step, finalize
|
|
|
|
|
|
|
|
return setup_logger
|