mirror of https://github.com/explosion/spaCy.git
Remove scores list from config and document
This commit is contained in:
parent
9b704c3db3
commit
0094cb0d04
|
@ -445,9 +445,8 @@ def setup_printer(
|
|||
def update_meta(
|
||||
training: Union[Dict[str, Any], Config], nlp: Language, info: Dict[str, Any]
|
||||
) -> None:
|
||||
score_cols = training["scores"]
|
||||
nlp.meta["performance"] = {}
|
||||
for metric in score_cols:
|
||||
for metric in training["scores_weights"]:
|
||||
nlp.meta["performance"][metric] = info["other_scores"][metric]
|
||||
for pipe_name in nlp.pipe_names:
|
||||
nlp.meta["performance"][f"{pipe_name}_loss"] = info["losses"][pipe_name]
|
||||
|
|
|
@ -34,7 +34,6 @@ seed = 0
|
|||
accumulate_gradient = 1
|
||||
use_pytorch_for_gpu_memory = false
|
||||
# Control how scores are printed and checkpoints are evaluated.
|
||||
scores = ["token_acc", "speed"]
|
||||
score_weights = {}
|
||||
# These settings are invalid for the transformer models.
|
||||
init_tok2vec = null
|
||||
|
|
|
@ -224,19 +224,15 @@ class Language:
|
|||
# We're storing the filled config for each pipeline component and so
|
||||
# we can populate the config again later
|
||||
pipeline = {}
|
||||
scores = self._config["training"].get("scores", [])
|
||||
score_weights = []
|
||||
for pipe_name in self.pipe_names:
|
||||
pipe_meta = self.get_pipe_meta(pipe_name)
|
||||
pipe_config = self.get_pipe_config(pipe_name)
|
||||
pipeline[pipe_name] = {"factory": pipe_meta.factory, **pipe_config}
|
||||
scores.extend(pipe_meta.scores)
|
||||
if pipe_meta.default_score_weights:
|
||||
score_weights.append(pipe_meta.default_score_weights)
|
||||
self._config["nlp"]["pipeline"] = self.pipe_names
|
||||
self._config["components"] = pipeline
|
||||
self._config["training"]["scores"] = sorted(set(scores))
|
||||
combined_score_weights = combine_score_weights(score_weights)
|
||||
self._config["training"]["score_weights"] = combine_score_weights(score_weights)
|
||||
if not srsly.is_json_serializable(self._config):
|
||||
raise ValueError(Errors.E961.format(config=self._config))
|
||||
|
@ -376,6 +372,12 @@ class Language:
|
|||
e.g. "token.ent_id". Used for pipeline analyis.
|
||||
retokenizes (bool): Whether the component changes the tokenization.
|
||||
Used for pipeline analysis.
|
||||
scores (Iterable[str]): All scores set by the component if it's trainable,
|
||||
e.g. ["ents_f", "ents_r", "ents_p"].
|
||||
default_score_weights (Dict[str, float]): The scores to report during
|
||||
training, and their default weight towards the final score used to
|
||||
select the best model. Weights should sum to 1.0 per component and
|
||||
will be combined and normalized for the whole pipeline.
|
||||
func (Optional[Callable]): Factory function if not used as a decorator.
|
||||
"""
|
||||
if not isinstance(name, str):
|
||||
|
@ -448,6 +450,12 @@ class Language:
|
|||
e.g. "token.ent_id". Used for pipeline analyis.
|
||||
retokenizes (bool): Whether the component changes the tokenization.
|
||||
Used for pipeline analysis.
|
||||
scores (Iterable[str]): All scores set by the component if it's trainable,
|
||||
e.g. ["ents_f", "ents_r", "ents_p"].
|
||||
default_score_weights (Dict[str, float]): The scores to report during
|
||||
training, and their default weight towards the final score used to
|
||||
select the best model. Weights should sum to 1.0 per component and
|
||||
will be combined and normalized for the whole pipeline.
|
||||
func (Optional[Callable]): Factory function if not used as a decorator.
|
||||
"""
|
||||
if name is not None and not isinstance(name, str):
|
||||
|
@ -1505,7 +1513,7 @@ class FactoryMeta:
|
|||
requires: Iterable[str] = tuple()
|
||||
retokenizes: bool = False
|
||||
scores: Iterable[str] = tuple()
|
||||
default_score_weights: Dict[str, float] = None
|
||||
default_score_weights: Optional[Dict[str, float]] = None # noqa: E704
|
||||
|
||||
|
||||
def _get_config_overrides(
|
||||
|
|
|
@ -207,8 +207,7 @@ class ConfigSchemaTraining(BaseModel):
|
|||
seed: Optional[StrictInt] = Field(..., title="Random seed")
|
||||
accumulate_gradient: StrictInt = Field(..., title="Whether to divide the batch up into substeps")
|
||||
use_pytorch_for_gpu_memory: StrictBool = Field(..., title="Allocate memory via PyTorch")
|
||||
scores: List[StrictStr] = Field(..., title="Score types to be printed in overview")
|
||||
score_weights: Dict[StrictStr, Union[StrictFloat, StrictInt]] = Field(..., title="Weights of each score type for selecting final model")
|
||||
score_weights: Dict[StrictStr, Union[StrictFloat, StrictInt]] = Field(..., title="Scores to report and their weights for selecting final model")
|
||||
init_tok2vec: Optional[StrictStr] = Field(..., title="Path to pretrained tok2vec weights")
|
||||
discard_oversize: StrictBool = Field(..., title="Whether to skip examples longer than batch size")
|
||||
batch_by: StrictStr = Field(..., title="Batch examples by type")
|
||||
|
|
|
@ -343,10 +343,7 @@ def test_language_factories_invalid():
|
|||
[{"a": 100, "b": 400}, {"c": 0.5, "d": 0.5}],
|
||||
{"a": 0.1, "b": 0.4, "c": 0.25, "d": 0.25},
|
||||
),
|
||||
(
|
||||
[{"a": 0.5, "b": 0.5}, {"b": 1.0}],
|
||||
{"a": 0.25, "b": 0.75},
|
||||
),
|
||||
([{"a": 0.5, "b": 0.5}, {"b": 1.0}], {"a": 0.25, "b": 0.75},),
|
||||
],
|
||||
)
|
||||
def test_language_factories_combine_score_weights(weights, expected):
|
||||
|
@ -371,11 +368,9 @@ def test_language_factories_scores():
|
|||
meta2 = Language.get_factory_meta(f"{name}2")
|
||||
assert meta2.default_score_weights == weights2
|
||||
nlp = Language()
|
||||
nlp._config["training"]["scores"] = ["speed"]
|
||||
nlp._config["training"]["score_weights"] = {}
|
||||
nlp.add_pipe(f"{name}1")
|
||||
nlp.add_pipe(f"{name}2")
|
||||
cfg = nlp.config["training"]
|
||||
assert cfg["scores"] == sorted(["speed", *list(weights1.keys()), *list(weights2.keys())])
|
||||
expected_weights = {"a1": 0.25, "a2": 0.25, "b1": 0.1, "b2": 0.35, "b3": 0.05}
|
||||
assert cfg["score_weights"] == expected_weights
|
||||
|
|
|
@ -42,14 +42,16 @@ decorator. For more details and examples, see the
|
|||
> Language.component("my_component2", func=my_component)
|
||||
> ```
|
||||
|
||||
| Name | Type | Description |
|
||||
| -------------- | -------------------- | --------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `name` | str | The name of the component factory. |
|
||||
| _keyword-only_ | | |
|
||||
| `assigns` | `Iterable[str]` | `Doc` or `Token` attributes assigned by this component, e.g. `["token.ent_id"]`. Used for pipeline analysis. <!-- TODO: link to something --> |
|
||||
| `requires` | `Iterable[str]` | `Doc` or `Token` attributes required by this component, e.g. `["token.ent_id"]`. Used for pipeline analysis. <!-- TODO: link to something --> |
|
||||
| `retokenizes` | bool | Whether the component changes tokenization. Used for pipeline analysis. <!-- TODO: link to something --> |
|
||||
| `func` | `Optional[Callable]` | Optional function if not used a a decorator. |
|
||||
| Name | Type | Description |
|
||||
| ----------------------- | -------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `name` | str | The name of the component factory. |
|
||||
| _keyword-only_ | | |
|
||||
| `assigns` | `Iterable[str]` | `Doc` or `Token` attributes assigned by this component, e.g. `["token.ent_id"]`. Used for pipeline analysis. <!-- TODO: link to something --> |
|
||||
| `requires` | `Iterable[str]` | `Doc` or `Token` attributes required by this component, e.g. `["token.ent_id"]`. Used for pipeline analysis. <!-- TODO: link to something --> |
|
||||
| `retokenizes` | bool | Whether the component changes tokenization. Used for pipeline analysis. <!-- TODO: link to something --> |
|
||||
| `scores` | `Iterable[str]` | All scores set by the components if it's trainable, e.g. `["ents_f", "ents_r", "ents_p"]`. |
|
||||
| `default_score_weights` | `Dict[str, float]` | The scores to report during training, and their default weight towards the final score used to select the best model. Weights should sum to `1.0` per component and will be combined and normalized for the whole pipeline. |
|
||||
| `func` | `Optional[Callable]` | Optional function if not used a a decorator. |
|
||||
|
||||
## Language.factory {#factory tag="classmethod"}
|
||||
|
||||
|
@ -87,15 +89,17 @@ examples, see the
|
|||
> )
|
||||
> ```
|
||||
|
||||
| Name | Type | Description |
|
||||
| ---------------- | -------------------- | --------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `name` | str | The name of the component factory. |
|
||||
| _keyword-only_ | | |
|
||||
| `default_config` | `Dict[str, any]` | The default config, describing the default values of the factory arguments. |
|
||||
| `assigns` | `Iterable[str]` | `Doc` or `Token` attributes assigned by this component, e.g. `["token.ent_id"]`. Used for pipeline analysis. <!-- TODO: link to something --> |
|
||||
| `requires` | `Iterable[str]` | `Doc` or `Token` attributes required by this component, e.g. `["token.ent_id"]`. Used for pipeline analysis. <!-- TODO: link to something --> |
|
||||
| `retokenizes` | bool | Whether the component changes tokenization. Used for pipeline analysis. <!-- TODO: link to something --> |
|
||||
| `func` | `Optional[Callable]` | Optional function if not used a a decorator. |
|
||||
| Name | Type | Description |
|
||||
| ----------------------- | -------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `name` | str | The name of the component factory. |
|
||||
| _keyword-only_ | | |
|
||||
| `default_config` | `Dict[str, any]` | The default config, describing the default values of the factory arguments. |
|
||||
| `assigns` | `Iterable[str]` | `Doc` or `Token` attributes assigned by this component, e.g. `["token.ent_id"]`. Used for pipeline analysis. <!-- TODO: link to something --> |
|
||||
| `requires` | `Iterable[str]` | `Doc` or `Token` attributes required by this component, e.g. `["token.ent_id"]`. Used for pipeline analysis. <!-- TODO: link to something --> |
|
||||
| `retokenizes` | bool | Whether the component changes tokenization. Used for pipeline analysis. <!-- TODO: link to something --> |
|
||||
| `scores` | `Iterable[str]` | All scores set by the components if it's trainable, e.g. `["ents_f", "ents_r", "ents_p"]`. |
|
||||
| `default_score_weights` | `Dict[str, float]` | The scores to report during training, and their default weight towards the final score used to select the best model. Weights should sum to `1.0` per component and will be combined and normalized for the whole pipeline. |
|
||||
| `func` | `Optional[Callable]` | Optional function if not used a a decorator. |
|
||||
|
||||
## Language.\_\_init\_\_ {#init tag="method"}
|
||||
|
||||
|
@ -767,10 +771,12 @@ provided by the [`@Language.component`](/api/language#component) or
|
|||
component is added to the pipeline and stored on the `Language` class for each
|
||||
component instance and factory instance.
|
||||
|
||||
| Name | Type | Description |
|
||||
| ---------------- | ---------------- | ---------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `factory` | str | The name of the registered component factory. |
|
||||
| `default_config` | `Dict[str, Any]` | The default config, describing the default values of the factory arguments. |
|
||||
| `assigns` | `Iterable[str]` | `Doc` or `Token` attributes assigned by this component, e.g. `["token.ent_id"]`. Used for pipeline analysis. <!-- TODO: link to something --> |
|
||||
| `requires` | `Iterable[str]` | `Doc` or `Token` attributes required by this component, e.g. `["token.ent_id"]`. Used for pipeline analysis. <!-- TODO: link to something --> |
|
||||
| `retokenizes` | bool | Whether the component changes tokenization. Used for pipeline analysis. <!-- TODO: link to something --> |
|
||||
| Name | Type | Description |
|
||||
| ----------------------- | ------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `factory` | str | The name of the registered component factory. |
|
||||
| `default_config` | `Dict[str, Any]` | The default config, describing the default values of the factory arguments. |
|
||||
| `assigns` | `Iterable[str]` | `Doc` or `Token` attributes assigned by this component, e.g. `["token.ent_id"]`. Used for pipeline analysis. <!-- TODO: link to something --> |
|
||||
| `requires` | `Iterable[str]` | `Doc` or `Token` attributes required by this component, e.g. `["token.ent_id"]`. Used for pipeline analysis. <!-- TODO: link to something --> |
|
||||
| `retokenizes` | bool | Whether the component changes tokenization. Used for pipeline analysis. <!-- TODO: link to something --> |
|
||||
| `scores` | `Iterable[str]` | All scores set by the components if it's trainable, e.g. `["ents_f", "ents_r", "ents_p"]`. |
|
||||
| `default_score_weights` | `Dict[str, float]` | The scores to report during training, and their default weight towards the final score used to select the best model. Weights should sum to `1.0` per component and will be combined and normalized for the whole pipeline. |
|
||||
|
|
Loading…
Reference in New Issue