From 3ecec1324c31e8531b50304b59a1972c3e0961b3 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Wed, 23 Oct 2024 12:42:54 +0200 Subject: [PATCH] Usage page on memory management, explaining memory zones and doc_cleaner (#13643) [ci skip] Co-authored-by: Ines Montani --- website/docs/api/language.mdx | 22 ++ website/docs/usage/memory-management.mdx | 131 ++++++++ website/meta/sidebars.json | 376 ++++++++++++++++++----- 3 files changed, 454 insertions(+), 75 deletions(-) create mode 100644 website/docs/usage/memory-management.mdx diff --git a/website/docs/api/language.mdx b/website/docs/api/language.mdx index 068e8ea78..b969ddc53 100644 --- a/website/docs/api/language.mdx +++ b/website/docs/api/language.mdx @@ -890,6 +890,28 @@ when loading a config with | `pipe_name` | Name of pipeline component to replace listeners for. ~~str~~ | | `listeners` | The paths to the listeners, relative to the component config, e.g. `["model.tok2vec"]`. Typically, implementations will only connect to one tok2vec component, `model.tok2vec`, but in theory, custom models can use multiple listeners. The value here can either be an empty list to not replace any listeners, or a _complete_ list of the paths to all listener layers used by the model that should be replaced.~~Iterable[str]~~ | +## Language.memory_zone {id="memory_zone",tag="contextmanager",version="3.8"} + +Begin a block where all resources allocated during the block will be freed at +the end of it. If a resources was created within the memory zone block, +accessing it outside the block is invalid. Behavior of this invalid access is +undefined. Memory zones should not be nested. The memory zone is helpful for +services that need to process large volumes of text with a defined memory budget. + +> ```python +> ### Example +> counts = Counter() +> with nlp.memory_zone(): +> for doc in nlp.pipe(texts): +> for token in doc: +> counts[token.text] += 1 +> ``` + +| Name | Description | +| --- | --- | +| `mem` | Optional `cymem.Pool` object to own allocations (created if not provided). This argument is not required for ordinary usage. Defaults to `None`. ~~Optional[cymem.Pool]~~ | +| **RETURNS** | The memory pool that owns the allocations. This object is not required for ordinary usage. ~~Iterator[cymem.Pool]~~ | + ## Language.meta {id="meta",tag="property"} Meta data for the `Language` class, including name, version, data sources, diff --git a/website/docs/usage/memory-management.mdx b/website/docs/usage/memory-management.mdx new file mode 100644 index 000000000..330c117cd --- /dev/null +++ b/website/docs/usage/memory-management.mdx @@ -0,0 +1,131 @@ +--- +title: Memory Management +teaser: Managing Memory for persistent services +version: 3.8 +menu: + - ['Memory Zones', 'memoryzones'] + - ['Clearing Doc attributes', 'doc-attrs'] +--- + +spaCy maintains a few internal caches that improve speed, +but cause memory to increase slightly over time. If you're +running a batch process that you don't need to be long-lived, +the increase in memory usage generally isn't a problem. +However, if you're running spaCy inside a web service, you'll +often want spaCy's memory usage to stay consistent. Transformer +models can also run into memory problems sometimes, especially when +used on a GPU. + +## Memory zones {id="memoryzones"} + +You can tell spaCy to free data from its internal caches (especially the +[`Vocab`](/api/vocab)) using the [`Language.memory_zone`](/api/language#memory_zone) context manager. Enter +the contextmanager and process your text within it, and spaCy will +**reset its internal caches** (freeing up the associated memory) at the +end of the block. spaCy objects created inside the memory zone must +not be accessed once the memory zone is finished. + +```python +### Using memory zones +from collections import Counter + +def count_words(nlp, texts): + counts = Counter() + with nlp.memory_zone(): + for doc in nlp.pipe(texts): + for token in doc: + counts[token.text] += 1 + return counts +``` + + + +Exiting the memory-zone invalidates all `Doc`, `Token`, `Span` and `Lexeme` +objects that were created within it. If you access these objects +after the memory zone exits, you may encounter a segmentation fault +due to invalid memory access. + + + +spaCy needs the memory zone contextmanager because the processing pipeline +can't keep track of which [`Doc`](/api/doc) objects are referring to data in the shared +[`Vocab`](/api/vocab) cache. For instance, when spaCy encounters a new word, a new [`Lexeme`](/api/lexeme) +entry is stored in the `Vocab`, and the `Doc` object points to this shared +data. When the `Doc` goes out of scope, the `Vocab` has no way of knowing that +this `Lexeme` is no longer in use. + +The memory zone solves this problem by +allowing you to tell the processing pipeline that all data created +between two points is no longer in use. It is up to the you to honor +this agreement. If you access objects that are supposed to no longer be in +use, you may encounter a segmentation fault due to invalid memory access. + +A common use case for memory zones will be **within a web service**. The processing +pipeline can be loaded once, either as a context variable or a global, and each +request can be handled within a memory zone: + +```python +### Memory zones with FastAPI {highlight="10,23"} +from fastapi import FastAPI, APIRouter, Depends, Request +import spacy +from spacy.language import Language + +router = APIRouter() + + +def make_app(): + app = FastAPI() + app.state.NLP = spacy.load("en_core_web_sm") + app.include_router(router) + return app + + +def get_nlp(request: Request) -> Language: + return request.app.state.NLP + + +@router.post("/parse") +def parse_texts( + *, text_batch: list[str], nlp: Language = Depends(get_nlp) +) -> list[dict]: + with nlp.memory_zone(): + # Put the spaCy call within a separate function, so we can't + # leak the Doc objects outside the scope of the memory zone. + output = _process_text(nlp, text_batch) + return output + + +def _process_text(nlp: Language, texts: list[str]) -> list[dict]: + # Call spaCy, and transform the output into our own data + # structures. This function is called from inside a memory + # zone, so must not return the spaCy objects. + docs = list(nlp.pipe(texts)) + return [ + { + "tokens": [{"text": t.text} for t in doc], + "entities": [ + {"start": e.start, "end": e.end, "label": e.label_} for e in doc.ents + ], + } + for doc in docs + ] + + +app = make_app() +``` + +## Clearing transformer tensors and other Doc attributes {id="doc-attrs"} + +The [`Transformer`](/api/transformer) and [`Tok2Vec`](/api/tok2vec) components set intermediate values onto the `Doc` +object during parsing. This can cause GPU memory to be exhausted if many `Doc` +objects are kept in memory together. + +To resolve this, you can add the [`doc_cleaner`](/api/pipeline-functions#doc_cleaner) component to your pipeline. By default +this will clean up the [`Doc._.trf_data`](/api/transformer#custom_attributes) extension attribute and the [`Doc.tensor`](/api/doc#attributes) attribute. +You can have it clean up other intermediate extension attributes you use in custom +pipeline components as well. + +```python +### Adding the doc_cleaner +nlp.add_pipe("doc_cleaner", config={"attrs": {"tensor": None}}) +``` diff --git a/website/meta/sidebars.json b/website/meta/sidebars.json index 2df120ffa..74a9eac05 100644 --- a/website/meta/sidebars.json +++ b/website/meta/sidebars.json @@ -5,45 +5,96 @@ { "label": "Get started", "items": [ - { "text": "Installation", "url": "/usage" }, - { "text": "Models & Languages", "url": "/usage/models" }, - { "text": "Facts & Figures", "url": "/usage/facts-figures" }, - { "text": "spaCy 101", "url": "/usage/spacy-101" }, - { "text": "New in v3.7", "url": "/usage/v3-7" }, - { "text": "New in v3.6", "url": "/usage/v3-6" }, - { "text": "New in v3.5", "url": "/usage/v3-5" } + { + "text": "Installation", + "url": "/usage" + }, + { + "text": "Models & Languages", + "url": "/usage/models" + }, + { + "text": "Facts & Figures", + "url": "/usage/facts-figures" + }, + { + "text": "spaCy 101", + "url": "/usage/spacy-101" + }, + { + "text": "New in v3.7", + "url": "/usage/v3-7" + }, + { + "text": "New in v3.6", + "url": "/usage/v3-6" + }, + { + "text": "New in v3.5", + "url": "/usage/v3-5" + } ] }, { "label": "Guides", "items": [ - { "text": "Linguistic Features", "url": "/usage/linguistic-features" }, - { "text": "Rule-based Matching", "url": "/usage/rule-based-matching" }, - { "text": "Processing Pipelines", "url": "/usage/processing-pipelines" }, + { + "text": "Linguistic Features", + "url": "/usage/linguistic-features" + }, + { + "text": "Rule-based Matching", + "url": "/usage/rule-based-matching" + }, + { + "text": "Processing Pipelines", + "url": "/usage/processing-pipelines" + }, { "text": "Embeddings & Transformers", "url": "/usage/embeddings-transformers" }, { "text": "Large Language Models", - "url": "/usage/large-language-models", - "tag": "new" + "url": "/usage/large-language-models" + }, + { + "text": "Training Models", + "url": "/usage/training" }, - { "text": "Training Models", "url": "/usage/training" }, { "text": "Layers & Model Architectures", "url": "/usage/layers-architectures" }, - { "text": "spaCy Projects", "url": "/usage/projects" }, - { "text": "Saving & Loading", "url": "/usage/saving-loading" }, - { "text": "Visualizers", "url": "/usage/visualizers" } + { + "text": "spaCy Projects", + "url": "/usage/projects" + }, + { + "text": "Saving & Loading", + "url": "/usage/saving-loading" + }, + { + "text": "Memory Management", + "url": "/usage/memory-management" + }, + { + "text": "Visualizers", + "url": "/usage/visualizers" + } ] }, { "label": "Resources", "items": [ - { "text": "Project Templates", "url": "https://github.com/explosion/projects" }, - { "text": "v2.x Documentation", "url": "https://v2.spacy.io" }, + { + "text": "Project Templates", + "url": "https://github.com/explosion/projects" + }, + { + "text": "v2.x Documentation", + "url": "https://v2.spacy.io" + }, { "text": "Custom Solutions", "url": "https://explosion.ai/custom-solutions" @@ -57,7 +108,12 @@ "items": [ { "label": "Models", - "items": [{ "text": "Overview", "url": "/models" }] + "items": [ + { + "text": "Overview", + "url": "/models" + } + ] }, { "label": "Trained Pipelines", @@ -71,91 +127,261 @@ { "label": "Overview", "items": [ - { "text": "Library Architecture", "url": "/api" }, - { "text": "Model Architectures", "url": "/api/architectures" }, - { "text": "Data Formats", "url": "/api/data-formats" }, - { "text": "Command Line", "url": "/api/cli" }, - { "text": "Functions", "url": "/api/top-level" } + { + "text": "Library Architecture", + "url": "/api" + }, + { + "text": "Model Architectures", + "url": "/api/architectures" + }, + { + "text": "Data Formats", + "url": "/api/data-formats" + }, + { + "text": "Command Line", + "url": "/api/cli" + }, + { + "text": "Functions", + "url": "/api/top-level" + } ] }, { "label": "Containers", "items": [ - { "text": "Doc", "url": "/api/doc" }, - { "text": "DocBin", "url": "/api/docbin" }, - { "text": "Example", "url": "/api/example" }, - { "text": "Language", "url": "/api/language" }, - { "text": "Lexeme", "url": "/api/lexeme" }, - { "text": "Span", "url": "/api/span" }, - { "text": "SpanGroup", "url": "/api/spangroup" }, - { "text": "Token", "url": "/api/token" } + { + "text": "Doc", + "url": "/api/doc" + }, + { + "text": "DocBin", + "url": "/api/docbin" + }, + { + "text": "Example", + "url": "/api/example" + }, + { + "text": "Language", + "url": "/api/language" + }, + { + "text": "Lexeme", + "url": "/api/lexeme" + }, + { + "text": "Span", + "url": "/api/span" + }, + { + "text": "SpanGroup", + "url": "/api/spangroup" + }, + { + "text": "Token", + "url": "/api/token" + } ] }, { "label": "Pipeline", "items": [ - { "text": "AttributeRuler", "url": "/api/attributeruler" }, - { "text": "CoreferenceResolver", "url": "/api/coref" }, - { "text": "CuratedTransformer", "url": "/api/curatedtransformer" }, - { "text": "DependencyParser", "url": "/api/dependencyparser" }, - { "text": "EditTreeLemmatizer", "url": "/api/edittreelemmatizer" }, - { "text": "EntityLinker", "url": "/api/entitylinker" }, - { "text": "EntityRecognizer", "url": "/api/entityrecognizer" }, - { "text": "EntityRuler", "url": "/api/entityruler" }, - { "text": "Large Language Models", "url": "/api/large-language-models" }, - { "text": "Lemmatizer", "url": "/api/lemmatizer" }, - { "text": "Morphologizer", "url": "/api/morphologizer" }, - { "text": "SentenceRecognizer", "url": "/api/sentencerecognizer" }, - { "text": "Sentencizer", "url": "/api/sentencizer" }, - { "text": "SpanCategorizer", "url": "/api/spancategorizer" }, - { "text": "SpanFinder", "url": "/api/spanfinder" }, - { "text": "SpanResolver", "url": "/api/span-resolver" }, - { "text": "SpanRuler", "url": "/api/spanruler" }, - { "text": "Tagger", "url": "/api/tagger" }, - { "text": "TextCategorizer", "url": "/api/textcategorizer" }, - { "text": "Tok2Vec", "url": "/api/tok2vec" }, - { "text": "Tokenizer", "url": "/api/tokenizer" }, - { "text": "TrainablePipe", "url": "/api/pipe" }, - { "text": "Transformer", "url": "/api/transformer" }, - { "text": "Other Functions", "url": "/api/pipeline-functions" } + { + "text": "AttributeRuler", + "url": "/api/attributeruler" + }, + { + "text": "CoreferenceResolver", + "url": "/api/coref" + }, + { + "text": "CuratedTransformer", + "url": "/api/curatedtransformer" + }, + { + "text": "DependencyParser", + "url": "/api/dependencyparser" + }, + { + "text": "EditTreeLemmatizer", + "url": "/api/edittreelemmatizer" + }, + { + "text": "EntityLinker", + "url": "/api/entitylinker" + }, + { + "text": "EntityRecognizer", + "url": "/api/entityrecognizer" + }, + { + "text": "EntityRuler", + "url": "/api/entityruler" + }, + { + "text": "Large Language Models", + "url": "/api/large-language-models" + }, + { + "text": "Lemmatizer", + "url": "/api/lemmatizer" + }, + { + "text": "Morphologizer", + "url": "/api/morphologizer" + }, + { + "text": "SentenceRecognizer", + "url": "/api/sentencerecognizer" + }, + { + "text": "Sentencizer", + "url": "/api/sentencizer" + }, + { + "text": "SpanCategorizer", + "url": "/api/spancategorizer" + }, + { + "text": "SpanFinder", + "url": "/api/spanfinder" + }, + { + "text": "SpanResolver", + "url": "/api/span-resolver" + }, + { + "text": "SpanRuler", + "url": "/api/spanruler" + }, + { + "text": "Tagger", + "url": "/api/tagger" + }, + { + "text": "TextCategorizer", + "url": "/api/textcategorizer" + }, + { + "text": "Tok2Vec", + "url": "/api/tok2vec" + }, + { + "text": "Tokenizer", + "url": "/api/tokenizer" + }, + { + "text": "TrainablePipe", + "url": "/api/pipe" + }, + { + "text": "Transformer", + "url": "/api/transformer" + }, + { + "text": "Other Functions", + "url": "/api/pipeline-functions" + } ] }, { "label": "Matchers", "items": [ - { "text": "DependencyMatcher", "url": "/api/dependencymatcher" }, - { "text": "Matcher", "url": "/api/matcher" }, - { "text": "PhraseMatcher", "url": "/api/phrasematcher" } + { + "text": "DependencyMatcher", + "url": "/api/dependencymatcher" + }, + { + "text": "Matcher", + "url": "/api/matcher" + }, + { + "text": "PhraseMatcher", + "url": "/api/phrasematcher" + } ] }, { "label": "Other", "items": [ - { "text": "Attributes", "url": "/api/attributes" }, - { "text": "BaseVectors", "url": "/api/basevectors" }, - { "text": "Corpus", "url": "/api/corpus" }, - { "text": "InMemoryLookupKB", "url": "/api/inmemorylookupkb" }, - { "text": "KnowledgeBase", "url": "/api/kb" }, - { "text": "Lookups", "url": "/api/lookups" }, - { "text": "MorphAnalysis", "url": "/api/morphology#morphanalysis" }, - { "text": "Morphology", "url": "/api/morphology" }, - { "text": "Scorer", "url": "/api/scorer" }, - { "text": "StringStore", "url": "/api/stringstore" }, - { "text": "Vectors", "url": "/api/vectors" }, - { "text": "Vocab", "url": "/api/vocab" } + { + "text": "Attributes", + "url": "/api/attributes" + }, + { + "text": "BaseVectors", + "url": "/api/basevectors" + }, + { + "text": "Corpus", + "url": "/api/corpus" + }, + { + "text": "InMemoryLookupKB", + "url": "/api/inmemorylookupkb" + }, + { + "text": "KnowledgeBase", + "url": "/api/kb" + }, + { + "text": "Lookups", + "url": "/api/lookups" + }, + { + "text": "MorphAnalysis", + "url": "/api/morphology#morphanalysis" + }, + { + "text": "Morphology", + "url": "/api/morphology" + }, + { + "text": "Scorer", + "url": "/api/scorer" + }, + { + "text": "StringStore", + "url": "/api/stringstore" + }, + { + "text": "Vectors", + "url": "/api/vectors" + }, + { + "text": "Vocab", + "url": "/api/vocab" + } ] }, { "label": "Cython", "items": [ - { "text": "Architecture", "url": "/api/cython" }, - { "text": "Classes", "url": "/api/cython-classes" }, - { "text": "Structs", "url": "/api/cython-structs" } + { + "text": "Architecture", + "url": "/api/cython" + }, + { + "text": "Classes", + "url": "/api/cython-classes" + }, + { + "text": "Structs", + "url": "/api/cython-structs" + } ] }, { "label": "Legacy", - "items": [{ "text": "Legacy functions", "url": "/api/legacy" }] + "items": [ + { + "text": "Legacy functions", + "url": "/api/legacy" + } + ] } ] }