From a9cb2882cb98674614e72232c4bc5133b92fa501 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Thu, 21 May 2020 15:17:39 +0200 Subject: [PATCH] Rename argument: doc_or_span/obj -> doclike (#5463) * doc_or_span -> obj * Revert "doc_or_span -> obj" This reverts commit 78bb9ff5e0e4adc01bd30e227657118d87546f83. * obj -> doclike * Refer to correct object --- spacy/lang/de/syntax_iterators.py | 6 +++--- spacy/lang/el/syntax_iterators.py | 6 +++--- spacy/lang/en/syntax_iterators.py | 6 +++--- spacy/lang/es/syntax_iterators.py | 6 +++--- spacy/lang/fa/syntax_iterators.py | 6 +++--- spacy/lang/fr/syntax_iterators.py | 6 +++--- spacy/lang/id/syntax_iterators.py | 6 +++--- spacy/lang/nb/syntax_iterators.py | 6 +++--- spacy/lang/sv/syntax_iterators.py | 6 +++--- spacy/matcher/matcher.pyx | 24 ++++++++++++------------ 10 files changed, 39 insertions(+), 39 deletions(-) diff --git a/spacy/lang/de/syntax_iterators.py b/spacy/lang/de/syntax_iterators.py index 13bb857ca..73c1b1a6e 100644 --- a/spacy/lang/de/syntax_iterators.py +++ b/spacy/lang/de/syntax_iterators.py @@ -5,7 +5,7 @@ from ...symbols import NOUN, PROPN, PRON from ...errors import Errors -def noun_chunks(obj): +def noun_chunks(doclike): """ Detect base noun phrases from a dependency parse. Works on both Doc and Span. """ @@ -28,7 +28,7 @@ def noun_chunks(obj): "og", "app", ] - doc = obj.doc # Ensure works on both Doc and Span. + doc = doclike.doc # Ensure works on both Doc and Span. if not doc.is_parsed: raise ValueError(Errors.E029) @@ -38,7 +38,7 @@ def noun_chunks(obj): close_app = doc.vocab.strings.add("nk") rbracket = 0 - for i, word in enumerate(obj): + for i, word in enumerate(doclike): if i < rbracket: continue if word.pos in (NOUN, PROPN, PRON) and word.dep in np_deps: diff --git a/spacy/lang/el/syntax_iterators.py b/spacy/lang/el/syntax_iterators.py index f02619ac9..4317bdeb4 100644 --- a/spacy/lang/el/syntax_iterators.py +++ b/spacy/lang/el/syntax_iterators.py @@ -5,7 +5,7 @@ from ...symbols import NOUN, PROPN, PRON from ...errors import Errors -def noun_chunks(obj): +def noun_chunks(doclike): """ Detect base noun phrases. Works on both Doc and Span. """ @@ -14,7 +14,7 @@ def noun_chunks(obj): # obj tag corrects some DEP tagger mistakes. # Further improvement of the models will eliminate the need for this tag. labels = ["nsubj", "obj", "iobj", "appos", "ROOT", "obl"] - doc = obj.doc # Ensure works on both Doc and Span. + doc = doclike.doc # Ensure works on both Doc and Span. if not doc.is_parsed: raise ValueError(Errors.E029) @@ -24,7 +24,7 @@ def noun_chunks(obj): nmod = doc.vocab.strings.add("nmod") np_label = doc.vocab.strings.add("NP") seen = set() - for i, word in enumerate(obj): + for i, word in enumerate(doclike): if word.pos not in (NOUN, PROPN, PRON): continue # Prevent nested chunks from being produced diff --git a/spacy/lang/en/syntax_iterators.py b/spacy/lang/en/syntax_iterators.py index 5ff848124..6d366ec90 100644 --- a/spacy/lang/en/syntax_iterators.py +++ b/spacy/lang/en/syntax_iterators.py @@ -5,7 +5,7 @@ from ...symbols import NOUN, PROPN, PRON from ...errors import Errors -def noun_chunks(obj): +def noun_chunks(doclike): """ Detect base noun phrases from a dependency parse. Works on both Doc and Span. """ @@ -20,7 +20,7 @@ def noun_chunks(obj): "attr", "ROOT", ] - doc = obj.doc # Ensure works on both Doc and Span. + doc = doclike.doc # Ensure works on both Doc and Span. if not doc.is_parsed: raise ValueError(Errors.E029) @@ -29,7 +29,7 @@ def noun_chunks(obj): conj = doc.vocab.strings.add("conj") np_label = doc.vocab.strings.add("NP") seen = set() - for i, word in enumerate(obj): + for i, word in enumerate(doclike): if word.pos not in (NOUN, PROPN, PRON): continue # Prevent nested chunks from being produced diff --git a/spacy/lang/es/syntax_iterators.py b/spacy/lang/es/syntax_iterators.py index 0badddca1..5fda35211 100644 --- a/spacy/lang/es/syntax_iterators.py +++ b/spacy/lang/es/syntax_iterators.py @@ -5,8 +5,8 @@ from ...symbols import NOUN, PROPN, PRON, VERB, AUX from ...errors import Errors -def noun_chunks(obj): - doc = obj.doc +def noun_chunks(doclike): + doc = doclike.doc if not doc.is_parsed: raise ValueError(Errors.E029) @@ -21,7 +21,7 @@ def noun_chunks(obj): np_right_deps = [doc.vocab.strings.add(label) for label in right_labels] stop_deps = [doc.vocab.strings.add(label) for label in stop_labels] token = doc[0] - while token and token.i < len(doc): + while token and token.i < len(doclike): if token.pos in [PROPN, NOUN, PRON]: left, right = noun_bounds( doc, token, np_left_deps, np_right_deps, stop_deps diff --git a/spacy/lang/fa/syntax_iterators.py b/spacy/lang/fa/syntax_iterators.py index 5ff848124..6d366ec90 100644 --- a/spacy/lang/fa/syntax_iterators.py +++ b/spacy/lang/fa/syntax_iterators.py @@ -5,7 +5,7 @@ from ...symbols import NOUN, PROPN, PRON from ...errors import Errors -def noun_chunks(obj): +def noun_chunks(doclike): """ Detect base noun phrases from a dependency parse. Works on both Doc and Span. """ @@ -20,7 +20,7 @@ def noun_chunks(obj): "attr", "ROOT", ] - doc = obj.doc # Ensure works on both Doc and Span. + doc = doclike.doc # Ensure works on both Doc and Span. if not doc.is_parsed: raise ValueError(Errors.E029) @@ -29,7 +29,7 @@ def noun_chunks(obj): conj = doc.vocab.strings.add("conj") np_label = doc.vocab.strings.add("NP") seen = set() - for i, word in enumerate(obj): + for i, word in enumerate(doclike): if word.pos not in (NOUN, PROPN, PRON): continue # Prevent nested chunks from being produced diff --git a/spacy/lang/fr/syntax_iterators.py b/spacy/lang/fr/syntax_iterators.py index 9495dcf1e..2ed2c1b35 100644 --- a/spacy/lang/fr/syntax_iterators.py +++ b/spacy/lang/fr/syntax_iterators.py @@ -5,7 +5,7 @@ from ...symbols import NOUN, PROPN, PRON from ...errors import Errors -def noun_chunks(obj): +def noun_chunks(doclike): """ Detect base noun phrases from a dependency parse. Works on both Doc and Span. """ @@ -19,7 +19,7 @@ def noun_chunks(obj): "nmod", "nmod:poss", ] - doc = obj.doc # Ensure works on both Doc and Span. + doc = doclike.doc # Ensure works on both Doc and Span. if not doc.is_parsed: raise ValueError(Errors.E029) @@ -28,7 +28,7 @@ def noun_chunks(obj): conj = doc.vocab.strings.add("conj") np_label = doc.vocab.strings.add("NP") seen = set() - for i, word in enumerate(obj): + for i, word in enumerate(doclike): if word.pos not in (NOUN, PROPN, PRON): continue # Prevent nested chunks from being produced diff --git a/spacy/lang/id/syntax_iterators.py b/spacy/lang/id/syntax_iterators.py index 9495dcf1e..2ed2c1b35 100644 --- a/spacy/lang/id/syntax_iterators.py +++ b/spacy/lang/id/syntax_iterators.py @@ -5,7 +5,7 @@ from ...symbols import NOUN, PROPN, PRON from ...errors import Errors -def noun_chunks(obj): +def noun_chunks(doclike): """ Detect base noun phrases from a dependency parse. Works on both Doc and Span. """ @@ -19,7 +19,7 @@ def noun_chunks(obj): "nmod", "nmod:poss", ] - doc = obj.doc # Ensure works on both Doc and Span. + doc = doclike.doc # Ensure works on both Doc and Span. if not doc.is_parsed: raise ValueError(Errors.E029) @@ -28,7 +28,7 @@ def noun_chunks(obj): conj = doc.vocab.strings.add("conj") np_label = doc.vocab.strings.add("NP") seen = set() - for i, word in enumerate(obj): + for i, word in enumerate(doclike): if word.pos not in (NOUN, PROPN, PRON): continue # Prevent nested chunks from being produced diff --git a/spacy/lang/nb/syntax_iterators.py b/spacy/lang/nb/syntax_iterators.py index 9495dcf1e..2ed2c1b35 100644 --- a/spacy/lang/nb/syntax_iterators.py +++ b/spacy/lang/nb/syntax_iterators.py @@ -5,7 +5,7 @@ from ...symbols import NOUN, PROPN, PRON from ...errors import Errors -def noun_chunks(obj): +def noun_chunks(doclike): """ Detect base noun phrases from a dependency parse. Works on both Doc and Span. """ @@ -19,7 +19,7 @@ def noun_chunks(obj): "nmod", "nmod:poss", ] - doc = obj.doc # Ensure works on both Doc and Span. + doc = doclike.doc # Ensure works on both Doc and Span. if not doc.is_parsed: raise ValueError(Errors.E029) @@ -28,7 +28,7 @@ def noun_chunks(obj): conj = doc.vocab.strings.add("conj") np_label = doc.vocab.strings.add("NP") seen = set() - for i, word in enumerate(obj): + for i, word in enumerate(doclike): if word.pos not in (NOUN, PROPN, PRON): continue # Prevent nested chunks from being produced diff --git a/spacy/lang/sv/syntax_iterators.py b/spacy/lang/sv/syntax_iterators.py index 148884efe..84493ae79 100644 --- a/spacy/lang/sv/syntax_iterators.py +++ b/spacy/lang/sv/syntax_iterators.py @@ -5,7 +5,7 @@ from ...symbols import NOUN, PROPN, PRON from ...errors import Errors -def noun_chunks(obj): +def noun_chunks(doclike): """ Detect base noun phrases from a dependency parse. Works on both Doc and Span. """ @@ -20,7 +20,7 @@ def noun_chunks(obj): "nmod", "nmod:poss", ] - doc = obj.doc # Ensure works on both Doc and Span. + doc = doclike.doc # Ensure works on both Doc and Span. if not doc.is_parsed: raise ValueError(Errors.E029) @@ -29,7 +29,7 @@ def noun_chunks(obj): conj = doc.vocab.strings.add("conj") np_label = doc.vocab.strings.add("NP") seen = set() - for i, word in enumerate(obj): + for i, word in enumerate(doclike): if word.pos not in (NOUN, PROPN, PRON): continue # Prevent nested chunks from being produced diff --git a/spacy/matcher/matcher.pyx b/spacy/matcher/matcher.pyx index 4cfab915f..0c1a56187 100644 --- a/spacy/matcher/matcher.pyx +++ b/spacy/matcher/matcher.pyx @@ -213,28 +213,28 @@ cdef class Matcher: else: yield doc - def __call__(self, object doc_or_span): + def __call__(self, object doclike): """Find all token sequences matching the supplied pattern. - doc_or_span (Doc or Span): The document to match over. + doclike (Doc or Span): The document to match over. RETURNS (list): A list of `(key, start, end)` tuples, describing the matches. A match tuple describes a span `doc[start:end]`. The `label_id` and `key` are both integers. """ - if isinstance(doc_or_span, Doc): - doc = doc_or_span + if isinstance(doclike, Doc): + doc = doclike length = len(doc) - elif isinstance(doc_or_span, Span): - doc = doc_or_span.doc - length = doc_or_span.end - doc_or_span.start + elif isinstance(doclike, Span): + doc = doclike.doc + length = doclike.end - doclike.start else: - raise ValueError(Errors.E195.format(good="Doc or Span", got=type(doc_or_span).__name__)) + raise ValueError(Errors.E195.format(good="Doc or Span", got=type(doclike).__name__)) if len(set([LEMMA, POS, TAG]) & self._seen_attrs) > 0 \ and not doc.is_tagged: raise ValueError(Errors.E155.format()) if DEP in self._seen_attrs and not doc.is_parsed: raise ValueError(Errors.E156.format()) - matches = find_matches(&self.patterns[0], self.patterns.size(), doc_or_span, length, + matches = find_matches(&self.patterns[0], self.patterns.size(), doclike, length, extensions=self._extensions, predicates=self._extra_predicates) for i, (key, start, end) in enumerate(matches): on_match = self._callbacks.get(key, None) @@ -257,7 +257,7 @@ def unpickle_matcher(vocab, patterns, callbacks): return matcher -cdef find_matches(TokenPatternC** patterns, int n, object doc_or_span, int length, extensions=None, predicates=tuple()): +cdef find_matches(TokenPatternC** patterns, int n, object doclike, int length, extensions=None, predicates=tuple()): """Find matches in a doc, with a compiled array of patterns. Matches are returned as a list of (id, start, end) tuples. @@ -286,7 +286,7 @@ cdef find_matches(TokenPatternC** patterns, int n, object doc_or_span, int lengt else: nr_extra_attr = 0 extra_attr_values = mem.alloc(length, sizeof(attr_t)) - for i, token in enumerate(doc_or_span): + for i, token in enumerate(doclike): for name, index in extensions.items(): value = token._.get(name) if isinstance(value, basestring): @@ -298,7 +298,7 @@ cdef find_matches(TokenPatternC** patterns, int n, object doc_or_span, int lengt for j in range(n): states.push_back(PatternStateC(patterns[j], i, 0)) transition_states(states, matches, predicate_cache, - doc_or_span[i], extra_attr_values, predicates) + doclike[i], extra_attr_values, predicates) extra_attr_values += nr_extra_attr predicate_cache += len(predicates) # Handle matches that end in 0-width patterns