mirror of https://github.com/explosion/spaCy.git
Add Span.as_doc method
This commit is contained in:
parent
20309fb9db
commit
7ae67ec6a1
|
@ -111,6 +111,30 @@ cdef class Span:
|
||||||
for i in range(self.start, self.end):
|
for i in range(self.start, self.end):
|
||||||
yield self.doc[i]
|
yield self.doc[i]
|
||||||
|
|
||||||
|
def as_doc(self):
|
||||||
|
'''Create a Doc object view of the Span's data.
|
||||||
|
|
||||||
|
This is mostly useful for C-typed interfaces.
|
||||||
|
'''
|
||||||
|
cdef Doc doc = Doc(self.doc.vocab)
|
||||||
|
doc.length = self.end-self.start
|
||||||
|
doc.c = &self.doc.c[self.start]
|
||||||
|
doc.mem = self.doc.mem
|
||||||
|
doc.is_parsed = self.doc.is_parsed
|
||||||
|
doc.is_tagged = self.doc.is_tagged
|
||||||
|
doc.noun_chunks_iterator = self.doc.noun_chunks_iterator
|
||||||
|
doc.user_hooks = self.doc.user_hooks
|
||||||
|
doc.user_span_hooks = self.doc.user_span_hooks
|
||||||
|
doc.user_token_hooks = self.doc.user_token_hooks
|
||||||
|
doc.vector = self.vector
|
||||||
|
doc.vector_norm = self.vector_norm
|
||||||
|
for key, value in self.doc.cats.items():
|
||||||
|
if hasattr(key, '__len__') and len(key) == 3:
|
||||||
|
cat_start, cat_end, cat_label = key
|
||||||
|
if cat_start == self.start_char and cat_end == self.end_char:
|
||||||
|
doc.cats[cat_label] = value
|
||||||
|
return doc
|
||||||
|
|
||||||
def merge(self, *args, **attributes):
|
def merge(self, *args, **attributes):
|
||||||
"""Retokenize the document, such that the span is merged into a single
|
"""Retokenize the document, such that the span is merged into a single
|
||||||
token.
|
token.
|
||||||
|
|
Loading…
Reference in New Issue