mirror of https://github.com/explosion/spaCy.git
Added string manipulation for spans
This commit is contained in:
parent
a16aff17aa
commit
426d17167f
|
@ -105,3 +105,8 @@ website/package.json
|
||||||
website/announcement.jade
|
website/announcement.jade
|
||||||
website/www/
|
website/www/
|
||||||
website/.gitignore
|
website/.gitignore
|
||||||
|
|
||||||
|
# Personal (Eric)
|
||||||
|
venv
|
||||||
|
venv/*
|
||||||
|
.gitignore
|
||||||
|
|
|
@ -31,6 +31,13 @@ def test_spans_root(doc):
|
||||||
assert span.root.text == 'sentence'
|
assert span.root.text == 'sentence'
|
||||||
assert span.root.head.text == 'is'
|
assert span.root.head.text == 'is'
|
||||||
|
|
||||||
|
def test_spans_string_fn(doc):
|
||||||
|
span = doc[0:4]
|
||||||
|
assert len(span) == 4
|
||||||
|
assert span.text == 'This is a sentence'
|
||||||
|
assert span.mapStr((lambda x, i, arg="_": x + i + arg), "y", "z") == 'This yzis yza yzsentence yz'
|
||||||
|
assert span.upper_ == 'THIS IS A SENTENCE'
|
||||||
|
assert span.lower_ == 'this is a sentence'
|
||||||
|
|
||||||
def test_spans_root2(en_tokenizer):
|
def test_spans_root2(en_tokenizer):
|
||||||
text = "through North and South Carolina"
|
text = "through North and South Carolina"
|
||||||
|
|
|
@ -118,6 +118,17 @@ cdef class Span:
|
||||||
return 0.0
|
return 0.0
|
||||||
return numpy.dot(self.vector, other.vector) / (self.vector_norm * other.vector_norm)
|
return numpy.dot(self.vector, other.vector) / (self.vector_norm * other.vector_norm)
|
||||||
|
|
||||||
|
def mapStr(self, fn, *argv, **kargs):
|
||||||
|
'''Perform a function on the string representation of each token in this span.
|
||||||
|
|
||||||
|
Arguments:
|
||||||
|
fn (function): First argument will always be string of a token. Additional arguments
|
||||||
|
will be defined according to *argv and **kargs passed to this mapStr() method.
|
||||||
|
*argv (unpacked tuple): Arguments to be passed to fn
|
||||||
|
**kargs (unpacked dict): Arguments to be passed to fn
|
||||||
|
'''
|
||||||
|
return ''.join([fn(t.string, *argv, **kargs) for t in self]).strip()
|
||||||
|
|
||||||
cpdef int _recalculate_indices(self) except -1:
|
cpdef int _recalculate_indices(self) except -1:
|
||||||
if self.end > self.doc.length \
|
if self.end > self.doc.length \
|
||||||
or self.doc.c[self.start].idx != self.start_char \
|
or self.doc.c[self.start].idx != self.start_char \
|
||||||
|
@ -365,6 +376,14 @@ cdef class Span:
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
return ' '.join([t.lemma_ for t in self]).strip()
|
return ' '.join([t.lemma_ for t in self]).strip()
|
||||||
|
|
||||||
|
property upper_:
|
||||||
|
def __get__(self):
|
||||||
|
return ''.join([t.string.upper() for t in self]).strip()
|
||||||
|
|
||||||
|
property lower_:
|
||||||
|
def __get__(self):
|
||||||
|
return ''.join([t.string.lower() for t in self]).strip()
|
||||||
|
|
||||||
property string:
|
property string:
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
return ''.join([t.string for t in self])
|
return ''.join([t.string for t in self])
|
||||||
|
|
Loading…
Reference in New Issue