mirror of https://github.com/explosion/spaCy.git
Added string manipulation for spans
This commit is contained in:
parent
a16aff17aa
commit
426d17167f
|
@ -105,3 +105,8 @@ website/package.json
|
|||
website/announcement.jade
|
||||
website/www/
|
||||
website/.gitignore
|
||||
|
||||
# Personal (Eric)
|
||||
venv
|
||||
venv/*
|
||||
.gitignore
|
||||
|
|
|
@ -31,6 +31,13 @@ def test_spans_root(doc):
|
|||
assert span.root.text == 'sentence'
|
||||
assert span.root.head.text == 'is'
|
||||
|
||||
def test_spans_string_fn(doc):
|
||||
span = doc[0:4]
|
||||
assert len(span) == 4
|
||||
assert span.text == 'This is a sentence'
|
||||
assert span.mapStr((lambda x, i, arg="_": x + i + arg), "y", "z") == 'This yzis yza yzsentence yz'
|
||||
assert span.upper_ == 'THIS IS A SENTENCE'
|
||||
assert span.lower_ == 'this is a sentence'
|
||||
|
||||
def test_spans_root2(en_tokenizer):
|
||||
text = "through North and South Carolina"
|
||||
|
|
|
@ -118,6 +118,17 @@ cdef class Span:
|
|||
return 0.0
|
||||
return numpy.dot(self.vector, other.vector) / (self.vector_norm * other.vector_norm)
|
||||
|
||||
def mapStr(self, fn, *argv, **kargs):
|
||||
'''Perform a function on the string representation of each token in this span.
|
||||
|
||||
Arguments:
|
||||
fn (function): First argument will always be string of a token. Additional arguments
|
||||
will be defined according to *argv and **kargs passed to this mapStr() method.
|
||||
*argv (unpacked tuple): Arguments to be passed to fn
|
||||
**kargs (unpacked dict): Arguments to be passed to fn
|
||||
'''
|
||||
return ''.join([fn(t.string, *argv, **kargs) for t in self]).strip()
|
||||
|
||||
cpdef int _recalculate_indices(self) except -1:
|
||||
if self.end > self.doc.length \
|
||||
or self.doc.c[self.start].idx != self.start_char \
|
||||
|
@ -365,6 +376,14 @@ cdef class Span:
|
|||
def __get__(self):
|
||||
return ' '.join([t.lemma_ for t in self]).strip()
|
||||
|
||||
property upper_:
|
||||
def __get__(self):
|
||||
return ''.join([t.string.upper() for t in self]).strip()
|
||||
|
||||
property lower_:
|
||||
def __get__(self):
|
||||
return ''.join([t.string.lower() for t in self]).strip()
|
||||
|
||||
property string:
|
||||
def __get__(self):
|
||||
return ''.join([t.string for t in self])
|
||||
|
|
Loading…
Reference in New Issue