mirror of https://github.com/explosion/spaCy.git
* Avoid shipping the spacy.munge package
This commit is contained in:
parent
7d265a9c62
commit
00a0dfcb59
2
setup.py
2
setup.py
|
@ -103,7 +103,7 @@ def cython_setup(mod_names, language, includes, compile_args, link_args):
|
||||||
def run_setup(exts):
|
def run_setup(exts):
|
||||||
setup(
|
setup(
|
||||||
name='spacy',
|
name='spacy',
|
||||||
packages=['spacy', 'spacy.en', 'spacy.syntax', "spacy.munge"],
|
packages=['spacy', 'spacy.en', 'spacy.syntax'],
|
||||||
description="Industrial-strength NLP",
|
description="Industrial-strength NLP",
|
||||||
author='Matthew Honnibal',
|
author='Matthew Honnibal',
|
||||||
author_email='honnibal@gmail.com',
|
author_email='honnibal@gmail.com',
|
||||||
|
|
|
@ -7,10 +7,36 @@ import re
|
||||||
import os
|
import os
|
||||||
from os import path
|
from os import path
|
||||||
|
|
||||||
from spacy.munge.read_ner import tags_to_entities
|
|
||||||
from libc.string cimport memset
|
from libc.string cimport memset
|
||||||
|
|
||||||
|
|
||||||
|
def tags_to_entities(tags):
|
||||||
|
entities = []
|
||||||
|
start = None
|
||||||
|
for i, tag in enumerate(tags):
|
||||||
|
if tag.startswith('O'):
|
||||||
|
# TODO: We shouldn't be getting these malformed inputs. Fix this.
|
||||||
|
if start is not None:
|
||||||
|
start = None
|
||||||
|
continue
|
||||||
|
elif tag == '-':
|
||||||
|
continue
|
||||||
|
elif tag.startswith('I'):
|
||||||
|
assert start is not None, tags[:i]
|
||||||
|
continue
|
||||||
|
if tag.startswith('U'):
|
||||||
|
entities.append((tag[2:], i, i))
|
||||||
|
elif tag.startswith('B'):
|
||||||
|
start = i
|
||||||
|
elif tag.startswith('L'):
|
||||||
|
entities.append((tag[2:], start, i))
|
||||||
|
start = None
|
||||||
|
else:
|
||||||
|
raise Exception(tag)
|
||||||
|
return entities
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def align(cand_words, gold_words):
|
def align(cand_words, gold_words):
|
||||||
cost, edit_path = _min_edit_path(cand_words, gold_words)
|
cost, edit_path = _min_edit_path(cand_words, gold_words)
|
||||||
alignment = []
|
alignment = []
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
from __future__ import division
|
from __future__ import division
|
||||||
|
|
||||||
from spacy.munge.read_ner import tags_to_entities
|
from .gold import tags_to_entities
|
||||||
|
|
||||||
|
|
||||||
class PRFScore(object):
|
class PRFScore(object):
|
||||||
|
|
Loading…
Reference in New Issue