* Avoid shipping the spacy.munge package

This commit is contained in:
Matthew Honnibal 2015-06-08 00:54:13 +02:00
parent 7d265a9c62
commit 00a0dfcb59
3 changed files with 29 additions and 3 deletions

View File

@ -103,7 +103,7 @@ def cython_setup(mod_names, language, includes, compile_args, link_args):
def run_setup(exts):
setup(
name='spacy',
packages=['spacy', 'spacy.en', 'spacy.syntax', "spacy.munge"],
packages=['spacy', 'spacy.en', 'spacy.syntax'],
description="Industrial-strength NLP",
author='Matthew Honnibal',
author_email='honnibal@gmail.com',

View File

@ -7,10 +7,36 @@ import re
import os
from os import path
from spacy.munge.read_ner import tags_to_entities
from libc.string cimport memset
def tags_to_entities(tags):
entities = []
start = None
for i, tag in enumerate(tags):
if tag.startswith('O'):
# TODO: We shouldn't be getting these malformed inputs. Fix this.
if start is not None:
start = None
continue
elif tag == '-':
continue
elif tag.startswith('I'):
assert start is not None, tags[:i]
continue
if tag.startswith('U'):
entities.append((tag[2:], i, i))
elif tag.startswith('B'):
start = i
elif tag.startswith('L'):
entities.append((tag[2:], start, i))
start = None
else:
raise Exception(tag)
return entities
def align(cand_words, gold_words):
cost, edit_path = _min_edit_path(cand_words, gold_words)
alignment = []

View File

@ -1,6 +1,6 @@
from __future__ import division
from spacy.munge.read_ner import tags_to_entities
from .gold import tags_to_entities
class PRFScore(object):