From b58bace84b56cc3dcc4f78e0b9dae15effdcd51e Mon Sep 17 00:00:00 2001 From: svlandeg Date: Mon, 24 Jun 2019 10:55:04 +0200 Subject: [PATCH] small fixes --- bin/ud/conll17_ud_eval.py | 4 ++-- bin/wiki_entity_linking/kb_creator.py | 18 +++++++++--------- .../training_set_creator.py | 2 +- examples/pipeline/wikidata_entity_linking.py | 4 ++-- spacy/pipeline/pipes.pyx | 15 +++++++-------- 5 files changed, 21 insertions(+), 22 deletions(-) diff --git a/bin/ud/conll17_ud_eval.py b/bin/ud/conll17_ud_eval.py index 78a976a6d..88acfabac 100644 --- a/bin/ud/conll17_ud_eval.py +++ b/bin/ud/conll17_ud_eval.py @@ -292,8 +292,8 @@ def evaluate(gold_ud, system_ud, deprel_weights=None, check_parse=True): def spans_score(gold_spans, system_spans): correct, gi, si = 0, 0, 0 - undersegmented = list() - oversegmented = list() + undersegmented = [] + oversegmented = [] combo = 0 previous_end_si_earlier = False previous_end_gi_earlier = False diff --git a/bin/wiki_entity_linking/kb_creator.py b/bin/wiki_entity_linking/kb_creator.py index bd82e5b4e..6ee139174 100644 --- a/bin/wiki_entity_linking/kb_creator.py +++ b/bin/wiki_entity_linking/kb_creator.py @@ -42,9 +42,9 @@ def create_kb(nlp, max_entities_per_alias, min_entity_freq, min_occ, # filter the entities for in the KB by frequency, because there's just too much data (8M entities) otherwise filtered_title_to_id = dict() - entity_list = list() - description_list = list() - frequency_list = list() + entity_list = [] + description_list = [] + frequency_list = [] for title, entity in title_to_id.items(): freq = entity_frequencies.get(title, 0) desc = id_to_descr.get(entity, None) @@ -131,8 +131,8 @@ def _add_aliases(kb, title_to_id, max_entities_per_alias, min_occ, prior_prob_in line = prior_file.readline() previous_alias = None total_count = 0 - counts = list() - entities = list() + counts = [] + entities = [] while line: splits = line.replace('\n', "").split(sep='|') new_alias = splits[0] @@ -142,8 +142,8 @@ def _add_aliases(kb, title_to_id, max_entities_per_alias, min_occ, prior_prob_in if new_alias != previous_alias and previous_alias: # done reading the previous alias --> output if len(entities) > 0: - selected_entities = list() - prior_probs = list() + selected_entities = [] + prior_probs = [] for ent_count, ent_string in zip(counts, entities): if ent_string in wp_titles: wd_id = title_to_id[ent_string] @@ -157,8 +157,8 @@ def _add_aliases(kb, title_to_id, max_entities_per_alias, min_occ, prior_prob_in except ValueError as e: print(e) total_count = 0 - counts = list() - entities = list() + counts = [] + entities = [] total_count += count diff --git a/bin/wiki_entity_linking/training_set_creator.py b/bin/wiki_entity_linking/training_set_creator.py index d9600048c..51105ce09 100644 --- a/bin/wiki_entity_linking/training_set_creator.py +++ b/bin/wiki_entity_linking/training_set_creator.py @@ -343,7 +343,7 @@ def read_training(nlp, training_dir, dev, limit): # currently feeding the gold data one entity per sentence at a time gold_start = int(start) - found_ent.sent.start_char gold_end = int(end) - found_ent.sent.start_char - gold_entities = list() + gold_entities = [] gold_entities.append((gold_start, gold_end, wp_title)) gold = GoldParse(doc=sent, links=gold_entities) data.append((sent, gold)) diff --git a/examples/pipeline/wikidata_entity_linking.py b/examples/pipeline/wikidata_entity_linking.py index aa1c00996..2759da135 100644 --- a/examples/pipeline/wikidata_entity_linking.py +++ b/examples/pipeline/wikidata_entity_linking.py @@ -147,7 +147,7 @@ def run_pipeline(): if train_pipe: print("STEP 6: training Entity Linking pipe", datetime.datetime.now()) # define the size (nr of entities) of training and dev set - train_limit = 10000 + train_limit = 5000 dev_limit = 5000 train_data = training_set_creator.read_training(nlp=nlp_2, @@ -332,7 +332,7 @@ def _measure_baselines(data, kb): best_candidate = "" random_candidate = "" if candidates: - scores = list() + scores = [] for c in candidates: scores.append(c.prior_prob) diff --git a/spacy/pipeline/pipes.pyx b/spacy/pipeline/pipes.pyx index 2f7856fe0..2eaedd73a 100644 --- a/spacy/pipeline/pipes.pyx +++ b/spacy/pipeline/pipes.pyx @@ -1131,8 +1131,8 @@ class EntityLinker(Pipe): docs = [docs] golds = [golds] - context_docs = list() - entity_encodings = list() + context_docs = [] + entity_encodings = [] for doc, gold in zip(docs, golds): for entity in gold.links: @@ -1198,8 +1198,8 @@ class EntityLinker(Pipe): self.require_model() self.require_kb() - final_entities = list() - final_kb_ids = list() + final_entities = [] + final_kb_ids = [] if not docs: return final_entities, final_kb_ids @@ -1214,7 +1214,7 @@ class EntityLinker(Pipe): for ent in doc.ents: candidates = self.kb.get_candidates(ent.text) if candidates: - scores = list() + scores = [] for c in candidates: prior_prob = c.prior_prob * self.prior_weight kb_id = c.entity_ @@ -1259,11 +1259,10 @@ class EntityLinker(Pipe): return self def rehearse(self, docs, sgd=None, losses=None, **config): - # TODO - pass + raise NotImplementedError def add_label(self, label): - pass + raise NotImplementedError class Sentencizer(object):