From f6a82da9074c68854e4577fce8aabe7d22f0c69a Mon Sep 17 00:00:00 2001 From: atomobianco Date: Sun, 26 Nov 2017 23:55:25 +0100 Subject: [PATCH] Corrected char index instead of token index Changed the index used to add the label because `displacy.render` apparently uses char index --- website/usage/_linguistic-features/_rule-based-matching.jade | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/website/usage/_linguistic-features/_rule-based-matching.jade b/website/usage/_linguistic-features/_rule-based-matching.jade index aa81106e6..d2bead022 100644 --- a/website/usage/_linguistic-features/_rule-based-matching.jade +++ b/website/usage/_linguistic-features/_rule-based-matching.jade @@ -354,7 +354,8 @@ p # append mock entity for match in displaCy style to matched_sents # get the match span by ofsetting the start and end of the span with the # start and end of the sentence in the doc - match_ents = [{'start': span.start-sent.start, 'end': span.end-sent.start, + match_ents = [{'start': span.start_char - sent.start_char, + 'end': span.end_char - sent.start_char, 'label': 'MATCH'}] matched_sents.append({'text': sent.text, 'ents': match_ents })