From 6255e386954778ea745aef4cf9ae3c3051a643bd Mon Sep 17 00:00:00 2001 From: Sergiu Nisioi Date: Thu, 28 Sep 2023 16:06:50 +0700 Subject: [PATCH] Adding rolegal model to the spaCy universe (#13017) * adding rolegal model to the spaCy universe * Fix formatting * Use raw URL * update image url and example * fix pip and update url to raw * okay, let's add thumb instead of image :octopus: * Update website/meta/universe.json --------- Co-authored-by: Adriane Boyd --- website/meta/universe.json | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/website/meta/universe.json b/website/meta/universe.json index 46de8121c..b2868c084 100644 --- a/website/meta/universe.json +++ b/website/meta/universe.json @@ -4469,6 +4469,37 @@ }, "category": ["pipeline", "standalone"], "tags": ["spans", "rules", "ner"] + }, + { + "id": "rolegal", + "title": "A spaCy Package for Romanian Legal Document Processing", + "thumb": "https://raw.githubusercontent.com/senisioi/rolegal/main/img/paper200x200.jpeg", + "slogan": "rolegal: a spaCy Package for Noisy Romanian Legal Document Processing", + "description": "This is a spaCy language model for Romanian legal domain trained with floret 4-gram to 5-gram embeddings and `LEGAL` entity recognition. Useful for processing OCR-resulted noisy legal documents.", + "github": "senisioi/rolegal", + "pip": "ro-legal-fl", + "tags": ["legal", "floret", "ner", "romanian"], + "code_example": [ + "import spacy", + "nlp = spacy.load(\"ro_legal_fl\")", + "", + "doc = nlp(\"Titlul III din LEGEA nr. 255 din 19 iulie 2013, publicată în MONITORUL OFICIAL\")", + "# legal entity identification", + "for entity in doc.ents:", + " print('entity: ', entity, '; entity type: ', entity.label_)", + "", + "# floret n-gram embeddings robust to typos", + "print(nlp('achizit1e public@').similarity(nlp('achiziții publice')))", + "# 0.7393895566928835", + "print(nlp('achizitii publice').similarity(nlp('achiziții publice')))", + "# 0.8996480808279399" + ], + "author": "Sergiu Nisioi", + "author_links": { + "github": "senisioi", + "website": "https://nlp.unibuc.ro/people/snisioi.html" + }, + "category": ["pipeline", "training", "models"] } ],