mirror of https://github.com/explosion/spaCy.git
Merge remote-tracking branch 'upstream/master' into chore/update-develop-from-master-v3.5
This commit is contained in:
commit
6c380d4fc6
|
@ -243,6 +243,27 @@ pipelines.
|
|||
> python -m spacy project run test . --vars.foo bar
|
||||
> ```
|
||||
|
||||
> #### Tip: Environment Variables
|
||||
>
|
||||
> Commands in a project file are not executed in a shell, so they don't have
|
||||
> direct access to environment variables. But you can insert environment
|
||||
> variables using the `env` dictionary to make values available for
|
||||
> interpolation, just like values in `vars`. Here's an example `env` dict that
|
||||
> makes `$PATH` available as `ENV_PATH`:
|
||||
>
|
||||
> ```yaml
|
||||
> env:
|
||||
> ENV_PATH: PATH
|
||||
> ```
|
||||
>
|
||||
> This can be used in a project command like so:
|
||||
>
|
||||
> ```yaml
|
||||
> - name: "echo-path"
|
||||
> script:
|
||||
> - "echo ${env.ENV_PATH}"
|
||||
> ```
|
||||
|
||||
| Section | Description |
|
||||
| --------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
|
||||
| `title` | An optional project title used in `--help` message and [auto-generated docs](#custom-docs). |
|
||||
|
|
|
@ -1,5 +1,46 @@
|
|||
{
|
||||
"resources": [
|
||||
{
|
||||
"id": "spacy-cleaner",
|
||||
"title": "spacy-cleaner",
|
||||
"slogan": "Easily clean text with spaCy!",
|
||||
"description": "**spacy-cleaner** utilises spaCy `Language` models to replace, remove, and \n mutate spaCy tokens. Cleaning actions available are:\n\n* Remove/replace stopwords.\n* Remove/replace punctuation.\n* Remove/replace numbers.\n* Remove/replace emails.\n* Remove/replace URLs.\n* Perform lemmatisation.\n\nSee our [docs](https://ce11an.github.io/spacy-cleaner/) for more information.",
|
||||
"github": "Ce11an/spacy-cleaner",
|
||||
"pip": "spacy-cleaner",
|
||||
"code_example": [
|
||||
"import spacy",
|
||||
"import spacy_cleaner",
|
||||
"from spacy_cleaner.processing import removers, replacers, mutators",
|
||||
"",
|
||||
"model = spacy.load(\"en_core_web_sm\")",
|
||||
"pipeline = spacy_cleaner.Pipeline(",
|
||||
" model,",
|
||||
" removers.remove_stopword_token,",
|
||||
" replacers.replace_punctuation_token,",
|
||||
" mutators.mutate_lemma_token,",
|
||||
")",
|
||||
"",
|
||||
"texts = [\"Hello, my name is Cellan! I love to swim!\"]",
|
||||
"",
|
||||
"pipeline.clean(texts)",
|
||||
"# ['hello _IS_PUNCT_ Cellan _IS_PUNCT_ love swim _IS_PUNCT_']"
|
||||
],
|
||||
"code_language": "python",
|
||||
"url": "https://ce11an.github.io/spacy-cleaner/",
|
||||
"image": "https://raw.githubusercontent.com/Ce11an/spacy-cleaner/main/docs/assets/images/spacemen.png",
|
||||
"author": "Cellan Hall",
|
||||
"author_links": {
|
||||
"twitter": "Ce11an",
|
||||
"github": "Ce11an",
|
||||
"website": "https://www.linkedin.com/in/cellan-hall/"
|
||||
},
|
||||
"category": [
|
||||
"extension"
|
||||
],
|
||||
"tags": [
|
||||
"text-processing"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": "Zshot",
|
||||
"title": "Zshot",
|
||||
|
|
Loading…
Reference in New Issue