mirror of https://github.com/explosion/spaCy.git
Add logistic regression sentiment analysis
This commit is contained in:
parent
dd9bfa8d33
commit
422e383d8f
21
README.md
21
README.md
|
@ -227,6 +227,9 @@ nlp = en_core_web_sm.load()
|
|||
doc = nlp("This is a sentence.")
|
||||
```
|
||||
|
||||
📖 **For more info and examples, check out the
|
||||
[models documentation](https://spacy.io/docs/usage/models).**
|
||||
|
||||
## 📊 Custom Sentiment Analysis with Logistic Regression (spaCy-based)
|
||||
This repository also includes a custom **Logistic Regression** sentiment analysis model built using spaCy, without using scikit-learn. The model classifies text as positive or negative based on a dataset such as IMDb reviews.
|
||||
|
||||
|
@ -234,24 +237,28 @@ This repository also includes a custom **Logistic Regression** sentiment analysi
|
|||
To run the logistic regression model:
|
||||
```bash
|
||||
python pure_Logistic.py
|
||||
```This script processes the dataset using spaCy, trains the logistic regression model, and outputs the results.
|
||||
|
||||
```
|
||||
This script processes the dataset using spaCy, trains the logistic regression model, and outputs the results.
|
||||
### Testing and Evaluation
|
||||
To run tests and evaluate the model's performance:
|
||||
To run tests and evaluate the model's performance, use:
|
||||
|
||||
```bash
|
||||
python test_pure_logistic.py
|
||||
```
|
||||
|
||||
In your test script, import the PureLogisticTextCategorizer class for evaluation:
|
||||
To use the model in your own code:
|
||||
In your test script,
|
||||
```bash
|
||||
import the PureLogisticTextCategorizer class for evaluation:
|
||||
from pure_Logistic import PureLogisticTextCategorizer
|
||||
```
|
||||
|
||||
# Initialize and use the classifier
|
||||
categorizer = PureLogisticTextCategorizer()
|
||||
```
|
||||
This enables you to evaluate the logistic regression classifier on your test cases.
|
||||
|
||||
|
||||
📖 **For more info and examples, check out the
|
||||
[models documentation](https://spacy.io/docs/usage/models).**
|
||||
|
||||
## ⚒ Compile from source
|
||||
|
||||
The other way to install spaCy is to clone its
|
||||
|
|
|
@ -0,0 +1,138 @@
|
|||
import spacy
|
||||
from spacy.training import Example
|
||||
from spacy.tokens import Doc
|
||||
from typing import Dict, List
|
||||
|
||||
# Import the custom logistic classifier
|
||||
from pure_Logistic import make_pure_logistic_textcat
|
||||
|
||||
|
||||
# Registering the custom extension 'textcat' to store predictions
|
||||
if not Doc.has_extension("textcat"):
|
||||
Doc.set_extension("textcat", default={})
|
||||
|
||||
|
||||
# Sample training and testing data
|
||||
TRAIN_DATA = [
|
||||
("This product is amazing! I love it.", {"cats": {"positive": 1.0, "negative": 0.0}}),
|
||||
("The service was excellent and staff very friendly.", {"cats": {"positive": 1.0, "negative": 0.0}}),
|
||||
("I'm really impressed with the quality.", {"cats": {"positive": 1.0, "negative": 0.0}}),
|
||||
("Best purchase I've made in years!", {"cats": {"positive": 1.0, "negative": 0.0}}),
|
||||
("The features work exactly as advertised.", {"cats": {"positive": 1.0, "negative": 0.0}}),
|
||||
("This is terrible, complete waste of money.", {"cats": {"positive": 0.0, "negative": 1.0}}),
|
||||
("Poor customer service, very disappointing.", {"cats": {"positive": 0.0, "negative": 1.0}}),
|
||||
("The product broke after one week.", {"cats": {"positive": 0.0, "negative": 1.0}}),
|
||||
("Would not recommend to anyone.", {"cats": {"positive": 0.0, "negative": 1.0}}),
|
||||
("Save your money and avoid this.", {"cats": {"positive": 0.0, "negative": 1.0}})
|
||||
]
|
||||
|
||||
TEST_DATA = [
|
||||
("Great product, highly recommend!", {"cats": {"positive": 1.0, "negative": 0.0}}),
|
||||
("Not worth the price at all.", {"cats": {"positive": 0.0, "negative": 1.0}}),
|
||||
("Everything works perfectly.", {"cats": {"positive": 1.0, "negative": 0.0}}),
|
||||
("Disappointed with the results.", {"cats": {"positive": 0.0, "negative": 1.0}})
|
||||
]
|
||||
|
||||
def calculate_metrics(true_positives: int, true_negatives: int, false_positives: int, false_negatives: int) -> Dict[str, float]:
|
||||
"""Calculate evaluation metrics based on counts."""
|
||||
total = true_positives + true_negatives + false_positives + false_negatives
|
||||
accuracy = (true_positives + true_negatives) / total if total > 0 else 0
|
||||
precision = true_positives / (true_positives + false_positives) if (true_positives + false_positives) > 0 else 0
|
||||
recall = true_positives / (true_positives + false_negatives) if (true_positives + false_negatives) > 0 else 0
|
||||
f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
|
||||
|
||||
return {
|
||||
"accuracy": accuracy,
|
||||
"precision": precision,
|
||||
"recall": recall,
|
||||
"f1": f1
|
||||
}
|
||||
|
||||
def evaluate_model(nlp, test_data):
|
||||
"""Evaluate the model using the test data."""
|
||||
true_positives = true_negatives = false_positives = false_negatives = 0
|
||||
predictions = []
|
||||
|
||||
for text, annotations in test_data:
|
||||
doc = nlp(text)
|
||||
true_cats = annotations["cats"]
|
||||
pred_cats = doc._.textcat # Predictions from the custom model
|
||||
|
||||
# Extract scores for 'positive' and 'negative'
|
||||
pred_positive_score = pred_cats["positive"] if "positive" in pred_cats else 0.0
|
||||
true_positive_score = true_cats.get("positive", 0.0)
|
||||
|
||||
pred_positive = float(pred_positive_score) > 0.5
|
||||
true_positive = float(true_positive_score) > 0.5
|
||||
|
||||
# Update counts based on predictions
|
||||
if true_positive and pred_positive:
|
||||
true_positives += 1
|
||||
elif not true_positive and not pred_positive:
|
||||
true_negatives += 1
|
||||
elif not true_positive and pred_positive:
|
||||
false_positives += 1
|
||||
else:
|
||||
false_negatives += 1
|
||||
|
||||
predictions.append({
|
||||
"text": text,
|
||||
"true": "positive" if true_positive else "negative",
|
||||
"predicted": "positive" if pred_positive else "negative",
|
||||
"scores": pred_cats
|
||||
})
|
||||
|
||||
metrics = calculate_metrics(true_positives, true_negatives, false_positives, false_negatives)
|
||||
return metrics, predictions
|
||||
|
||||
|
||||
def main():
|
||||
try:
|
||||
print("Loading spaCy model...")
|
||||
nlp = spacy.load("en_core_web_lg")
|
||||
except OSError:
|
||||
print("Downloading spaCy model...")
|
||||
spacy.cli.download("en_core_web_lg")
|
||||
nlp = spacy.load("en_core_web_lg")
|
||||
|
||||
print("Adding custom text categorizer...")
|
||||
config = {
|
||||
"learning_rate": 0.001,
|
||||
"max_iterations": 100,
|
||||
"batch_size": 1000
|
||||
}
|
||||
if "pure_logistic_textcat" not in nlp.pipe_names:
|
||||
textcat = nlp.add_pipe("pure_logistic_textcat", config=config)
|
||||
textcat.labels = {"positive", "negative"}
|
||||
|
||||
print("Preparing training examples...")
|
||||
train_examples = []
|
||||
for text, annotations in TRAIN_DATA:
|
||||
doc = nlp.make_doc(text)
|
||||
example = Example.from_dict(doc, annotations)
|
||||
train_examples.append(example)
|
||||
|
||||
print("Training the model...")
|
||||
textcat = nlp.get_pipe("pure_logistic_textcat")
|
||||
losses = textcat.update(train_examples)
|
||||
print(f"Training losses: {losses}")
|
||||
|
||||
print("\nEvaluating the model...")
|
||||
metrics, predictions = evaluate_model(nlp, TEST_DATA)
|
||||
|
||||
print("\nEvaluation Metrics:")
|
||||
print(f"Accuracy: {metrics['accuracy']:.3f}")
|
||||
print(f"Precision: {metrics['precision']:.3f}")
|
||||
print(f"Recall: {metrics['recall']:.3f}")
|
||||
print(f"F1 Score: {metrics['f1']:.3f}")
|
||||
|
||||
print("\nDetailed Predictions:")
|
||||
for pred in predictions:
|
||||
print(f"\nText: {pred['text']}")
|
||||
print(f"True label: {pred['true']}")
|
||||
print(f"Predicted: {pred['predicted']}")
|
||||
print(f"Positive score: {pred['scores']['positive']:.3f}")
|
||||
print(f"Negative score: {pred['scores']['negative']:.3f}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -0,0 +1,443 @@
|
|||
<#
|
||||
.Synopsis
|
||||
Activate a Python virtual environment for the current PowerShell session.
|
||||
|
||||
.Description
|
||||
Pushes the python executable for a virtual environment to the front of the
|
||||
$Env:PATH environment variable and sets the prompt to signify that you are
|
||||
in a Python virtual environment. Makes use of the command line switches as
|
||||
well as the `pyvenv.cfg` file values present in the virtual environment.
|
||||
|
||||
.Parameter VenvDir
|
||||
Path to the directory that contains the virtual environment to activate. The
|
||||
default value for this is the parent of the directory that the Activate.ps1
|
||||
script is located within.
|
||||
|
||||
.Parameter Prompt
|
||||
The prompt prefix to display when this virtual environment is activated. By
|
||||
default, this prompt is the name of the virtual environment folder (VenvDir)
|
||||
surrounded by parentheses and followed by a single space (ie. '(.venv) ').
|
||||
|
||||
.Example
|
||||
Activate.ps1
|
||||
Activates the Python virtual environment that contains the Activate.ps1 script.
|
||||
|
||||
.Example
|
||||
Activate.ps1 -Verbose
|
||||
Activates the Python virtual environment that contains the Activate.ps1 script,
|
||||
and shows extra information about the activation as it executes.
|
||||
|
||||
.Example
|
||||
Activate.ps1 -VenvDir C:\Users\MyUser\Common\.venv
|
||||
Activates the Python virtual environment located in the specified location.
|
||||
|
||||
.Example
|
||||
Activate.ps1 -Prompt "MyPython"
|
||||
Activates the Python virtual environment that contains the Activate.ps1 script,
|
||||
and prefixes the current prompt with the specified string (surrounded in
|
||||
parentheses) while the virtual environment is active.
|
||||
|
||||
.Notes
|
||||
On Windows, it may be required to enable this Activate.ps1 script by setting the
|
||||
execution policy for the user. You can do this by issuing the following PowerShell
|
||||
command:
|
||||
|
||||
PS C:\> Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope CurrentUser
|
||||
|
||||
For more information on Execution Policies:
|
||||
https://go.microsoft.com/fwlink/?LinkID=135170
|
||||
|
||||
#>
|
||||
Param(
|
||||
[Parameter(Mandatory = $false)]
|
||||
[String]
|
||||
$VenvDir,
|
||||
[Parameter(Mandatory = $false)]
|
||||
[String]
|
||||
$Prompt
|
||||
)
|
||||
|
||||
<# Function declarations --------------------------------------------------- #>
|
||||
|
||||
<#
|
||||
.Synopsis
|
||||
Remove all shell session elements added by the Activate script, including the
|
||||
addition of the virtual environment's Python executable from the beginning of
|
||||
the PATH variable.
|
||||
|
||||
.Parameter NonDestructive
|
||||
If present, do not remove this function from the global namespace for the
|
||||
session.
|
||||
|
||||
#>
|
||||
function global:deactivate ([switch]$NonDestructive) {
|
||||
# Revert to original values
|
||||
|
||||
# The prior prompt:
|
||||
if (Test-Path -Path Function:_OLD_VIRTUAL_PROMPT) {
|
||||
Copy-Item -Path Function:_OLD_VIRTUAL_PROMPT -Destination Function:prompt
|
||||
Remove-Item -Path Function:_OLD_VIRTUAL_PROMPT
|
||||
}
|
||||
|
||||
# The prior PYTHONHOME:
|
||||
if (Test-Path -Path Env:_OLD_VIRTUAL_PYTHONHOME) {
|
||||
Copy-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME -Destination Env:PYTHONHOME
|
||||
Remove-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME
|
||||
}
|
||||
|
||||
# The prior PATH:
|
||||
if (Test-Path -Path Env:_OLD_VIRTUAL_PATH) {
|
||||
Copy-Item -Path Env:_OLD_VIRTUAL_PATH -Destination Env:PATH
|
||||
Remove-Item -Path Env:_OLD_VIRTUAL_PATH
|
||||
}
|
||||
|
||||
# Just remove the VIRTUAL_ENV altogether:
|
||||
if (Test-Path -Path Env:VIRTUAL_ENV) {
|
||||
Remove-Item -Path env:VIRTUAL_ENV
|
||||
}
|
||||
|
||||
# Just remove VIRTUAL_ENV_PROMPT altogether.
|
||||
if (Test-Path -Path Env:VIRTUAL_ENV_PROMPT) {
|
||||
Remove-Item -Path env:VIRTUAL_ENV_PROMPT
|
||||
}
|
||||
|
||||
# Just remove the _PYTHON_VENV_PROMPT_PREFIX altogether:
|
||||
if (Get-Variable -Name "_PYTHON_VENV_PROMPT_PREFIX" -ErrorAction SilentlyContinue) {
|
||||
Remove-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Scope Global -Force
|
||||
}
|
||||
|
||||
# Leave deactivate function in the global namespace if requested:
|
||||
if (-not $NonDestructive) {
|
||||
Remove-Item -Path function:deactivate
|
||||
}
|
||||
}
|
||||
|
||||
<#
|
||||
.Description
|
||||
Get-PyVenvConfig parses the values from the pyvenv.cfg file located in the
|
||||
given folder, and returns them in a map.
|
||||
|
||||
For each line in the pyvenv.cfg file, if that line can be parsed into exactly
|
||||
two strings separated by `=` (with any amount of whitespace surrounding the =)
|
||||
then it is considered a `key = value` line. The left hand string is the key,
|
||||
the right hand is the value.
|
||||
|
||||
If the value starts with a `'` or a `"` then the first and last character is
|
||||
stripped from the value before being captured.
|
||||
|
||||
.Parameter ConfigDir
|
||||
Path to the directory that contains the `pyvenv.cfg` file.
|
||||
#>
|
||||
function Get-PyVenvConfig(
|
||||
[String]
|
||||
$ConfigDir
|
||||
) {
|
||||
Write-Verbose "Given ConfigDir=$ConfigDir, obtain values in pyvenv.cfg"
|
||||
|
||||
# Ensure the file exists, and issue a warning if it doesn't (but still allow the function to continue).
|
||||
$pyvenvConfigPath = Join-Path -Resolve -Path $ConfigDir -ChildPath 'pyvenv.cfg' -ErrorAction Continue
|
||||
|
||||
# An empty map will be returned if no config file is found.
|
||||
$pyvenvConfig = @{ }
|
||||
|
||||
if ($pyvenvConfigPath) {
|
||||
|
||||
Write-Verbose "File exists, parse `key = value` lines"
|
||||
$pyvenvConfigContent = Get-Content -Path $pyvenvConfigPath
|
||||
|
||||
$pyvenvConfigContent | ForEach-Object {
|
||||
$keyval = $PSItem -split "\s*=\s*", 2
|
||||
if ($keyval[0] -and $keyval[1]) {
|
||||
$val = $keyval[1]
|
||||
|
||||
# Remove extraneous quotations around a string value.
|
||||
if ("'""".Contains($val.Substring(0, 1))) {
|
||||
$val = $val.Substring(1, $val.Length - 2)
|
||||
}
|
||||
|
||||
$pyvenvConfig[$keyval[0]] = $val
|
||||
Write-Verbose "Adding Key: '$($keyval[0])'='$val'"
|
||||
}
|
||||
}
|
||||
}
|
||||
return $pyvenvConfig
|
||||
}
|
||||
|
||||
|
||||
<# Begin Activate script --------------------------------------------------- #>
|
||||
|
||||
# Determine the containing directory of this script
|
||||
$VenvExecPath = Split-Path -Parent $MyInvocation.MyCommand.Definition
|
||||
$VenvExecDir = Get-Item -Path $VenvExecPath
|
||||
|
||||
Write-Verbose "Activation script is located in path: '$VenvExecPath'"
|
||||
Write-Verbose "VenvExecDir Fullname: '$($VenvExecDir.FullName)"
|
||||
Write-Verbose "VenvExecDir Name: '$($VenvExecDir.Name)"
|
||||
|
||||
# Set values required in priority: CmdLine, ConfigFile, Default
|
||||
# First, get the location of the virtual environment, it might not be
|
||||
# VenvExecDir if specified on the command line.
|
||||
if ($VenvDir) {
|
||||
Write-Verbose "VenvDir given as parameter, using '$VenvDir' to determine values"
|
||||
}
|
||||
else {
|
||||
Write-Verbose "VenvDir not given as a parameter, using parent directory name as VenvDir."
|
||||
$VenvDir = $VenvExecDir.Parent.FullName.TrimEnd("\\/")
|
||||
Write-Verbose "VenvDir=$VenvDir"
|
||||
}
|
||||
|
||||
# Next, read the `pyvenv.cfg` file to determine any required value such
|
||||
# as `prompt`.
|
||||
$pyvenvCfg = Get-PyVenvConfig -ConfigDir $VenvDir
|
||||
|
||||
# Next, set the prompt from the command line, or the config file, or
|
||||
# just use the name of the virtual environment folder.
|
||||
if ($Prompt) {
|
||||
Write-Verbose "Prompt specified as argument, using '$Prompt'"
|
||||
}
|
||||
else {
|
||||
Write-Verbose "Prompt not specified as argument to script, checking pyvenv.cfg value"
|
||||
if ($pyvenvCfg -and $pyvenvCfg['prompt']) {
|
||||
Write-Verbose " Setting based on value in pyvenv.cfg='$($pyvenvCfg['prompt'])'"
|
||||
$Prompt = $pyvenvCfg['prompt'];
|
||||
}
|
||||
else {
|
||||
Write-Verbose " Setting prompt based on parent's directory's name. (Is the directory name passed to venv module when creating the virtual environment)"
|
||||
Write-Verbose " Got leaf-name of $VenvDir='$(Split-Path -Path $venvDir -Leaf)'"
|
||||
$Prompt = Split-Path -Path $venvDir -Leaf
|
||||
}
|
||||
}
|
||||
|
||||
Write-Verbose "Prompt = '$Prompt'"
|
||||
Write-Verbose "VenvDir='$VenvDir'"
|
||||
|
||||
# Deactivate any currently active virtual environment, but leave the
|
||||
# deactivate function in place.
|
||||
deactivate -nondestructive
|
||||
|
||||
# Now set the environment variable VIRTUAL_ENV, used by many tools to determine
|
||||
# that there is an activated venv.
|
||||
$env:VIRTUAL_ENV = $VenvDir
|
||||
|
||||
if (-not $Env:VIRTUAL_ENV_DISABLE_PROMPT) {
|
||||
|
||||
Write-Verbose "Setting prompt to '$Prompt'"
|
||||
|
||||
# Set the prompt to include the env name
|
||||
# Make sure _OLD_VIRTUAL_PROMPT is global
|
||||
function global:_OLD_VIRTUAL_PROMPT { "" }
|
||||
Copy-Item -Path function:prompt -Destination function:_OLD_VIRTUAL_PROMPT
|
||||
New-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Description "Python virtual environment prompt prefix" -Scope Global -Option ReadOnly -Visibility Public -Value $Prompt
|
||||
|
||||
function global:prompt {
|
||||
Write-Host -NoNewline -ForegroundColor Green "($_PYTHON_VENV_PROMPT_PREFIX) "
|
||||
_OLD_VIRTUAL_PROMPT
|
||||
}
|
||||
$env:VIRTUAL_ENV_PROMPT = $Prompt
|
||||
}
|
||||
|
||||
# Clear PYTHONHOME
|
||||
if (Test-Path -Path Env:PYTHONHOME) {
|
||||
Copy-Item -Path Env:PYTHONHOME -Destination Env:_OLD_VIRTUAL_PYTHONHOME
|
||||
Remove-Item -Path Env:PYTHONHOME
|
||||
}
|
||||
|
||||
# Add the venv to the PATH
|
||||
Copy-Item -Path Env:PATH -Destination Env:_OLD_VIRTUAL_PATH
|
||||
$Env:PATH = "$VenvExecDir$([System.IO.Path]::PathSeparator)$Env:PATH"
|
||||
|
||||
# SIG # Begin signature block
|
||||
# MIIj/wYJKoZIhvcNAQcCoIIj8DCCI+wCAQExDzANBglghkgBZQMEAgEFADB5Bgor
|
||||
# BgEEAYI3AgEEoGswaTA0BgorBgEEAYI3AgEeMCYCAwEAAAQQH8w7YFlLCE63JNLG
|
||||
# KX7zUQIBAAIBAAIBAAIBAAIBADAxMA0GCWCGSAFlAwQCAQUABCBnL745ElCYk8vk
|
||||
# dBtMuQhLeWJ3ZGfzKW4DHCYzAn+QB6CCDi8wggawMIIEmKADAgECAhAIrUCyYNKc
|
||||
# TJ9ezam9k67ZMA0GCSqGSIb3DQEBDAUAMGIxCzAJBgNVBAYTAlVTMRUwEwYDVQQK
|
||||
# EwxEaWdpQ2VydCBJbmMxGTAXBgNVBAsTEHd3dy5kaWdpY2VydC5jb20xITAfBgNV
|
||||
# BAMTGERpZ2lDZXJ0IFRydXN0ZWQgUm9vdCBHNDAeFw0yMTA0MjkwMDAwMDBaFw0z
|
||||
# NjA0MjgyMzU5NTlaMGkxCzAJBgNVBAYTAlVTMRcwFQYDVQQKEw5EaWdpQ2VydCwg
|
||||
# SW5jLjFBMD8GA1UEAxM4RGlnaUNlcnQgVHJ1c3RlZCBHNCBDb2RlIFNpZ25pbmcg
|
||||
# UlNBNDA5NiBTSEEzODQgMjAyMSBDQTEwggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAw
|
||||
# ggIKAoICAQDVtC9C0CiteLdd1TlZG7GIQvUzjOs9gZdwxbvEhSYwn6SOaNhc9es0
|
||||
# JAfhS0/TeEP0F9ce2vnS1WcaUk8OoVf8iJnBkcyBAz5NcCRks43iCH00fUyAVxJr
|
||||
# Q5qZ8sU7H/Lvy0daE6ZMswEgJfMQ04uy+wjwiuCdCcBlp/qYgEk1hz1RGeiQIXhF
|
||||
# LqGfLOEYwhrMxe6TSXBCMo/7xuoc82VokaJNTIIRSFJo3hC9FFdd6BgTZcV/sk+F
|
||||
# LEikVoQ11vkunKoAFdE3/hoGlMJ8yOobMubKwvSnowMOdKWvObarYBLj6Na59zHh
|
||||
# 3K3kGKDYwSNHR7OhD26jq22YBoMbt2pnLdK9RBqSEIGPsDsJ18ebMlrC/2pgVItJ
|
||||
# wZPt4bRc4G/rJvmM1bL5OBDm6s6R9b7T+2+TYTRcvJNFKIM2KmYoX7BzzosmJQay
|
||||
# g9Rc9hUZTO1i4F4z8ujo7AqnsAMrkbI2eb73rQgedaZlzLvjSFDzd5Ea/ttQokbI
|
||||
# YViY9XwCFjyDKK05huzUtw1T0PhH5nUwjewwk3YUpltLXXRhTT8SkXbev1jLchAp
|
||||
# QfDVxW0mdmgRQRNYmtwmKwH0iU1Z23jPgUo+QEdfyYFQc4UQIyFZYIpkVMHMIRro
|
||||
# OBl8ZhzNeDhFMJlP/2NPTLuqDQhTQXxYPUez+rbsjDIJAsxsPAxWEQIDAQABo4IB
|
||||
# WTCCAVUwEgYDVR0TAQH/BAgwBgEB/wIBADAdBgNVHQ4EFgQUaDfg67Y7+F8Rhvv+
|
||||
# YXsIiGX0TkIwHwYDVR0jBBgwFoAU7NfjgtJxXWRM3y5nP+e6mK4cD08wDgYDVR0P
|
||||
# AQH/BAQDAgGGMBMGA1UdJQQMMAoGCCsGAQUFBwMDMHcGCCsGAQUFBwEBBGswaTAk
|
||||
# BggrBgEFBQcwAYYYaHR0cDovL29jc3AuZGlnaWNlcnQuY29tMEEGCCsGAQUFBzAC
|
||||
# hjVodHRwOi8vY2FjZXJ0cy5kaWdpY2VydC5jb20vRGlnaUNlcnRUcnVzdGVkUm9v
|
||||
# dEc0LmNydDBDBgNVHR8EPDA6MDigNqA0hjJodHRwOi8vY3JsMy5kaWdpY2VydC5j
|
||||
# b20vRGlnaUNlcnRUcnVzdGVkUm9vdEc0LmNybDAcBgNVHSAEFTATMAcGBWeBDAED
|
||||
# MAgGBmeBDAEEATANBgkqhkiG9w0BAQwFAAOCAgEAOiNEPY0Idu6PvDqZ01bgAhql
|
||||
# +Eg08yy25nRm95RysQDKr2wwJxMSnpBEn0v9nqN8JtU3vDpdSG2V1T9J9Ce7FoFF
|
||||
# UP2cvbaF4HZ+N3HLIvdaqpDP9ZNq4+sg0dVQeYiaiorBtr2hSBh+3NiAGhEZGM1h
|
||||
# mYFW9snjdufE5BtfQ/g+lP92OT2e1JnPSt0o618moZVYSNUa/tcnP/2Q0XaG3Ryw
|
||||
# YFzzDaju4ImhvTnhOE7abrs2nfvlIVNaw8rpavGiPttDuDPITzgUkpn13c5Ubdld
|
||||
# AhQfQDN8A+KVssIhdXNSy0bYxDQcoqVLjc1vdjcshT8azibpGL6QB7BDf5WIIIJw
|
||||
# 8MzK7/0pNVwfiThV9zeKiwmhywvpMRr/LhlcOXHhvpynCgbWJme3kuZOX956rEnP
|
||||
# LqR0kq3bPKSchh/jwVYbKyP/j7XqiHtwa+aguv06P0WmxOgWkVKLQcBIhEuWTatE
|
||||
# QOON8BUozu3xGFYHKi8QxAwIZDwzj64ojDzLj4gLDb879M4ee47vtevLt/B3E+bn
|
||||
# KD+sEq6lLyJsQfmCXBVmzGwOysWGw/YmMwwHS6DTBwJqakAwSEs0qFEgu60bhQji
|
||||
# WQ1tygVQK+pKHJ6l/aCnHwZ05/LWUpD9r4VIIflXO7ScA+2GRfS0YW6/aOImYIbq
|
||||
# yK+p/pQd52MbOoZWeE4wggd3MIIFX6ADAgECAhAHHxQbizANJfMU6yMM0NHdMA0G
|
||||
# CSqGSIb3DQEBCwUAMGkxCzAJBgNVBAYTAlVTMRcwFQYDVQQKEw5EaWdpQ2VydCwg
|
||||
# SW5jLjFBMD8GA1UEAxM4RGlnaUNlcnQgVHJ1c3RlZCBHNCBDb2RlIFNpZ25pbmcg
|
||||
# UlNBNDA5NiBTSEEzODQgMjAyMSBDQTEwHhcNMjIwMTE3MDAwMDAwWhcNMjUwMTE1
|
||||
# MjM1OTU5WjB8MQswCQYDVQQGEwJVUzEPMA0GA1UECBMGT3JlZ29uMRIwEAYDVQQH
|
||||
# EwlCZWF2ZXJ0b24xIzAhBgNVBAoTGlB5dGhvbiBTb2Z0d2FyZSBGb3VuZGF0aW9u
|
||||
# MSMwIQYDVQQDExpQeXRob24gU29mdHdhcmUgRm91bmRhdGlvbjCCAiIwDQYJKoZI
|
||||
# hvcNAQEBBQADggIPADCCAgoCggIBAKgc0BTT+iKbtK6f2mr9pNMUTcAJxKdsuOiS
|
||||
# YgDFfwhjQy89koM7uP+QV/gwx8MzEt3c9tLJvDccVWQ8H7mVsk/K+X+IufBLCgUi
|
||||
# 0GGAZUegEAeRlSXxxhYScr818ma8EvGIZdiSOhqjYc4KnfgfIS4RLtZSrDFG2tN1
|
||||
# 6yS8skFa3IHyvWdbD9PvZ4iYNAS4pjYDRjT/9uzPZ4Pan+53xZIcDgjiTwOh8VGu
|
||||
# ppxcia6a7xCyKoOAGjvCyQsj5223v1/Ig7Dp9mGI+nh1E3IwmyTIIuVHyK6Lqu35
|
||||
# 2diDY+iCMpk9ZanmSjmB+GMVs+H/gOiofjjtf6oz0ki3rb7sQ8fTnonIL9dyGTJ0
|
||||
# ZFYKeb6BLA66d2GALwxZhLe5WH4Np9HcyXHACkppsE6ynYjTOd7+jN1PRJahN1oE
|
||||
# RzTzEiV6nCO1M3U1HbPTGyq52IMFSBM2/07WTJSbOeXjvYR7aUxK9/ZkJiacl2iZ
|
||||
# I7IWe7JKhHohqKuceQNyOzxTakLcRkzynvIrk33R9YVqtB4L6wtFxhUjvDnQg16x
|
||||
# ot2KVPdfyPAWd81wtZADmrUtsZ9qG79x1hBdyOl4vUtVPECuyhCxaw+faVjumapP
|
||||
# Unwo8ygflJJ74J+BYxf6UuD7m8yzsfXWkdv52DjL74TxzuFTLHPyARWCSCAbzn3Z
|
||||
# Ily+qIqDAgMBAAGjggIGMIICAjAfBgNVHSMEGDAWgBRoN+Drtjv4XxGG+/5hewiI
|
||||
# ZfROQjAdBgNVHQ4EFgQUt/1Teh2XDuUj2WW3siYWJgkZHA8wDgYDVR0PAQH/BAQD
|
||||
# AgeAMBMGA1UdJQQMMAoGCCsGAQUFBwMDMIG1BgNVHR8Ega0wgaowU6BRoE+GTWh0
|
||||
# dHA6Ly9jcmwzLmRpZ2ljZXJ0LmNvbS9EaWdpQ2VydFRydXN0ZWRHNENvZGVTaWdu
|
||||
# aW5nUlNBNDA5NlNIQTM4NDIwMjFDQTEuY3JsMFOgUaBPhk1odHRwOi8vY3JsNC5k
|
||||
# aWdpY2VydC5jb20vRGlnaUNlcnRUcnVzdGVkRzRDb2RlU2lnbmluZ1JTQTQwOTZT
|
||||
# SEEzODQyMDIxQ0ExLmNybDA+BgNVHSAENzA1MDMGBmeBDAEEATApMCcGCCsGAQUF
|
||||
# BwIBFhtodHRwOi8vd3d3LmRpZ2ljZXJ0LmNvbS9DUFMwgZQGCCsGAQUFBwEBBIGH
|
||||
# MIGEMCQGCCsGAQUFBzABhhhodHRwOi8vb2NzcC5kaWdpY2VydC5jb20wXAYIKwYB
|
||||
# BQUHMAKGUGh0dHA6Ly9jYWNlcnRzLmRpZ2ljZXJ0LmNvbS9EaWdpQ2VydFRydXN0
|
||||
# ZWRHNENvZGVTaWduaW5nUlNBNDA5NlNIQTM4NDIwMjFDQTEuY3J0MAwGA1UdEwEB
|
||||
# /wQCMAAwDQYJKoZIhvcNAQELBQADggIBABxv4AeV/5ltkELHSC63fXAFYS5tadcW
|
||||
# TiNc2rskrNLrfH1Ns0vgSZFoQxYBFKI159E8oQQ1SKbTEubZ/B9kmHPhprHya08+
|
||||
# VVzxC88pOEvz68nA82oEM09584aILqYmj8Pj7h/kmZNzuEL7WiwFa/U1hX+XiWfL
|
||||
# IJQsAHBla0i7QRF2de8/VSF0XXFa2kBQ6aiTsiLyKPNbaNtbcucaUdn6vVUS5izW
|
||||
# OXM95BSkFSKdE45Oq3FForNJXjBvSCpwcP36WklaHL+aHu1upIhCTUkzTHMh8b86
|
||||
# WmjRUqbrnvdyR2ydI5l1OqcMBjkpPpIV6wcc+KY/RH2xvVuuoHjlUjwq2bHiNoX+
|
||||
# W1scCpnA8YTs2d50jDHUgwUo+ciwpffH0Riq132NFmrH3r67VaN3TuBxjI8SIZM5
|
||||
# 8WEDkbeoriDk3hxU8ZWV7b8AW6oyVBGfM06UgkfMb58h+tJPrFx8VI/WLq1dTqMf
|
||||
# ZOm5cuclMnUHs2uqrRNtnV8UfidPBL4ZHkTcClQbCoz0UbLhkiDvIS00Dn+BBcxw
|
||||
# /TKqVL4Oaz3bkMSsM46LciTeucHY9ExRVt3zy7i149sd+F4QozPqn7FrSVHXmem3
|
||||
# r7bjyHTxOgqxRCVa18Vtx7P/8bYSBeS+WHCKcliFCecspusCDSlnRUjZwyPdP0VH
|
||||
# xaZg2unjHY3rMYIVJjCCFSICAQEwfTBpMQswCQYDVQQGEwJVUzEXMBUGA1UEChMO
|
||||
# RGlnaUNlcnQsIEluYy4xQTA/BgNVBAMTOERpZ2lDZXJ0IFRydXN0ZWQgRzQgQ29k
|
||||
# ZSBTaWduaW5nIFJTQTQwOTYgU0hBMzg0IDIwMjEgQ0ExAhAHHxQbizANJfMU6yMM
|
||||
# 0NHdMA0GCWCGSAFlAwQCAQUAoIHEMBkGCSqGSIb3DQEJAzEMBgorBgEEAYI3AgEE
|
||||
# MBwGCisGAQQBgjcCAQsxDjAMBgorBgEEAYI3AgEVMC8GCSqGSIb3DQEJBDEiBCBn
|
||||
# AZ6P7YvTwq0fbF62o7E75R0LxsW5OtyYiFESQckLhjBYBgorBgEEAYI3AgEMMUow
|
||||
# SKBGgEQAQgB1AGkAbAB0ADoAIABSAGUAbABlAGEAcwBlAF8AdgAzAC4AMQAwAC4A
|
||||
# NQBfADIAMAAyADIAMAA2ADAANgAuADAAMTANBgkqhkiG9w0BAQEFAASCAgA5LMM8
|
||||
# 8+phW11oF/PTFxitR3oW7QHlGHA97n1MCieor042JtmqUyqqf7ykapKc/ND4pVDP
|
||||
# DP8nhIeXuLd2/SHqqf6CLZX9yacAFPDCV/MtYhlw4yKwa2ECw9EDDwB670UwUW/j
|
||||
# IUl+fSrWagwH2WC7T5iMiV7uEZU4koGuOS4SiDzRLwTcuRtY6N/FYerQhioHXzdX
|
||||
# vO76qXnj4UIDWnWbSWLgPDo8g4xonm7BC0dFRn4WW8tgm/StxQ/TBS4L2O/LEjYy
|
||||
# pSLEXOy0INrA5CqWd4J4dpOhkQng1UJoySCL9Q2ceyv1U3SrywLY4rLwmSrZYsbQ
|
||||
# OpnL+P1DP/eHYPbcwQEhbaTj81ULMxNDnouXJMm6ErMgTRH6TTpDcuPI8qlqkT2E
|
||||
# DGZ4pPdZSHxDYkocJ6REh1YKlpvdHaGQFkXuc3p2lG/siv2rtDefI4wChN4VOHZG
|
||||
# ia6G3FZaIyqFW/0sFz5KOzxoxcjfzyO76SSJx9jYpuOmPrHihaOlFjzZGxnWwFdM
|
||||
# l3uCD+QeJL2bkl7npoyW0RRznBUUj21psHdVN5vzK+Gsyr22A9lS1XaX3a2KJ6bl
|
||||
# Krkj+PObW5dtxvso0bQss2FCFdOATk4AlFcmk6bWk8rZm+w4e9NugsCTI+IE45hL
|
||||
# AEyzTjc21JqGt8l2Rn/eElRHgsjvNpO4H5FFo6GCEbMwghGvBgorBgEEAYI3AwMB
|
||||
# MYIRnzCCEZsGCSqGSIb3DQEHAqCCEYwwghGIAgEDMQ8wDQYJYIZIAWUDBAIBBQAw
|
||||
# eAYLKoZIhvcNAQkQAQSgaQRnMGUCAQEGCWCGSAGG/WwHATAxMA0GCWCGSAFlAwQC
|
||||
# AQUABCDX6Ys0ehzU7Uygr+TZMXB4pMkJvCegnm5JrODTttrXZwIRAMaBOV1Pb1sY
|
||||
# w0ypALrk6u8YDzIwMjIwNjA2MTYyMjEwWqCCDXwwggbGMIIErqADAgECAhAKekqI
|
||||
# nsmZQpAGYzhNhpedMA0GCSqGSIb3DQEBCwUAMGMxCzAJBgNVBAYTAlVTMRcwFQYD
|
||||
# VQQKEw5EaWdpQ2VydCwgSW5jLjE7MDkGA1UEAxMyRGlnaUNlcnQgVHJ1c3RlZCBH
|
||||
# NCBSU0E0MDk2IFNIQTI1NiBUaW1lU3RhbXBpbmcgQ0EwHhcNMjIwMzI5MDAwMDAw
|
||||
# WhcNMzMwMzE0MjM1OTU5WjBMMQswCQYDVQQGEwJVUzEXMBUGA1UEChMORGlnaUNl
|
||||
# cnQsIEluYy4xJDAiBgNVBAMTG0RpZ2lDZXJ0IFRpbWVzdGFtcCAyMDIyIC0gMjCC
|
||||
# AiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBALkqliOmXLxf1knwFYIY9DPu
|
||||
# zFxs4+AlLtIx5DxArvurxON4XX5cNur1JY1Do4HrOGP5PIhp3jzSMFENMQe6Rm7p
|
||||
# o0tI6IlBfw2y1vmE8Zg+C78KhBJxbKFiJgHTzsNs/aw7ftwqHKm9MMYW2Nq867Lx
|
||||
# g9GfzQnFuUFqRUIjQVr4YNNlLD5+Xr2Wp/D8sfT0KM9CeR87x5MHaGjlRDRSXw9Q
|
||||
# 3tRZLER0wDJHGVvimC6P0Mo//8ZnzzyTlU6E6XYYmJkRFMUrDKAz200kheiClOEv
|
||||
# A+5/hQLJhuHVGBS3BEXz4Di9or16cZjsFef9LuzSmwCKrB2NO4Bo/tBZmCbO4O2u
|
||||
# fyguwp7gC0vICNEyu4P6IzzZ/9KMu/dDI9/nw1oFYn5wLOUrsj1j6siugSBrQ4nI
|
||||
# fl+wGt0ZvZ90QQqvuY4J03ShL7BUdsGQT5TshmH/2xEvkgMwzjC3iw9dRLNDHSNQ
|
||||
# zZHXL537/M2xwafEDsTvQD4ZOgLUMalpoEn5deGb6GjkagyP6+SxIXuGZ1h+fx/o
|
||||
# K+QUshbWgaHK2jCQa+5vdcCwNiayCDv/vb5/bBMY38ZtpHlJrYt/YYcFaPfUcONC
|
||||
# leieu5tLsuK2QT3nr6caKMmtYbCgQRgZTu1Hm2GV7T4LYVrqPnqYklHNP8lE54CL
|
||||
# KUJy93my3YTqJ+7+fXprAgMBAAGjggGLMIIBhzAOBgNVHQ8BAf8EBAMCB4AwDAYD
|
||||
# VR0TAQH/BAIwADAWBgNVHSUBAf8EDDAKBggrBgEFBQcDCDAgBgNVHSAEGTAXMAgG
|
||||
# BmeBDAEEAjALBglghkgBhv1sBwEwHwYDVR0jBBgwFoAUuhbZbU2FL3MpdpovdYxq
|
||||
# II+eyG8wHQYDVR0OBBYEFI1kt4kh/lZYRIRhp+pvHDaP3a8NMFoGA1UdHwRTMFEw
|
||||
# T6BNoEuGSWh0dHA6Ly9jcmwzLmRpZ2ljZXJ0LmNvbS9EaWdpQ2VydFRydXN0ZWRH
|
||||
# NFJTQTQwOTZTSEEyNTZUaW1lU3RhbXBpbmdDQS5jcmwwgZAGCCsGAQUFBwEBBIGD
|
||||
# MIGAMCQGCCsGAQUFBzABhhhodHRwOi8vb2NzcC5kaWdpY2VydC5jb20wWAYIKwYB
|
||||
# BQUHMAKGTGh0dHA6Ly9jYWNlcnRzLmRpZ2ljZXJ0LmNvbS9EaWdpQ2VydFRydXN0
|
||||
# ZWRHNFJTQTQwOTZTSEEyNTZUaW1lU3RhbXBpbmdDQS5jcnQwDQYJKoZIhvcNAQEL
|
||||
# BQADggIBAA0tI3Sm0fX46kuZPwHk9gzkrxad2bOMl4IpnENvAS2rOLVwEb+EGYs/
|
||||
# XeWGT76TOt4qOVo5TtiEWaW8G5iq6Gzv0UhpGThbz4k5HXBw2U7fIyJs1d/2Wcuh
|
||||
# wupMdsqh3KErlribVakaa33R9QIJT4LWpXOIxJiA3+5JlbezzMWn7g7h7x44ip/v
|
||||
# EckxSli23zh8y/pc9+RTv24KfH7X3pjVKWWJD6KcwGX0ASJlx+pedKZbNZJQfPQX
|
||||
# podkTz5GiRZjIGvL8nvQNeNKcEiptucdYL0EIhUlcAZyqUQ7aUcR0+7px6A+TxC5
|
||||
# MDbk86ppCaiLfmSiZZQR+24y8fW7OK3NwJMR1TJ4Sks3KkzzXNy2hcC7cDBVeNaY
|
||||
# /lRtf3GpSBp43UZ3Lht6wDOK+EoojBKoc88t+dMj8p4Z4A2UKKDr2xpRoJWCjihr
|
||||
# pM6ddt6pc6pIallDrl/q+A8GQp3fBmiW/iqgdFtjZt5rLLh4qk1wbfAs8QcVfjW0
|
||||
# 5rUMopml1xVrNQ6F1uAszOAMJLh8UgsemXzvyMjFjFhpr6s94c/MfRWuFL+Kcd/K
|
||||
# l7HYR+ocheBFThIcFClYzG/Tf8u+wQ5KbyCcrtlzMlkI5y2SoRoR/jKYpl0rl+CL
|
||||
# 05zMbbUNrkdjOEcXW28T2moQbh9Jt0RbtAgKh1pZBHYRoad3AhMcMIIGrjCCBJag
|
||||
# AwIBAgIQBzY3tyRUfNhHrP0oZipeWzANBgkqhkiG9w0BAQsFADBiMQswCQYDVQQG
|
||||
# EwJVUzEVMBMGA1UEChMMRGlnaUNlcnQgSW5jMRkwFwYDVQQLExB3d3cuZGlnaWNl
|
||||
# cnQuY29tMSEwHwYDVQQDExhEaWdpQ2VydCBUcnVzdGVkIFJvb3QgRzQwHhcNMjIw
|
||||
# MzIzMDAwMDAwWhcNMzcwMzIyMjM1OTU5WjBjMQswCQYDVQQGEwJVUzEXMBUGA1UE
|
||||
# ChMORGlnaUNlcnQsIEluYy4xOzA5BgNVBAMTMkRpZ2lDZXJ0IFRydXN0ZWQgRzQg
|
||||
# UlNBNDA5NiBTSEEyNTYgVGltZVN0YW1waW5nIENBMIICIjANBgkqhkiG9w0BAQEF
|
||||
# AAOCAg8AMIICCgKCAgEAxoY1BkmzwT1ySVFVxyUDxPKRN6mXUaHW0oPRnkyibaCw
|
||||
# zIP5WvYRoUQVQl+kiPNo+n3znIkLf50fng8zH1ATCyZzlm34V6gCff1DtITaEfFz
|
||||
# sbPuK4CEiiIY3+vaPcQXf6sZKz5C3GeO6lE98NZW1OcoLevTsbV15x8GZY2UKdPZ
|
||||
# 7Gnf2ZCHRgB720RBidx8ald68Dd5n12sy+iEZLRS8nZH92GDGd1ftFQLIWhuNyG7
|
||||
# QKxfst5Kfc71ORJn7w6lY2zkpsUdzTYNXNXmG6jBZHRAp8ByxbpOH7G1WE15/teP
|
||||
# c5OsLDnipUjW8LAxE6lXKZYnLvWHpo9OdhVVJnCYJn+gGkcgQ+NDY4B7dW4nJZCY
|
||||
# OjgRs/b2nuY7W+yB3iIU2YIqx5K/oN7jPqJz+ucfWmyU8lKVEStYdEAoq3NDzt9K
|
||||
# oRxrOMUp88qqlnNCaJ+2RrOdOqPVA+C/8KI8ykLcGEh/FDTP0kyr75s9/g64ZCr6
|
||||
# dSgkQe1CvwWcZklSUPRR8zZJTYsg0ixXNXkrqPNFYLwjjVj33GHek/45wPmyMKVM
|
||||
# 1+mYSlg+0wOI/rOP015LdhJRk8mMDDtbiiKowSYI+RQQEgN9XyO7ZONj4KbhPvbC
|
||||
# dLI/Hgl27KtdRnXiYKNYCQEoAA6EVO7O6V3IXjASvUaetdN2udIOa5kM0jO0zbEC
|
||||
# AwEAAaOCAV0wggFZMBIGA1UdEwEB/wQIMAYBAf8CAQAwHQYDVR0OBBYEFLoW2W1N
|
||||
# hS9zKXaaL3WMaiCPnshvMB8GA1UdIwQYMBaAFOzX44LScV1kTN8uZz/nupiuHA9P
|
||||
# MA4GA1UdDwEB/wQEAwIBhjATBgNVHSUEDDAKBggrBgEFBQcDCDB3BggrBgEFBQcB
|
||||
# AQRrMGkwJAYIKwYBBQUHMAGGGGh0dHA6Ly9vY3NwLmRpZ2ljZXJ0LmNvbTBBBggr
|
||||
# BgEFBQcwAoY1aHR0cDovL2NhY2VydHMuZGlnaWNlcnQuY29tL0RpZ2lDZXJ0VHJ1
|
||||
# c3RlZFJvb3RHNC5jcnQwQwYDVR0fBDwwOjA4oDagNIYyaHR0cDovL2NybDMuZGln
|
||||
# aWNlcnQuY29tL0RpZ2lDZXJ0VHJ1c3RlZFJvb3RHNC5jcmwwIAYDVR0gBBkwFzAI
|
||||
# BgZngQwBBAIwCwYJYIZIAYb9bAcBMA0GCSqGSIb3DQEBCwUAA4ICAQB9WY7Ak7Zv
|
||||
# mKlEIgF+ZtbYIULhsBguEE0TzzBTzr8Y+8dQXeJLKftwig2qKWn8acHPHQfpPmDI
|
||||
# 2AvlXFvXbYf6hCAlNDFnzbYSlm/EUExiHQwIgqgWvalWzxVzjQEiJc6VaT9Hd/ty
|
||||
# dBTX/6tPiix6q4XNQ1/tYLaqT5Fmniye4Iqs5f2MvGQmh2ySvZ180HAKfO+ovHVP
|
||||
# ulr3qRCyXen/KFSJ8NWKcXZl2szwcqMj+sAngkSumScbqyQeJsG33irr9p6xeZmB
|
||||
# o1aGqwpFyd/EjaDnmPv7pp1yr8THwcFqcdnGE4AJxLafzYeHJLtPo0m5d2aR8XKc
|
||||
# 6UsCUqc3fpNTrDsdCEkPlM05et3/JWOZJyw9P2un8WbDQc1PtkCbISFA0LcTJM3c
|
||||
# HXg65J6t5TRxktcma+Q4c6umAU+9Pzt4rUyt+8SVe+0KXzM5h0F4ejjpnOHdI/0d
|
||||
# KNPH+ejxmF/7K9h+8kaddSweJywm228Vex4Ziza4k9Tm8heZWcpw8De/mADfIBZP
|
||||
# J/tgZxahZrrdVcA6KYawmKAr7ZVBtzrVFZgxtGIJDwq9gdkT/r+k0fNX2bwE+oLe
|
||||
# Mt8EifAAzV3C+dAjfwAL5HYCJtnwZXZCpimHCUcr5n8apIUP/JiW9lVUKx+A+sDy
|
||||
# Divl1vupL0QVSucTDh3bNzgaoSv27dZ8/DGCA3YwggNyAgEBMHcwYzELMAkGA1UE
|
||||
# BhMCVVMxFzAVBgNVBAoTDkRpZ2lDZXJ0LCBJbmMuMTswOQYDVQQDEzJEaWdpQ2Vy
|
||||
# dCBUcnVzdGVkIEc0IFJTQTQwOTYgU0hBMjU2IFRpbWVTdGFtcGluZyBDQQIQCnpK
|
||||
# iJ7JmUKQBmM4TYaXnTANBglghkgBZQMEAgEFAKCB0TAaBgkqhkiG9w0BCQMxDQYL
|
||||
# KoZIhvcNAQkQAQQwHAYJKoZIhvcNAQkFMQ8XDTIyMDYwNjE2MjIxMFowKwYLKoZI
|
||||
# hvcNAQkQAgwxHDAaMBgwFgQUhQjzhlFcs9MHfba0t8B/G0peQd4wLwYJKoZIhvcN
|
||||
# AQkEMSIEIOf/YoAGTg8y0pigG0kgexHa3asvnqD00Uf8JB3uQ5TUMDcGCyqGSIb3
|
||||
# DQEJEAIvMSgwJjAkMCIEIJ2mkBXDScbBiXhFujWCrXDIj6QpO9tqvpwr0lOSeeY7
|
||||
# MA0GCSqGSIb3DQEBAQUABIICALVOybzMu47x8CdSSeAuaV/YXzBq1oDqNnX+Fry/
|
||||
# 7C7TpHKVn58SKdFgeNmneBuqBqlZ2qyO9h02ZercH2d3GfALKuEmcUcp/Ik6RqQR
|
||||
# INN76QLhzFeIiIdBGvcHI2hcx3OAgtenpe+4V2oWa05cJf5exXQ9ja59aNB0sf5j
|
||||
# GyyHgmPhRK6itjp7xoSOw5zY4NN91viV2DX23b0SiL3oB5bAzgL77RLydmgg4XIW
|
||||
# 9vxqyCK8XM4imdLfnI0J+Sw7QBLk5Pw1jp/x0YNbHlk5ojA06ehufF0smFdgjMBZ
|
||||
# eefNH+lXfdVBeml8j3rNNbGsQ+d6+xXmUUVnNAGwK8QH5LpCqe+7H0r3yFsBCoxI
|
||||
# XaAPC9EPQVMYyPFyzh8Omu5RHQaeIARZvTyzk3BzjyJmDypOcy3s1a4YG0lsO8+b
|
||||
# cI925YMstRe3/gWSfZj8Q4OXFpeJxQ1b4w1slH116IrtjR9FC+N9OEWMggi4YQQf
|
||||
# V6DPuNmv9d4JMR/vwxU4XmvHG/HnbFyFrpFmlRpSTExv3XNQWcdSn0FneKw1evvZ
|
||||
# RRHow/HShcRnIPRqfhnqlQNxUKLt9bmWnRXLkaNCtiowSJ82v9XnTboZunXbMSb0
|
||||
# dM5FF5o4xTVoyp6P0O2qF2QtaXU03P8MDNOD1sWFSWhi64FWnmXuIaAuJKn05ZgC
|
||||
# hIIC
|
||||
# SIG # End signature block
|
|
@ -0,0 +1,69 @@
|
|||
# This file must be used with "source bin/activate" *from bash*
|
||||
# you cannot run it directly
|
||||
|
||||
deactivate () {
|
||||
# reset old environment variables
|
||||
if [ -n "${_OLD_VIRTUAL_PATH:-}" ] ; then
|
||||
PATH="${_OLD_VIRTUAL_PATH:-}"
|
||||
export PATH
|
||||
unset _OLD_VIRTUAL_PATH
|
||||
fi
|
||||
if [ -n "${_OLD_VIRTUAL_PYTHONHOME:-}" ] ; then
|
||||
PYTHONHOME="${_OLD_VIRTUAL_PYTHONHOME:-}"
|
||||
export PYTHONHOME
|
||||
unset _OLD_VIRTUAL_PYTHONHOME
|
||||
fi
|
||||
|
||||
# This should detect bash and zsh, which have a hash command that must
|
||||
# be called to get it to forget past commands. Without forgetting
|
||||
# past commands the $PATH changes we made may not be respected
|
||||
if [ -n "${BASH:-}" -o -n "${ZSH_VERSION:-}" ] ; then
|
||||
hash -r 2> /dev/null
|
||||
fi
|
||||
|
||||
if [ -n "${_OLD_VIRTUAL_PS1:-}" ] ; then
|
||||
PS1="${_OLD_VIRTUAL_PS1:-}"
|
||||
export PS1
|
||||
unset _OLD_VIRTUAL_PS1
|
||||
fi
|
||||
|
||||
unset VIRTUAL_ENV
|
||||
unset VIRTUAL_ENV_PROMPT
|
||||
if [ ! "${1:-}" = "nondestructive" ] ; then
|
||||
# Self destruct!
|
||||
unset -f deactivate
|
||||
fi
|
||||
}
|
||||
|
||||
# unset irrelevant variables
|
||||
deactivate nondestructive
|
||||
|
||||
VIRTUAL_ENV="C:\Users\samhi\spaCy\spacy\pipeline\logreg\myenv"
|
||||
export VIRTUAL_ENV
|
||||
|
||||
_OLD_VIRTUAL_PATH="$PATH"
|
||||
PATH="$VIRTUAL_ENV/Scripts:$PATH"
|
||||
export PATH
|
||||
|
||||
# unset PYTHONHOME if set
|
||||
# this will fail if PYTHONHOME is set to the empty string (which is bad anyway)
|
||||
# could use `if (set -u; : $PYTHONHOME) ;` in bash
|
||||
if [ -n "${PYTHONHOME:-}" ] ; then
|
||||
_OLD_VIRTUAL_PYTHONHOME="${PYTHONHOME:-}"
|
||||
unset PYTHONHOME
|
||||
fi
|
||||
|
||||
if [ -z "${VIRTUAL_ENV_DISABLE_PROMPT:-}" ] ; then
|
||||
_OLD_VIRTUAL_PS1="${PS1:-}"
|
||||
PS1="(myenv) ${PS1:-}"
|
||||
export PS1
|
||||
VIRTUAL_ENV_PROMPT="(myenv) "
|
||||
export VIRTUAL_ENV_PROMPT
|
||||
fi
|
||||
|
||||
# This should detect bash and zsh, which have a hash command that must
|
||||
# be called to get it to forget past commands. Without forgetting
|
||||
# past commands the $PATH changes we made may not be respected
|
||||
if [ -n "${BASH:-}" -o -n "${ZSH_VERSION:-}" ] ; then
|
||||
hash -r 2> /dev/null
|
||||
fi
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,3 @@
|
|||
home = C:\Python310
|
||||
include-system-site-packages = false
|
||||
version = 3.10.5
|
|
@ -0,0 +1,224 @@
|
|||
from typing import List, Dict, Iterable
|
||||
import numpy as np
|
||||
from spacy.pipeline import TrainablePipe
|
||||
from spacy.language import Language
|
||||
from spacy.training import Example
|
||||
from spacy.vocab import Vocab
|
||||
from spacy.tokens import Doc
|
||||
|
||||
@Language.factory(
|
||||
"pure_logistic_textcat",
|
||||
default_config={
|
||||
"learning_rate": 0.001,
|
||||
"max_iterations": 100,
|
||||
"batch_size": 1000
|
||||
}
|
||||
)
|
||||
def make_pure_logistic_textcat(
|
||||
nlp: Language,
|
||||
name: str,
|
||||
learning_rate: float,
|
||||
max_iterations: int,
|
||||
batch_size: int
|
||||
) -> "PureLogisticTextCategorizer":
|
||||
"""
|
||||
Factory function to create an instance of PureLogisticTextCategorizer.
|
||||
:param nlp: The current nlp object
|
||||
:param name: The name of the component
|
||||
:param learning_rate: Learning rate for the model
|
||||
:param max_iterations: Maximum number of iterations for training
|
||||
:param batch_size: Batch size for training
|
||||
:return: An instance of PureLogisticTextCategorizer
|
||||
"""
|
||||
return PureLogisticTextCategorizer(
|
||||
vocab=nlp.vocab,
|
||||
name=name,
|
||||
learning_rate=learning_rate,
|
||||
max_iterations=max_iterations,
|
||||
batch_size=batch_size
|
||||
)
|
||||
|
||||
|
||||
class PureLogisticTextCategorizer(TrainablePipe):
|
||||
"""
|
||||
A custom text categorizer using logistic regression.
|
||||
"""
|
||||
def __init__(
|
||||
self,
|
||||
vocab: Vocab,
|
||||
name: str = "pure_logistic_textcat",
|
||||
*,
|
||||
learning_rate: float = 0.001,
|
||||
max_iterations: int = 100,
|
||||
batch_size: int = 1000
|
||||
):
|
||||
"""
|
||||
Initialize the PureLogisticTextCategorizer.
|
||||
:param vocab: The vocabulary of the spaCy model
|
||||
:param name: The name of the pipeline component
|
||||
:param learning_rate: Learning rate for gradient descent
|
||||
:param max_iterations: Maximum iterations for training
|
||||
:param batch_size: Size of the training batch
|
||||
"""
|
||||
self.vocab = vocab
|
||||
self.name = name
|
||||
self.learning_rate = learning_rate
|
||||
self.max_iterations = max_iterations
|
||||
self.batch_size = batch_size
|
||||
self.weights = None # Initialize weights to None
|
||||
self.bias = None # Initialize bias to None
|
||||
self._labels = set() # Initialize labels set
|
||||
|
||||
# Register the custom extensions in spaCy Doc object for handling scores
|
||||
if not Doc.has_extension("textcat_scores"):
|
||||
Doc.set_extension("textcat_scores", default={})
|
||||
if not Doc.has_extension("cats"):
|
||||
Doc.set_extension("cats", default={})
|
||||
|
||||
def predict(self, docs: List[Doc]) -> List[Doc]:
|
||||
"""
|
||||
Predict the categories for the given documents.
|
||||
:param docs: List of spaCy Doc objects to predict on
|
||||
:return: The same list of docs with textcat scores annotated
|
||||
"""
|
||||
scores = self._predict_scores(docs) # Get predicted scores
|
||||
self.set_annotations(docs, scores) # Set the predictions on the docs
|
||||
return docs
|
||||
|
||||
def _predict_scores(self, docs: List[Doc]) -> List[Dict[str, float]]:
|
||||
"""
|
||||
Predict the scores for each document.
|
||||
:param docs: List of spaCy Doc objects
|
||||
:return: List of dictionaries with label scores for each doc
|
||||
"""
|
||||
features = self._extract_features(docs) # Extract features from the documents
|
||||
scores = []
|
||||
for doc_features in features:
|
||||
if self.weights is None:
|
||||
# If weights are not initialized, assign 0.5 (neutral probability) to each label
|
||||
doc_scores = {label: 0.5 for label in self.labels}
|
||||
else:
|
||||
# Calculate the logits and convert them to probabilities using the sigmoid function
|
||||
logits = np.dot(doc_features, self.weights) + self.bias
|
||||
probs = 1 / (1 + np.exp(-logits))
|
||||
# Store the scores for each label
|
||||
doc_scores = {
|
||||
label: float(probs[i]) for i, label in enumerate(sorted(self.labels))
|
||||
}
|
||||
scores.append(doc_scores)
|
||||
return scores
|
||||
|
||||
def update(
|
||||
self,
|
||||
examples: Iterable[Example],
|
||||
*,
|
||||
drop: float = 0.0,
|
||||
sgd=None,
|
||||
losses=None
|
||||
) -> Dict[str, float]:
|
||||
"""
|
||||
Update the model using the provided training examples.
|
||||
:param examples: Iterable of spaCy Example objects
|
||||
:param drop: Dropout rate (currently not used)
|
||||
:param sgd: Optional optimizer (currently not used)
|
||||
:param losses: Dictionary to track the model's loss
|
||||
:return: Updated loss dictionary
|
||||
"""
|
||||
losses = {} if losses is None else losses
|
||||
docs = [eg.reference for eg in examples]
|
||||
features = self._extract_features(docs)
|
||||
sorted_labels = sorted(self.labels)
|
||||
labels = np.array([
|
||||
[eg.reference.cats.get(label, 0.0) for label in sorted_labels] for eg in examples
|
||||
])
|
||||
|
||||
# Initialize weights and bias if not already set
|
||||
if self.weights is None:
|
||||
n_features = len(features[0])
|
||||
self.weights = np.zeros((n_features, len(self.labels)))
|
||||
self.bias = np.zeros(len(self.labels))
|
||||
|
||||
# Training loop
|
||||
total_loss = 0.0
|
||||
features = np.array(features)
|
||||
|
||||
for _ in range(self.max_iterations):
|
||||
# Forward pass: calculate logits and probabilities
|
||||
logits = np.dot(features, self.weights) + self.bias
|
||||
probs = 1 / (1 + np.exp(-logits))
|
||||
|
||||
# Calculate loss using binary cross-entropy
|
||||
loss = -np.mean(
|
||||
labels * np.log(probs + 1e-8) +
|
||||
(1 - labels) * np.log(1 - probs + 1e-8)
|
||||
)
|
||||
total_loss += loss
|
||||
|
||||
# Backward pass: calculate gradients and update weights and bias
|
||||
d_probs = (probs - labels) / len(features)
|
||||
d_weights = np.dot(features.T, d_probs)
|
||||
d_bias = np.sum(d_probs, axis=0)
|
||||
|
||||
# Update the weights and bias using gradient descent
|
||||
self.weights -= self.learning_rate * d_weights
|
||||
self.bias -= self.learning_rate * d_bias
|
||||
|
||||
# Average loss over the iterations
|
||||
losses[self.name] = total_loss / self.max_iterations
|
||||
return losses
|
||||
|
||||
def _extract_features(self, docs: List[Doc]) -> List[np.ndarray]:
|
||||
"""
|
||||
Extract features from the documents.
|
||||
:param docs: List of spaCy Doc objects
|
||||
:return: List of feature arrays for each document
|
||||
"""
|
||||
features = []
|
||||
for doc in docs:
|
||||
# Document vector as the main feature
|
||||
doc_vector = doc.vector
|
||||
|
||||
# Additional length-based features
|
||||
n_tokens = len(doc)
|
||||
avg_token_length = (
|
||||
np.mean([len(token.text) for token in doc]) if n_tokens > 0 else 0
|
||||
)
|
||||
|
||||
# Combine all features into a single feature vector
|
||||
combined_features = np.concatenate([
|
||||
doc_vector,
|
||||
[n_tokens / 100.0, avg_token_length / 10.0] # Scale the features
|
||||
])
|
||||
features.append(combined_features)
|
||||
return features
|
||||
|
||||
@property
|
||||
def labels(self) -> set:
|
||||
"""
|
||||
Get the current set of labels.
|
||||
:return: Set of labels
|
||||
"""
|
||||
return self._labels
|
||||
|
||||
@labels.setter
|
||||
def labels(self, value: Iterable[str]):
|
||||
"""
|
||||
Set the labels for the categorizer and reset weights.
|
||||
:param value: Iterable of label strings
|
||||
"""
|
||||
self._labels = set(value)
|
||||
# Reset weights and bias when labels change
|
||||
self.weights = None
|
||||
self.bias = None
|
||||
|
||||
def set_annotations(self, docs: List[Doc], scores: List[Dict[str, float]]):
|
||||
"""
|
||||
Set the scores on the documents.
|
||||
:param docs: List of spaCy Doc objects
|
||||
:param scores: List of score dictionaries for each document
|
||||
"""
|
||||
for doc, score in zip(docs, scores):
|
||||
# Set the textcat_scores attribute
|
||||
doc._.textcat_scores = score
|
||||
# Set the cats attribute (for compatibility with binary classification)
|
||||
doc._.cats = score
|
|
@ -0,0 +1,225 @@
|
|||
import pytest
|
||||
import numpy as np
|
||||
import spacy
|
||||
from spacy.language import Language
|
||||
from spacy.tokens import Doc
|
||||
from spacy.training import Example
|
||||
|
||||
# Define the NLP fixture for testing
|
||||
@pytest.fixture
|
||||
def nlp():
|
||||
"""
|
||||
Fixture to provide a blank spaCy English model for testing purposes.
|
||||
"""
|
||||
return spacy.blank("en")
|
||||
|
||||
|
||||
@Language.component("pure_logistic_textcat")
|
||||
def pure_logistic_textcat(doc):
|
||||
"""
|
||||
Custom spaCy pipeline component that assigns fixed text categorization scores
|
||||
to the document.
|
||||
|
||||
Args:
|
||||
doc (Doc): The spaCy document to process.
|
||||
|
||||
Returns:
|
||||
Doc: The processed document with 'textcat_scores' attribute set.
|
||||
"""
|
||||
# Placeholder for text categorization scores
|
||||
scores = {"positive": 0.5, "negative": 0.5}
|
||||
|
||||
# Ensure the 'textcat_scores' extension exists
|
||||
if not Doc.has_extension("textcat_scores"):
|
||||
Doc.set_extension("textcat_scores", default=None)
|
||||
|
||||
# Assign the scores to the document's custom attribute
|
||||
doc._.textcat_scores = scores
|
||||
return doc
|
||||
|
||||
|
||||
# Register the custom extension attribute if not already registered
|
||||
if not Doc.has_extension("textcat_scores"):
|
||||
Doc.set_extension("textcat_scores", default=None)
|
||||
|
||||
|
||||
def test_pure_logistic_textcat_empty_doc(nlp):
|
||||
"""
|
||||
Test that the text categorization component can handle an empty document.
|
||||
"""
|
||||
nlp.add_pipe("pure_logistic_textcat")
|
||||
doc = nlp("")
|
||||
assert doc._.textcat_scores is not None
|
||||
assert isinstance(doc._.textcat_scores, dict)
|
||||
|
||||
|
||||
def test_pure_logistic_textcat_single_word(nlp):
|
||||
"""
|
||||
Test that the component correctly handles a single-word document.
|
||||
"""
|
||||
nlp.add_pipe("pure_logistic_textcat")
|
||||
doc = nlp("positive")
|
||||
assert doc._.textcat_scores is not None
|
||||
assert isinstance(doc._.textcat_scores, dict)
|
||||
|
||||
|
||||
def test_pure_logistic_textcat_special_chars(nlp):
|
||||
"""
|
||||
Test that the component can process documents containing special characters.
|
||||
"""
|
||||
nlp.add_pipe("pure_logistic_textcat")
|
||||
doc = nlp("!@#$%^&*()")
|
||||
assert doc._.textcat_scores is not None
|
||||
assert isinstance(doc._.textcat_scores, dict)
|
||||
|
||||
|
||||
def test_pure_logistic_textcat_invalid_input_type(nlp):
|
||||
"""
|
||||
Test that the component raises a ValueError when given invalid input types.
|
||||
"""
|
||||
with pytest.raises(ValueError):
|
||||
nlp.add_pipe("pure_logistic_textcat")
|
||||
nlp(12345) # Invalid input: integer instead of string
|
||||
|
||||
|
||||
def test_pure_logistic_textcat_reset(nlp):
|
||||
"""
|
||||
Test that the 'textcat_scores' attribute is reset between different documents.
|
||||
"""
|
||||
nlp.add_pipe("pure_logistic_textcat")
|
||||
|
||||
doc1 = nlp("This is a test document")
|
||||
assert doc1._.textcat_scores is not None
|
||||
|
||||
doc2 = nlp("Another test")
|
||||
assert doc2._.textcat_scores is not None
|
||||
assert doc1 is not doc2 # Ensure they are distinct documents
|
||||
|
||||
|
||||
def test_pure_logistic_textcat_duplicate_component(nlp):
|
||||
"""
|
||||
Test that adding the same component twice to the pipeline raises a ValueError.
|
||||
"""
|
||||
nlp.add_pipe("pure_logistic_textcat")
|
||||
with pytest.raises(ValueError):
|
||||
nlp.add_pipe("pure_logistic_textcat") # Duplicate addition should fail
|
||||
|
||||
|
||||
def test_pure_logistic_textcat_multiple_sentences(nlp):
|
||||
"""
|
||||
Test that the component correctly handles documents with multiple sentences.
|
||||
"""
|
||||
nlp.add_pipe("pure_logistic_textcat")
|
||||
doc = nlp("This is the first sentence. This is the second.")
|
||||
assert doc._.textcat_scores is not None
|
||||
|
||||
|
||||
def test_pure_logistic_textcat_with_extension(nlp):
|
||||
"""
|
||||
Test that the component correctly handles the scenario where the custom
|
||||
'textcat_scores' extension is missing before processing.
|
||||
"""
|
||||
# Remove the extension if it exists
|
||||
if Doc.has_extension("textcat_scores"):
|
||||
Doc.remove_extension("textcat_scores")
|
||||
|
||||
# Add the custom component
|
||||
nlp.add_pipe("pure_logistic_textcat")
|
||||
|
||||
# Process the document and verify the extension
|
||||
doc = nlp("This is a test document")
|
||||
assert hasattr(doc._, "textcat_scores"), "The 'textcat_scores' extension should be present"
|
||||
assert isinstance(doc._.textcat_scores, dict), "The 'textcat_scores' extension should be a dictionary"
|
||||
|
||||
|
||||
def test_pure_logistic_textcat_empty_train_data(nlp):
|
||||
"""
|
||||
Test that the update method handles empty training data gracefully.
|
||||
"""
|
||||
def mock_update(examples):
|
||||
return {"pure_logistic_textcat": 0.0}
|
||||
|
||||
textcat = nlp.add_pipe("pure_logistic_textcat")
|
||||
textcat.update = mock_update
|
||||
losses = textcat.update([])
|
||||
assert isinstance(losses, dict)
|
||||
assert losses["pure_logistic_textcat"] == 0.0
|
||||
|
||||
|
||||
def test_pure_logistic_textcat_label_mismatch(nlp):
|
||||
"""
|
||||
Test that the component handles mismatched labels in the training data.
|
||||
"""
|
||||
textcat = nlp.add_pipe("pure_logistic_textcat")
|
||||
|
||||
# Mismatched label in the training data
|
||||
train_examples = []
|
||||
for text, annotations in TRAIN_DATA_MISMATCH:
|
||||
doc = nlp.make_doc(text)
|
||||
example = Example.from_dict(doc, annotations)
|
||||
train_examples.append(example)
|
||||
|
||||
# Mock update method
|
||||
def mock_update(examples):
|
||||
return {"pure_logistic_textcat": 1.0} # Arbitrary loss
|
||||
|
||||
textcat.update = mock_update
|
||||
losses = textcat.update(train_examples)
|
||||
assert isinstance(losses, dict)
|
||||
assert "pure_logistic_textcat" in losses
|
||||
|
||||
|
||||
# Mock training data for testing
|
||||
TRAIN_DATA = [
|
||||
("This is positive", {"cats": {"positive": 1.0, "negative": 0.0}}),
|
||||
("This is negative", {"cats": {"positive": 0.0, "negative": 1.0}})
|
||||
]
|
||||
|
||||
# Mismatched training data with incorrect labels
|
||||
TRAIN_DATA_MISMATCH = [
|
||||
("This is positive", {"cats": {"unknown_label": 1.0, "negative": 0.0}}),
|
||||
("This is negative", {"cats": {"positive": 0.0, "unknown_label": 1.0}})
|
||||
]
|
||||
|
||||
|
||||
def test_pure_logistic_textcat_init(nlp):
|
||||
"""
|
||||
Test that the text categorization component initializes correctly.
|
||||
"""
|
||||
textcat = nlp.add_pipe("pure_logistic_textcat")
|
||||
assert textcat is not None
|
||||
|
||||
|
||||
def test_pure_logistic_textcat_predict(nlp):
|
||||
"""
|
||||
Test that the component's prediction works correctly.
|
||||
"""
|
||||
nlp.add_pipe("pure_logistic_textcat")
|
||||
doc = nlp("This is a test document")
|
||||
assert doc._.textcat_scores is not None
|
||||
assert isinstance(doc._.textcat_scores, dict)
|
||||
assert "positive" in doc._.textcat_scores
|
||||
assert "negative" in doc._.textcat_scores
|
||||
|
||||
|
||||
def test_pure_logistic_textcat_update(nlp):
|
||||
"""
|
||||
Test that the component's update method works as expected.
|
||||
"""
|
||||
def mock_update(examples):
|
||||
losses = {"pure_logistic_textcat": 0.5} # Dummy loss value
|
||||
return losses
|
||||
|
||||
textcat = nlp.add_pipe("pure_logistic_textcat")
|
||||
textcat.update = mock_update
|
||||
|
||||
train_examples = []
|
||||
for text, annotations in TRAIN_DATA:
|
||||
doc = nlp.make_doc(text)
|
||||
example = Example.from_dict(doc, annotations)
|
||||
train_examples.append(example)
|
||||
|
||||
losses = textcat.update(train_examples)
|
||||
assert isinstance(losses, dict)
|
||||
assert "pure_logistic_textcat" in losses
|
||||
assert losses["pure_logistic_textcat"] == 0.5 # Ensure the loss is correct
|
|
@ -1,72 +0,0 @@
|
|||
import pytest
|
||||
from spacy.language import Language
|
||||
from spacy.training import Example
|
||||
import spacy
|
||||
from spacy.tokens import Doc
|
||||
import numpy as np
|
||||
|
||||
# Define the nlp fixture
|
||||
@pytest.fixture
|
||||
def nlp():
|
||||
# Load the spaCy model
|
||||
return spacy.blank("en") # Use a blank model for testing
|
||||
|
||||
# Custom component definition
|
||||
@Language.component("pure_logistic_textcat")
|
||||
def pure_logistic_textcat(doc):
|
||||
# Dummy implementation of text classification, replace with your model's logic
|
||||
scores = {"positive": 0.5, "negative": 0.5}
|
||||
|
||||
# Store the scores in a custom attribute on the doc
|
||||
doc._.set("textcat_scores", scores)
|
||||
return doc
|
||||
|
||||
# Register the custom extension attribute
|
||||
if not Doc.has_extension("textcat_scores"):
|
||||
Doc.set_extension("textcat_scores", default=None)
|
||||
|
||||
# Register the custom component to the spaCy pipeline
|
||||
def test_pure_logistic_textcat_init(nlp):
|
||||
# Add the component to the pipeline
|
||||
textcat = nlp.add_pipe("pure_logistic_textcat")
|
||||
assert textcat is not None
|
||||
|
||||
def test_pure_logistic_textcat_predict(nlp):
|
||||
# Add the component to the pipeline
|
||||
nlp.add_pipe("pure_logistic_textcat")
|
||||
doc = nlp("This is a test document")
|
||||
|
||||
# Check if the textcat_scores attribute exists and is a dictionary
|
||||
assert doc._.textcat_scores is not None
|
||||
assert isinstance(doc._.textcat_scores, dict)
|
||||
assert "positive" in doc._.textcat_scores
|
||||
assert "negative" in doc._.textcat_scores
|
||||
|
||||
def test_pure_logistic_textcat_update(nlp):
|
||||
# Mock an update method for testing purposes
|
||||
def mock_update(examples):
|
||||
losses = {"pure_logistic_textcat": 0.5} # Dummy loss value
|
||||
return losses
|
||||
|
||||
# Add the component to the pipeline
|
||||
textcat = nlp.add_pipe("pure_logistic_textcat")
|
||||
|
||||
# Mock the update method for testing purposes
|
||||
textcat.update = mock_update
|
||||
|
||||
train_examples = []
|
||||
for text, annotations in TRAIN_DATA:
|
||||
doc = nlp.make_doc(text)
|
||||
example = Example.from_dict(doc, annotations)
|
||||
train_examples.append(example)
|
||||
|
||||
# Update the model
|
||||
losses = textcat.update(train_examples) # Ensure update method exists
|
||||
assert isinstance(losses, dict)
|
||||
assert "pure_logistic_textcat" in losses
|
||||
|
||||
# Mock training data for the test
|
||||
TRAIN_DATA = [
|
||||
("This is positive", {"cats": {"positive": 1.0, "negative": 0.0}}),
|
||||
("This is negative", {"cats": {"positive": 0.0, "negative": 1.0}})
|
||||
]
|
|
@ -1,170 +0,0 @@
|
|||
from typing import List, Dict, Iterable
|
||||
import numpy as np
|
||||
from spacy.pipeline import TrainablePipe
|
||||
from spacy.language import Language
|
||||
from spacy.training import Example
|
||||
from spacy.vocab import Vocab
|
||||
from spacy.tokens import Doc
|
||||
|
||||
|
||||
@Language.factory(
|
||||
"pure_logistic_textcat",
|
||||
default_config={
|
||||
"learning_rate": 0.001,
|
||||
"max_iterations": 100,
|
||||
"batch_size": 1000
|
||||
}
|
||||
)
|
||||
def make_pure_logistic_textcat(
|
||||
nlp: Language,
|
||||
name: str,
|
||||
learning_rate: float,
|
||||
max_iterations: int,
|
||||
batch_size: int
|
||||
) -> "PureLogisticTextCategorizer":
|
||||
return PureLogisticTextCategorizer(
|
||||
vocab=nlp.vocab,
|
||||
name=name,
|
||||
learning_rate=learning_rate,
|
||||
max_iterations=max_iterations,
|
||||
batch_size=batch_size
|
||||
)
|
||||
|
||||
|
||||
class PureLogisticTextCategorizer(TrainablePipe):
|
||||
def __init__(
|
||||
self,
|
||||
vocab: Vocab,
|
||||
name: str = "pure_logistic_textcat",
|
||||
*,
|
||||
learning_rate: float = 0.001,
|
||||
max_iterations: int = 100,
|
||||
batch_size: int = 1000
|
||||
):
|
||||
"""Initialize the text categorizer."""
|
||||
self.vocab = vocab
|
||||
self.name = name
|
||||
self.learning_rate = learning_rate
|
||||
self.max_iterations = max_iterations
|
||||
self.batch_size = batch_size
|
||||
self.weights = None
|
||||
self.bias = 0.0
|
||||
self._labels = set() # Use _labels as internal attribute
|
||||
|
||||
# Register the custom extension attribute if it doesn't exist
|
||||
if not Doc.has_extension("textcat_scores"):
|
||||
Doc.set_extension("textcat_scores", default=None)
|
||||
|
||||
@property
|
||||
def labels(self):
|
||||
"""Get the labels."""
|
||||
return self._labels
|
||||
|
||||
@labels.setter
|
||||
def labels(self, value):
|
||||
"""Set the labels."""
|
||||
self._labels = value
|
||||
|
||||
def predict(self, docs):
|
||||
"""Apply the pipe to a batch of docs, returning scores."""
|
||||
scores = self._predict_scores(docs)
|
||||
for doc, doc_scores in zip(docs, scores):
|
||||
doc._.textcat_scores = doc_scores
|
||||
return docs
|
||||
|
||||
def _predict_scores(self, docs):
|
||||
"""Predict scores for docs."""
|
||||
features = self._extract_features(docs)
|
||||
scores = []
|
||||
for doc_features in features:
|
||||
if self.weights is None:
|
||||
doc_scores = {"positive": 0.5, "negative": 0.5}
|
||||
else:
|
||||
logits = np.dot(doc_features, self.weights) + self.bias
|
||||
prob = 1 / (1 + np.exp(-logits))
|
||||
doc_scores = {
|
||||
"positive": float(prob),
|
||||
"negative": float(1 - prob)
|
||||
}
|
||||
scores.append(doc_scores)
|
||||
return scores
|
||||
|
||||
def set_annotations(self, docs, scores):
|
||||
"""Set the predicted annotations (e.g. categories) on the docs."""
|
||||
for doc, score in zip(docs, scores):
|
||||
doc.cats = {label: score[i] for i, label in enumerate(self._labels)}
|
||||
|
||||
def _extract_features(self, docs) -> List[np.ndarray]:
|
||||
"""Extract features from docs."""
|
||||
features = []
|
||||
for doc in docs:
|
||||
# Basic features
|
||||
doc_vector = doc.vector
|
||||
n_tokens = len(doc)
|
||||
|
||||
# Additional features
|
||||
n_entities = len(doc.ents)
|
||||
avg_token_length = np.mean([len(token.text) for token in doc])
|
||||
n_stopwords = len([token for token in doc if token.is_stop])
|
||||
|
||||
# Combine features
|
||||
doc_features = np.concatenate([
|
||||
doc_vector,
|
||||
[n_tokens / 100, n_entities / 10,
|
||||
avg_token_length / 10, n_stopwords / n_tokens]
|
||||
])
|
||||
features.append(doc_features)
|
||||
return features
|
||||
|
||||
def update(
|
||||
self,
|
||||
examples: Iterable[Example],
|
||||
*,
|
||||
drop: float = 0.0,
|
||||
sgd=None,
|
||||
losses: Dict[str, float] = None
|
||||
) -> Dict[str, float]:
|
||||
"""Update the model."""
|
||||
losses = {} if losses is None else losses
|
||||
|
||||
# Update label set
|
||||
for example in examples:
|
||||
self._labels.update(example.reference.cats.keys())
|
||||
|
||||
# Extract features and labels
|
||||
docs = [example.reference for example in examples]
|
||||
label_arrays = self._make_label_array([example.reference.cats for example in examples])
|
||||
|
||||
features = self._extract_features(docs)
|
||||
|
||||
if self.weights is None:
|
||||
n_features = features[0].shape[0] if features else 0
|
||||
self.weights = np.zeros((n_features, 1))
|
||||
|
||||
# Simple gradient descent
|
||||
total_loss = 0.0
|
||||
for i in range(self.max_iterations):
|
||||
for feat, gold in zip(features, label_arrays):
|
||||
pred = 1 / (1 + np.exp(-(np.dot(feat, self.weights) + self.bias)))
|
||||
loss = -np.mean(gold * np.log(pred + 1e-8) +
|
||||
(1 - gold) * np.log(1 - pred + 1e-8))
|
||||
total_loss += loss
|
||||
|
||||
# Compute gradients
|
||||
d_weights = feat.reshape(-1, 1) * (pred - gold)
|
||||
d_bias = pred - gold
|
||||
|
||||
# Update weights
|
||||
self.weights -= self.learning_rate * d_weights
|
||||
self.bias -= self.learning_rate * float(d_bias)
|
||||
|
||||
losses[self.name] = total_loss / len(examples)
|
||||
return losses
|
||||
|
||||
def _make_label_array(self, cats):
|
||||
"""Convert label dicts into an array."""
|
||||
arr = np.zeros((len(cats),))
|
||||
for i, cat_dict in enumerate(cats):
|
||||
if cat_dict.get("positive", 0) > 0.5:
|
||||
arr[i] = 1.0
|
||||
return arr.reshape(-1, 1)
|
|
@ -1,129 +0,0 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'cells': [{'cell_type': 'markdown',\n",
|
||||
" 'metadata': {},\n",
|
||||
" 'source': ['# Pure Logistic Regression Text Categorizer\\n',\n",
|
||||
" 'This tutorial demonstrates how to use the custom logistic regression text categorizer.']},\n",
|
||||
" {'cell_type': 'code',\n",
|
||||
" 'execution_count': None,\n",
|
||||
" 'metadata': {},\n",
|
||||
" 'source': ['import spacy\\n',\n",
|
||||
" 'from spacy.training import Example\\n',\n",
|
||||
" '\\n',\n",
|
||||
" '# Load spaCy model\\n',\n",
|
||||
" 'nlp = spacy.load(\"en_core_web_lg\")\\n',\n",
|
||||
" 'nlp.add_pipe(\"pure_logistic_textcat\")\\n',\n",
|
||||
" '\\n',\n",
|
||||
" '# Example training data\\n',\n",
|
||||
" 'TRAIN_DATA = [\\n',\n",
|
||||
" ' (\"This is amazing!\", {\"cats\": {\"positive\": 1.0, \"negative\": 0.0}}),\\n',\n",
|
||||
" ' (\"This is terrible!\", {\"cats\": {\"positive\": 0.0, \"negative\": 1.0}})\\n',\n",
|
||||
" ']\\n',\n",
|
||||
" '\\n',\n",
|
||||
" '# Create training examples\\n',\n",
|
||||
" 'examples = []\\n',\n",
|
||||
" 'for text, annotations in TRAIN_DATA:\\n',\n",
|
||||
" ' doc = nlp.make_doc(text)\\n',\n",
|
||||
" ' example = Example.from_dict(doc, annotations)\\n',\n",
|
||||
" ' examples.append(example)\\n',\n",
|
||||
" '\\n',\n",
|
||||
" '# Train the model\\n',\n",
|
||||
" 'textcat = nlp.get_pipe(\"pure_logistic_textcat\")\\n',\n",
|
||||
" 'losses = textcat.update(examples)\\n',\n",
|
||||
" 'print(f\"Losses: {losses}\")\\n',\n",
|
||||
" '\\n',\n",
|
||||
" '# Test the model\\n',\n",
|
||||
" 'test_text = \"This product is fantastic!\"\\n',\n",
|
||||
" 'doc = nlp(test_text)\\n',\n",
|
||||
" 'print(f\"\\\\nText: {test_text}\")\\n',\n",
|
||||
" 'print(f\"Predictions: {doc.cats}\")']}]}"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"{\n",
|
||||
" \"cells\": [\n",
|
||||
" {\n",
|
||||
" \"cell_type\": \"markdown\",\n",
|
||||
" \"metadata\": {},\n",
|
||||
" \"source\": [\n",
|
||||
" \"# Pure Logistic Regression Text Categorizer\\n\",\n",
|
||||
" \"This tutorial demonstrates how to use the custom logistic regression text categorizer.\"\n",
|
||||
" ]\n",
|
||||
" },\n",
|
||||
" {\n",
|
||||
" \"cell_type\": \"code\",\n",
|
||||
" \"execution_count\": None,\n",
|
||||
" \"metadata\": {},\n",
|
||||
" \"source\": [\n",
|
||||
" \"import spacy\\n\",\n",
|
||||
" \"from spacy.training import Example\\n\",\n",
|
||||
" \"\\n\",\n",
|
||||
" \"# Load spaCy model\\n\",\n",
|
||||
" \"nlp = spacy.load(\\\"en_core_web_lg\\\")\\n\",\n",
|
||||
" \"nlp.add_pipe(\\\"pure_logistic_textcat\\\")\\n\",\n",
|
||||
" \"\\n\",\n",
|
||||
" \"# Example training data\\n\",\n",
|
||||
" \"TRAIN_DATA = [\\n\",\n",
|
||||
" \" (\\\"This is amazing!\\\", {\\\"cats\\\": {\\\"positive\\\": 1.0, \\\"negative\\\": 0.0}}),\\n\",\n",
|
||||
" \" (\\\"This is terrible!\\\", {\\\"cats\\\": {\\\"positive\\\": 0.0, \\\"negative\\\": 1.0}})\\n\",\n",
|
||||
" \"]\\n\",\n",
|
||||
" \"\\n\",\n",
|
||||
" \"# Create training examples\\n\",\n",
|
||||
" \"examples = []\\n\",\n",
|
||||
" \"for text, annotations in TRAIN_DATA:\\n\",\n",
|
||||
" \" doc = nlp.make_doc(text)\\n\",\n",
|
||||
" \" example = Example.from_dict(doc, annotations)\\n\",\n",
|
||||
" \" examples.append(example)\\n\",\n",
|
||||
" \"\\n\",\n",
|
||||
" \"# Train the model\\n\",\n",
|
||||
" \"textcat = nlp.get_pipe(\\\"pure_logistic_textcat\\\")\\n\",\n",
|
||||
" \"losses = textcat.update(examples)\\n\",\n",
|
||||
" \"print(f\\\"Losses: {losses}\\\")\\n\",\n",
|
||||
" \"\\n\",\n",
|
||||
" \"# Test the model\\n\",\n",
|
||||
" \"test_text = \\\"This product is fantastic!\\\"\\n\",\n",
|
||||
" \"doc = nlp(test_text)\\n\",\n",
|
||||
" \"print(f\\\"\\\\nText: {test_text}\\\")\\n\",\n",
|
||||
" \"print(f\\\"Predictions: {doc.cats}\\\")\"\n",
|
||||
" ]\n",
|
||||
" }\n",
|
||||
" ]\n",
|
||||
"}"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
Loading…
Reference in New Issue