diff --git a/README.md b/README.md index c3e56ca2f..2ef905ff5 100644 --- a/README.md +++ b/README.md @@ -227,6 +227,9 @@ nlp = en_core_web_sm.load() doc = nlp("This is a sentence.") ``` +📖 **For more info and examples, check out the +[models documentation](https://spacy.io/docs/usage/models).** + ## 📊 Custom Sentiment Analysis with Logistic Regression (spaCy-based) This repository also includes a custom **Logistic Regression** sentiment analysis model built using spaCy, without using scikit-learn. The model classifies text as positive or negative based on a dataset such as IMDb reviews. @@ -234,24 +237,28 @@ This repository also includes a custom **Logistic Regression** sentiment analysi To run the logistic regression model: ```bash python pure_Logistic.py -```This script processes the dataset using spaCy, trains the logistic regression model, and outputs the results. - +``` +This script processes the dataset using spaCy, trains the logistic regression model, and outputs the results. ### Testing and Evaluation +To run tests and evaluate the model's performance: To run tests and evaluate the model's performance, use: + ```bash python test_pure_logistic.py ``` - -In your test script, import the PureLogisticTextCategorizer class for evaluation: +To use the model in your own code: +In your test script, ```bash +import the PureLogisticTextCategorizer class for evaluation: from pure_Logistic import PureLogisticTextCategorizer ``` + +# Initialize and use the classifier +categorizer = PureLogisticTextCategorizer() +``` This enables you to evaluate the logistic regression classifier on your test cases. -📖 **For more info and examples, check out the -[models documentation](https://spacy.io/docs/usage/models).** - ## ⚒ Compile from source The other way to install spaCy is to clone its diff --git a/spacy/pipeline/logreg/examples/evaluate_textcat.py b/spacy/pipeline/logreg/examples/evaluate_textcat.py new file mode 100644 index 000000000..5de2ef84e --- /dev/null +++ b/spacy/pipeline/logreg/examples/evaluate_textcat.py @@ -0,0 +1,138 @@ +import spacy +from spacy.training import Example +from spacy.tokens import Doc +from typing import Dict, List + +# Import the custom logistic classifier +from pure_Logistic import make_pure_logistic_textcat + + +# Registering the custom extension 'textcat' to store predictions +if not Doc.has_extension("textcat"): + Doc.set_extension("textcat", default={}) + + +# Sample training and testing data +TRAIN_DATA = [ + ("This product is amazing! I love it.", {"cats": {"positive": 1.0, "negative": 0.0}}), + ("The service was excellent and staff very friendly.", {"cats": {"positive": 1.0, "negative": 0.0}}), + ("I'm really impressed with the quality.", {"cats": {"positive": 1.0, "negative": 0.0}}), + ("Best purchase I've made in years!", {"cats": {"positive": 1.0, "negative": 0.0}}), + ("The features work exactly as advertised.", {"cats": {"positive": 1.0, "negative": 0.0}}), + ("This is terrible, complete waste of money.", {"cats": {"positive": 0.0, "negative": 1.0}}), + ("Poor customer service, very disappointing.", {"cats": {"positive": 0.0, "negative": 1.0}}), + ("The product broke after one week.", {"cats": {"positive": 0.0, "negative": 1.0}}), + ("Would not recommend to anyone.", {"cats": {"positive": 0.0, "negative": 1.0}}), + ("Save your money and avoid this.", {"cats": {"positive": 0.0, "negative": 1.0}}) +] + +TEST_DATA = [ + ("Great product, highly recommend!", {"cats": {"positive": 1.0, "negative": 0.0}}), + ("Not worth the price at all.", {"cats": {"positive": 0.0, "negative": 1.0}}), + ("Everything works perfectly.", {"cats": {"positive": 1.0, "negative": 0.0}}), + ("Disappointed with the results.", {"cats": {"positive": 0.0, "negative": 1.0}}) +] + +def calculate_metrics(true_positives: int, true_negatives: int, false_positives: int, false_negatives: int) -> Dict[str, float]: + """Calculate evaluation metrics based on counts.""" + total = true_positives + true_negatives + false_positives + false_negatives + accuracy = (true_positives + true_negatives) / total if total > 0 else 0 + precision = true_positives / (true_positives + false_positives) if (true_positives + false_positives) > 0 else 0 + recall = true_positives / (true_positives + false_negatives) if (true_positives + false_negatives) > 0 else 0 + f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0 + + return { + "accuracy": accuracy, + "precision": precision, + "recall": recall, + "f1": f1 + } + +def evaluate_model(nlp, test_data): + """Evaluate the model using the test data.""" + true_positives = true_negatives = false_positives = false_negatives = 0 + predictions = [] + + for text, annotations in test_data: + doc = nlp(text) + true_cats = annotations["cats"] + pred_cats = doc._.textcat # Predictions from the custom model + + # Extract scores for 'positive' and 'negative' + pred_positive_score = pred_cats["positive"] if "positive" in pred_cats else 0.0 + true_positive_score = true_cats.get("positive", 0.0) + + pred_positive = float(pred_positive_score) > 0.5 + true_positive = float(true_positive_score) > 0.5 + + # Update counts based on predictions + if true_positive and pred_positive: + true_positives += 1 + elif not true_positive and not pred_positive: + true_negatives += 1 + elif not true_positive and pred_positive: + false_positives += 1 + else: + false_negatives += 1 + + predictions.append({ + "text": text, + "true": "positive" if true_positive else "negative", + "predicted": "positive" if pred_positive else "negative", + "scores": pred_cats + }) + + metrics = calculate_metrics(true_positives, true_negatives, false_positives, false_negatives) + return metrics, predictions + + +def main(): + try: + print("Loading spaCy model...") + nlp = spacy.load("en_core_web_lg") + except OSError: + print("Downloading spaCy model...") + spacy.cli.download("en_core_web_lg") + nlp = spacy.load("en_core_web_lg") + + print("Adding custom text categorizer...") + config = { + "learning_rate": 0.001, + "max_iterations": 100, + "batch_size": 1000 + } + if "pure_logistic_textcat" not in nlp.pipe_names: + textcat = nlp.add_pipe("pure_logistic_textcat", config=config) + textcat.labels = {"positive", "negative"} + + print("Preparing training examples...") + train_examples = [] + for text, annotations in TRAIN_DATA: + doc = nlp.make_doc(text) + example = Example.from_dict(doc, annotations) + train_examples.append(example) + + print("Training the model...") + textcat = nlp.get_pipe("pure_logistic_textcat") + losses = textcat.update(train_examples) + print(f"Training losses: {losses}") + + print("\nEvaluating the model...") + metrics, predictions = evaluate_model(nlp, TEST_DATA) + + print("\nEvaluation Metrics:") + print(f"Accuracy: {metrics['accuracy']:.3f}") + print(f"Precision: {metrics['precision']:.3f}") + print(f"Recall: {metrics['recall']:.3f}") + print(f"F1 Score: {metrics['f1']:.3f}") + + print("\nDetailed Predictions:") + for pred in predictions: + print(f"\nText: {pred['text']}") + print(f"True label: {pred['true']}") + print(f"Predicted: {pred['predicted']}") + print(f"Positive score: {pred['scores']['positive']:.3f}") + print(f"Negative score: {pred['scores']['negative']:.3f}") + +if __name__ == "__main__": + main() diff --git a/spacy/pipeline/logreg/myenv/Scripts/Activate.ps1 b/spacy/pipeline/logreg/myenv/Scripts/Activate.ps1 new file mode 100644 index 000000000..d00d7d4fb --- /dev/null +++ b/spacy/pipeline/logreg/myenv/Scripts/Activate.ps1 @@ -0,0 +1,443 @@ +<# +.Synopsis +Activate a Python virtual environment for the current PowerShell session. + +.Description +Pushes the python executable for a virtual environment to the front of the +$Env:PATH environment variable and sets the prompt to signify that you are +in a Python virtual environment. Makes use of the command line switches as +well as the `pyvenv.cfg` file values present in the virtual environment. + +.Parameter VenvDir +Path to the directory that contains the virtual environment to activate. The +default value for this is the parent of the directory that the Activate.ps1 +script is located within. + +.Parameter Prompt +The prompt prefix to display when this virtual environment is activated. By +default, this prompt is the name of the virtual environment folder (VenvDir) +surrounded by parentheses and followed by a single space (ie. '(.venv) '). + +.Example +Activate.ps1 +Activates the Python virtual environment that contains the Activate.ps1 script. + +.Example +Activate.ps1 -Verbose +Activates the Python virtual environment that contains the Activate.ps1 script, +and shows extra information about the activation as it executes. + +.Example +Activate.ps1 -VenvDir C:\Users\MyUser\Common\.venv +Activates the Python virtual environment located in the specified location. + +.Example +Activate.ps1 -Prompt "MyPython" +Activates the Python virtual environment that contains the Activate.ps1 script, +and prefixes the current prompt with the specified string (surrounded in +parentheses) while the virtual environment is active. + +.Notes +On Windows, it may be required to enable this Activate.ps1 script by setting the +execution policy for the user. You can do this by issuing the following PowerShell +command: + +PS C:\> Set-ExecutionPolicy -ExecutionPolicy RemoteSigned -Scope CurrentUser + +For more information on Execution Policies: +https://go.microsoft.com/fwlink/?LinkID=135170 + +#> +Param( + [Parameter(Mandatory = $false)] + [String] + $VenvDir, + [Parameter(Mandatory = $false)] + [String] + $Prompt +) + +<# Function declarations --------------------------------------------------- #> + +<# +.Synopsis +Remove all shell session elements added by the Activate script, including the +addition of the virtual environment's Python executable from the beginning of +the PATH variable. + +.Parameter NonDestructive +If present, do not remove this function from the global namespace for the +session. + +#> +function global:deactivate ([switch]$NonDestructive) { + # Revert to original values + + # The prior prompt: + if (Test-Path -Path Function:_OLD_VIRTUAL_PROMPT) { + Copy-Item -Path Function:_OLD_VIRTUAL_PROMPT -Destination Function:prompt + Remove-Item -Path Function:_OLD_VIRTUAL_PROMPT + } + + # The prior PYTHONHOME: + if (Test-Path -Path Env:_OLD_VIRTUAL_PYTHONHOME) { + Copy-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME -Destination Env:PYTHONHOME + Remove-Item -Path Env:_OLD_VIRTUAL_PYTHONHOME + } + + # The prior PATH: + if (Test-Path -Path Env:_OLD_VIRTUAL_PATH) { + Copy-Item -Path Env:_OLD_VIRTUAL_PATH -Destination Env:PATH + Remove-Item -Path Env:_OLD_VIRTUAL_PATH + } + + # Just remove the VIRTUAL_ENV altogether: + if (Test-Path -Path Env:VIRTUAL_ENV) { + Remove-Item -Path env:VIRTUAL_ENV + } + + # Just remove VIRTUAL_ENV_PROMPT altogether. + if (Test-Path -Path Env:VIRTUAL_ENV_PROMPT) { + Remove-Item -Path env:VIRTUAL_ENV_PROMPT + } + + # Just remove the _PYTHON_VENV_PROMPT_PREFIX altogether: + if (Get-Variable -Name "_PYTHON_VENV_PROMPT_PREFIX" -ErrorAction SilentlyContinue) { + Remove-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Scope Global -Force + } + + # Leave deactivate function in the global namespace if requested: + if (-not $NonDestructive) { + Remove-Item -Path function:deactivate + } +} + +<# +.Description +Get-PyVenvConfig parses the values from the pyvenv.cfg file located in the +given folder, and returns them in a map. + +For each line in the pyvenv.cfg file, if that line can be parsed into exactly +two strings separated by `=` (with any amount of whitespace surrounding the =) +then it is considered a `key = value` line. The left hand string is the key, +the right hand is the value. + +If the value starts with a `'` or a `"` then the first and last character is +stripped from the value before being captured. + +.Parameter ConfigDir +Path to the directory that contains the `pyvenv.cfg` file. +#> +function Get-PyVenvConfig( + [String] + $ConfigDir +) { + Write-Verbose "Given ConfigDir=$ConfigDir, obtain values in pyvenv.cfg" + + # Ensure the file exists, and issue a warning if it doesn't (but still allow the function to continue). + $pyvenvConfigPath = Join-Path -Resolve -Path $ConfigDir -ChildPath 'pyvenv.cfg' -ErrorAction Continue + + # An empty map will be returned if no config file is found. + $pyvenvConfig = @{ } + + if ($pyvenvConfigPath) { + + Write-Verbose "File exists, parse `key = value` lines" + $pyvenvConfigContent = Get-Content -Path $pyvenvConfigPath + + $pyvenvConfigContent | ForEach-Object { + $keyval = $PSItem -split "\s*=\s*", 2 + if ($keyval[0] -and $keyval[1]) { + $val = $keyval[1] + + # Remove extraneous quotations around a string value. + if ("'""".Contains($val.Substring(0, 1))) { + $val = $val.Substring(1, $val.Length - 2) + } + + $pyvenvConfig[$keyval[0]] = $val + Write-Verbose "Adding Key: '$($keyval[0])'='$val'" + } + } + } + return $pyvenvConfig +} + + +<# Begin Activate script --------------------------------------------------- #> + +# Determine the containing directory of this script +$VenvExecPath = Split-Path -Parent $MyInvocation.MyCommand.Definition +$VenvExecDir = Get-Item -Path $VenvExecPath + +Write-Verbose "Activation script is located in path: '$VenvExecPath'" +Write-Verbose "VenvExecDir Fullname: '$($VenvExecDir.FullName)" +Write-Verbose "VenvExecDir Name: '$($VenvExecDir.Name)" + +# Set values required in priority: CmdLine, ConfigFile, Default +# First, get the location of the virtual environment, it might not be +# VenvExecDir if specified on the command line. +if ($VenvDir) { + Write-Verbose "VenvDir given as parameter, using '$VenvDir' to determine values" +} +else { + Write-Verbose "VenvDir not given as a parameter, using parent directory name as VenvDir." + $VenvDir = $VenvExecDir.Parent.FullName.TrimEnd("\\/") + Write-Verbose "VenvDir=$VenvDir" +} + +# Next, read the `pyvenv.cfg` file to determine any required value such +# as `prompt`. +$pyvenvCfg = Get-PyVenvConfig -ConfigDir $VenvDir + +# Next, set the prompt from the command line, or the config file, or +# just use the name of the virtual environment folder. +if ($Prompt) { + Write-Verbose "Prompt specified as argument, using '$Prompt'" +} +else { + Write-Verbose "Prompt not specified as argument to script, checking pyvenv.cfg value" + if ($pyvenvCfg -and $pyvenvCfg['prompt']) { + Write-Verbose " Setting based on value in pyvenv.cfg='$($pyvenvCfg['prompt'])'" + $Prompt = $pyvenvCfg['prompt']; + } + else { + Write-Verbose " Setting prompt based on parent's directory's name. (Is the directory name passed to venv module when creating the virtual environment)" + Write-Verbose " Got leaf-name of $VenvDir='$(Split-Path -Path $venvDir -Leaf)'" + $Prompt = Split-Path -Path $venvDir -Leaf + } +} + +Write-Verbose "Prompt = '$Prompt'" +Write-Verbose "VenvDir='$VenvDir'" + +# Deactivate any currently active virtual environment, but leave the +# deactivate function in place. +deactivate -nondestructive + +# Now set the environment variable VIRTUAL_ENV, used by many tools to determine +# that there is an activated venv. +$env:VIRTUAL_ENV = $VenvDir + +if (-not $Env:VIRTUAL_ENV_DISABLE_PROMPT) { + + Write-Verbose "Setting prompt to '$Prompt'" + + # Set the prompt to include the env name + # Make sure _OLD_VIRTUAL_PROMPT is global + function global:_OLD_VIRTUAL_PROMPT { "" } + Copy-Item -Path function:prompt -Destination function:_OLD_VIRTUAL_PROMPT + New-Variable -Name _PYTHON_VENV_PROMPT_PREFIX -Description "Python virtual environment prompt prefix" -Scope Global -Option ReadOnly -Visibility Public -Value $Prompt + + function global:prompt { + Write-Host -NoNewline -ForegroundColor Green "($_PYTHON_VENV_PROMPT_PREFIX) " + _OLD_VIRTUAL_PROMPT + } + $env:VIRTUAL_ENV_PROMPT = $Prompt +} + +# Clear PYTHONHOME +if (Test-Path -Path Env:PYTHONHOME) { + Copy-Item -Path Env:PYTHONHOME -Destination Env:_OLD_VIRTUAL_PYTHONHOME + Remove-Item -Path Env:PYTHONHOME +} + +# Add the venv to the PATH +Copy-Item -Path Env:PATH -Destination Env:_OLD_VIRTUAL_PATH +$Env:PATH = "$VenvExecDir$([System.IO.Path]::PathSeparator)$Env:PATH" + +# SIG # Begin signature block +# MIIj/wYJKoZIhvcNAQcCoIIj8DCCI+wCAQExDzANBglghkgBZQMEAgEFADB5Bgor +# BgEEAYI3AgEEoGswaTA0BgorBgEEAYI3AgEeMCYCAwEAAAQQH8w7YFlLCE63JNLG +# KX7zUQIBAAIBAAIBAAIBAAIBADAxMA0GCWCGSAFlAwQCAQUABCBnL745ElCYk8vk +# dBtMuQhLeWJ3ZGfzKW4DHCYzAn+QB6CCDi8wggawMIIEmKADAgECAhAIrUCyYNKc +# TJ9ezam9k67ZMA0GCSqGSIb3DQEBDAUAMGIxCzAJBgNVBAYTAlVTMRUwEwYDVQQK +# EwxEaWdpQ2VydCBJbmMxGTAXBgNVBAsTEHd3dy5kaWdpY2VydC5jb20xITAfBgNV +# BAMTGERpZ2lDZXJ0IFRydXN0ZWQgUm9vdCBHNDAeFw0yMTA0MjkwMDAwMDBaFw0z +# NjA0MjgyMzU5NTlaMGkxCzAJBgNVBAYTAlVTMRcwFQYDVQQKEw5EaWdpQ2VydCwg +# SW5jLjFBMD8GA1UEAxM4RGlnaUNlcnQgVHJ1c3RlZCBHNCBDb2RlIFNpZ25pbmcg +# UlNBNDA5NiBTSEEzODQgMjAyMSBDQTEwggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAw +# ggIKAoICAQDVtC9C0CiteLdd1TlZG7GIQvUzjOs9gZdwxbvEhSYwn6SOaNhc9es0 +# JAfhS0/TeEP0F9ce2vnS1WcaUk8OoVf8iJnBkcyBAz5NcCRks43iCH00fUyAVxJr +# Q5qZ8sU7H/Lvy0daE6ZMswEgJfMQ04uy+wjwiuCdCcBlp/qYgEk1hz1RGeiQIXhF +# LqGfLOEYwhrMxe6TSXBCMo/7xuoc82VokaJNTIIRSFJo3hC9FFdd6BgTZcV/sk+F +# LEikVoQ11vkunKoAFdE3/hoGlMJ8yOobMubKwvSnowMOdKWvObarYBLj6Na59zHh +# 3K3kGKDYwSNHR7OhD26jq22YBoMbt2pnLdK9RBqSEIGPsDsJ18ebMlrC/2pgVItJ +# wZPt4bRc4G/rJvmM1bL5OBDm6s6R9b7T+2+TYTRcvJNFKIM2KmYoX7BzzosmJQay +# g9Rc9hUZTO1i4F4z8ujo7AqnsAMrkbI2eb73rQgedaZlzLvjSFDzd5Ea/ttQokbI +# YViY9XwCFjyDKK05huzUtw1T0PhH5nUwjewwk3YUpltLXXRhTT8SkXbev1jLchAp +# QfDVxW0mdmgRQRNYmtwmKwH0iU1Z23jPgUo+QEdfyYFQc4UQIyFZYIpkVMHMIRro +# OBl8ZhzNeDhFMJlP/2NPTLuqDQhTQXxYPUez+rbsjDIJAsxsPAxWEQIDAQABo4IB +# WTCCAVUwEgYDVR0TAQH/BAgwBgEB/wIBADAdBgNVHQ4EFgQUaDfg67Y7+F8Rhvv+ +# YXsIiGX0TkIwHwYDVR0jBBgwFoAU7NfjgtJxXWRM3y5nP+e6mK4cD08wDgYDVR0P +# AQH/BAQDAgGGMBMGA1UdJQQMMAoGCCsGAQUFBwMDMHcGCCsGAQUFBwEBBGswaTAk +# BggrBgEFBQcwAYYYaHR0cDovL29jc3AuZGlnaWNlcnQuY29tMEEGCCsGAQUFBzAC +# hjVodHRwOi8vY2FjZXJ0cy5kaWdpY2VydC5jb20vRGlnaUNlcnRUcnVzdGVkUm9v +# dEc0LmNydDBDBgNVHR8EPDA6MDigNqA0hjJodHRwOi8vY3JsMy5kaWdpY2VydC5j +# b20vRGlnaUNlcnRUcnVzdGVkUm9vdEc0LmNybDAcBgNVHSAEFTATMAcGBWeBDAED +# MAgGBmeBDAEEATANBgkqhkiG9w0BAQwFAAOCAgEAOiNEPY0Idu6PvDqZ01bgAhql +# +Eg08yy25nRm95RysQDKr2wwJxMSnpBEn0v9nqN8JtU3vDpdSG2V1T9J9Ce7FoFF +# UP2cvbaF4HZ+N3HLIvdaqpDP9ZNq4+sg0dVQeYiaiorBtr2hSBh+3NiAGhEZGM1h +# mYFW9snjdufE5BtfQ/g+lP92OT2e1JnPSt0o618moZVYSNUa/tcnP/2Q0XaG3Ryw +# YFzzDaju4ImhvTnhOE7abrs2nfvlIVNaw8rpavGiPttDuDPITzgUkpn13c5Ubdld +# AhQfQDN8A+KVssIhdXNSy0bYxDQcoqVLjc1vdjcshT8azibpGL6QB7BDf5WIIIJw +# 8MzK7/0pNVwfiThV9zeKiwmhywvpMRr/LhlcOXHhvpynCgbWJme3kuZOX956rEnP +# LqR0kq3bPKSchh/jwVYbKyP/j7XqiHtwa+aguv06P0WmxOgWkVKLQcBIhEuWTatE +# QOON8BUozu3xGFYHKi8QxAwIZDwzj64ojDzLj4gLDb879M4ee47vtevLt/B3E+bn +# KD+sEq6lLyJsQfmCXBVmzGwOysWGw/YmMwwHS6DTBwJqakAwSEs0qFEgu60bhQji +# WQ1tygVQK+pKHJ6l/aCnHwZ05/LWUpD9r4VIIflXO7ScA+2GRfS0YW6/aOImYIbq +# yK+p/pQd52MbOoZWeE4wggd3MIIFX6ADAgECAhAHHxQbizANJfMU6yMM0NHdMA0G +# CSqGSIb3DQEBCwUAMGkxCzAJBgNVBAYTAlVTMRcwFQYDVQQKEw5EaWdpQ2VydCwg +# SW5jLjFBMD8GA1UEAxM4RGlnaUNlcnQgVHJ1c3RlZCBHNCBDb2RlIFNpZ25pbmcg +# UlNBNDA5NiBTSEEzODQgMjAyMSBDQTEwHhcNMjIwMTE3MDAwMDAwWhcNMjUwMTE1 +# MjM1OTU5WjB8MQswCQYDVQQGEwJVUzEPMA0GA1UECBMGT3JlZ29uMRIwEAYDVQQH +# EwlCZWF2ZXJ0b24xIzAhBgNVBAoTGlB5dGhvbiBTb2Z0d2FyZSBGb3VuZGF0aW9u +# MSMwIQYDVQQDExpQeXRob24gU29mdHdhcmUgRm91bmRhdGlvbjCCAiIwDQYJKoZI +# hvcNAQEBBQADggIPADCCAgoCggIBAKgc0BTT+iKbtK6f2mr9pNMUTcAJxKdsuOiS +# YgDFfwhjQy89koM7uP+QV/gwx8MzEt3c9tLJvDccVWQ8H7mVsk/K+X+IufBLCgUi +# 0GGAZUegEAeRlSXxxhYScr818ma8EvGIZdiSOhqjYc4KnfgfIS4RLtZSrDFG2tN1 +# 6yS8skFa3IHyvWdbD9PvZ4iYNAS4pjYDRjT/9uzPZ4Pan+53xZIcDgjiTwOh8VGu +# ppxcia6a7xCyKoOAGjvCyQsj5223v1/Ig7Dp9mGI+nh1E3IwmyTIIuVHyK6Lqu35 +# 2diDY+iCMpk9ZanmSjmB+GMVs+H/gOiofjjtf6oz0ki3rb7sQ8fTnonIL9dyGTJ0 +# ZFYKeb6BLA66d2GALwxZhLe5WH4Np9HcyXHACkppsE6ynYjTOd7+jN1PRJahN1oE +# RzTzEiV6nCO1M3U1HbPTGyq52IMFSBM2/07WTJSbOeXjvYR7aUxK9/ZkJiacl2iZ +# I7IWe7JKhHohqKuceQNyOzxTakLcRkzynvIrk33R9YVqtB4L6wtFxhUjvDnQg16x +# ot2KVPdfyPAWd81wtZADmrUtsZ9qG79x1hBdyOl4vUtVPECuyhCxaw+faVjumapP +# Unwo8ygflJJ74J+BYxf6UuD7m8yzsfXWkdv52DjL74TxzuFTLHPyARWCSCAbzn3Z +# Ily+qIqDAgMBAAGjggIGMIICAjAfBgNVHSMEGDAWgBRoN+Drtjv4XxGG+/5hewiI +# ZfROQjAdBgNVHQ4EFgQUt/1Teh2XDuUj2WW3siYWJgkZHA8wDgYDVR0PAQH/BAQD +# AgeAMBMGA1UdJQQMMAoGCCsGAQUFBwMDMIG1BgNVHR8Ega0wgaowU6BRoE+GTWh0 +# dHA6Ly9jcmwzLmRpZ2ljZXJ0LmNvbS9EaWdpQ2VydFRydXN0ZWRHNENvZGVTaWdu +# aW5nUlNBNDA5NlNIQTM4NDIwMjFDQTEuY3JsMFOgUaBPhk1odHRwOi8vY3JsNC5k +# aWdpY2VydC5jb20vRGlnaUNlcnRUcnVzdGVkRzRDb2RlU2lnbmluZ1JTQTQwOTZT +# SEEzODQyMDIxQ0ExLmNybDA+BgNVHSAENzA1MDMGBmeBDAEEATApMCcGCCsGAQUF +# BwIBFhtodHRwOi8vd3d3LmRpZ2ljZXJ0LmNvbS9DUFMwgZQGCCsGAQUFBwEBBIGH +# MIGEMCQGCCsGAQUFBzABhhhodHRwOi8vb2NzcC5kaWdpY2VydC5jb20wXAYIKwYB +# BQUHMAKGUGh0dHA6Ly9jYWNlcnRzLmRpZ2ljZXJ0LmNvbS9EaWdpQ2VydFRydXN0 +# ZWRHNENvZGVTaWduaW5nUlNBNDA5NlNIQTM4NDIwMjFDQTEuY3J0MAwGA1UdEwEB +# /wQCMAAwDQYJKoZIhvcNAQELBQADggIBABxv4AeV/5ltkELHSC63fXAFYS5tadcW +# TiNc2rskrNLrfH1Ns0vgSZFoQxYBFKI159E8oQQ1SKbTEubZ/B9kmHPhprHya08+ +# VVzxC88pOEvz68nA82oEM09584aILqYmj8Pj7h/kmZNzuEL7WiwFa/U1hX+XiWfL +# IJQsAHBla0i7QRF2de8/VSF0XXFa2kBQ6aiTsiLyKPNbaNtbcucaUdn6vVUS5izW +# OXM95BSkFSKdE45Oq3FForNJXjBvSCpwcP36WklaHL+aHu1upIhCTUkzTHMh8b86 +# WmjRUqbrnvdyR2ydI5l1OqcMBjkpPpIV6wcc+KY/RH2xvVuuoHjlUjwq2bHiNoX+ +# W1scCpnA8YTs2d50jDHUgwUo+ciwpffH0Riq132NFmrH3r67VaN3TuBxjI8SIZM5 +# 8WEDkbeoriDk3hxU8ZWV7b8AW6oyVBGfM06UgkfMb58h+tJPrFx8VI/WLq1dTqMf +# ZOm5cuclMnUHs2uqrRNtnV8UfidPBL4ZHkTcClQbCoz0UbLhkiDvIS00Dn+BBcxw +# /TKqVL4Oaz3bkMSsM46LciTeucHY9ExRVt3zy7i149sd+F4QozPqn7FrSVHXmem3 +# r7bjyHTxOgqxRCVa18Vtx7P/8bYSBeS+WHCKcliFCecspusCDSlnRUjZwyPdP0VH +# xaZg2unjHY3rMYIVJjCCFSICAQEwfTBpMQswCQYDVQQGEwJVUzEXMBUGA1UEChMO +# RGlnaUNlcnQsIEluYy4xQTA/BgNVBAMTOERpZ2lDZXJ0IFRydXN0ZWQgRzQgQ29k +# ZSBTaWduaW5nIFJTQTQwOTYgU0hBMzg0IDIwMjEgQ0ExAhAHHxQbizANJfMU6yMM +# 0NHdMA0GCWCGSAFlAwQCAQUAoIHEMBkGCSqGSIb3DQEJAzEMBgorBgEEAYI3AgEE +# MBwGCisGAQQBgjcCAQsxDjAMBgorBgEEAYI3AgEVMC8GCSqGSIb3DQEJBDEiBCBn +# AZ6P7YvTwq0fbF62o7E75R0LxsW5OtyYiFESQckLhjBYBgorBgEEAYI3AgEMMUow +# SKBGgEQAQgB1AGkAbAB0ADoAIABSAGUAbABlAGEAcwBlAF8AdgAzAC4AMQAwAC4A +# NQBfADIAMAAyADIAMAA2ADAANgAuADAAMTANBgkqhkiG9w0BAQEFAASCAgA5LMM8 +# 8+phW11oF/PTFxitR3oW7QHlGHA97n1MCieor042JtmqUyqqf7ykapKc/ND4pVDP +# DP8nhIeXuLd2/SHqqf6CLZX9yacAFPDCV/MtYhlw4yKwa2ECw9EDDwB670UwUW/j +# IUl+fSrWagwH2WC7T5iMiV7uEZU4koGuOS4SiDzRLwTcuRtY6N/FYerQhioHXzdX +# vO76qXnj4UIDWnWbSWLgPDo8g4xonm7BC0dFRn4WW8tgm/StxQ/TBS4L2O/LEjYy +# pSLEXOy0INrA5CqWd4J4dpOhkQng1UJoySCL9Q2ceyv1U3SrywLY4rLwmSrZYsbQ +# OpnL+P1DP/eHYPbcwQEhbaTj81ULMxNDnouXJMm6ErMgTRH6TTpDcuPI8qlqkT2E +# DGZ4pPdZSHxDYkocJ6REh1YKlpvdHaGQFkXuc3p2lG/siv2rtDefI4wChN4VOHZG +# ia6G3FZaIyqFW/0sFz5KOzxoxcjfzyO76SSJx9jYpuOmPrHihaOlFjzZGxnWwFdM +# l3uCD+QeJL2bkl7npoyW0RRznBUUj21psHdVN5vzK+Gsyr22A9lS1XaX3a2KJ6bl +# Krkj+PObW5dtxvso0bQss2FCFdOATk4AlFcmk6bWk8rZm+w4e9NugsCTI+IE45hL +# AEyzTjc21JqGt8l2Rn/eElRHgsjvNpO4H5FFo6GCEbMwghGvBgorBgEEAYI3AwMB +# MYIRnzCCEZsGCSqGSIb3DQEHAqCCEYwwghGIAgEDMQ8wDQYJYIZIAWUDBAIBBQAw +# eAYLKoZIhvcNAQkQAQSgaQRnMGUCAQEGCWCGSAGG/WwHATAxMA0GCWCGSAFlAwQC +# AQUABCDX6Ys0ehzU7Uygr+TZMXB4pMkJvCegnm5JrODTttrXZwIRAMaBOV1Pb1sY +# w0ypALrk6u8YDzIwMjIwNjA2MTYyMjEwWqCCDXwwggbGMIIErqADAgECAhAKekqI +# nsmZQpAGYzhNhpedMA0GCSqGSIb3DQEBCwUAMGMxCzAJBgNVBAYTAlVTMRcwFQYD +# VQQKEw5EaWdpQ2VydCwgSW5jLjE7MDkGA1UEAxMyRGlnaUNlcnQgVHJ1c3RlZCBH +# NCBSU0E0MDk2IFNIQTI1NiBUaW1lU3RhbXBpbmcgQ0EwHhcNMjIwMzI5MDAwMDAw +# WhcNMzMwMzE0MjM1OTU5WjBMMQswCQYDVQQGEwJVUzEXMBUGA1UEChMORGlnaUNl +# cnQsIEluYy4xJDAiBgNVBAMTG0RpZ2lDZXJ0IFRpbWVzdGFtcCAyMDIyIC0gMjCC +# AiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBALkqliOmXLxf1knwFYIY9DPu +# zFxs4+AlLtIx5DxArvurxON4XX5cNur1JY1Do4HrOGP5PIhp3jzSMFENMQe6Rm7p +# o0tI6IlBfw2y1vmE8Zg+C78KhBJxbKFiJgHTzsNs/aw7ftwqHKm9MMYW2Nq867Lx +# g9GfzQnFuUFqRUIjQVr4YNNlLD5+Xr2Wp/D8sfT0KM9CeR87x5MHaGjlRDRSXw9Q +# 3tRZLER0wDJHGVvimC6P0Mo//8ZnzzyTlU6E6XYYmJkRFMUrDKAz200kheiClOEv +# A+5/hQLJhuHVGBS3BEXz4Di9or16cZjsFef9LuzSmwCKrB2NO4Bo/tBZmCbO4O2u +# fyguwp7gC0vICNEyu4P6IzzZ/9KMu/dDI9/nw1oFYn5wLOUrsj1j6siugSBrQ4nI +# fl+wGt0ZvZ90QQqvuY4J03ShL7BUdsGQT5TshmH/2xEvkgMwzjC3iw9dRLNDHSNQ +# zZHXL537/M2xwafEDsTvQD4ZOgLUMalpoEn5deGb6GjkagyP6+SxIXuGZ1h+fx/o +# K+QUshbWgaHK2jCQa+5vdcCwNiayCDv/vb5/bBMY38ZtpHlJrYt/YYcFaPfUcONC +# leieu5tLsuK2QT3nr6caKMmtYbCgQRgZTu1Hm2GV7T4LYVrqPnqYklHNP8lE54CL +# KUJy93my3YTqJ+7+fXprAgMBAAGjggGLMIIBhzAOBgNVHQ8BAf8EBAMCB4AwDAYD +# VR0TAQH/BAIwADAWBgNVHSUBAf8EDDAKBggrBgEFBQcDCDAgBgNVHSAEGTAXMAgG +# BmeBDAEEAjALBglghkgBhv1sBwEwHwYDVR0jBBgwFoAUuhbZbU2FL3MpdpovdYxq +# II+eyG8wHQYDVR0OBBYEFI1kt4kh/lZYRIRhp+pvHDaP3a8NMFoGA1UdHwRTMFEw +# T6BNoEuGSWh0dHA6Ly9jcmwzLmRpZ2ljZXJ0LmNvbS9EaWdpQ2VydFRydXN0ZWRH +# NFJTQTQwOTZTSEEyNTZUaW1lU3RhbXBpbmdDQS5jcmwwgZAGCCsGAQUFBwEBBIGD +# MIGAMCQGCCsGAQUFBzABhhhodHRwOi8vb2NzcC5kaWdpY2VydC5jb20wWAYIKwYB +# BQUHMAKGTGh0dHA6Ly9jYWNlcnRzLmRpZ2ljZXJ0LmNvbS9EaWdpQ2VydFRydXN0 +# ZWRHNFJTQTQwOTZTSEEyNTZUaW1lU3RhbXBpbmdDQS5jcnQwDQYJKoZIhvcNAQEL +# BQADggIBAA0tI3Sm0fX46kuZPwHk9gzkrxad2bOMl4IpnENvAS2rOLVwEb+EGYs/ +# XeWGT76TOt4qOVo5TtiEWaW8G5iq6Gzv0UhpGThbz4k5HXBw2U7fIyJs1d/2Wcuh +# wupMdsqh3KErlribVakaa33R9QIJT4LWpXOIxJiA3+5JlbezzMWn7g7h7x44ip/v +# EckxSli23zh8y/pc9+RTv24KfH7X3pjVKWWJD6KcwGX0ASJlx+pedKZbNZJQfPQX +# podkTz5GiRZjIGvL8nvQNeNKcEiptucdYL0EIhUlcAZyqUQ7aUcR0+7px6A+TxC5 +# MDbk86ppCaiLfmSiZZQR+24y8fW7OK3NwJMR1TJ4Sks3KkzzXNy2hcC7cDBVeNaY +# /lRtf3GpSBp43UZ3Lht6wDOK+EoojBKoc88t+dMj8p4Z4A2UKKDr2xpRoJWCjihr +# pM6ddt6pc6pIallDrl/q+A8GQp3fBmiW/iqgdFtjZt5rLLh4qk1wbfAs8QcVfjW0 +# 5rUMopml1xVrNQ6F1uAszOAMJLh8UgsemXzvyMjFjFhpr6s94c/MfRWuFL+Kcd/K +# l7HYR+ocheBFThIcFClYzG/Tf8u+wQ5KbyCcrtlzMlkI5y2SoRoR/jKYpl0rl+CL +# 05zMbbUNrkdjOEcXW28T2moQbh9Jt0RbtAgKh1pZBHYRoad3AhMcMIIGrjCCBJag +# AwIBAgIQBzY3tyRUfNhHrP0oZipeWzANBgkqhkiG9w0BAQsFADBiMQswCQYDVQQG +# EwJVUzEVMBMGA1UEChMMRGlnaUNlcnQgSW5jMRkwFwYDVQQLExB3d3cuZGlnaWNl +# cnQuY29tMSEwHwYDVQQDExhEaWdpQ2VydCBUcnVzdGVkIFJvb3QgRzQwHhcNMjIw +# MzIzMDAwMDAwWhcNMzcwMzIyMjM1OTU5WjBjMQswCQYDVQQGEwJVUzEXMBUGA1UE +# ChMORGlnaUNlcnQsIEluYy4xOzA5BgNVBAMTMkRpZ2lDZXJ0IFRydXN0ZWQgRzQg +# UlNBNDA5NiBTSEEyNTYgVGltZVN0YW1waW5nIENBMIICIjANBgkqhkiG9w0BAQEF +# AAOCAg8AMIICCgKCAgEAxoY1BkmzwT1ySVFVxyUDxPKRN6mXUaHW0oPRnkyibaCw +# zIP5WvYRoUQVQl+kiPNo+n3znIkLf50fng8zH1ATCyZzlm34V6gCff1DtITaEfFz +# sbPuK4CEiiIY3+vaPcQXf6sZKz5C3GeO6lE98NZW1OcoLevTsbV15x8GZY2UKdPZ +# 7Gnf2ZCHRgB720RBidx8ald68Dd5n12sy+iEZLRS8nZH92GDGd1ftFQLIWhuNyG7 +# QKxfst5Kfc71ORJn7w6lY2zkpsUdzTYNXNXmG6jBZHRAp8ByxbpOH7G1WE15/teP +# c5OsLDnipUjW8LAxE6lXKZYnLvWHpo9OdhVVJnCYJn+gGkcgQ+NDY4B7dW4nJZCY +# OjgRs/b2nuY7W+yB3iIU2YIqx5K/oN7jPqJz+ucfWmyU8lKVEStYdEAoq3NDzt9K +# oRxrOMUp88qqlnNCaJ+2RrOdOqPVA+C/8KI8ykLcGEh/FDTP0kyr75s9/g64ZCr6 +# dSgkQe1CvwWcZklSUPRR8zZJTYsg0ixXNXkrqPNFYLwjjVj33GHek/45wPmyMKVM +# 1+mYSlg+0wOI/rOP015LdhJRk8mMDDtbiiKowSYI+RQQEgN9XyO7ZONj4KbhPvbC +# dLI/Hgl27KtdRnXiYKNYCQEoAA6EVO7O6V3IXjASvUaetdN2udIOa5kM0jO0zbEC +# AwEAAaOCAV0wggFZMBIGA1UdEwEB/wQIMAYBAf8CAQAwHQYDVR0OBBYEFLoW2W1N +# hS9zKXaaL3WMaiCPnshvMB8GA1UdIwQYMBaAFOzX44LScV1kTN8uZz/nupiuHA9P +# MA4GA1UdDwEB/wQEAwIBhjATBgNVHSUEDDAKBggrBgEFBQcDCDB3BggrBgEFBQcB +# AQRrMGkwJAYIKwYBBQUHMAGGGGh0dHA6Ly9vY3NwLmRpZ2ljZXJ0LmNvbTBBBggr +# BgEFBQcwAoY1aHR0cDovL2NhY2VydHMuZGlnaWNlcnQuY29tL0RpZ2lDZXJ0VHJ1 +# c3RlZFJvb3RHNC5jcnQwQwYDVR0fBDwwOjA4oDagNIYyaHR0cDovL2NybDMuZGln +# aWNlcnQuY29tL0RpZ2lDZXJ0VHJ1c3RlZFJvb3RHNC5jcmwwIAYDVR0gBBkwFzAI +# BgZngQwBBAIwCwYJYIZIAYb9bAcBMA0GCSqGSIb3DQEBCwUAA4ICAQB9WY7Ak7Zv +# mKlEIgF+ZtbYIULhsBguEE0TzzBTzr8Y+8dQXeJLKftwig2qKWn8acHPHQfpPmDI +# 2AvlXFvXbYf6hCAlNDFnzbYSlm/EUExiHQwIgqgWvalWzxVzjQEiJc6VaT9Hd/ty +# dBTX/6tPiix6q4XNQ1/tYLaqT5Fmniye4Iqs5f2MvGQmh2ySvZ180HAKfO+ovHVP +# ulr3qRCyXen/KFSJ8NWKcXZl2szwcqMj+sAngkSumScbqyQeJsG33irr9p6xeZmB +# o1aGqwpFyd/EjaDnmPv7pp1yr8THwcFqcdnGE4AJxLafzYeHJLtPo0m5d2aR8XKc +# 6UsCUqc3fpNTrDsdCEkPlM05et3/JWOZJyw9P2un8WbDQc1PtkCbISFA0LcTJM3c +# HXg65J6t5TRxktcma+Q4c6umAU+9Pzt4rUyt+8SVe+0KXzM5h0F4ejjpnOHdI/0d +# KNPH+ejxmF/7K9h+8kaddSweJywm228Vex4Ziza4k9Tm8heZWcpw8De/mADfIBZP +# J/tgZxahZrrdVcA6KYawmKAr7ZVBtzrVFZgxtGIJDwq9gdkT/r+k0fNX2bwE+oLe +# Mt8EifAAzV3C+dAjfwAL5HYCJtnwZXZCpimHCUcr5n8apIUP/JiW9lVUKx+A+sDy +# Divl1vupL0QVSucTDh3bNzgaoSv27dZ8/DGCA3YwggNyAgEBMHcwYzELMAkGA1UE +# BhMCVVMxFzAVBgNVBAoTDkRpZ2lDZXJ0LCBJbmMuMTswOQYDVQQDEzJEaWdpQ2Vy +# dCBUcnVzdGVkIEc0IFJTQTQwOTYgU0hBMjU2IFRpbWVTdGFtcGluZyBDQQIQCnpK +# iJ7JmUKQBmM4TYaXnTANBglghkgBZQMEAgEFAKCB0TAaBgkqhkiG9w0BCQMxDQYL +# KoZIhvcNAQkQAQQwHAYJKoZIhvcNAQkFMQ8XDTIyMDYwNjE2MjIxMFowKwYLKoZI +# hvcNAQkQAgwxHDAaMBgwFgQUhQjzhlFcs9MHfba0t8B/G0peQd4wLwYJKoZIhvcN +# AQkEMSIEIOf/YoAGTg8y0pigG0kgexHa3asvnqD00Uf8JB3uQ5TUMDcGCyqGSIb3 +# DQEJEAIvMSgwJjAkMCIEIJ2mkBXDScbBiXhFujWCrXDIj6QpO9tqvpwr0lOSeeY7 +# MA0GCSqGSIb3DQEBAQUABIICALVOybzMu47x8CdSSeAuaV/YXzBq1oDqNnX+Fry/ +# 7C7TpHKVn58SKdFgeNmneBuqBqlZ2qyO9h02ZercH2d3GfALKuEmcUcp/Ik6RqQR +# INN76QLhzFeIiIdBGvcHI2hcx3OAgtenpe+4V2oWa05cJf5exXQ9ja59aNB0sf5j +# GyyHgmPhRK6itjp7xoSOw5zY4NN91viV2DX23b0SiL3oB5bAzgL77RLydmgg4XIW +# 9vxqyCK8XM4imdLfnI0J+Sw7QBLk5Pw1jp/x0YNbHlk5ojA06ehufF0smFdgjMBZ +# eefNH+lXfdVBeml8j3rNNbGsQ+d6+xXmUUVnNAGwK8QH5LpCqe+7H0r3yFsBCoxI +# XaAPC9EPQVMYyPFyzh8Omu5RHQaeIARZvTyzk3BzjyJmDypOcy3s1a4YG0lsO8+b +# cI925YMstRe3/gWSfZj8Q4OXFpeJxQ1b4w1slH116IrtjR9FC+N9OEWMggi4YQQf +# V6DPuNmv9d4JMR/vwxU4XmvHG/HnbFyFrpFmlRpSTExv3XNQWcdSn0FneKw1evvZ +# RRHow/HShcRnIPRqfhnqlQNxUKLt9bmWnRXLkaNCtiowSJ82v9XnTboZunXbMSb0 +# dM5FF5o4xTVoyp6P0O2qF2QtaXU03P8MDNOD1sWFSWhi64FWnmXuIaAuJKn05ZgC +# hIIC +# SIG # End signature block diff --git a/spacy/pipeline/logreg/myenv/Scripts/activate b/spacy/pipeline/logreg/myenv/Scripts/activate new file mode 100644 index 000000000..29e3594c7 --- /dev/null +++ b/spacy/pipeline/logreg/myenv/Scripts/activate @@ -0,0 +1,69 @@ +# This file must be used with "source bin/activate" *from bash* +# you cannot run it directly + +deactivate () { + # reset old environment variables + if [ -n "${_OLD_VIRTUAL_PATH:-}" ] ; then + PATH="${_OLD_VIRTUAL_PATH:-}" + export PATH + unset _OLD_VIRTUAL_PATH + fi + if [ -n "${_OLD_VIRTUAL_PYTHONHOME:-}" ] ; then + PYTHONHOME="${_OLD_VIRTUAL_PYTHONHOME:-}" + export PYTHONHOME + unset _OLD_VIRTUAL_PYTHONHOME + fi + + # This should detect bash and zsh, which have a hash command that must + # be called to get it to forget past commands. Without forgetting + # past commands the $PATH changes we made may not be respected + if [ -n "${BASH:-}" -o -n "${ZSH_VERSION:-}" ] ; then + hash -r 2> /dev/null + fi + + if [ -n "${_OLD_VIRTUAL_PS1:-}" ] ; then + PS1="${_OLD_VIRTUAL_PS1:-}" + export PS1 + unset _OLD_VIRTUAL_PS1 + fi + + unset VIRTUAL_ENV + unset VIRTUAL_ENV_PROMPT + if [ ! "${1:-}" = "nondestructive" ] ; then + # Self destruct! + unset -f deactivate + fi +} + +# unset irrelevant variables +deactivate nondestructive + +VIRTUAL_ENV="C:\Users\samhi\spaCy\spacy\pipeline\logreg\myenv" +export VIRTUAL_ENV + +_OLD_VIRTUAL_PATH="$PATH" +PATH="$VIRTUAL_ENV/Scripts:$PATH" +export PATH + +# unset PYTHONHOME if set +# this will fail if PYTHONHOME is set to the empty string (which is bad anyway) +# could use `if (set -u; : $PYTHONHOME) ;` in bash +if [ -n "${PYTHONHOME:-}" ] ; then + _OLD_VIRTUAL_PYTHONHOME="${PYTHONHOME:-}" + unset PYTHONHOME +fi + +if [ -z "${VIRTUAL_ENV_DISABLE_PROMPT:-}" ] ; then + _OLD_VIRTUAL_PS1="${PS1:-}" + PS1="(myenv) ${PS1:-}" + export PS1 + VIRTUAL_ENV_PROMPT="(myenv) " + export VIRTUAL_ENV_PROMPT +fi + +# This should detect bash and zsh, which have a hash command that must +# be called to get it to forget past commands. Without forgetting +# past commands the $PATH changes we made may not be respected +if [ -n "${BASH:-}" -o -n "${ZSH_VERSION:-}" ] ; then + hash -r 2> /dev/null +fi diff --git a/spacy/pipeline/logreg/myenv/Scripts/f2py.exe b/spacy/pipeline/logreg/myenv/Scripts/f2py.exe new file mode 100644 index 000000000..48e9d7f53 Binary files /dev/null and b/spacy/pipeline/logreg/myenv/Scripts/f2py.exe differ diff --git a/spacy/pipeline/logreg/myenv/Scripts/markdown-it.exe b/spacy/pipeline/logreg/myenv/Scripts/markdown-it.exe new file mode 100644 index 000000000..421257081 Binary files /dev/null and b/spacy/pipeline/logreg/myenv/Scripts/markdown-it.exe differ diff --git a/spacy/pipeline/logreg/myenv/Scripts/numpy-config.exe b/spacy/pipeline/logreg/myenv/Scripts/numpy-config.exe new file mode 100644 index 000000000..73920e031 Binary files /dev/null and b/spacy/pipeline/logreg/myenv/Scripts/numpy-config.exe differ diff --git a/spacy/pipeline/logreg/myenv/Scripts/pip.exe b/spacy/pipeline/logreg/myenv/Scripts/pip.exe new file mode 100644 index 000000000..cf6b2901d Binary files /dev/null and b/spacy/pipeline/logreg/myenv/Scripts/pip.exe differ diff --git a/spacy/pipeline/logreg/myenv/Scripts/pip3.10.exe b/spacy/pipeline/logreg/myenv/Scripts/pip3.10.exe new file mode 100644 index 000000000..cf6b2901d Binary files /dev/null and b/spacy/pipeline/logreg/myenv/Scripts/pip3.10.exe differ diff --git a/spacy/pipeline/logreg/myenv/Scripts/pip3.exe b/spacy/pipeline/logreg/myenv/Scripts/pip3.exe new file mode 100644 index 000000000..cf6b2901d Binary files /dev/null and b/spacy/pipeline/logreg/myenv/Scripts/pip3.exe differ diff --git a/spacy/pipeline/logreg/myenv/Scripts/pygmentize.exe b/spacy/pipeline/logreg/myenv/Scripts/pygmentize.exe new file mode 100644 index 000000000..cf46a5f32 Binary files /dev/null and b/spacy/pipeline/logreg/myenv/Scripts/pygmentize.exe differ diff --git a/spacy/pipeline/logreg/myenv/Scripts/python.exe b/spacy/pipeline/logreg/myenv/Scripts/python.exe new file mode 100644 index 000000000..796e5ca8e Binary files /dev/null and b/spacy/pipeline/logreg/myenv/Scripts/python.exe differ diff --git a/spacy/pipeline/logreg/myenv/Scripts/pythonw.exe b/spacy/pipeline/logreg/myenv/Scripts/pythonw.exe new file mode 100644 index 000000000..3a7a004a9 Binary files /dev/null and b/spacy/pipeline/logreg/myenv/Scripts/pythonw.exe differ diff --git a/spacy/pipeline/logreg/myenv/Scripts/spacy.exe b/spacy/pipeline/logreg/myenv/Scripts/spacy.exe new file mode 100644 index 000000000..bb45a113d Binary files /dev/null and b/spacy/pipeline/logreg/myenv/Scripts/spacy.exe differ diff --git a/spacy/pipeline/logreg/myenv/Scripts/tqdm.exe b/spacy/pipeline/logreg/myenv/Scripts/tqdm.exe new file mode 100644 index 000000000..90e724041 Binary files /dev/null and b/spacy/pipeline/logreg/myenv/Scripts/tqdm.exe differ diff --git a/spacy/pipeline/logreg/myenv/Scripts/typer.exe b/spacy/pipeline/logreg/myenv/Scripts/typer.exe new file mode 100644 index 000000000..93e0d3d76 Binary files /dev/null and b/spacy/pipeline/logreg/myenv/Scripts/typer.exe differ diff --git a/spacy/pipeline/logreg/myenv/Scripts/weasel.exe b/spacy/pipeline/logreg/myenv/Scripts/weasel.exe new file mode 100644 index 000000000..9a6a2f1d5 Binary files /dev/null and b/spacy/pipeline/logreg/myenv/Scripts/weasel.exe differ diff --git a/spacy/pipeline/logreg/myenv/pyvenv.cfg b/spacy/pipeline/logreg/myenv/pyvenv.cfg new file mode 100644 index 000000000..8695482a6 --- /dev/null +++ b/spacy/pipeline/logreg/myenv/pyvenv.cfg @@ -0,0 +1,3 @@ +home = C:\Python310 +include-system-site-packages = false +version = 3.10.5 diff --git a/spacy/pipeline/logreg/src/pure_Logistic.py b/spacy/pipeline/logreg/src/pure_Logistic.py new file mode 100644 index 000000000..db1748c6d --- /dev/null +++ b/spacy/pipeline/logreg/src/pure_Logistic.py @@ -0,0 +1,224 @@ +from typing import List, Dict, Iterable +import numpy as np +from spacy.pipeline import TrainablePipe +from spacy.language import Language +from spacy.training import Example +from spacy.vocab import Vocab +from spacy.tokens import Doc + +@Language.factory( + "pure_logistic_textcat", + default_config={ + "learning_rate": 0.001, + "max_iterations": 100, + "batch_size": 1000 + } +) +def make_pure_logistic_textcat( + nlp: Language, + name: str, + learning_rate: float, + max_iterations: int, + batch_size: int +) -> "PureLogisticTextCategorizer": + """ + Factory function to create an instance of PureLogisticTextCategorizer. + :param nlp: The current nlp object + :param name: The name of the component + :param learning_rate: Learning rate for the model + :param max_iterations: Maximum number of iterations for training + :param batch_size: Batch size for training + :return: An instance of PureLogisticTextCategorizer + """ + return PureLogisticTextCategorizer( + vocab=nlp.vocab, + name=name, + learning_rate=learning_rate, + max_iterations=max_iterations, + batch_size=batch_size + ) + + +class PureLogisticTextCategorizer(TrainablePipe): + """ + A custom text categorizer using logistic regression. + """ + def __init__( + self, + vocab: Vocab, + name: str = "pure_logistic_textcat", + *, + learning_rate: float = 0.001, + max_iterations: int = 100, + batch_size: int = 1000 + ): + """ + Initialize the PureLogisticTextCategorizer. + :param vocab: The vocabulary of the spaCy model + :param name: The name of the pipeline component + :param learning_rate: Learning rate for gradient descent + :param max_iterations: Maximum iterations for training + :param batch_size: Size of the training batch + """ + self.vocab = vocab + self.name = name + self.learning_rate = learning_rate + self.max_iterations = max_iterations + self.batch_size = batch_size + self.weights = None # Initialize weights to None + self.bias = None # Initialize bias to None + self._labels = set() # Initialize labels set + + # Register the custom extensions in spaCy Doc object for handling scores + if not Doc.has_extension("textcat_scores"): + Doc.set_extension("textcat_scores", default={}) + if not Doc.has_extension("cats"): + Doc.set_extension("cats", default={}) + + def predict(self, docs: List[Doc]) -> List[Doc]: + """ + Predict the categories for the given documents. + :param docs: List of spaCy Doc objects to predict on + :return: The same list of docs with textcat scores annotated + """ + scores = self._predict_scores(docs) # Get predicted scores + self.set_annotations(docs, scores) # Set the predictions on the docs + return docs + + def _predict_scores(self, docs: List[Doc]) -> List[Dict[str, float]]: + """ + Predict the scores for each document. + :param docs: List of spaCy Doc objects + :return: List of dictionaries with label scores for each doc + """ + features = self._extract_features(docs) # Extract features from the documents + scores = [] + for doc_features in features: + if self.weights is None: + # If weights are not initialized, assign 0.5 (neutral probability) to each label + doc_scores = {label: 0.5 for label in self.labels} + else: + # Calculate the logits and convert them to probabilities using the sigmoid function + logits = np.dot(doc_features, self.weights) + self.bias + probs = 1 / (1 + np.exp(-logits)) + # Store the scores for each label + doc_scores = { + label: float(probs[i]) for i, label in enumerate(sorted(self.labels)) + } + scores.append(doc_scores) + return scores + + def update( + self, + examples: Iterable[Example], + *, + drop: float = 0.0, + sgd=None, + losses=None + ) -> Dict[str, float]: + """ + Update the model using the provided training examples. + :param examples: Iterable of spaCy Example objects + :param drop: Dropout rate (currently not used) + :param sgd: Optional optimizer (currently not used) + :param losses: Dictionary to track the model's loss + :return: Updated loss dictionary + """ + losses = {} if losses is None else losses + docs = [eg.reference for eg in examples] + features = self._extract_features(docs) + sorted_labels = sorted(self.labels) + labels = np.array([ + [eg.reference.cats.get(label, 0.0) for label in sorted_labels] for eg in examples + ]) + + # Initialize weights and bias if not already set + if self.weights is None: + n_features = len(features[0]) + self.weights = np.zeros((n_features, len(self.labels))) + self.bias = np.zeros(len(self.labels)) + + # Training loop + total_loss = 0.0 + features = np.array(features) + + for _ in range(self.max_iterations): + # Forward pass: calculate logits and probabilities + logits = np.dot(features, self.weights) + self.bias + probs = 1 / (1 + np.exp(-logits)) + + # Calculate loss using binary cross-entropy + loss = -np.mean( + labels * np.log(probs + 1e-8) + + (1 - labels) * np.log(1 - probs + 1e-8) + ) + total_loss += loss + + # Backward pass: calculate gradients and update weights and bias + d_probs = (probs - labels) / len(features) + d_weights = np.dot(features.T, d_probs) + d_bias = np.sum(d_probs, axis=0) + + # Update the weights and bias using gradient descent + self.weights -= self.learning_rate * d_weights + self.bias -= self.learning_rate * d_bias + + # Average loss over the iterations + losses[self.name] = total_loss / self.max_iterations + return losses + + def _extract_features(self, docs: List[Doc]) -> List[np.ndarray]: + """ + Extract features from the documents. + :param docs: List of spaCy Doc objects + :return: List of feature arrays for each document + """ + features = [] + for doc in docs: + # Document vector as the main feature + doc_vector = doc.vector + + # Additional length-based features + n_tokens = len(doc) + avg_token_length = ( + np.mean([len(token.text) for token in doc]) if n_tokens > 0 else 0 + ) + + # Combine all features into a single feature vector + combined_features = np.concatenate([ + doc_vector, + [n_tokens / 100.0, avg_token_length / 10.0] # Scale the features + ]) + features.append(combined_features) + return features + + @property + def labels(self) -> set: + """ + Get the current set of labels. + :return: Set of labels + """ + return self._labels + + @labels.setter + def labels(self, value: Iterable[str]): + """ + Set the labels for the categorizer and reset weights. + :param value: Iterable of label strings + """ + self._labels = set(value) + # Reset weights and bias when labels change + self.weights = None + self.bias = None + + def set_annotations(self, docs: List[Doc], scores: List[Dict[str, float]]): + """ + Set the scores on the documents. + :param docs: List of spaCy Doc objects + :param scores: List of score dictionaries for each document + """ + for doc, score in zip(docs, scores): + # Set the textcat_scores attribute + doc._.textcat_scores = score + # Set the cats attribute (for compatibility with binary classification) + doc._.cats = score diff --git a/spacy/pipeline/logreg/tests/test_pure_logistic.py b/spacy/pipeline/logreg/tests/test_pure_logistic.py new file mode 100644 index 000000000..44e98fa5e --- /dev/null +++ b/spacy/pipeline/logreg/tests/test_pure_logistic.py @@ -0,0 +1,225 @@ +import pytest +import numpy as np +import spacy +from spacy.language import Language +from spacy.tokens import Doc +from spacy.training import Example + +# Define the NLP fixture for testing +@pytest.fixture +def nlp(): + """ + Fixture to provide a blank spaCy English model for testing purposes. + """ + return spacy.blank("en") + + +@Language.component("pure_logistic_textcat") +def pure_logistic_textcat(doc): + """ + Custom spaCy pipeline component that assigns fixed text categorization scores + to the document. + + Args: + doc (Doc): The spaCy document to process. + + Returns: + Doc: The processed document with 'textcat_scores' attribute set. + """ + # Placeholder for text categorization scores + scores = {"positive": 0.5, "negative": 0.5} + + # Ensure the 'textcat_scores' extension exists + if not Doc.has_extension("textcat_scores"): + Doc.set_extension("textcat_scores", default=None) + + # Assign the scores to the document's custom attribute + doc._.textcat_scores = scores + return doc + + +# Register the custom extension attribute if not already registered +if not Doc.has_extension("textcat_scores"): + Doc.set_extension("textcat_scores", default=None) + + +def test_pure_logistic_textcat_empty_doc(nlp): + """ + Test that the text categorization component can handle an empty document. + """ + nlp.add_pipe("pure_logistic_textcat") + doc = nlp("") + assert doc._.textcat_scores is not None + assert isinstance(doc._.textcat_scores, dict) + + +def test_pure_logistic_textcat_single_word(nlp): + """ + Test that the component correctly handles a single-word document. + """ + nlp.add_pipe("pure_logistic_textcat") + doc = nlp("positive") + assert doc._.textcat_scores is not None + assert isinstance(doc._.textcat_scores, dict) + + +def test_pure_logistic_textcat_special_chars(nlp): + """ + Test that the component can process documents containing special characters. + """ + nlp.add_pipe("pure_logistic_textcat") + doc = nlp("!@#$%^&*()") + assert doc._.textcat_scores is not None + assert isinstance(doc._.textcat_scores, dict) + + +def test_pure_logistic_textcat_invalid_input_type(nlp): + """ + Test that the component raises a ValueError when given invalid input types. + """ + with pytest.raises(ValueError): + nlp.add_pipe("pure_logistic_textcat") + nlp(12345) # Invalid input: integer instead of string + + +def test_pure_logistic_textcat_reset(nlp): + """ + Test that the 'textcat_scores' attribute is reset between different documents. + """ + nlp.add_pipe("pure_logistic_textcat") + + doc1 = nlp("This is a test document") + assert doc1._.textcat_scores is not None + + doc2 = nlp("Another test") + assert doc2._.textcat_scores is not None + assert doc1 is not doc2 # Ensure they are distinct documents + + +def test_pure_logistic_textcat_duplicate_component(nlp): + """ + Test that adding the same component twice to the pipeline raises a ValueError. + """ + nlp.add_pipe("pure_logistic_textcat") + with pytest.raises(ValueError): + nlp.add_pipe("pure_logistic_textcat") # Duplicate addition should fail + + +def test_pure_logistic_textcat_multiple_sentences(nlp): + """ + Test that the component correctly handles documents with multiple sentences. + """ + nlp.add_pipe("pure_logistic_textcat") + doc = nlp("This is the first sentence. This is the second.") + assert doc._.textcat_scores is not None + + +def test_pure_logistic_textcat_with_extension(nlp): + """ + Test that the component correctly handles the scenario where the custom + 'textcat_scores' extension is missing before processing. + """ + # Remove the extension if it exists + if Doc.has_extension("textcat_scores"): + Doc.remove_extension("textcat_scores") + + # Add the custom component + nlp.add_pipe("pure_logistic_textcat") + + # Process the document and verify the extension + doc = nlp("This is a test document") + assert hasattr(doc._, "textcat_scores"), "The 'textcat_scores' extension should be present" + assert isinstance(doc._.textcat_scores, dict), "The 'textcat_scores' extension should be a dictionary" + + +def test_pure_logistic_textcat_empty_train_data(nlp): + """ + Test that the update method handles empty training data gracefully. + """ + def mock_update(examples): + return {"pure_logistic_textcat": 0.0} + + textcat = nlp.add_pipe("pure_logistic_textcat") + textcat.update = mock_update + losses = textcat.update([]) + assert isinstance(losses, dict) + assert losses["pure_logistic_textcat"] == 0.0 + + +def test_pure_logistic_textcat_label_mismatch(nlp): + """ + Test that the component handles mismatched labels in the training data. + """ + textcat = nlp.add_pipe("pure_logistic_textcat") + + # Mismatched label in the training data + train_examples = [] + for text, annotations in TRAIN_DATA_MISMATCH: + doc = nlp.make_doc(text) + example = Example.from_dict(doc, annotations) + train_examples.append(example) + + # Mock update method + def mock_update(examples): + return {"pure_logistic_textcat": 1.0} # Arbitrary loss + + textcat.update = mock_update + losses = textcat.update(train_examples) + assert isinstance(losses, dict) + assert "pure_logistic_textcat" in losses + + +# Mock training data for testing +TRAIN_DATA = [ + ("This is positive", {"cats": {"positive": 1.0, "negative": 0.0}}), + ("This is negative", {"cats": {"positive": 0.0, "negative": 1.0}}) +] + +# Mismatched training data with incorrect labels +TRAIN_DATA_MISMATCH = [ + ("This is positive", {"cats": {"unknown_label": 1.0, "negative": 0.0}}), + ("This is negative", {"cats": {"positive": 0.0, "unknown_label": 1.0}}) +] + + +def test_pure_logistic_textcat_init(nlp): + """ + Test that the text categorization component initializes correctly. + """ + textcat = nlp.add_pipe("pure_logistic_textcat") + assert textcat is not None + + +def test_pure_logistic_textcat_predict(nlp): + """ + Test that the component's prediction works correctly. + """ + nlp.add_pipe("pure_logistic_textcat") + doc = nlp("This is a test document") + assert doc._.textcat_scores is not None + assert isinstance(doc._.textcat_scores, dict) + assert "positive" in doc._.textcat_scores + assert "negative" in doc._.textcat_scores + + +def test_pure_logistic_textcat_update(nlp): + """ + Test that the component's update method works as expected. + """ + def mock_update(examples): + losses = {"pure_logistic_textcat": 0.5} # Dummy loss value + return losses + + textcat = nlp.add_pipe("pure_logistic_textcat") + textcat.update = mock_update + + train_examples = [] + for text, annotations in TRAIN_DATA: + doc = nlp.make_doc(text) + example = Example.from_dict(doc, annotations) + train_examples.append(example) + + losses = textcat.update(train_examples) + assert isinstance(losses, dict) + assert "pure_logistic_textcat" in losses + assert losses["pure_logistic_textcat"] == 0.5 # Ensure the loss is correct diff --git a/spacy/pipeline/test_textcat/test_pure_logistic.py b/spacy/pipeline/test_textcat/test_pure_logistic.py deleted file mode 100644 index 1497b5bdc..000000000 --- a/spacy/pipeline/test_textcat/test_pure_logistic.py +++ /dev/null @@ -1,72 +0,0 @@ -import pytest -from spacy.language import Language -from spacy.training import Example -import spacy -from spacy.tokens import Doc -import numpy as np - -# Define the nlp fixture -@pytest.fixture -def nlp(): - # Load the spaCy model - return spacy.blank("en") # Use a blank model for testing - -# Custom component definition -@Language.component("pure_logistic_textcat") -def pure_logistic_textcat(doc): - # Dummy implementation of text classification, replace with your model's logic - scores = {"positive": 0.5, "negative": 0.5} - - # Store the scores in a custom attribute on the doc - doc._.set("textcat_scores", scores) - return doc - -# Register the custom extension attribute -if not Doc.has_extension("textcat_scores"): - Doc.set_extension("textcat_scores", default=None) - -# Register the custom component to the spaCy pipeline -def test_pure_logistic_textcat_init(nlp): - # Add the component to the pipeline - textcat = nlp.add_pipe("pure_logistic_textcat") - assert textcat is not None - -def test_pure_logistic_textcat_predict(nlp): - # Add the component to the pipeline - nlp.add_pipe("pure_logistic_textcat") - doc = nlp("This is a test document") - - # Check if the textcat_scores attribute exists and is a dictionary - assert doc._.textcat_scores is not None - assert isinstance(doc._.textcat_scores, dict) - assert "positive" in doc._.textcat_scores - assert "negative" in doc._.textcat_scores - -def test_pure_logistic_textcat_update(nlp): - # Mock an update method for testing purposes - def mock_update(examples): - losses = {"pure_logistic_textcat": 0.5} # Dummy loss value - return losses - - # Add the component to the pipeline - textcat = nlp.add_pipe("pure_logistic_textcat") - - # Mock the update method for testing purposes - textcat.update = mock_update - - train_examples = [] - for text, annotations in TRAIN_DATA: - doc = nlp.make_doc(text) - example = Example.from_dict(doc, annotations) - train_examples.append(example) - - # Update the model - losses = textcat.update(train_examples) # Ensure update method exists - assert isinstance(losses, dict) - assert "pure_logistic_textcat" in losses - -# Mock training data for the test -TRAIN_DATA = [ - ("This is positive", {"cats": {"positive": 1.0, "negative": 0.0}}), - ("This is negative", {"cats": {"positive": 0.0, "negative": 1.0}}) -] diff --git a/spacy/pipeline/textcat/pure_Logistic.py b/spacy/pipeline/textcat/pure_Logistic.py deleted file mode 100644 index cb1cbc6e8..000000000 --- a/spacy/pipeline/textcat/pure_Logistic.py +++ /dev/null @@ -1,170 +0,0 @@ -from typing import List, Dict, Iterable -import numpy as np -from spacy.pipeline import TrainablePipe -from spacy.language import Language -from spacy.training import Example -from spacy.vocab import Vocab -from spacy.tokens import Doc - - -@Language.factory( - "pure_logistic_textcat", - default_config={ - "learning_rate": 0.001, - "max_iterations": 100, - "batch_size": 1000 - } -) -def make_pure_logistic_textcat( - nlp: Language, - name: str, - learning_rate: float, - max_iterations: int, - batch_size: int -) -> "PureLogisticTextCategorizer": - return PureLogisticTextCategorizer( - vocab=nlp.vocab, - name=name, - learning_rate=learning_rate, - max_iterations=max_iterations, - batch_size=batch_size - ) - - -class PureLogisticTextCategorizer(TrainablePipe): - def __init__( - self, - vocab: Vocab, - name: str = "pure_logistic_textcat", - *, - learning_rate: float = 0.001, - max_iterations: int = 100, - batch_size: int = 1000 - ): - """Initialize the text categorizer.""" - self.vocab = vocab - self.name = name - self.learning_rate = learning_rate - self.max_iterations = max_iterations - self.batch_size = batch_size - self.weights = None - self.bias = 0.0 - self._labels = set() # Use _labels as internal attribute - - # Register the custom extension attribute if it doesn't exist - if not Doc.has_extension("textcat_scores"): - Doc.set_extension("textcat_scores", default=None) - - @property - def labels(self): - """Get the labels.""" - return self._labels - - @labels.setter - def labels(self, value): - """Set the labels.""" - self._labels = value - - def predict(self, docs): - """Apply the pipe to a batch of docs, returning scores.""" - scores = self._predict_scores(docs) - for doc, doc_scores in zip(docs, scores): - doc._.textcat_scores = doc_scores - return docs - - def _predict_scores(self, docs): - """Predict scores for docs.""" - features = self._extract_features(docs) - scores = [] - for doc_features in features: - if self.weights is None: - doc_scores = {"positive": 0.5, "negative": 0.5} - else: - logits = np.dot(doc_features, self.weights) + self.bias - prob = 1 / (1 + np.exp(-logits)) - doc_scores = { - "positive": float(prob), - "negative": float(1 - prob) - } - scores.append(doc_scores) - return scores - - def set_annotations(self, docs, scores): - """Set the predicted annotations (e.g. categories) on the docs.""" - for doc, score in zip(docs, scores): - doc.cats = {label: score[i] for i, label in enumerate(self._labels)} - - def _extract_features(self, docs) -> List[np.ndarray]: - """Extract features from docs.""" - features = [] - for doc in docs: - # Basic features - doc_vector = doc.vector - n_tokens = len(doc) - - # Additional features - n_entities = len(doc.ents) - avg_token_length = np.mean([len(token.text) for token in doc]) - n_stopwords = len([token for token in doc if token.is_stop]) - - # Combine features - doc_features = np.concatenate([ - doc_vector, - [n_tokens / 100, n_entities / 10, - avg_token_length / 10, n_stopwords / n_tokens] - ]) - features.append(doc_features) - return features - - def update( - self, - examples: Iterable[Example], - *, - drop: float = 0.0, - sgd=None, - losses: Dict[str, float] = None - ) -> Dict[str, float]: - """Update the model.""" - losses = {} if losses is None else losses - - # Update label set - for example in examples: - self._labels.update(example.reference.cats.keys()) - - # Extract features and labels - docs = [example.reference for example in examples] - label_arrays = self._make_label_array([example.reference.cats for example in examples]) - - features = self._extract_features(docs) - - if self.weights is None: - n_features = features[0].shape[0] if features else 0 - self.weights = np.zeros((n_features, 1)) - - # Simple gradient descent - total_loss = 0.0 - for i in range(self.max_iterations): - for feat, gold in zip(features, label_arrays): - pred = 1 / (1 + np.exp(-(np.dot(feat, self.weights) + self.bias))) - loss = -np.mean(gold * np.log(pred + 1e-8) + - (1 - gold) * np.log(1 - pred + 1e-8)) - total_loss += loss - - # Compute gradients - d_weights = feat.reshape(-1, 1) * (pred - gold) - d_bias = pred - gold - - # Update weights - self.weights -= self.learning_rate * d_weights - self.bias -= self.learning_rate * float(d_bias) - - losses[self.name] = total_loss / len(examples) - return losses - - def _make_label_array(self, cats): - """Convert label dicts into an array.""" - arr = np.zeros((len(cats),)) - for i, cat_dict in enumerate(cats): - if cat_dict.get("positive", 0) > 0.5: - arr[i] = 1.0 - return arr.reshape(-1, 1) diff --git a/spacy/pipeline/textcat/pure_logistic_textcat.ipynb b/spacy/pipeline/textcat/pure_logistic_textcat.ipynb deleted file mode 100644 index b8d95a76d..000000000 --- a/spacy/pipeline/textcat/pure_logistic_textcat.ipynb +++ /dev/null @@ -1,129 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'cells': [{'cell_type': 'markdown',\n", - " 'metadata': {},\n", - " 'source': ['# Pure Logistic Regression Text Categorizer\\n',\n", - " 'This tutorial demonstrates how to use the custom logistic regression text categorizer.']},\n", - " {'cell_type': 'code',\n", - " 'execution_count': None,\n", - " 'metadata': {},\n", - " 'source': ['import spacy\\n',\n", - " 'from spacy.training import Example\\n',\n", - " '\\n',\n", - " '# Load spaCy model\\n',\n", - " 'nlp = spacy.load(\"en_core_web_lg\")\\n',\n", - " 'nlp.add_pipe(\"pure_logistic_textcat\")\\n',\n", - " '\\n',\n", - " '# Example training data\\n',\n", - " 'TRAIN_DATA = [\\n',\n", - " ' (\"This is amazing!\", {\"cats\": {\"positive\": 1.0, \"negative\": 0.0}}),\\n',\n", - " ' (\"This is terrible!\", {\"cats\": {\"positive\": 0.0, \"negative\": 1.0}})\\n',\n", - " ']\\n',\n", - " '\\n',\n", - " '# Create training examples\\n',\n", - " 'examples = []\\n',\n", - " 'for text, annotations in TRAIN_DATA:\\n',\n", - " ' doc = nlp.make_doc(text)\\n',\n", - " ' example = Example.from_dict(doc, annotations)\\n',\n", - " ' examples.append(example)\\n',\n", - " '\\n',\n", - " '# Train the model\\n',\n", - " 'textcat = nlp.get_pipe(\"pure_logistic_textcat\")\\n',\n", - " 'losses = textcat.update(examples)\\n',\n", - " 'print(f\"Losses: {losses}\")\\n',\n", - " '\\n',\n", - " '# Test the model\\n',\n", - " 'test_text = \"This product is fantastic!\"\\n',\n", - " 'doc = nlp(test_text)\\n',\n", - " 'print(f\"\\\\nText: {test_text}\")\\n',\n", - " 'print(f\"Predictions: {doc.cats}\")']}]}" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "{\n", - " \"cells\": [\n", - " {\n", - " \"cell_type\": \"markdown\",\n", - " \"metadata\": {},\n", - " \"source\": [\n", - " \"# Pure Logistic Regression Text Categorizer\\n\",\n", - " \"This tutorial demonstrates how to use the custom logistic regression text categorizer.\"\n", - " ]\n", - " },\n", - " {\n", - " \"cell_type\": \"code\",\n", - " \"execution_count\": None,\n", - " \"metadata\": {},\n", - " \"source\": [\n", - " \"import spacy\\n\",\n", - " \"from spacy.training import Example\\n\",\n", - " \"\\n\",\n", - " \"# Load spaCy model\\n\",\n", - " \"nlp = spacy.load(\\\"en_core_web_lg\\\")\\n\",\n", - " \"nlp.add_pipe(\\\"pure_logistic_textcat\\\")\\n\",\n", - " \"\\n\",\n", - " \"# Example training data\\n\",\n", - " \"TRAIN_DATA = [\\n\",\n", - " \" (\\\"This is amazing!\\\", {\\\"cats\\\": {\\\"positive\\\": 1.0, \\\"negative\\\": 0.0}}),\\n\",\n", - " \" (\\\"This is terrible!\\\", {\\\"cats\\\": {\\\"positive\\\": 0.0, \\\"negative\\\": 1.0}})\\n\",\n", - " \"]\\n\",\n", - " \"\\n\",\n", - " \"# Create training examples\\n\",\n", - " \"examples = []\\n\",\n", - " \"for text, annotations in TRAIN_DATA:\\n\",\n", - " \" doc = nlp.make_doc(text)\\n\",\n", - " \" example = Example.from_dict(doc, annotations)\\n\",\n", - " \" examples.append(example)\\n\",\n", - " \"\\n\",\n", - " \"# Train the model\\n\",\n", - " \"textcat = nlp.get_pipe(\\\"pure_logistic_textcat\\\")\\n\",\n", - " \"losses = textcat.update(examples)\\n\",\n", - " \"print(f\\\"Losses: {losses}\\\")\\n\",\n", - " \"\\n\",\n", - " \"# Test the model\\n\",\n", - " \"test_text = \\\"This product is fantastic!\\\"\\n\",\n", - " \"doc = nlp(test_text)\\n\",\n", - " \"print(f\\\"\\\\nText: {test_text}\\\")\\n\",\n", - " \"print(f\\\"Predictions: {doc.cats}\\\")\"\n", - " ]\n", - " }\n", - " ]\n", - "}" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.5" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -}