mirror of https://github.com/explosion/spaCy.git
Update for numpy 2.0 deprecations (#13103)
- Replace `np.trapz` with vendored `trapezoid` from scipy - Replace `np.float_` with `np.float64`
This commit is contained in:
parent
92f1d0a195
commit
c096c5c0c9
|
@ -158,3 +158,45 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
SOFTWARE.
|
SOFTWARE.
|
||||||
|
|
||||||
|
|
||||||
|
SciPy
|
||||||
|
-----
|
||||||
|
|
||||||
|
* Files: scorer.py
|
||||||
|
|
||||||
|
The implementation of trapezoid() is adapted from SciPy, which is distributed
|
||||||
|
under the following license:
|
||||||
|
|
||||||
|
New BSD License
|
||||||
|
|
||||||
|
Copyright (c) 2001-2002 Enthought, Inc. 2003-2023, SciPy Developers.
|
||||||
|
All rights reserved.
|
||||||
|
|
||||||
|
Redistribution and use in source and binary forms, with or without
|
||||||
|
modification, are permitted provided that the following conditions
|
||||||
|
are met:
|
||||||
|
|
||||||
|
1. Redistributions of source code must retain the above copyright
|
||||||
|
notice, this list of conditions and the following disclaimer.
|
||||||
|
|
||||||
|
2. Redistributions in binary form must reproduce the above
|
||||||
|
copyright notice, this list of conditions and the following
|
||||||
|
disclaimer in the documentation and/or other materials provided
|
||||||
|
with the distribution.
|
||||||
|
|
||||||
|
3. Neither the name of the copyright holder nor the names of its
|
||||||
|
contributors may be used to endorse or promote products derived
|
||||||
|
from this software without specific prior written permission.
|
||||||
|
|
||||||
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
138
spacy/scorer.py
138
spacy/scorer.py
|
@ -802,6 +802,140 @@ def get_ner_prf(examples: Iterable[Example], **kwargs) -> Dict[str, Any]:
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# The following implementation of trapezoid() is adapted from SciPy,
|
||||||
|
# which is distributed under the New BSD License.
|
||||||
|
# Copyright (c) 2001-2002 Enthought, Inc. 2003-2023, SciPy Developers.
|
||||||
|
# See licenses/3rd_party_licenses.txt
|
||||||
|
def trapezoid(y, x=None, dx=1.0, axis=-1):
|
||||||
|
r"""
|
||||||
|
Integrate along the given axis using the composite trapezoidal rule.
|
||||||
|
|
||||||
|
If `x` is provided, the integration happens in sequence along its
|
||||||
|
elements - they are not sorted.
|
||||||
|
|
||||||
|
Integrate `y` (`x`) along each 1d slice on the given axis, compute
|
||||||
|
:math:`\int y(x) dx`.
|
||||||
|
When `x` is specified, this integrates along the parametric curve,
|
||||||
|
computing :math:`\int_t y(t) dt =
|
||||||
|
\int_t y(t) \left.\frac{dx}{dt}\right|_{x=x(t)} dt`.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
y : array_like
|
||||||
|
Input array to integrate.
|
||||||
|
x : array_like, optional
|
||||||
|
The sample points corresponding to the `y` values. If `x` is None,
|
||||||
|
the sample points are assumed to be evenly spaced `dx` apart. The
|
||||||
|
default is None.
|
||||||
|
dx : scalar, optional
|
||||||
|
The spacing between sample points when `x` is None. The default is 1.
|
||||||
|
axis : int, optional
|
||||||
|
The axis along which to integrate.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
trapezoid : float or ndarray
|
||||||
|
Definite integral of `y` = n-dimensional array as approximated along
|
||||||
|
a single axis by the trapezoidal rule. If `y` is a 1-dimensional array,
|
||||||
|
then the result is a float. If `n` is greater than 1, then the result
|
||||||
|
is an `n`-1 dimensional array.
|
||||||
|
|
||||||
|
See Also
|
||||||
|
--------
|
||||||
|
cumulative_trapezoid, simpson, romb
|
||||||
|
|
||||||
|
Notes
|
||||||
|
-----
|
||||||
|
Image [2]_ illustrates trapezoidal rule -- y-axis locations of points
|
||||||
|
will be taken from `y` array, by default x-axis distances between
|
||||||
|
points will be 1.0, alternatively they can be provided with `x` array
|
||||||
|
or with `dx` scalar. Return value will be equal to combined area under
|
||||||
|
the red lines.
|
||||||
|
|
||||||
|
References
|
||||||
|
----------
|
||||||
|
.. [1] Wikipedia page: https://en.wikipedia.org/wiki/Trapezoidal_rule
|
||||||
|
|
||||||
|
.. [2] Illustration image:
|
||||||
|
https://en.wikipedia.org/wiki/File:Composite_trapezoidal_rule_illustration.png
|
||||||
|
|
||||||
|
Examples
|
||||||
|
--------
|
||||||
|
Use the trapezoidal rule on evenly spaced points:
|
||||||
|
|
||||||
|
>>> import numpy as np
|
||||||
|
>>> from scipy import integrate
|
||||||
|
>>> integrate.trapezoid([1, 2, 3])
|
||||||
|
4.0
|
||||||
|
|
||||||
|
The spacing between sample points can be selected by either the
|
||||||
|
``x`` or ``dx`` arguments:
|
||||||
|
|
||||||
|
>>> integrate.trapezoid([1, 2, 3], x=[4, 6, 8])
|
||||||
|
8.0
|
||||||
|
>>> integrate.trapezoid([1, 2, 3], dx=2)
|
||||||
|
8.0
|
||||||
|
|
||||||
|
Using a decreasing ``x`` corresponds to integrating in reverse:
|
||||||
|
|
||||||
|
>>> integrate.trapezoid([1, 2, 3], x=[8, 6, 4])
|
||||||
|
-8.0
|
||||||
|
|
||||||
|
More generally ``x`` is used to integrate along a parametric curve. We can
|
||||||
|
estimate the integral :math:`\int_0^1 x^2 = 1/3` using:
|
||||||
|
|
||||||
|
>>> x = np.linspace(0, 1, num=50)
|
||||||
|
>>> y = x**2
|
||||||
|
>>> integrate.trapezoid(y, x)
|
||||||
|
0.33340274885464394
|
||||||
|
|
||||||
|
Or estimate the area of a circle, noting we repeat the sample which closes
|
||||||
|
the curve:
|
||||||
|
|
||||||
|
>>> theta = np.linspace(0, 2 * np.pi, num=1000, endpoint=True)
|
||||||
|
>>> integrate.trapezoid(np.cos(theta), x=np.sin(theta))
|
||||||
|
3.141571941375841
|
||||||
|
|
||||||
|
``trapezoid`` can be applied along a specified axis to do multiple
|
||||||
|
computations in one call:
|
||||||
|
|
||||||
|
>>> a = np.arange(6).reshape(2, 3)
|
||||||
|
>>> a
|
||||||
|
array([[0, 1, 2],
|
||||||
|
[3, 4, 5]])
|
||||||
|
>>> integrate.trapezoid(a, axis=0)
|
||||||
|
array([1.5, 2.5, 3.5])
|
||||||
|
>>> integrate.trapezoid(a, axis=1)
|
||||||
|
array([2., 8.])
|
||||||
|
"""
|
||||||
|
y = np.asanyarray(y)
|
||||||
|
if x is None:
|
||||||
|
d = dx
|
||||||
|
else:
|
||||||
|
x = np.asanyarray(x)
|
||||||
|
if x.ndim == 1:
|
||||||
|
d = np.diff(x)
|
||||||
|
# reshape to correct shape
|
||||||
|
shape = [1] * y.ndim
|
||||||
|
shape[axis] = d.shape[0]
|
||||||
|
d = d.reshape(shape)
|
||||||
|
else:
|
||||||
|
d = np.diff(x, axis=axis)
|
||||||
|
nd = y.ndim
|
||||||
|
slice1 = [slice(None)] * nd
|
||||||
|
slice2 = [slice(None)] * nd
|
||||||
|
slice1[axis] = slice(1, None)
|
||||||
|
slice2[axis] = slice(None, -1)
|
||||||
|
try:
|
||||||
|
ret = (d * (y[tuple(slice1)] + y[tuple(slice2)]) / 2.0).sum(axis)
|
||||||
|
except ValueError:
|
||||||
|
# Operations didn't work, cast to ndarray
|
||||||
|
d = np.asarray(d)
|
||||||
|
y = np.asarray(y)
|
||||||
|
ret = np.add.reduce(d * (y[tuple(slice1)] + y[tuple(slice2)]) / 2.0, axis)
|
||||||
|
return ret
|
||||||
|
|
||||||
|
|
||||||
# The following implementation of roc_auc_score() is adapted from
|
# The following implementation of roc_auc_score() is adapted from
|
||||||
# scikit-learn, which is distributed under the New BSD License.
|
# scikit-learn, which is distributed under the New BSD License.
|
||||||
# Copyright (c) 2007–2019 The scikit-learn developers.
|
# Copyright (c) 2007–2019 The scikit-learn developers.
|
||||||
|
@ -1024,9 +1158,9 @@ def _auc(x, y):
|
||||||
else:
|
else:
|
||||||
raise ValueError(Errors.E164.format(x=x))
|
raise ValueError(Errors.E164.format(x=x))
|
||||||
|
|
||||||
area = direction * np.trapz(y, x)
|
area = direction * trapezoid(y, x)
|
||||||
if isinstance(area, np.memmap):
|
if isinstance(area, np.memmap):
|
||||||
# Reductions such as .sum used internally in np.trapz do not return a
|
# Reductions such as .sum used internally in trapezoid do not return a
|
||||||
# scalar by default for numpy.memmap instances contrary to
|
# scalar by default for numpy.memmap instances contrary to
|
||||||
# regular numpy.ndarray instances.
|
# regular numpy.ndarray instances.
|
||||||
area = area.dtype.type(area)
|
area = area.dtype.type(area)
|
||||||
|
|
|
@ -42,7 +42,7 @@ class Doc:
|
||||||
user_hooks: Dict[str, Callable[..., Any]]
|
user_hooks: Dict[str, Callable[..., Any]]
|
||||||
user_token_hooks: Dict[str, Callable[..., Any]]
|
user_token_hooks: Dict[str, Callable[..., Any]]
|
||||||
user_span_hooks: Dict[str, Callable[..., Any]]
|
user_span_hooks: Dict[str, Callable[..., Any]]
|
||||||
tensor: np.ndarray[Any, np.dtype[np.float_]]
|
tensor: np.ndarray[Any, np.dtype[np.float64]]
|
||||||
user_data: Dict[str, Any]
|
user_data: Dict[str, Any]
|
||||||
has_unknown_spaces: bool
|
has_unknown_spaces: bool
|
||||||
_context: Any
|
_context: Any
|
||||||
|
@ -166,7 +166,7 @@ class Doc:
|
||||||
) -> Doc: ...
|
) -> Doc: ...
|
||||||
def to_array(
|
def to_array(
|
||||||
self, py_attr_ids: Union[int, str, List[Union[int, str]]]
|
self, py_attr_ids: Union[int, str, List[Union[int, str]]]
|
||||||
) -> np.ndarray[Any, np.dtype[np.float_]]: ...
|
) -> np.ndarray[Any, np.dtype[np.float64]]: ...
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def from_docs(
|
def from_docs(
|
||||||
docs: List[Doc],
|
docs: List[Doc],
|
||||||
|
|
Loading…
Reference in New Issue