Update for numpy 2.0 deprecations (#13103)

- Replace `np.trapz` with vendored `trapezoid` from scipy - Replace `np.float_` with `np.float64`
2023-11-06 08:47:53 +01:00 · 2023-11-06 08:47:53 +01:00 · c096c5c0c9
parent 92f1d0a195
commit c096c5c0c9
3 changed files with 180 additions and 4 deletions
--- a/licenses/3rd_party_licenses.txt
+++ b/licenses/3rd_party_licenses.txt
@ -158,3 +158,45 @@ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
 SciPy
 -----
 * Files: scorer.py
 The implementation of trapezoid() is adapted from SciPy, which is distributed
 under the following license:
 New BSD License
 Copyright (c) 2001-2002 Enthought, Inc. 2003-2023, SciPy Developers.
 All rights reserved.
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions
 are met:
 1. Redistributions of source code must retain the above copyright
   notice, this list of conditions and the following disclaimer.
 2. Redistributions in binary form must reproduce the above
   copyright notice, this list of conditions and the following
   disclaimer in the documentation and/or other materials provided
   with the distribution.
 3. Neither the name of the copyright holder nor the names of its
   contributors may be used to endorse or promote products derived
   from this software without specific prior written permission.
 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--- a/spacy/scorer.py
+++ b/spacy/scorer.py
@ -802,6 +802,140 @@ def get_ner_prf(examples: Iterable[Example], **kwargs) -> Dict[str, Any]:
        }
 # The following implementation of trapezoid() is adapted from SciPy,
 # which is distributed under the New BSD License.
 # Copyright (c) 2001-2002 Enthought, Inc. 2003-2023, SciPy Developers.
 # See licenses/3rd_party_licenses.txt
 def trapezoid(y, x=None, dx=1.0, axis=-1):
    r"""
    Integrate along the given axis using the composite trapezoidal rule.
    If `x` is provided, the integration happens in sequence along its
    elements - they are not sorted.
    Integrate `y` (`x`) along each 1d slice on the given axis, compute
    :math:`\int y(x) dx`.
    When `x` is specified, this integrates along the parametric curve,
    computing :math:`\int_t y(t) dt =
    \int_t y(t) \left.\frac{dx}{dt}\right|_{x=x(t)} dt`.
    Parameters
    ----------
    y : array_like
        Input array to integrate.
    x : array_like, optional
        The sample points corresponding to the `y` values. If `x` is None,
        the sample points are assumed to be evenly spaced `dx` apart. The
        default is None.
    dx : scalar, optional
        The spacing between sample points when `x` is None. The default is 1.
    axis : int, optional
        The axis along which to integrate.
    Returns
    -------
    trapezoid : float or ndarray
        Definite integral of `y` = n-dimensional array as approximated along
        a single axis by the trapezoidal rule. If `y` is a 1-dimensional array,
        then the result is a float. If `n` is greater than 1, then the result
        is an `n`-1 dimensional array.
    See Also
    --------
    cumulative_trapezoid, simpson, romb
    Notes
    -----
    Image [2]_ illustrates trapezoidal rule -- y-axis locations of points
    will be taken from `y` array, by default x-axis distances between
    points will be 1.0, alternatively they can be provided with `x` array
    or with `dx` scalar.  Return value will be equal to combined area under
    the red lines.
    References
    ----------
    .. [1] Wikipedia page: https://en.wikipedia.org/wiki/Trapezoidal_rule
    .. [2] Illustration image:
           https://en.wikipedia.org/wiki/File:Composite_trapezoidal_rule_illustration.png
    Examples
    --------
    Use the trapezoidal rule on evenly spaced points:
    >>> import numpy as np
    >>> from scipy import integrate
    >>> integrate.trapezoid([1, 2, 3])
    4.0
    The spacing between sample points can be selected by either the
    ``x`` or ``dx`` arguments:
    >>> integrate.trapezoid([1, 2, 3], x=[4, 6, 8])
    8.0
    >>> integrate.trapezoid([1, 2, 3], dx=2)
    8.0
    Using a decreasing ``x`` corresponds to integrating in reverse:
    >>> integrate.trapezoid([1, 2, 3], x=[8, 6, 4])
    -8.0
    More generally ``x`` is used to integrate along a parametric curve. We can
    estimate the integral :math:`\int_0^1 x^2 = 1/3` using:
    >>> x = np.linspace(0, 1, num=50)
    >>> y = x**2
    >>> integrate.trapezoid(y, x)
    0.33340274885464394
    Or estimate the area of a circle, noting we repeat the sample which closes
    the curve:
    >>> theta = np.linspace(0, 2 * np.pi, num=1000, endpoint=True)
    >>> integrate.trapezoid(np.cos(theta), x=np.sin(theta))
    3.141571941375841
    ``trapezoid`` can be applied along a specified axis to do multiple
    computations in one call:
    >>> a = np.arange(6).reshape(2, 3)
    >>> a
    array([[0, 1, 2],
           [3, 4, 5]])
    >>> integrate.trapezoid(a, axis=0)
    array([1.5, 2.5, 3.5])
    >>> integrate.trapezoid(a, axis=1)
    array([2.,  8.])
    """
    y = np.asanyarray(y)
    if x is None:
        d = dx
    else:
        x = np.asanyarray(x)
        if x.ndim == 1:
            d = np.diff(x)
            # reshape to correct shape
            shape = [1] * y.ndim
            shape[axis] = d.shape[0]
            d = d.reshape(shape)
        else:
            d = np.diff(x, axis=axis)
    nd = y.ndim
    slice1 = [slice(None)] * nd
    slice2 = [slice(None)] * nd
    slice1[axis] = slice(1, None)
    slice2[axis] = slice(None, -1)
    try:
        ret = (d * (y[tuple(slice1)] + y[tuple(slice2)]) / 2.0).sum(axis)
    except ValueError:
        # Operations didn't work, cast to ndarray
        d = np.asarray(d)
        y = np.asarray(y)
        ret = np.add.reduce(d * (y[tuple(slice1)] + y[tuple(slice2)]) / 2.0, axis)
    return ret
 # The following implementation of roc_auc_score() is adapted from
 # scikit-learn, which is distributed under the New BSD License.
 # Copyright (c) 2007–2019 The scikit-learn developers.
@ -1024,9 +1158,9 @@ def _auc(x, y):
        else:
            raise ValueError(Errors.E164.format(x=x))
-    area = direction * np.trapz(y, x)
+    area = direction * trapezoid(y, x)
    if isinstance(area, np.memmap):
-        # Reductions such as .sum used internally in np.trapz do not return a
+        # Reductions such as .sum used internally in trapezoid do not return a
        # scalar by default for numpy.memmap instances contrary to
        # regular numpy.ndarray instances.
        area = area.dtype.type(area)
--- a/spacy/tokens/doc.pyi
+++ b/spacy/tokens/doc.pyi
@ -42,7 +42,7 @@ class Doc:
    user_hooks: Dict[str, Callable[..., Any]]
    user_token_hooks: Dict[str, Callable[..., Any]]
    user_span_hooks: Dict[str, Callable[..., Any]]
-    tensor: np.ndarray[Any, np.dtype[np.float_]]
+    tensor: np.ndarray[Any, np.dtype[np.float64]]
    user_data: Dict[str, Any]
    has_unknown_spaces: bool
    _context: Any
@ -166,7 +166,7 @@ class Doc:
    ) -> Doc: ...
    def to_array(
        self, py_attr_ids: Union[int, str, List[Union[int, str]]]
-    ) -> np.ndarray[Any, np.dtype[np.float_]]: ...
+    ) -> np.ndarray[Any, np.dtype[np.float64]]: ...
    @staticmethod
    def from_docs(
        docs: List[Doc],