hand

2026-05-06 19:47:31 +07:00
parent 94d8682530
commit 12dbb7731b
9963 changed files with 2747894 additions and 0 deletions
@@ -0,0 +1,634 @@
+"""
+Functions
+---------
+.. autosummary::
+   :toctree: generated/
+
+    fmin_l_bfgs_b
+
+"""
+
+## License for the Python wrapper
+## ==============================
+
+## Copyright (c) 2004 David M. Cooke <cookedm@physics.mcmaster.ca>
+
+## Permission is hereby granted, free of charge, to any person obtaining a
+## copy of this software and associated documentation files (the "Software"),
+## to deal in the Software without restriction, including without limitation
+## the rights to use, copy, modify, merge, publish, distribute, sublicense,
+## and/or sell copies of the Software, and to permit persons to whom the
+## Software is furnished to do so, subject to the following conditions:
+
+## The above copyright notice and this permission notice shall be included in
+## all copies or substantial portions of the Software.
+
+## THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+## IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+## FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+## AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+## LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+## FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+## DEALINGS IN THE SOFTWARE.
+
+## Modifications by Travis Oliphant and Enthought, Inc. for inclusion in SciPy
+
+import numpy as np
+from numpy import array, asarray, float64, zeros
+from . import _lbfgsb
+from ._optimize import (MemoizeJac, OptimizeResult, _call_callback_maybe_halt,
+                        _wrap_callback, _check_unknown_options,
+                        _prepare_scalar_function)
+from ._constraints import old_bound_to_new
+
+from scipy.sparse.linalg import LinearOperator
+from scipy._lib.deprecation import _NoValue
+import warnings
+
+__all__ = ['fmin_l_bfgs_b', 'LbfgsInvHessProduct']
+
+
+status_messages = {
+    0 : "START",
+    1 : "NEW_X",
+    2 : "RESTART",
+    3 : "FG",
+    4 : "CONVERGENCE",
+    5 : "STOP",
+    6 : "WARNING",
+    7 : "ERROR",
+    8 : "ABNORMAL"
+}
+
+
+task_messages = {
+    0 : "",
+    301 : "",
+    302 : "",
+    401 : "NORM OF PROJECTED GRADIENT <= PGTOL",
+    402 : "RELATIVE REDUCTION OF F <= FACTR*EPSMCH",
+    501 : "CPU EXCEEDING THE TIME LIMIT",
+    502 : "TOTAL NO. OF F,G EVALUATIONS EXCEEDS LIMIT",
+    503 : "PROJECTED GRADIENT IS SUFFICIENTLY SMALL",
+    504 : "TOTAL NO. OF ITERATIONS REACHED LIMIT",
+    505 : "CALLBACK REQUESTED HALT",
+    601 : "ROUNDING ERRORS PREVENT PROGRESS",
+    602 : "STP = STPMAX",
+    603 : "STP = STPMIN",
+    604 : "XTOL TEST SATISFIED",
+    701 : "NO FEASIBLE SOLUTION",
+    702 : "FACTR < 0",
+    703 : "FTOL < 0",
+    704 : "GTOL < 0",
+    705 : "XTOL < 0",
+    706 : "STP < STPMIN",
+    707 : "STP > STPMAX",
+    708 : "STPMIN < 0",
+    709 : "STPMAX < STPMIN",
+    710 : "INITIAL G >= 0",
+    711 : "M <= 0",
+    712 : "N <= 0",
+    713 : "INVALID NBD",
+}
+
+def fmin_l_bfgs_b(func, x0, fprime=None, args=(),
+                  approx_grad=0,
+                  bounds=None, m=10, factr=1e7, pgtol=1e-5,
+                  epsilon=1e-8,
+                  iprint=_NoValue, maxfun=15000, maxiter=15000, disp=_NoValue,
+                  callback=None, maxls=20):
+    """
+    Minimize a function func using the L-BFGS-B algorithm.
+
+    Parameters
+    ----------
+    func : callable f(x,*args)
+        Function to minimize.
+    x0 : ndarray
+        Initial guess.
+    fprime : callable fprime(x,*args), optional
+        The gradient of `func`. If None, then `func` returns the function
+        value and the gradient (``f, g = func(x, *args)``), unless
+        `approx_grad` is True in which case `func` returns only ``f``.
+    args : sequence, optional
+        Arguments to pass to `func` and `fprime`.
+    approx_grad : bool, optional
+        Whether to approximate the gradient numerically (in which case
+        `func` returns only the function value).
+    bounds : list, optional
+        ``(min, max)`` pairs for each element in ``x``, defining
+        the bounds on that parameter. Use None or +-inf for one of ``min`` or
+        ``max`` when there is no bound in that direction.
+    m : int, optional
+        The maximum number of variable metric corrections
+        used to define the limited memory matrix. (The limited memory BFGS
+        method does not store the full hessian but uses this many terms in an
+        approximation to it.)
+    factr : float, optional
+        The iteration stops when
+        ``(f^k - f^{k+1})/max{|f^k|,|f^{k+1}|,1} <= factr * eps``,
+        where ``eps`` is the machine precision, which is automatically
+        generated by the code. Typical values for `factr` are: 1e12 for
+        low accuracy; 1e7 for moderate accuracy; 10.0 for extremely
+        high accuracy. See Notes for relationship to `ftol`, which is exposed
+        (instead of `factr`) by the `scipy.optimize.minimize` interface to
+        L-BFGS-B.
+    pgtol : float, optional
+        The iteration will stop when
+        ``max{|proj g_i | i = 1, ..., n} <= pgtol``
+        where ``proj g_i`` is the i-th component of the projected gradient.
+    epsilon : float, optional
+        Step size used when `approx_grad` is True, for numerically
+        calculating the gradient
+    iprint : int, optional
+        Deprecated option that previously controlled the text printed on the
+        screen during the problem solution. Now the code does not emit any
+        output and this keyword has no function.
+
+        .. deprecated:: 1.15.0
+            This keyword is deprecated and will be removed from SciPy 1.18.0.
+
+    disp : int, optional
+        Deprecated option that previously controlled the text printed on the
+        screen during the problem solution. Now the code does not emit any
+        output and this keyword has no function.
+
+        .. deprecated:: 1.15.0
+            This keyword is deprecated and will be removed from SciPy 1.18.0.
+
+    maxfun : int, optional
+        Maximum number of function evaluations. Note that this function
+        may violate the limit because of evaluating gradients by numerical
+        differentiation.
+    maxiter : int, optional
+        Maximum number of iterations.
+    callback : callable, optional
+        Called after each iteration, as ``callback(xk)``, where ``xk`` is the
+        current parameter vector.
+    maxls : int, optional
+        Maximum number of line search steps (per iteration). Default is 20.
+
+    Returns
+    -------
+    x : array_like
+        Estimated position of the minimum.
+    f : float
+        Value of `func` at the minimum.
+    d : dict
+        Information dictionary.
+
+        * d['warnflag'] is
+
+          - 0 if converged,
+          - 1 if too many function evaluations or too many iterations,
+          - 2 if stopped for another reason, given in d['task']
+
+        * d['grad'] is the gradient at the minimum (should be 0 ish)
+        * d['funcalls'] is the number of function calls made.
+        * d['nit'] is the number of iterations.
+
+    See also
+    --------
+    minimize: Interface to minimization algorithms for multivariate
+        functions. See the 'L-BFGS-B' `method` in particular. Note that the
+        `ftol` option is made available via that interface, while `factr` is
+        provided via this interface, where `factr` is the factor multiplying
+        the default machine floating-point precision to arrive at `ftol`:
+        ``ftol = factr * numpy.finfo(float).eps``.
+
+    Notes
+    -----
+    SciPy uses a C-translated and modified version of the Fortran code,
+    L-BFGS-B v3.0 (released April 25, 2011, BSD-3 licensed). Original Fortran
+    version was written by Ciyou Zhu, Richard Byrd, Jorge Nocedal and,
+    Jose Luis Morales.
+
+    References
+    ----------
+    * R. H. Byrd, P. Lu and J. Nocedal. A Limited Memory Algorithm for Bound
+      Constrained Optimization, (1995), SIAM Journal on Scientific and
+      Statistical Computing, 16, 5, pp. 1190-1208.
+    * C. Zhu, R. H. Byrd and J. Nocedal. L-BFGS-B: Algorithm 778: L-BFGS-B,
+      FORTRAN routines for large scale bound constrained optimization (1997),
+      ACM Transactions on Mathematical Software, 23, 4, pp. 550 - 560.
+    * J.L. Morales and J. Nocedal. L-BFGS-B: Remark on Algorithm 778: L-BFGS-B,
+      FORTRAN routines for large scale bound constrained optimization (2011),
+      ACM Transactions on Mathematical Software, 38, 1.
+
+    Examples
+    --------
+    Solve a linear regression problem via `fmin_l_bfgs_b`. To do this, first we
+    define an objective function ``f(m, b) = (y - y_model)**2``, where `y`
+    describes the observations and `y_model` the prediction of the linear model
+    as ``y_model = m*x + b``. The bounds for the parameters, ``m`` and ``b``,
+    are arbitrarily chosen as ``(0,5)`` and ``(5,10)`` for this example.
+
+    >>> import numpy as np
+    >>> from scipy.optimize import fmin_l_bfgs_b
+    >>> X = np.arange(0, 10, 1)
+    >>> M = 2
+    >>> B = 3
+    >>> Y = M * X + B
+    >>> def func(parameters, *args):
+    ...     x = args[0]
+    ...     y = args[1]
+    ...     m, b = parameters
+    ...     y_model = m*x + b
+    ...     error = sum(np.power((y - y_model), 2))
+    ...     return error
+
+    >>> initial_values = np.array([0.0, 1.0])
+
+    >>> x_opt, f_opt, info = fmin_l_bfgs_b(func, x0=initial_values, args=(X, Y),
+    ...                                    approx_grad=True)
+    >>> x_opt, f_opt
+    array([1.99999999, 3.00000006]), 1.7746231151323805e-14  # may vary
+
+    The optimized parameters in ``x_opt`` agree with the ground truth parameters
+    ``m`` and ``b``. Next, let us perform a bound constrained optimization using
+    the `bounds` parameter.
+
+    >>> bounds = [(0, 5), (5, 10)]
+    >>> x_opt, f_op, info = fmin_l_bfgs_b(func, x0=initial_values, args=(X, Y),
+    ...                                   approx_grad=True, bounds=bounds)
+    >>> x_opt, f_opt
+    array([1.65990508, 5.31649385]), 15.721334516453945  # may vary
+    """
+    # handle fprime/approx_grad
+    if approx_grad:
+        fun = func
+        jac = None
+    elif fprime is None:
+        fun = MemoizeJac(func)
+        jac = fun.derivative
+    else:
+        fun = func
+        jac = fprime
+
+    # build options
+    callback = _wrap_callback(callback)
+    opts = {'disp': disp,
+            'iprint': iprint,
+            'maxcor': m,
+            'ftol': factr * np.finfo(float).eps,
+            'gtol': pgtol,
+            'eps': epsilon,
+            'maxfun': maxfun,
+            'maxiter': maxiter,
+            'callback': callback,
+            'maxls': maxls}
+
+    res = _minimize_lbfgsb(fun, x0, args=args, jac=jac, bounds=bounds,
+                           **opts)
+    d = {'grad': res['jac'],
+         'task': res['message'],
+         'funcalls': res['nfev'],
+         'nit': res['nit'],
+         'warnflag': res['status']}
+    f = res['fun']
+    x = res['x']
+
+    return x, f, d
+
+
+def _minimize_lbfgsb(fun, x0, args=(), jac=None, bounds=None,
+                     disp=_NoValue, maxcor=10, ftol=2.2204460492503131e-09,
+                     gtol=1e-5, eps=1e-8, maxfun=15000, maxiter=15000,
+                     iprint=_NoValue, callback=None, maxls=20,
+                     finite_diff_rel_step=None, workers=None,
+                     **unknown_options):
+    """
+    Minimize a scalar function of one or more variables using the L-BFGS-B
+    algorithm.
+
+    Options
+    -------
+    disp : None or int
+        Deprecated option that previously controlled the text printed on the
+        screen during the problem solution. Now the code does not emit any
+        output and this keyword has no function.
+
+        .. deprecated:: 1.15.0
+            This keyword is deprecated and will be removed from SciPy 1.18.0.
+
+    maxcor : int
+        The maximum number of variable metric corrections used to
+        define the limited memory matrix. (The limited memory BFGS
+        method does not store the full hessian but uses this many terms
+        in an approximation to it.)
+    ftol : float
+        The iteration stops when ``(f^k -
+        f^{k+1})/max{|f^k|,|f^{k+1}|,1} <= ftol``.
+    gtol : float
+        The iteration will stop when ``max{|proj g_i | i = 1, ..., n}
+        <= gtol`` where ``proj g_i`` is the i-th component of the
+        projected gradient.
+    eps : float or ndarray
+        If `jac is None` the absolute step size used for numerical
+        approximation of the jacobian via forward differences.
+    maxfun : int
+        Maximum number of function evaluations before minimization terminates.
+        Note that this function may violate the limit if the gradients
+        are evaluated by numerical differentiation.
+    maxiter : int
+        Maximum number of algorithm iterations.
+    iprint : int, optional
+        Deprecated option that previously controlled the text printed on the
+        screen during the problem solution. Now the code does not emit any
+        output and this keyword has no function.
+
+        .. deprecated:: 1.15.0
+            This keyword is deprecated and will be removed from SciPy 1.18.0.
+
+    maxls : int, optional
+        Maximum number of line search steps (per iteration). Default is 20.
+    finite_diff_rel_step : None or array_like, optional
+        If ``jac in ['2-point', '3-point', 'cs']`` the relative step size to
+        use for numerical approximation of the jacobian. The absolute step
+        size is computed as ``h = rel_step * sign(x) * max(1, abs(x))``,
+        possibly adjusted to fit into the bounds. For ``method='3-point'``
+        the sign of `h` is ignored. If None (default) then step is selected
+        automatically.
+    workers : int, map-like callable, optional
+        A map-like callable, such as `multiprocessing.Pool.map` for evaluating
+        any numerical differentiation in parallel.
+        This evaluation is carried out as ``workers(fun, iterable)``.
+
+        .. versionadded:: 1.16.0
+
+    Notes
+    -----
+    The option `ftol` is exposed via the `scipy.optimize.minimize` interface,
+    but calling `scipy.optimize.fmin_l_bfgs_b` directly exposes `factr`. The
+    relationship between the two is ``ftol = factr * numpy.finfo(float).eps``.
+    I.e., `factr` multiplies the default machine floating-point precision to
+    arrive at `ftol`.
+    If the minimization is slow to converge the optimizer may halt if the
+    total number of function evaluations exceeds `maxfun`, or the number of
+    algorithm iterations has reached `maxiter` (whichever comes first). If
+    this is the case then ``result.success=False``, and an appropriate
+    error message is contained in ``result.message``.
+
+    """
+    _check_unknown_options(unknown_options)
+    m = maxcor
+    pgtol = gtol
+    factr = ftol / np.finfo(float).eps
+
+    x0 = asarray(x0).ravel()
+    n, = x0.shape
+    if disp is not _NoValue:
+        warnings.warn("scipy.optimize: The `disp` and `iprint` options of the "
+                      "L-BFGS-B solver are deprecated and will be removed in "
+                      "SciPy 1.18.0.",
+                      DeprecationWarning, stacklevel=3)
+
+    if iprint is not _NoValue:
+        warnings.warn("scipy.optimize: The `disp` and `iprint` options of the "
+                      "L-BFGS-B solver are deprecated and will be removed in "
+                      "SciPy 1.18.0.",
+                      DeprecationWarning, stacklevel=3)
+
+    # historically old-style bounds were/are expected by lbfgsb.
+    # That's still the case but we'll deal with new-style from here on,
+    # it's easier
+    if bounds is None:
+        pass
+    elif len(bounds) != n:
+        raise ValueError('length of x0 != length of bounds')
+    else:
+        bounds = np.array(old_bound_to_new(bounds))
+
+        # check bounds
+        if (bounds[0] > bounds[1]).any():
+            raise ValueError(
+                "LBFGSB - one of the lower bounds is greater than an upper bound."
+            )
+
+        # initial vector must lie within the bounds. Otherwise ScalarFunction and
+        # approx_derivative will cause problems
+        x0 = np.clip(x0, bounds[0], bounds[1])
+
+    # _prepare_scalar_function can use bounds=None to represent no bounds
+    sf = _prepare_scalar_function(fun, x0, jac=jac, args=args, epsilon=eps,
+                                  bounds=bounds,
+                                  finite_diff_rel_step=finite_diff_rel_step,
+                                  workers=workers)
+
+    func_and_grad = sf.fun_and_grad
+
+    nbd = zeros(n, np.int32)
+    low_bnd = zeros(n, float64)
+    upper_bnd = zeros(n, float64)
+    bounds_map = {(-np.inf, np.inf): 0,
+                  (1, np.inf): 1,
+                  (1, 1): 2,
+                  (-np.inf, 1): 3}
+
+    if bounds is not None:
+        for i in range(0, n):
+            L, U = bounds[0, i], bounds[1, i]
+            if not np.isinf(L):
+                low_bnd[i] = L
+                L = 1
+            if not np.isinf(U):
+                upper_bnd[i] = U
+                U = 1
+            nbd[i] = bounds_map[L, U]
+
+    if not maxls > 0:
+        raise ValueError('maxls must be positive.')
+
+    x = array(x0, dtype=np.float64)
+    f = array(0.0, dtype=np.float64)
+    g = zeros((n,), dtype=np.float64)
+    wa = zeros(2*m*n + 5*n + 11*m*m + 8*m, float64)
+    iwa = zeros(3*n, dtype=np.int32)
+    task = zeros(2, dtype=np.int32)
+    ln_task = zeros(2, dtype=np.int32)
+    lsave = zeros(4, dtype=np.int32)
+    isave = zeros(44, dtype=np.int32)
+    dsave = zeros(29, dtype=float64)
+
+    n_iterations = 0
+
+    while True:
+        # g may become float32 if a user provides a function that calculates
+        # the Jacobian in float32 (see gh-18730). The underlying code expects
+        # float64, so upcast it
+        g = g.astype(np.float64)
+        # x, f, g, wa, iwa, task, csave, lsave, isave, dsave = \
+        _lbfgsb.setulb(m, x, low_bnd, upper_bnd, nbd, f, g, factr, pgtol, wa,
+                       iwa, task, lsave, isave, dsave, maxls, ln_task)
+
+        if task[0] == 3:
+            # The minimization routine wants f and g at the current x.
+            # Note that interruptions due to maxfun are postponed
+            # until the completion of the current minimization iteration.
+            # Overwrite f and g:
+            f, g = func_and_grad(x)
+        elif task[0] == 1:
+            # new iteration
+            n_iterations += 1
+
+            intermediate_result = OptimizeResult(x=x, fun=f)
+            if _call_callback_maybe_halt(callback, intermediate_result):
+                task[0] = 5
+                task[1] = 505
+            if n_iterations >= maxiter:
+                task[0] = 5
+                task[1] = 504
+            elif sf.nfev > maxfun:
+                task[0] = 5
+                task[1] = 502
+        else:
+            break
+
+    if task[0] == 4:
+        warnflag = 0
+    elif sf.nfev > maxfun or n_iterations >= maxiter:
+        warnflag = 1
+    else:
+        warnflag = 2
+
+    # These two portions of the workspace are described in the mainlb
+    # function docstring in "__lbfgsb.c", ws and wy arguments.
+    s = wa[0: m*n].reshape(m, n)
+    y = wa[m*n: 2*m*n].reshape(m, n)
+
+    # isave(31) = the total number of BFGS updates prior the current iteration.
+    n_bfgs_updates = isave[30]
+
+    n_corrs = min(n_bfgs_updates, maxcor)
+    hess_inv = LbfgsInvHessProduct(s[:n_corrs], y[:n_corrs])
+
+    msg = status_messages[task[0]] + ": " + task_messages[task[1]]
+
+    return OptimizeResult(fun=f, jac=g, nfev=sf.nfev,
+                          njev=sf.ngev,
+                          nit=n_iterations, status=warnflag, message=msg,
+                          x=x, success=(warnflag == 0), hess_inv=hess_inv)
+
+
+class LbfgsInvHessProduct(LinearOperator):
+    """Linear operator for the L-BFGS approximate inverse Hessian.
+
+    This operator computes the product of a vector with the approximate inverse
+    of the Hessian of the objective function, using the L-BFGS limited
+    memory approximation to the inverse Hessian, accumulated during the
+    optimization.
+
+    Objects of this class implement the ``scipy.sparse.linalg.LinearOperator``
+    interface.
+
+    Parameters
+    ----------
+    sk : array_like, shape=(n_corr, n)
+        Array of `n_corr` most recent updates to the solution vector.
+        (See [1]).
+    yk : array_like, shape=(n_corr, n)
+        Array of `n_corr` most recent updates to the gradient. (See [1]).
+
+    References
+    ----------
+    .. [1] Nocedal, Jorge. "Updating quasi-Newton matrices with limited
+       storage." Mathematics of computation 35.151 (1980): 773-782.
+
+    """
+
+    def __init__(self, sk, yk):
+        """Construct the operator."""
+        if sk.shape != yk.shape or sk.ndim != 2:
+            raise ValueError('sk and yk must have matching shape, (n_corrs, n)')
+        n_corrs, n = sk.shape
+
+        super().__init__(dtype=np.float64, shape=(n, n))
+
+        self.sk = sk
+        self.yk = yk
+        self.n_corrs = n_corrs
+        self.rho = 1 / np.einsum('ij,ij->i', sk, yk)
+
+    def _matvec(self, x):
+        """Efficient matrix-vector multiply with the BFGS matrices.
+
+        This calculation is described in Section (4) of [1].
+
+        Parameters
+        ----------
+        x : ndarray
+            An array with shape (n,) or (n,1).
+
+        Returns
+        -------
+        y : ndarray
+            The matrix-vector product
+
+        """
+        s, y, n_corrs, rho = self.sk, self.yk, self.n_corrs, self.rho
+        q = np.array(x, dtype=self.dtype, copy=True)
+        if q.ndim == 2 and q.shape[1] == 1:
+            q = q.reshape(-1)
+
+        alpha = np.empty(n_corrs)
+
+        for i in range(n_corrs-1, -1, -1):
+            alpha[i] = rho[i] * np.dot(s[i], q)
+            q = q - alpha[i]*y[i]
+
+        r = q
+        for i in range(n_corrs):
+            beta = rho[i] * np.dot(y[i], r)
+            r = r + s[i] * (alpha[i] - beta)
+
+        return r
+
+    def _matmat(self, X):
+        """Efficient matrix-matrix multiply with the BFGS matrices.
+
+        This calculation is described in Section (4) of [1].
+
+        Parameters
+        ----------
+        X : ndarray
+            An array with shape (n,m)
+
+        Returns
+        -------
+        Y : ndarray
+            The matrix-matrix product
+
+        Notes
+        -----
+        This implementation is written starting from _matvec and broadcasting
+        all expressions along the second axis of X.
+
+        """
+        s, y, n_corrs, rho = self.sk, self.yk, self.n_corrs, self.rho
+        Q = np.array(X, dtype=self.dtype, copy=True)
+
+        alpha = np.empty((n_corrs, Q.shape[1]))
+
+        for i in range(n_corrs-1, -1, -1):
+            alpha[i] = rho[i] * np.dot(s[i], Q)
+            Q -= alpha[i]*y[i][:, np.newaxis]
+
+        R = Q
+        for i in range(n_corrs):
+            beta = rho[i] * np.dot(y[i], R)
+            R += s[i][:, np.newaxis] * (alpha[i] - beta)
+
+        return R
+
+    def todense(self):
+        """Return a dense array representation of this operator.
+
+        Returns
+        -------
+        arr : ndarray, shape=(n, n)
+            An array with the same shape and containing
+            the same data represented by this `LinearOperator`.
+
+        """
+        I_arr = np.eye(*self.shape, dtype=self.dtype)
+        return self._matmat(I_arr)