hand

2026-05-06 19:47:31 +07:00
parent 94d8682530
commit 12dbb7731b
9963 changed files with 2747894 additions and 0 deletions
@@ -0,0 +1,465 @@
+# Copyright 2020 The JAX Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+from collections.abc import Callable, Sequence
+import enum
+from typing import Any
+
+import math
+import numpy as np
+
+from jax._src import api
+from jax._src import core
+from jax._src import dtypes
+from jax._src import numpy as jnp
+from jax._src.lax import lax
+from jax._src.numpy import einsum as jnp_einsum
+from jax._src.util import canonicalize_axis
+from jax._src.numpy.util import promote_dtypes_inexact
+
+
+def _fill_lanczos_kernel(radius, x):
+  y = radius * jnp.sin(np.pi * x) * jnp.sin(np.pi * x / radius)
+  #  out = y / (np.pi ** 2 * x ** 2) where x >1e-3, 1 otherwise
+  out = jnp.where(x > 1e-3, jnp.divide(y, jnp.where(x != 0, np.pi**2 * x**2, 1)), 1)
+  return jnp.where(x > radius, 0., out)
+
+def _fill_keys_cubic_kernel(x):
+  # http://ieeexplore.ieee.org/document/1163711/
+  # R. G. Keys. Cubic convolution interpolation for digital image processing.
+  # IEEE Transactions on Acoustics, Speech, and Signal Processing,
+  # 29(6):1153–1160, 1981.
+  #
+  # https://en.wikipedia.org/wiki/Bicubic_interpolation#Bicubic_convolution_algorithm
+  # This is the Keys kernel with A=-0.5.
+  #
+  # This kernel matches Pillow, TensorFlow, and Pytorch when
+  # antialiasing is enabled.
+  out = ((1.5 * x - 2.5) * x) * x + 1.
+  out = jnp.where(x >= 1., ((-0.5 * x + 2.5) * x - 4.) * x + 2., out)
+  return jnp.where(x >= 2., 0., out)
+
+
+def _fill_opencv_cubic_kernel(x):
+  # See https://github.com/jax-ml/jax/issues/15768#issuecomment-1529939102 and
+  # https://en.wikipedia.org/wiki/Bicubic_interpolation#Bicubic_convolution_algorithm
+  #
+  # When antialiasing is disabled, PyTorch uses a cubic kernel with A = -0.75
+  # that matches OpenCV.
+  # At least some users consider this a bug (opencv/opencv#17720), and that set
+  # of parameters suffers from ringing artifacts.
+  a = -0.75
+  out = ((a + 2.0) * x - (a + 3.0)) * x * x + 1.0
+  out = jnp.where(x >= 1.0, ((a * x - 5.0 * a) * x + 8.0 * a) * x - 4.0 * a,
+                  out)
+  return jnp.where(x >= 2.0, 0.0, out)
+
+
+def _fill_triangle_kernel(x):
+  return jnp.maximum(0, 1 - jnp.abs(x))
+
+
+def compute_weight_mat(input_size: core.DimSize,
+                       output_size: core.DimSize,
+                       scale,
+                       translation,
+                       kernel: Callable,
+                       antialias: bool,
+                       edge_padding: bool,
+                       radius: int | None):
+  dtype = dtypes.result_type(scale, translation)
+  inv_scale = 1. / scale
+  # When downsampling the kernel should be scaled since we want to low pass
+  # filter and interpolate, but when upsampling it should not be since we only
+  # want to interpolate.
+  kernel_scale = jnp.maximum(inv_scale, 1.) if antialias else 1.
+
+  # sample_f has shape [output_size] and is the floating-point index in the
+  # input image corresponding to the center of each output pixel.
+  sample_f = ((jnp.arange(output_size, dtype=dtype) + 0.5) * inv_scale -
+              translation * inv_scale - 0.5)
+
+  # Evaluate the kernel for all input/output coordinate pairs. If edge_padding
+  # is true, this includes k pixels outside the original image.
+  if edge_padding:
+    assert radius is not None
+    if antialias:
+      # This case isn't actually reachable from the public APIs at the time of
+      # writing, but we did figure it out, so we may as well leave the code.
+      concrete_scale = core.concrete_or_error(
+          None, scale,
+          context="Antialiasing with edge padding requires a static scale."
+      )
+      inv_scale_val = 1.0 / float(concrete_scale)
+      kernel_scale_val = max(inv_scale_val, 1.0)
+      k = math.ceil(radius * kernel_scale_val)
+    else:
+      k = radius
+  else:
+    k = 0
+
+  expanded_indices = jnp.arange(-k, input_size + k, dtype=dtype)
+  x = jnp.abs(sample_f[np.newaxis, :] - expanded_indices[:, np.newaxis])
+  x = x / kernel_scale
+  weights = kernel(x)
+
+  if edge_padding:
+    # Some of the weights are for indices outside the input image. We use a
+    # scatter-add to move their mass onto the relevant edge pixels.
+    clamped_indices = jnp.clip(
+      expanded_indices.astype(jnp.int32), 0, input_size - 1)
+    output_indices = jnp.arange(output_size)
+    weight_mat = jnp.zeros((input_size, output_size), dtype=dtype)
+    output_indices_expanded = lax.broadcast_in_dim(
+        output_indices, (expanded_indices.shape[0], output_size), (1,))
+    weight_mat = weight_mat.at[
+        clamped_indices[:, np.newaxis], output_indices_expanded
+    ].add(weights)
+    # Normalize the weights
+    total_weight_sum = jnp.sum(weight_mat, axis=0, keepdims=True)
+    weights = jnp.where(
+        jnp.abs(total_weight_sum) > 1000. * float(np.finfo(np.float32).eps),
+        jnp.divide(weight_mat,
+                   jnp.where(total_weight_sum != 0, total_weight_sum, 1)),
+        0)
+  else:
+    # Normalize the weights to account for the fact that some or all of the
+    # input coordinates might not be in the valid part of the input image.
+    total_weight_sum = jnp.sum(weights, axis=0, keepdims=True)
+    weights = jnp.where(
+        jnp.abs(total_weight_sum) > 1000. * float(np.finfo(np.float32).eps),
+        jnp.divide(weights,
+                   jnp.where(total_weight_sum != 0, total_weight_sum, 1)),
+        0)
+
+    # Zero out weights where the sample location is completely outside the input
+    # range. sample_f has already had the 0.5 removed, hence the weird range
+    # below.
+    input_size_minus_0_5 = core.dimension_as_value(input_size) - 0.5
+    weights = jnp.where(
+        jnp.logical_and(sample_f >= -0.5,
+                        sample_f <= input_size_minus_0_5)[np.newaxis, :],
+        weights, 0)
+
+  return weights
+
+
+def _scale_and_translate(x, output_shape: core.Shape,
+                         spatial_dims: Sequence[int], scale, translation,
+                         kernel, antialias: bool, precision,
+                         edge_padding: bool = False, radius: int | None = None):
+  """
+  Args:
+    edge_padding: if False, pixels that are off the edge of the input
+      image will receive zero weight. If True, the edges of the input image are
+      repeated.
+    radius: the radius of the kernel. May be None if edge_padding is False.
+  """
+  input_shape = x.shape
+  assert len(input_shape) == len(output_shape)
+  assert len(spatial_dims) == len(scale)
+  assert len(spatial_dims) == len(translation)
+  if len(spatial_dims) == 0:
+    return x
+  contractions = []
+  in_indices = list(range(len(output_shape)))
+  out_indices = list(range(len(output_shape)))
+  for i, d in enumerate(spatial_dims):
+    d = canonicalize_axis(d, x.ndim)
+    m = input_shape[d]
+    n = output_shape[d]
+    w = compute_weight_mat(
+        m, n, scale[i], translation[i], kernel, antialias,
+        edge_padding=edge_padding, radius=radius,
+    ).astype(x.dtype)
+    contractions.append(w)
+    contractions.append([d, len(output_shape) + i])
+    out_indices[d] = len(output_shape) + i
+  contractions.append(out_indices)
+  return jnp_einsum.einsum(x, in_indices, *contractions, precision=precision)
+
+
+class ResizeMethod(enum.Enum):
+  """Image resize method.
+
+  Possible values are:
+
+  NEAREST:
+    Nearest-neighbor interpolation.
+
+  LINEAR:
+    `Linear interpolation`_.
+
+  LANCZOS3:
+    `Lanczos resampling`_, using a kernel of radius 3.
+
+  LANCZOS5:
+    `Lanczos resampling`_, using a kernel of radius 5.
+
+  CUBIC:
+    `Cubic interpolation`_, using the Keys cubic kernel.
+
+  .. _Linear interpolation: https://en.wikipedia.org/wiki/Bilinear_interpolation
+  .. _Cubic interpolation: https://en.wikipedia.org/wiki/Bicubic_interpolation
+  .. _Lanczos resampling: https://en.wikipedia.org/wiki/Lanczos_resampling
+  """
+
+  NEAREST = 0
+  LINEAR = 1
+  LANCZOS3 = 2
+  LANCZOS5 = 3
+  CUBIC = 4
+  CUBIC_PYTORCH = 5
+
+  # Caution: The current resize implementation assumes that the resize kernels
+  # are interpolating, i.e. for the identity warp the output equals the input.
+  # This is not true for, e.g. a Gaussian kernel, so if such kernels are added
+  # the implementation will need to be changed.
+
+  @staticmethod
+  def from_string(s: str):
+    if s == 'nearest':
+      return ResizeMethod.NEAREST
+    if s in ['linear', 'bilinear', 'trilinear', 'triangle']:
+      return ResizeMethod.LINEAR
+    elif s == 'lanczos3':
+      return ResizeMethod.LANCZOS3
+    elif s == 'lanczos5':
+      return ResizeMethod.LANCZOS5
+    elif s in ['cubic', 'bicubic', 'tricubic']:
+      return ResizeMethod.CUBIC
+    elif s in ['cubic-pytorch', 'bicubic-pytorch']:
+      return ResizeMethod.CUBIC_PYTORCH
+    else:
+      raise ValueError(f'Unknown resize method "{s}"')
+
+_kernels = {
+    ResizeMethod.LINEAR: (1, _fill_triangle_kernel),
+    ResizeMethod.LANCZOS3: (3, lambda x: _fill_lanczos_kernel(3., x)),
+    ResizeMethod.LANCZOS5: (5, lambda x: _fill_lanczos_kernel(5., x)),
+    ResizeMethod.CUBIC: (2, _fill_keys_cubic_kernel),
+    ResizeMethod.CUBIC_PYTORCH: (2, _fill_opencv_cubic_kernel),
+}
+
+
+# scale and translation here are scalar elements of an np.array, what is the
+# correct type annotation?
+def scale_and_translate(image, shape: core.Shape,
+                        spatial_dims: Sequence[int],
+                        scale, translation,
+                        method: str | ResizeMethod,
+                        antialias: bool = True,
+                        precision=lax.Precision.HIGHEST):
+  """Apply a scale and translation to an image.
+
+  Generates a new image of shape 'shape' by resampling from the input image
+  using the sampling method corresponding to method. For 2D images, this
+  operation transforms a location in the input images, (x, y), to a location
+  in the output image according to::
+
+    (x * scale[1] + translation[1], y * scale[0] + translation[0])
+
+  (Note the *inverse* warp is used to generate the sample locations.)
+  Assumes half-centered pixels, i.e the pixel at integer location ``row, col``
+  has coordinates ``y, x = row + 0.5, col + 0.5``, and similarly for other input
+  image dimensions.
+
+  If an output location(pixel) maps to an input sample location that is outside
+  the input boundaries then the value for the output location will be set to
+  zero.
+
+  This function can be used to imitate the behavior of
+  ``torch.nn.functional.interpolate`` with ``align_corners=True`` by setting::
+
+      scale = (n - 1) / (m - 1)
+      translation = 0.5 * (1 - scale)
+
+  where ``m`` is the input size and ``n`` is the output size for a given
+  dimension.
+
+  The ``method`` argument expects one of the following resize methods:
+
+  ``ResizeMethod.LINEAR``, ``"linear"``, ``"bilinear"``, ``"trilinear"``,
+    ``"triangle"`` `Linear interpolation`_. If ``antialias`` is ``True``, uses a
+    triangular filter when downsampling.
+
+  ``ResizeMethod.CUBIC``, ``"cubic"``, ``"bicubic"``, ``"tricubic"``
+    `Cubic interpolation`_, using the Keys cubic kernel.
+
+  ``ResizeMethod.CUBIC_PYTORCH``, ``"cubic-pytorch"``, ``"bicubic-pytorch"``
+    `Cubic interpolation`_, matching PyTorch's bicubic resizing behavior.
+    Identical to ``ResizeMethod.CUBIC`` when antialiasing is enabled, but uses
+    a different kernel and enables edge padding when antialiasing is disabled.
+
+  ``ResizeMethod.LANCZOS3``, ``"lanczos3"``
+    `Lanczos resampling`_, using a kernel of radius 3.
+
+  ``ResizeMethod.LANCZOS5``, ``"lanczos5"``
+    `Lanczos resampling`_, using a kernel of radius 5.
+
+  .. _Linear interpolation: https://en.wikipedia.org/wiki/Bilinear_interpolation
+  .. _Cubic interpolation: https://en.wikipedia.org/wiki/Bicubic_interpolation
+  .. _Lanczos resampling: https://en.wikipedia.org/wiki/Lanczos_resampling
+
+  Args:
+    image: a JAX array.
+    shape: the output shape, as a sequence of integers with length equal to the
+      number of dimensions of `image`.
+    spatial_dims: A length K tuple specifying the spatial dimensions that the
+      passed scale and translation should be applied to.
+    scale: A [K] array with the same number of dimensions as image, containing
+      the scale to apply in each dimension.
+    translation: A [K] array with the same number of dimensions as image,
+      containing the translation to apply in each dimension.
+    method: the resizing method to use; either a ``ResizeMethod`` instance or a
+      string. Available methods are: LINEAR, LANCZOS3, LANCZOS5, CUBIC, CUBIC_PYTORCH.
+    antialias: Should an antialiasing filter be used when downsampling? Defaults
+      to ``True``. Has no effect when upsampling.
+
+  Returns:
+    The scale and translated image.
+  """
+  shape = core.canonicalize_shape(shape)
+  if len(shape) != image.ndim:
+    msg = ('shape must have length equal to the number of dimensions of x; '
+           f' {shape} vs {image.shape}')
+    raise ValueError(msg)
+  if isinstance(method, str):
+    method = ResizeMethod.from_string(method)
+  if method == ResizeMethod.NEAREST:
+    # Nearest neighbor is currently special-cased for straight resize, so skip
+    # for now.
+    raise ValueError('Nearest neighbor resampling is not currently supported '
+                     'for scale_and_translate.')
+  assert isinstance(method, ResizeMethod)
+
+  if method == ResizeMethod.CUBIC_PYTORCH and antialias:
+    method = ResizeMethod.CUBIC
+  radius, kernel = _kernels[method]
+  edge_padding = (method == ResizeMethod.CUBIC_PYTORCH and not antialias)
+  image, = promote_dtypes_inexact(image)
+  scale, translation = promote_dtypes_inexact(scale, translation)
+  return _scale_and_translate(
+     image, shape, spatial_dims, scale, translation, kernel, antialias,
+     precision, edge_padding=edge_padding, radius=radius)
+
+
+def _resize_nearest(x, output_shape: core.Shape):
+  input_shape = x.shape
+  assert len(input_shape) == len(output_shape)
+  spatial_dims = tuple(i for i in range(len(input_shape))
+                       if not core.definitely_equal(input_shape[i], output_shape[i]))
+  for d in spatial_dims:
+    m = input_shape[d]
+    n = output_shape[d]
+    offsets = (jnp.arange(n, dtype=np.float32) + 0.5) * core.dimension_as_value(m) / core.dimension_as_value(n)
+    # TODO(b/206898375): this computation produces the wrong result on
+    # CPU and GPU when using float64. Use float32 until the bug is fixed.
+    offsets = jnp.floor(offsets.astype(np.float32)).astype(np.int32)
+    indices: list[Any] = [slice(None)] * len(input_shape)
+    indices[d] = offsets
+    x = x[tuple(indices)]
+  return x
+
+
+@api.jit(static_argnums=(1, 2, 3, 4))
+def _resize(image, shape: core.Shape, method: str | ResizeMethod,
+            antialias: bool, precision):
+  if len(shape) != image.ndim:
+    msg = ('shape must have length equal to the number of dimensions of x; '
+           f' {shape} vs {image.shape}')
+    raise ValueError(msg)
+  if isinstance(method, str):
+    method = ResizeMethod.from_string(method)
+  if method == ResizeMethod.NEAREST:
+    return _resize_nearest(image, shape)
+  assert isinstance(method, ResizeMethod)
+
+  image, = promote_dtypes_inexact(image)
+  # Skip dimensions that have scale=1 and translation=0, this is only possible
+  # since all of the current resize methods (kernels) are interpolating, so the
+  # output = input under an identity warp.
+  spatial_dims = tuple(i for i in range(len(shape))
+                       if not core.definitely_equal(image.shape[i], shape[i]))
+  if method == ResizeMethod.CUBIC_PYTORCH and antialias:
+    method = ResizeMethod.CUBIC
+  radius, kernel = _kernels[method]
+  scale = [1.0 if core.definitely_equal(shape[d], 0) else core.dimension_as_value(shape[d]) / core.dimension_as_value(image.shape[d])
+           for d in spatial_dims]
+  edge_padding = (method == ResizeMethod.CUBIC_PYTORCH and not antialias)
+  return _scale_and_translate(image, shape, spatial_dims, scale,
+                              [0.] * len(spatial_dims), kernel, antialias,
+                              precision, edge_padding=edge_padding,
+                              radius=radius)
+
+
+def resize(image, shape: core.Shape, method: str | ResizeMethod,
+           antialias: bool = True,
+           precision = lax.Precision.HIGHEST):
+  """Image resize.
+
+  The ``method`` argument expects one of the following resize methods:
+
+  ``ResizeMethod.NEAREST``, ``"nearest"``
+    `Nearest neighbor interpolation`_. The values of ``antialias`` and
+    ``precision`` are ignored.
+
+  ``ResizeMethod.LINEAR``, ``"linear"``, ``"bilinear"``, ``"trilinear"``, ``"triangle"``
+    `Linear interpolation`_. If ``antialias`` is ``True``, uses a triangular
+    filter when downsampling.
+
+  ``ResizeMethod.CUBIC``, ``"cubic"``, ``"bicubic"``, ``"tricubic"``
+    `Cubic interpolation`_, using the Keys cubic kernel.
+
+  ``ResizeMethod.CUBIC_PYTORCH``, ``"cubic-pytorch"``, ``"bicubic-pytorch"``
+    `Cubic interpolation`_, matching PyTorch's bicubic resizing behavior.
+    Identical to ``ResizeMethod.CUBIC`` when antialiasing is enabled, but uses
+    a different kernel and enables edge padding when antialiasing is disabled.
+
+  ``ResizeMethod.LANCZOS3``, ``"lanczos3"``
+    `Lanczos resampling`_, using a kernel of radius 3.
+
+  ``ResizeMethod.LANCZOS5``, ``"lanczos5"``
+    `Lanczos resampling`_, using a kernel of radius 5.
+
+  .. _Nearest neighbor interpolation: https://en.wikipedia.org/wiki/Nearest-neighbor_interpolation
+  .. _Linear interpolation: https://en.wikipedia.org/wiki/Bilinear_interpolation
+  .. _Cubic interpolation: https://en.wikipedia.org/wiki/Bicubic_interpolation
+  .. _Lanczos resampling: https://en.wikipedia.org/wiki/Lanczos_resampling
+
+  This function does not support an ``align_corners`` argument like
+  ``torch.nn.functional.interpolate``. That behavior can be emulated using
+  :func:`scale_and_translate`.
+
+  Args:
+    image: a JAX array.
+    shape: the output shape, as a sequence of integers with length equal to
+      the number of dimensions of `image`. Note that :func:`resize` does not
+      distinguish spatial dimensions from batch or channel dimensions, so this
+      includes all dimensions of the image. To represent a batch or a channel
+      dimension, simply leave that element of the shape unchanged.
+    method: the resizing method to use; either a ``ResizeMethod`` instance or a
+      string. Available methods are: LINEAR, LANCZOS3, LANCZOS5, CUBIC, CUBIC_PYTORCH.
+    antialias: should an antialiasing filter be used when downsampling? Defaults
+      to ``True``. Has no effect when upsampling.
+  Returns:
+    The resized image. The return type may differ from the input type depending
+    on the ``method``. For ``ResizeMethod.NEAREST``, the return type is the same
+    as the input type. For other methods, the output type will be promoted to a
+    floating point type.
+  """
+  return _resize(image, core.canonicalize_shape(shape), method, antialias,
+                 precision)