hand

2026-05-06 19:47:31 +07:00
parent 94d8682530
commit 12dbb7731b
9963 changed files with 2747894 additions and 0 deletions
@@ -0,0 +1,243 @@
+# Copyright 2019 The JAX Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+from collections.abc import Sequence
+import enum
+import math
+
+import numpy as np
+
+from jax._src import dispatch
+from jax._src import dtypes
+from jax._src.api import jit
+from jax._src.core import Primitive, is_constant_shape
+from jax._src.interpreters import ad
+from jax._src.interpreters import batching
+from jax._src.interpreters import mlir
+from jax._src.lax import lax
+from jax._src.lax import slicing
+from jax._src.lib.mlir.dialects import hlo
+
+__all__ = [
+  "fft",
+  "fft_p",
+]
+
+class FftType(enum.IntEnum):
+  "Describes which FFT operation to perform."
+
+  FFT = 0
+  "Forward complex-to-complex FFT."
+
+  IFFT = 1
+  "Inverse complex-to-complex FFT."
+
+  RFFT = 2
+  "Forward real-to-complex FFT."
+
+  IRFFT = 3
+  "Inverse real-to-complex FFT."
+
+
+def _str_to_fft_type(s: str) -> FftType:
+  if s in ("fft", "FFT"):
+    return FftType.FFT
+  elif s in ("ifft", "IFFT"):
+    return FftType.IFFT
+  elif s in ("rfft", "RFFT"):
+    return FftType.RFFT
+  elif s in ("irfft", "IRFFT"):
+    return FftType.IRFFT
+  else:
+    raise ValueError(f"Unknown FFT type '{s}'")
+
+@jit(static_argnums=(1, 2))
+def fft(x, fft_type: FftType | str, fft_lengths: Sequence[int]):
+  if isinstance(fft_type, str):
+    typ = _str_to_fft_type(fft_type)
+  elif isinstance(fft_type, FftType):
+    typ = fft_type
+  else:
+    raise TypeError(f"Unknown FFT type value '{fft_type}'")
+
+  if typ == FftType.RFFT:
+    if np.iscomplexobj(x):
+      raise ValueError("only real valued inputs supported for rfft")
+    x = lax.convert_element_type(x, dtypes.to_inexact_dtype(dtypes.dtype(x)))
+  else:
+    x = lax.convert_element_type(x, dtypes.to_complex_dtype(dtypes.dtype(x)))
+  if len(fft_lengths) == 0:
+    # XLA FFT doesn't support 0-rank.
+    return x
+  fft_lengths = tuple(fft_lengths)
+  return fft_p.bind(x, fft_type=typ, fft_lengths=fft_lengths)
+
+def _fft_impl(x, fft_type, fft_lengths):
+  return dispatch.apply_primitive(fft_p, x, fft_type=fft_type, fft_lengths=fft_lengths)
+
+_complex_dtype = lambda dtype: (np.zeros((), dtype) + np.zeros((), np.complex64)).dtype
+_real_dtype = lambda dtype: np.finfo(dtype).dtype
+
+def fft_abstract_eval(x, fft_type, fft_lengths):
+  if len(fft_lengths) > x.ndim:
+    raise ValueError(f"FFT input shape {x.shape} must have at least as many "
+                    f"input dimensions as fft_lengths {fft_lengths}.")
+  if fft_type == FftType.RFFT:
+    if x.dtype not in (np.float32, np.float64):
+      raise ValueError(f"RFFT input must be float32 or float64, got {x.dtype}")
+    if x.shape[-len(fft_lengths):] != fft_lengths:
+      raise ValueError(f"RFFT input shape {x.shape} minor dimensions must "
+                      f"be equal to fft_lengths {fft_lengths}")
+    shape = (x.shape[:-len(fft_lengths)] + fft_lengths[:-1]
+             + (fft_lengths[-1] // 2 + 1,))
+    dtype = _complex_dtype(x.dtype)
+  elif fft_type == FftType.IRFFT:
+    if not np.issubdtype(x.dtype, np.complexfloating):
+      raise ValueError("IRFFT input must be complex64 or complex128, got "
+                       f"{x.dtype}")
+    if x.shape[-len(fft_lengths):-1] != fft_lengths[:-1]:
+      raise ValueError(f"IRFFT input shape {x.shape} minor dimensions must "
+                      "be equal to all except the last fft_length, got "
+                      f"{fft_lengths=}")
+    shape = x.shape[:-len(fft_lengths)] + fft_lengths
+    dtype = _real_dtype(x.dtype)
+  else:
+    if not np.issubdtype(x.dtype, np.complexfloating):
+      raise ValueError("FFT input must be complex64 or complex128, got "
+                       f"{x.dtype}")
+    if x.shape[-len(fft_lengths):] != fft_lengths:
+      raise ValueError(f"FFT input shape {x.shape} minor dimensions must "
+                      f"be equal to fft_lengths {fft_lengths}")
+    shape = x.shape
+    dtype = x.dtype
+  if x.mat.unreduced or x.mat.reduced:
+    raise NotImplementedError
+  return x.update(shape=shape, dtype=dtype)
+
+def _fft_lowering(ctx, x, *, fft_type, fft_lengths):
+  if not is_constant_shape(fft_lengths):
+    # TODO: https://github.com/openxla/stablehlo/issues/1366
+    raise NotImplementedError("Shape polymorphism for FFT with non-constant fft_length is not implemented for TPU and GPU")
+  return [
+      hlo.FftOp(x, hlo.FftTypeAttr.get(fft_type.name),
+                mlir.dense_int_array(fft_lengths)).result
+  ]
+
+
+def _fft_lowering_gpu(ctx, x, *, fft_type, fft_lengths):
+  # Decompose multi-dimensional IRFFT into a sequence of transforms.
+  # cuFFT assumes Hermitian symmetry on all dimensions of a multi-dimensional
+  # C2R transform, whereas our other implementations and NumPy only require
+  # symmetry on the final dimension.
+  if fft_type == FftType.IRFFT and len(fft_lengths) > 1:
+    rank = len(ctx.avals_in[0].shape)
+
+    # Move the final C2R axis (at index -1) to the start of the FFT block.
+    target_pos = rank - len(fft_lengths)
+
+    perm_out = list(range(rank))
+    perm_out.insert(target_pos, perm_out.pop(-1))
+    x = hlo.transpose(x, mlir.dense_int_array(perm_out))
+
+    # Apply multi-dimensional IFFT on the outer axes (which are now at the end).
+    outer_lengths = fft_lengths[:-1]
+    x = hlo.FftOp(x, hlo.FftTypeAttr.get(FftType.IFFT.name),
+                  mlir.dense_int_array(outer_lengths)).result
+
+    # Move the C2R axis back to the end.
+    perm_in = list(range(rank))
+    perm_in.append(perm_in.pop(target_pos))
+    x = hlo.transpose(x, mlir.dense_int_array(perm_in))
+
+    # Apply 1D IRFFT on the last axis.
+    x = hlo.FftOp(x, hlo.FftTypeAttr.get(FftType.IRFFT.name),
+                  mlir.dense_int_array((fft_lengths[-1],))).result
+
+    return [x]
+
+  return _fft_lowering(ctx, x, fft_type=fft_type, fft_lengths=fft_lengths)
+
+
+@jit(static_argnums=1)
+def _rfft_transpose(t, fft_lengths):
+  # The transpose can be computed directly using irfft with a mask to account
+  # for Hermitian redundancy. Mask values are 1 for DC and Nyquist, 2 for others.
+  n = fft_lengths[-1]
+  is_odd = n % 2
+  m = t.shape[-1]
+  mask = lax.full_like(t, 2.0, shape=(m,))
+  mask = slicing.dynamic_update_slice(
+      mask, lax.full_like(t, 1.0, shape=(1,)), (0,)
+  )
+  if not is_odd:
+    mask = slicing.dynamic_update_slice(
+        mask, lax.full_like(t, 1.0, shape=(1,)), (m - 1,)
+    )
+
+  N = math.prod(fft_lengths)
+  # The mask is along the last dimension.
+  mask = lax.expand_dims(mask, range(t.ndim - 1))
+  out = N * fft(lax.conj(t) / mask, FftType.IRFFT, fft_lengths)
+  assert out.dtype == _real_dtype(t.dtype), (out.dtype, t.dtype)
+  return out
+
+def _irfft_transpose(t, fft_lengths):
+  # The transpose of IRFFT is the RFFT of the cotangent times a scaling
+  # factor and a mask. The mask scales the cotangent for the Hermitian
+  # symmetric components of the RFFT by a factor of two, since these components
+  # are de-duplicated in the RFFT.
+  x = fft(t, FftType.RFFT, fft_lengths)
+  n = x.shape[-1]
+  is_odd = fft_lengths[-1] % 2
+  mask = lax.full_like(t, 2.0, shape=(n,), dtype=x.dtype)
+  mask = slicing.dynamic_update_slice(
+      mask, lax.full_like(t, 1.0, shape=(1,), dtype=x.dtype), (0,)
+  )
+  if not is_odd:
+    mask = slicing.dynamic_update_slice(
+        mask, lax.full_like(t, 1.0, shape=(1,), dtype=x.dtype), (n - 1,)
+    )
+
+  scale = 1 / math.prod(fft_lengths)
+  out = scale * lax.expand_dims(mask, range(x.ndim - 1)) * x
+  assert out.dtype == _complex_dtype(t.dtype), (out.dtype, t.dtype)
+  # Use JAX's convention for complex gradients
+  # https://github.com/jax-ml/jax/issues/6223#issuecomment-807740707
+  return lax.conj(out)
+
+def _fft_transpose_rule(t, operand, fft_type, fft_lengths):
+  if fft_type == FftType.RFFT:
+    result = _rfft_transpose(t, fft_lengths)
+  elif fft_type == FftType.IRFFT:
+    result = _irfft_transpose(t, fft_lengths)
+  else:
+    result = fft(t, fft_type, fft_lengths)
+  return result,
+
+def _fft_batching_rule(batched_args, batch_dims, fft_type, fft_lengths):
+  x, = batched_args
+  bd, = batch_dims
+  x = batching.moveaxis(x, bd, 0)
+  return fft(x, fft_type, fft_lengths), 0
+
+fft_p = Primitive('fft')
+fft_p.def_impl(_fft_impl)
+fft_p.def_abstract_eval(fft_abstract_eval)
+mlir.register_lowering(fft_p, _fft_lowering)
+mlir.register_lowering(fft_p, _fft_lowering_gpu, platform="cuda")
+mlir.register_lowering(fft_p, _fft_lowering_gpu, platform="rocm")
+ad.deflinear2(fft_p, _fft_transpose_rule)
+batching.primitive_batchers[fft_p] = _fft_batching_rule