Implement gcd, lcm (#40651)

Summary: Resolves pytorch/pytorch#40018. Pull Request resolved: pytorch/pytorch#40651 Reviewed By: ezyang Differential Revision: D22511828 Pulled By: mruberry fbshipit-source-id: 3ef251e45da4688b1b64c79f530fb6642feb63ab
Sconcer · Jul 16, 2020 · 200c343 · 200c343
1 parent e44f460
commit 200c343
Show file tree

Hide file tree

Showing 14 changed files with 242 additions and 0 deletions.
diff --git a/aten/src/ATen/core/aten_interned_strings.h b/aten/src/ATen/core/aten_interned_strings.h
@@ -346,6 +346,7 @@ _(aten, frobenius_norm) \
 _(aten, full) \
 _(aten, full_like) \
 _(aten, gather) \
+_(aten, gcd) \
 _(aten, ge) \
 _(aten, gelu) \
 _(aten, geometric) \
@@ -407,6 +408,7 @@ _(aten, l1_loss) \
 _(aten, l1_loss_backward) \
 _(aten, l1_loss_forward) \
 _(aten, layer_norm) \
+_(aten, lcm) \
 _(aten, le) \
 _(aten, leaky_relu) \
 _(aten, leaky_relu_backward) \

diff --git a/aten/src/ATen/native/BinaryOps.cpp b/aten/src/ATen/native/BinaryOps.cpp
@@ -43,6 +43,8 @@ DEFINE_DISPATCH(fmod_stub);
 DEFINE_DISPATCH(fmod_scalar_stub);
 DEFINE_DISPATCH(logaddexp_stub);
 DEFINE_DISPATCH(logaddexp2_stub);
+DEFINE_DISPATCH(gcd_stub);
+DEFINE_DISPATCH(lcm_stub);
 
 Tensor& add_out(Tensor& result, const Tensor& self, const Tensor& other, Scalar alpha) {
   auto iter = TensorIterator::binary_op(result, self, other,
@@ -852,6 +854,36 @@ Tensor logaddexp2(const Tensor& self, const Tensor& other) {
   return at::logaddexp2_out(result, self, other);
 }
 
+Tensor& gcd_out(Tensor& result, const Tensor& self, const Tensor& other) {
+  auto iter = TensorIterator::binary_op(result, self, other, /*check_mem_overlap=*/ true);
+  gcd_stub(iter.device_type(), iter);
+  return result;
+}
+
+Tensor gcd(const Tensor& self, const Tensor& other) {  
+  Tensor result = at::empty({0}, self.options());
+  return at::gcd_out(result, self, other);
+}
+
+Tensor& gcd_(Tensor& self, const Tensor& other) {
+  return at::gcd_out(self, self, other);
+}
+
+Tensor& lcm_out(Tensor& result, const Tensor& self, const Tensor& other) {
+  auto iter = TensorIterator::binary_op(result, self, other, /*check_mem_overlap=*/ true);
+  lcm_stub(iter.device_type(), iter);
+  return result;
+}
+
+Tensor lcm(const Tensor& self, const Tensor& other) {  
+  Tensor result = at::empty({0}, self.options());
+  return at::lcm_out(result, self, other);
+}
+
+Tensor& lcm_(Tensor& self, const Tensor& other) {
+  return at::lcm_out(self, self, other);
+}
+
 Tensor true_divide(const Tensor& self, Scalar divisor) {
   return self.true_divide(wrapped_scalar_tensor(divisor)); // redispatch!
 }

diff --git a/aten/src/ATen/native/BinaryOps.h b/aten/src/ATen/native/BinaryOps.h
@@ -61,5 +61,7 @@ DECLARE_DISPATCH(binary_fn, fmod_stub);
 DECLARE_DISPATCH(binary_fn_alpha, fmod_scalar_stub);
 DECLARE_DISPATCH(binary_fn, logaddexp_stub);
 DECLARE_DISPATCH(binary_fn, logaddexp2_stub);
+DECLARE_DISPATCH(binary_fn, gcd_stub);
+DECLARE_DISPATCH(binary_fn, lcm_stub);
 
 }} // namespace at::native
diff --git a/aten/src/ATen/native/Math.h b/aten/src/ATen/native/Math.h
@@ -267,3 +267,16 @@ static inline float calc_digamma(float x) {
 }
 
 inline c10::BFloat16 calc_erfinv(c10::BFloat16 a) { return calc_erfinv(float(a)); }
+
+template <typename T>
+static inline typename std::enable_if<std::is_integral<T>::value, T>::type
+calc_gcd(T a, T b) {
+  a = std::abs(a);
+  b = std::abs(b);
+  while (a != 0) {
+    T c = a;
+    a = b % a;
+    b = c; 
+  }
+  return b;
+}
diff --git a/aten/src/ATen/native/cpu/BinaryOpsKernel.cpp b/aten/src/ATen/native/cpu/BinaryOpsKernel.cpp
@@ -9,6 +9,7 @@
 #include <ATen/cpu/vec256/vec256.h>
 #include <ATen/native/TensorIterator.h>
 #include <ATen/native/cpu/Loops.h>
+#include <ATen/native/Math.h>
 #include <c10/macros/Macros.h>
 
 namespace at {
@@ -715,6 +716,27 @@ void logaddexp2_kernel(TensorIterator& iter) {
   });
 }
 
+void gcd_kernel(TensorIterator& iter) {
+  AT_DISPATCH_INTEGRAL_TYPES(iter.dtype(), "gcd_cpu", [&]() {
+      cpu_kernel(
+          iter,
+          [](scalar_t a, scalar_t b) -> scalar_t {
+            return calc_gcd(a, b);
+          });
+    });
+}
+
+void lcm_kernel(TensorIterator& iter) {
+  AT_DISPATCH_INTEGRAL_TYPES(iter.dtype(), "lcm_cpu", [&]() {
+      cpu_kernel(
+          iter,
+          [](scalar_t a, scalar_t b) -> scalar_t {
+            scalar_t g = calc_gcd(a, b);
+            return (g == 0) ? 0 : a / g * b;
+          });
+    });
+}
+
 } // namespace
 
 REGISTER_DISPATCH(add_stub, &add_kernel);
@@ -749,6 +771,8 @@ REGISTER_DISPATCH(fmod_stub, &fmod_kernel);
 REGISTER_DISPATCH(fmod_scalar_stub, &fmod_scalar_kernel);
 REGISTER_DISPATCH(logaddexp_stub, &logaddexp_kernel);
 REGISTER_DISPATCH(logaddexp2_stub, &logaddexp2_kernel);
+REGISTER_DISPATCH(gcd_stub, &gcd_kernel);
+REGISTER_DISPATCH(lcm_stub, &lcm_kernel);
 
 } // namespace native
 } // namespace at
diff --git a/aten/src/ATen/native/cuda/BinaryMiscOpsKernels.cu b/aten/src/ATen/native/cuda/BinaryMiscOpsKernels.cu
@@ -1,6 +1,7 @@
 #include <ATen/Dispatch.h>
 #include <ATen/native/DispatchStub.h>
 #include <ATen/native/cuda/Loops.cuh>
+#include <ATen/native/cuda/Math.cuh>
 #include <ATen/native/TensorIterator.h>
 #include <ATen/native/BinaryOps.h>
 
@@ -67,10 +68,30 @@ void logaddexp2_kernel_cuda(TensorIterator& iter) {
   });
 }
 
+void gcd_kernel_cuda(TensorIterator& iter) {
+  AT_DISPATCH_INTEGRAL_TYPES(iter.dtype(), "gcd_cuda", [&]() {
+    gpu_kernel(iter, [] GPU_LAMBDA (scalar_t a, scalar_t b) -> scalar_t {
+      return calc_gcd(a, b);
+    });
+  });
+}
+
+void lcm_kernel_cuda(TensorIterator& iter) {
+  AT_DISPATCH_INTEGRAL_TYPES(iter.dtype(), "lcm_cuda", [&]() {
+    gpu_kernel(iter, [] GPU_LAMBDA (scalar_t a, scalar_t b) -> scalar_t {
+      scalar_t g = calc_gcd(a, b);
+      return (g == 0) ? 0 : a / g * b;
+    });
+  });
+}
+
+
 REGISTER_DISPATCH(atan2_stub, &atan2_kernel_cuda);
 REGISTER_DISPATCH(smooth_l1_stub, &smooth_l1_kernel_cuda);
 REGISTER_DISPATCH(mse_stub, &mse_kernel_cuda);
 REGISTER_DISPATCH(logaddexp_stub, &logaddexp_kernel_cuda);
 REGISTER_DISPATCH(logaddexp2_stub, &logaddexp2_kernel_cuda);
+REGISTER_DISPATCH(gcd_stub, &gcd_kernel_cuda);
+REGISTER_DISPATCH(lcm_stub, &lcm_kernel_cuda);
 
 }} // namespace at::native
diff --git a/aten/src/ATen/native/cuda/Math.cuh b/aten/src/ATen/native/cuda/Math.cuh
@@ -1,5 +1,6 @@
 #pragma once
 #include <ATen/AccumulateType.h>
+#include <c10/macros/Macros.h>
 
 namespace at {
 namespace native {
@@ -88,5 +89,17 @@ static inline __host__ __device__ scalar_t calc_trigamma(scalar_t in) {
   return static_cast<scalar_t>(sign * result);
 }
 
+template <typename scalar_t>
+static inline C10_HOST_DEVICE scalar_t calc_gcd(scalar_t a_in, scalar_t b_in) {
+  scalar_t a = ::abs(a_in);
+  scalar_t b = ::abs(b_in);
+  while (a != 0) {
+    scalar_t c = a;
+    a = b % a;
+    b = c;
+  }
+  return b;
+}
+
 }
 }
diff --git a/aten/src/ATen/native/native_functions.yaml b/aten/src/ATen/native/native_functions.yaml
@@ -1314,6 +1314,24 @@
   dispatch:
     CPU: from_file
 
+- func: gcd.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
+
+- func: gcd(Tensor self, Tensor other) -> Tensor
+  use_c10_dispatcher: full
+  variants: function, method
+
+- func: gcd_(Tensor(a!) self, Tensor other) -> Tensor(a!)
+  variants: function, method
+
+- func: lcm.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)
+
+- func: lcm(Tensor self, Tensor other) -> Tensor
+  use_c10_dispatcher: full
+  variants: function, method
+
+- func: lcm_(Tensor(a!) self, Tensor other) -> Tensor(a!)
+  variants: function, method
+
 # NOTE [ grid_sampler Native Functions ]
 # `grid_sampler` does all the shape checking and then dispatches to one of
 # `cudnn_grid_sampler`, `grid_sampler_2d`, or `grid_sampler_3d`, each of which

diff --git a/docs/source/tensors.rst b/docs/source/tensors.rst
@@ -308,6 +308,8 @@ view of a storage and defines numeric operations on it.
    .. automethod:: frac
    .. automethod:: frac_
    .. automethod:: gather
+   .. automethod:: gcd
+   .. automethod:: gcd_
    .. automethod:: ge
    .. automethod:: ge_
    .. automethod:: geometric_
@@ -351,6 +353,8 @@ view of a storage and defines numeric operations on it.
    .. automethod:: istft
    .. automethod:: item
    .. automethod:: kthvalue
+   .. automethod:: lcm
+   .. automethod:: lcm_
    .. automethod:: le
    .. automethod:: le_
    .. automethod:: lerp

diff --git a/docs/source/torch.rst b/docs/source/torch.rst
@@ -409,8 +409,10 @@ Other Operations
     fliplr
     flipud
     rot90
+    gcd
     histc
     meshgrid
+    lcm
     logcumsumexp
     renorm
     repeat_interleave

diff --git a/test/test_torch.py b/test/test_torch.py
@@ -16922,6 +16922,26 @@ def test_remainder_edge_cases(self, device, dtype):
         r = a.remainder(b)
         self.assertEqual(r.dtype, a.dtype)
 
+    @onlyOnCPUAndCUDA
+    @dtypes(torch.int16, torch.int32, torch.int64)
+    @unittest.skipIf(not TEST_NUMPY, "NumPy not found")
+    def test_gcd_edge_cases(self, device, dtype):
+        t1 = torch.tensor([0, 10, 0], dtype=dtype, device=device)
+        t2 = torch.tensor([0, 0, 10], dtype=dtype, device=device)
+        actual = torch.gcd(t1, t2)
+        expected = np.gcd([0, 10, 0], [0, 0, 10])
+        self.assertEqual(actual, expected)
+
+    @onlyOnCPUAndCUDA
+    @dtypes(torch.int16, torch.int32, torch.int64)
+    @unittest.skipIf(not TEST_NUMPY, "NumPy not found")
+    def test_lcm_edge_cases(self, device, dtype):
+        t1 = torch.tensor([0, 10, 0], dtype=dtype, device=device)
+        t2 = torch.tensor([0, 0, 10], dtype=dtype, device=device)
+        actual = torch.lcm(t1, t2)
+        expected = np.lcm([0, 10, 0], [0, 0, 10])
+        self.assertEqual(actual, expected)
+
     @slowTest
     @onlyOnCPUAndCUDA
     @dtypes(torch.float32, torch.float64, torch.bfloat16, torch.int32, torch.int64, torch.cfloat, torch.cdouble)
@@ -19207,6 +19227,12 @@ def inner(self, device, dtype):
     ('expand_as', '', _new_t((_M, 1, _M)), lambda t, d: [_new_t((_M, 4, _M))(t, d)],
         1e-5, 1e-5, 1e-5, _types, _cpu_types, False),
     ('fill_', '', _medium_2d, lambda t, d: [_number(3.14, 3, t)], 1e-3, 1e-5, 1e-5, _types, _cpu_types, False),
+    ('gcd', '', _small_3d, lambda t, d: [_small_3d(t, d)], 0, 0, 0,
+     [torch.int16, torch.int32, torch.int64],
+     [torch.int16, torch.int32, torch.int64], True, [onlyOnCPUAndCUDA]),
+    ('lcm', '', _small_3d, lambda t, d: [_small_3d(t, d)], 0, 0, 0,
+     [torch.int16, torch.int32, torch.int64],
+     [torch.int16, torch.int32, torch.int64], True, [onlyOnCPUAndCUDA]),
     ('ge', '', _medium_2d, lambda t, d: [_medium_2d(t, d)], 1e-5, 1e-5, 1e-5, _types2),
     ('le', '', _medium_2d, lambda t, d: [_medium_2d(t, d)], 1e-5, 1e-5, 1e-5, _types2),
     ('gt', '', _medium_2d, lambda t, d: [_medium_2d(t, d)], 1e-5, 1e-5, 1e-5, _types2),

diff --git a/torch/_overrides.py b/torch/_overrides.py
@@ -310,6 +310,7 @@ def get_testing_overrides():
         torch.full_like: lambda input, fill_value, out=None, dtype=None, layout=torch.strided, device=None, requires_grad=False: -1,
         torch.functional.lu_unpack: lambda LU_data, LU_pivots, unpack_data=True, unpack_pivots=True: -1,
         torch.gather: lambda input, dim, index, out=None, sparse_grad=False: -1,
+        torch.gcd: lambda input, other, out=None: -1,
         torch.ge: lambda input, other, out=None: -1,
         torch.geqrf: lambda input, out=None: -1,
         torch.ger: lambda input, vec2, out=None: -1,
@@ -351,6 +352,7 @@ def get_testing_overrides():
         torch.kl_div: lambda input, target, size_average=None, reduce=None, reduction='mean', log_target=False: -1,
         torch.kthvalue: lambda input, k, dim=None, keepdim=False, out=None: -1,
         torch.layer_norm: lambda input, normalized_shape, weight=None, bias=None, esp=1e-05, cudnn_enabled=True: -1,
+        torch.lcm: lambda input, other, out=None: -1,
         torch.le: lambda input, other, out=None: -1,
         torch.lerp: lambda input, end, weight, out=None: -1,
         torch.lgamma: lambda input, out=None: -1,

diff --git a/torch/_tensor_docs.py b/torch/_tensor_docs.py
@@ -1320,6 +1320,20 @@ def add_docstr_all(method, docstr):
 See :func:`torch.gather`
 """)
 
+add_docstr_all('gcd',
+               r"""
+gcd(other) -> Tensor
+
+See :func:`torch.gcd`
+""")
+
+add_docstr_all('gcd_',
+               r"""
+gcd_(other) -> Tensor
+
+In-place version of :meth:`~Tensor.gcd`
+""")
+
 add_docstr_all('ge',
                r"""
 ge(other) -> Tensor
@@ -1706,6 +1720,20 @@ def add_docstr_all(method, docstr):
 See :func:`torch.kthvalue`
 """)
 
+add_docstr_all('lcm',
+               r"""
+lcm(other) -> Tensor
+
+See :func:`torch.lcm`
+""")
+
+add_docstr_all('lcm_',
+               r"""
+lcm_(other) -> Tensor
+
+In-place version of :meth:`~Tensor.lcm`
+""")
+
 add_docstr_all('le',
                r"""
 le(other) -> Tensor
-Original file line number
+Diff line change
@@ Expand Up / @@ -409,8 +409,10 @@ Other Operations @@
         fliplr
         flipud
         rot90
+        gcd
         histc
         meshgrid
+        lcm
         logcumsumexp
         renorm
         repeat_interleave
@@ Expand Down @@