diff --git a/oneflow/python/nn/modules/masked_fill.py b/oneflow/python/nn/modules/masked_fill.py
index 28b00766ac7c97a45b84aef091611017f8f6e0a2..19701c42a0f4ac4aad83186772a2332224238f61 100644
--- a/oneflow/python/nn/modules/masked_fill.py
+++ b/oneflow/python/nn/modules/masked_fill.py
@@ -34,7 +34,7 @@ class MaskedFill(Module):
 @oneflow_export("masked_fill")
 @register_tensor_op("masked_fill")
 @experimental_api
-def masked_fill_op(tensor, mask, value):
+def masked_fill_op(input, mask, value):
     r"""
     Fills elements of :attr:`self` tensor with :attr:`value` where :attr:`mask` is True.
     The shape of :attr:`mask` must be broadcastable with the shape of the underlying tensor.
@@ -72,7 +72,7 @@ def masked_fill_op(tensor, mask, value):
         #  [-1.9009,  8.7654,  8.7654,  8.7654]]], dtype=oneflow.float32)
 
     """
-    return MaskedFill(value)(tensor, mask)
+    return MaskedFill(value)(input, mask)
 
 
 if __name__ == "__main__":
diff --git a/oneflow/python/nn/modules/math_ops.py b/oneflow/python/nn/modules/math_ops.py
index 0d8296a199247e67dbaa0f2080528f5ae97ef9ad..b7ad7edeb4539c9a1f013ee6ef42094080d05b0b 100644
--- a/oneflow/python/nn/modules/math_ops.py
+++ b/oneflow/python/nn/modules/math_ops.py
@@ -66,13 +66,13 @@ class BroadcastMul(Module):
 @oneflow_export("mul")
 @register_tensor_op("mul")
 @experimental_api
-def _mul(x, y):
-    r"""Computes the multiplication of x by y for each element, scalar and broadcast promotation are supported.
+def _mul(input, other):
+    r"""Computes the multiplication of input by other for each element, scalar and broadcast promotation are supported.
     
     The formula is:
 
     .. math::
-        out = x \times y
+        out = input \times other
     
     For example:
 
@@ -83,40 +83,40 @@ def _mul(x, y):
         >>> flow.enable_eager_execution()
 
         # element-wise multiply
-        >>> x = flow.Tensor(np.random.randn(2,3))
-        >>> y = flow.Tensor(np.random.randn(2,3))
-        >>> out = flow.mul(x,y).numpy()
+        >>> input = flow.Tensor(np.random.randn(2,3))
+        >>> other = flow.Tensor(np.random.randn(2,3))
+        >>> out = flow.mul(input,other).numpy()
         >>> out.shape
         (2, 3)
 
         # scalar mutiply
-        >>> x = 5
-        >>> y = flow.Tensor(np.random.randn(2,3))
-        >>> out = flow.mul(x,y).numpy()
+        >>> input = 5
+        >>> other = flow.Tensor(np.random.randn(2,3))
+        >>> out = flow.mul(input,other).numpy()
         >>> out.shape
         (2, 3)
 
         # broadcast mutiply
-        >>> x = flow.Tensor(np.random.randn(1,1))
-        >>> y = flow.Tensor(np.random.randn(2,3))
-        >>> out = flow.mul(x,y).numpy()
+        >>> input = flow.Tensor(np.random.randn(1,1))
+        >>> other = flow.Tensor(np.random.randn(2,3))
+        >>> out = flow.mul(input,other).numpy()
         >>> out.shape 
         (2, 3)
 
     """
 
-    if isinstance(x, (int, float)):
-        return ScalarMul(x)(y)
-    elif isinstance(y, (int, float)):
-        return ScalarMul(y)(x)
-    elif x.shape == y.shape:
-        return ElementwiseMul()(x, y)
-    elif x.shape == (1,):
-        return ScalarMulByTensor()(y, x)
-    elif y.shape == (1,):
-        return ScalarMulByTensor()(x, y)
+    if isinstance(input, (int, float)):
+        return ScalarMul(input)(other)
+    elif isinstance(other, (int, float)):
+        return ScalarMul(other)(input)
+    elif input.shape == other.shape:
+        return ElementwiseMul()(input, other)
+    elif input.shape == (1,):
+        return ScalarMulByTensor()(other, input)
+    elif other.shape == (1,):
+        return ScalarMulByTensor()(input, other)
     else:
-        return BroadcastMul()(x, y)
+        return BroadcastMul()(input, other)
 
 
 class Variance(Module):
@@ -207,12 +207,12 @@ class ScalarAdd(Module):
 @oneflow_export("sub")
 @register_tensor_op("sub")
 @experimental_api
-def _sub(x, y):
-    r"""Computes the subtraction of x by y for each element, scalar and broadcast promotation are supported.
+def _sub(input, other):
+    r"""Computes the subtraction of input by other for each element, scalar and broadcast promotation are supported.
     The formula is:
 
     .. math::
-        out = x - y
+        out = input - other
     
     For example:
 
@@ -223,39 +223,39 @@ def _sub(x, y):
         >>> flow.enable_eager_execution()
 
         # element-wise subtract
-        >>> x = flow.Tensor(np.random.randn(2,3))
-        >>> y = flow.Tensor(np.random.randn(2,3))
-        >>> out = flow.sub(x,y).numpy()
+        >>> input = flow.Tensor(np.random.randn(2,3))
+        >>> other = flow.Tensor(np.random.randn(2,3))
+        >>> out = flow.sub(input,other).numpy()
         >>> out.shape
         (2, 3)
 
         # scalar subtract
-        >>> x = 5
-        >>> y = flow.Tensor(np.random.randn(2,3))
-        >>> out = flow.sub(x,y).numpy()
+        >>> input = 5
+        >>> other = flow.Tensor(np.random.randn(2,3))
+        >>> out = flow.sub(input,other).numpy()
         >>> out.shape
         (2, 3)
 
         # broadcast subtract
-        >>> x = flow.Tensor(np.random.randn(1,1))
-        >>> y = flow.Tensor(np.random.randn(2,3))
-        >>> out = flow.sub(x,y).numpy()
+        >>> input = flow.Tensor(np.random.randn(1,1))
+        >>> other = flow.Tensor(np.random.randn(2,3))
+        >>> out = flow.sub(input,other).numpy()
         >>> out.shape
         (2, 3)
 
     """
 
-    if isinstance(x, (int, float)):
-        return ScalarAdd(x)(ScalarMul(-1)(y))
-    elif isinstance(y, (int, float)):
-        return ScalarAdd(-1 * y)(x)
-    elif x.shape == y.shape:
+    if isinstance(input, (int, float)):
+        return ScalarAdd(input)(ScalarMul(-1)(other))
+    elif isinstance(other, (int, float)):
+        return ScalarAdd(-1 * other)(input)
+    elif input.shape == other.shape:
         # TODO: add element-wise op
-        return BroadcastSub()(x, y)
-    elif y.shape == (1,):
-        return ScalarSubByTensor()(x, y)
+        return BroadcastSub()(input, other)
+    elif other.shape == (1,):
+        return ScalarSubByTensor()(input, other)
     else:
-        return BroadcastSub()(x, y)
+        return BroadcastSub()(input, other)
 
 
 class BroadcastDiv(Module):
@@ -277,16 +277,16 @@ class ScalarDivByTensor(Module):
 @oneflow_export("div")
 @register_tensor_op("div")
 @experimental_api
-def _div(x, y):
-    r"""Computes the division of x by y for each element, scalar and broadcast promotation are supported.
+def _div(input, other):
+    r"""Computes the division of input by other for each element, scalar and broadcast promotation are supported.
     The formula is:
 
     .. math::
-        out = \frac{X}{Y}
+        out = \frac{input}{other}
     
     Args:
-        x (Union[int, float, flow.Tensor]): X.
-        y (Union[int, float, flow.Tensor]): Y.
+        input (Union[int, float, flow.Tensor]): input.
+        other (Union[int, float, flow.Tensor]): other.
     
     For example:
 
@@ -297,42 +297,42 @@ def _div(x, y):
         >>> flow.enable_eager_execution()
 
         # element-wise divide
-        >>> x = flow.Tensor(np.random.randn(2,3))
-        >>> y = flow.Tensor(np.random.randn(2,3))
-        >>> out = flow.div(x,y).numpy()
+        >>> input = flow.Tensor(np.random.randn(2,3))
+        >>> other = flow.Tensor(np.random.randn(2,3))
+        >>> out = flow.div(input,other).numpy()
         >>> out.shape
         (2, 3)
 
         # scalar divide
-        >>> x = 5
-        >>> y = flow.Tensor(np.random.randn(2,3))
-        >>> out = flow.div(x,y).numpy()
+        >>> input = 5
+        >>> other = flow.Tensor(np.random.randn(2,3))
+        >>> out = flow.div(input,other).numpy()
         >>> out.shape
         (2, 3)
 
         # broadcast divide
-        >>> x = flow.Tensor(np.random.randn(1,1))
-        >>> y = flow.Tensor(np.random.randn(2,3))
-        >>> out = flow.div(x,y).numpy()
+        >>> input = flow.Tensor(np.random.randn(1,1))
+        >>> other = flow.Tensor(np.random.randn(2,3))
+        >>> out = flow.div(input,other).numpy()
         >>> out.shape 
         (2, 3)
 
     """
 
-    if isinstance(x, (int, float)):
-        return ScalarMul(x)(flow.experimental.reciprocal(y))
-    elif isinstance(y, (int, float)):
-        if y == 0 or y == 0.0:
-            y = 0.0
+    if isinstance(input, (int, float)):
+        return ScalarMul(input)(flow.experimental.reciprocal(other))
+    elif isinstance(other, (int, float)):
+        if other == 0 or other == 0.0:
+            other = 0.0
         else:
-            y = 1.0 / (float(y))
-        return ScalarMul(y)(x)
-    elif x.shape == y.shape:
-        return BroadcastDiv()(x, y)
-    elif y.shape == (1,):
-        return ScalarDivByTensor()(x, y)
+            other = 1.0 / (float(other))
+        return ScalarMul(other)(input)
+    elif input.shape == other.shape:
+        return BroadcastDiv()(input, other)
+    elif other.shape == (1,):
+        return ScalarDivByTensor()(input, other)
     else:
-        return BroadcastDiv()(x, y)
+        return BroadcastDiv()(input, other)
 
 
 class Reciprocal(Module):
diff --git a/oneflow/python/test/modules/test_div.py b/oneflow/python/test/modules/test_div.py
index ac39f0e196b1f5172faa4e121f4ec5310a6f3b81..078ddc9a780018d76f3f0af36cc6833bc7e4e7cd 100644
--- a/oneflow/python/test/modules/test_div.py
+++ b/oneflow/python/test/modules/test_div.py
@@ -20,6 +20,7 @@ import numpy as np
 
 import oneflow.experimental as flow
 from test_util import GenArgList
+from automated_test_util import *
 
 
 def _test_div_impl(test_case, shape, device):
@@ -78,6 +79,34 @@ class TestDiv(flow.unittest.TestCase):
         for arg in GenArgList(arg_dict):
             _test_div_impl(test_case, *arg)
 
+    def test_sub_against_pytorch(test_case):
+        arg_dict = OrderedDict()
+        arg_dict["test_type"] = [test_flow_against_pytorch, test_tensor_against_pytorch]
+        arg_dict["device"] = ["cpu", "cuda"]
+        arg_dict["op"] = ["div"]
+        for arg in GenArgList(arg_dict):
+            arg[0](
+                test_case,
+                arg[2],
+                extra_annotations={"other": flow.Tensor},
+                extra_generators={
+                    "input": random_tensor(ndim=2, dim0=2, dim1=3),
+                    "other": random_tensor(ndim=2, dim0=2, dim1=3),
+                },
+                device=arg[1],
+            )
+
+            arg[0](
+                test_case,
+                arg[2],
+                extra_annotations={"other": float},
+                extra_generators={
+                    "input": random_tensor(ndim=2, dim0=2, dim1=3),
+                    "other": random(0, 5),
+                },
+                device=arg[1],
+            )
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/oneflow/python/test/modules/test_masked_fill.py b/oneflow/python/test/modules/test_masked_fill.py
index 4142b4fb36f0aa2c6dcc72de1e211c41478ea1ce..82206fd8765ef7db473f6f2048dc1ce2aea89aca 100644
--- a/oneflow/python/test/modules/test_masked_fill.py
+++ b/oneflow/python/test/modules/test_masked_fill.py
@@ -14,85 +14,11 @@ See the License for the specific language governing permissions and
 limitations under the License.
 """
 import unittest
-from collections import OrderedDict
 
 import numpy as np
 
 import oneflow.experimental as flow
-from test_util import GenArgList
-
-
-def _test_masked_fill(test_case, device):
-    input_arr = np.array(
-        [
-            [
-                [-0.13169311, 0.97277078, 1.23305363, 1.56752789],
-                [-1.51954275, 1.87629473, -0.53301206, 0.53006478],
-                [-1.38244183, -2.63448052, 1.30845795, -0.67144869],
-            ],
-            [
-                [0.41502161, 0.14452418, 0.38968, -1.76905653],
-                [0.34675095, -0.7050969, -0.7647731, -0.73233418],
-                [-1.90089858, 0.01262963, 0.74693893, 0.57132389],
-            ],
-        ]
-    )
-
-    output = np.array(
-        [
-            [
-                [-0.1316931, 8.7654321, 8.7654321, 8.7654321],
-                [-1.5195428, 8.7654321, -0.5330121, 8.7654321],
-                [-1.3824418, -2.6344805, 8.7654321, -0.6714487],
-            ],
-            [
-                [8.7654321, 8.7654321, 8.7654321, -1.7690565],
-                [8.7654321, -0.7050969, -0.7647731, -0.7323342],
-                [-1.9008986, 8.7654321, 8.7654321, 8.7654321],
-            ],
-        ]
-    )
-
-    fill_value = 8.7654321  # random value e.g. -1e9 3.14
-
-    input = flow.Tensor(input_arr, dtype=flow.float32, device=flow.device(device))
-    mask = flow.Tensor(
-        (input_arr > 0).astype(np.int8), dtype=flow.int, device=flow.device(device)
-    )
-    of_out = flow.masked_fill(input, mask, value=fill_value)
-    test_case.assertTrue(np.allclose(of_out.numpy(), output))
-
-
-def _test_masked_fill_backward(test_case, device):
-    input_arr = np.array(
-        [
-            [
-                [-0.13169311, 0.97277078, 1.23305363, 1.56752789],
-                [-1.51954275, 1.87629473, -0.53301206, 0.53006478],
-                [-1.38244183, -2.63448052, 1.30845795, -0.67144869],
-            ],
-            [
-                [0.41502161, 0.14452418, 0.38968, -1.76905653],
-                [0.34675095, -0.7050969, -0.7647731, -0.73233418],
-                [-1.90089858, 0.01262963, 0.74693893, 0.57132389],
-            ],
-        ]
-    )
-
-    fill_value = -3.1415  # random value e.g. -1e9 3.14
-
-    x = flow.Tensor(
-        input_arr, dtype=flow.float32, device=flow.device(device), requires_grad=True
-    )
-    mask = flow.Tensor(
-        (input_arr > 0).astype(np.int8), dtype=flow.int, device=flow.device(device)
-    )
-    y = flow.masked_fill(x, mask, value=fill_value)
-    z = y.sum()
-    z.backward()
-    test_case.assertTrue(
-        np.array_equal(x.grad.numpy(), (input_arr < 0).astype(np.float32))
-    )
+from automated_test_util import *
 
 
 @unittest.skipIf(
@@ -100,12 +26,45 @@ def _test_masked_fill_backward(test_case, device):
     ".numpy() doesn't work in lazy mode",
 )
 class TestMaskedFill(flow.unittest.TestCase):
-    def test_masked_fill(test_case):
-        arg_dict = OrderedDict()
-        arg_dict["test_fun"] = [_test_masked_fill, _test_masked_fill_backward]
-        arg_dict["device"] = ["cpu", "cuda"]
-        for arg in GenArgList(arg_dict):
-            arg[0](test_case, *arg[1:])
+    def test_masked_fill_aginst_pytorch(test_case):
+        import numpy as np
+        import torch
+
+        def mask_tensor(shape):
+            def generator(_):
+                rng = np.random.default_rng()
+                np_arr = rng.integers(low=0, high=2, size=shape)
+                return (
+                    flow.Tensor(np_arr, dtype=flow.int8),
+                    torch.tensor(np_arr, dtype=torch.bool),
+                )
+
+            return generator
+
+        for device in ["cpu", "cuda"]:
+            test_flow_against_pytorch(
+                test_case,
+                "masked_fill",
+                extra_annotations={"mask": flow.Tensor, "value": float},
+                extra_generators={
+                    "input": random_tensor(ndim=2, dim0=4, dim1=5),
+                    "mask": mask_tensor((4, 5)),
+                    "value": constant(3.14),
+                },
+                device=device,
+            )
+
+            test_tensor_against_pytorch(
+                test_case,
+                "masked_fill",
+                extra_annotations={"mask": flow.Tensor, "value": float},
+                extra_generators={
+                    "input": random_tensor(ndim=2, dim0=4, dim1=5),
+                    "mask": mask_tensor((4, 5)),
+                    "value": constant(3.14),
+                },
+                device=device,
+            )
 
 
 if __name__ == "__main__":
diff --git a/oneflow/python/test/modules/test_mean.py b/oneflow/python/test/modules/test_mean.py
index 0d5e2f7c3537698dbb8983e578000f1f21f16cd6..7b8fe7e05122383644be2acdbc61838208532996 100644
--- a/oneflow/python/test/modules/test_mean.py
+++ b/oneflow/python/test/modules/test_mean.py
@@ -20,6 +20,7 @@ import numpy as np
 
 import oneflow.experimental as flow
 from test_util import GenArgList
+from automated_test_util import *
 
 
 def _test_mean(test_case, shape, device):
@@ -79,6 +80,13 @@ class TestMean(flow.unittest.TestCase):
         for arg in GenArgList(arg_dict):
             arg[0](test_case, *arg[1:])
 
+    def test_mean_against_pytorch(test_case):
+        arg_dict = OrderedDict()
+        arg_dict["test_type"] = [test_flow_against_pytorch, test_tensor_against_pytorch]
+        arg_dict["device"] = ["cpu", "cuda"]
+        for arg in GenArgList(arg_dict):
+            arg[0](test_case, "mean", device=arg[1])
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/oneflow/python/test/modules/test_mul.py b/oneflow/python/test/modules/test_mul.py
index 3f85b4d709a55475308504b5f6a5df7969534fe8..8fd3372b49f6cc0fd32bb9a1e4f992440ddcfe1e 100644
--- a/oneflow/python/test/modules/test_mul.py
+++ b/oneflow/python/test/modules/test_mul.py
@@ -20,6 +20,7 @@ import numpy as np
 
 import oneflow.experimental as flow
 from test_util import GenArgList
+from automated_test_util import *
 
 
 def _test_mul_impl(test_case, device):
@@ -111,6 +112,34 @@ class TestMulModule(flow.unittest.TestCase):
         for arg in GenArgList(arg_dict):
             arg[0](test_case, *arg[1:])
 
+    def test_mul_against_pytorch(test_case):
+        arg_dict = OrderedDict()
+        arg_dict["test_type"] = [test_flow_against_pytorch, test_tensor_against_pytorch]
+        arg_dict["device"] = ["cpu", "cuda"]
+        arg_dict["op"] = ["mul"]
+        for arg in GenArgList(arg_dict):
+            arg[0](
+                test_case,
+                arg[2],
+                extra_annotations={"other": flow.Tensor},
+                extra_generators={
+                    "input": random_tensor(ndim=2, dim0=2, dim1=3),
+                    "other": random_tensor(ndim=2, dim0=2, dim1=3),
+                },
+                device=arg[1],
+            )
+
+            arg[0](
+                test_case,
+                arg[2],
+                extra_annotations={"other": float},
+                extra_generators={
+                    "input": random_tensor(ndim=2, dim0=2, dim1=3),
+                    "other": random(0, 5),
+                },
+                device=arg[1],
+            )
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/oneflow/python/test/modules/test_reduce_ops.py b/oneflow/python/test/modules/test_reduce_ops.py
index ee3088aa0fc72c435f774dd53babd76c58f1b420..6bf965d212697623011449243b9cd7d823403fff 100644
--- a/oneflow/python/test/modules/test_reduce_ops.py
+++ b/oneflow/python/test/modules/test_reduce_ops.py
@@ -20,6 +20,7 @@ import numpy as np
 
 import oneflow.experimental as flow
 from test_util import GenArgList
+from automated_test_util import *
 
 
 def _test_min(test_case, device, shape, dim, keepdims):
@@ -71,7 +72,7 @@ def _test_min_tensor_function(test_case, device, shape, dim, keepdims):
     ".numpy() doesn't work in lazy mode",
 )
 class TestMinModule(flow.unittest.TestCase):
-    def test_max(test_case):
+    def test_min(test_case):
         arg_dict = OrderedDict()
         arg_dict["test_fun"] = [_test_min, _test_min_tensor_function]
         arg_dict["device"] = ["cpu", "cuda"]
@@ -81,6 +82,13 @@ class TestMinModule(flow.unittest.TestCase):
         for arg in GenArgList(arg_dict):
             arg[0](test_case, *arg[1:])
 
+    def test_min_against_pytorch(test_case):
+        arg_dict = OrderedDict()
+        arg_dict["test_type"] = [test_flow_against_pytorch, test_tensor_against_pytorch]
+        arg_dict["device"] = ["cpu", "cuda"]
+        for arg in GenArgList(arg_dict):
+            arg[0](test_case, "min", device=arg[1])
+
 
 def _test_max(test_case, device, shape, dim, keepdims):
     input_arr = np.random.randn(*shape)
@@ -141,6 +149,13 @@ class TestMaxModule(flow.unittest.TestCase):
         for arg in GenArgList(arg_dict):
             arg[0](test_case, *arg[1:])
 
+    def test_max_against_pytorch(test_case):
+        arg_dict = OrderedDict()
+        arg_dict["test_type"] = [test_flow_against_pytorch, test_tensor_against_pytorch]
+        arg_dict["device"] = ["cpu", "cuda"]
+        for arg in GenArgList(arg_dict):
+            arg[0](test_case, "max", device=arg[1])
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/oneflow/python/test/modules/test_sub.py b/oneflow/python/test/modules/test_sub.py
index 8792e266b2890309ce3fa721ecc05534d71793ad..f2c3ecd6445413affb8e25218502485261d9f26c 100644
--- a/oneflow/python/test/modules/test_sub.py
+++ b/oneflow/python/test/modules/test_sub.py
@@ -20,6 +20,7 @@ import numpy as np
 
 import oneflow.experimental as flow
 from test_util import GenArgList
+from automated_test_util import *
 
 
 def _test_sub_impl(test_case, shape, device):
@@ -88,6 +89,34 @@ class TestSubModule(flow.unittest.TestCase):
         for arg in GenArgList(arg_dict):
             _test_sub_impl(test_case, *arg)
 
+    def test_sub_against_pytorch(test_case):
+        arg_dict = OrderedDict()
+        arg_dict["test_type"] = [test_flow_against_pytorch, test_tensor_against_pytorch]
+        arg_dict["device"] = ["cpu", "cuda"]
+        arg_dict["op"] = ["sub"]
+        for arg in GenArgList(arg_dict):
+            arg[0](
+                test_case,
+                arg[2],
+                extra_annotations={"other": flow.Tensor},
+                extra_generators={
+                    "input": random_tensor(ndim=2, dim0=2, dim1=3),
+                    "other": random_tensor(ndim=2, dim0=2, dim1=3),
+                },
+                device=arg[1],
+            )
+
+            arg[0](
+                test_case,
+                arg[2],
+                extra_annotations={"other": float},
+                extra_generators={
+                    "input": random_tensor(ndim=2, dim0=2, dim1=3),
+                    "other": random(0, 5),
+                },
+                device=arg[1],
+            )
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/oneflow/python/test/modules/test_sum.py b/oneflow/python/test/modules/test_sum.py
index d8786add97d236200dd6540f3b49ec6bffb6aec1..a82efe65331f52268dcdbc28a08b822943098616 100644
--- a/oneflow/python/test/modules/test_sum.py
+++ b/oneflow/python/test/modules/test_sum.py
@@ -20,6 +20,7 @@ import numpy as np
 
 import oneflow.experimental as flow
 from test_util import GenArgList
+from automated_test_util import *
 
 
 def _test_sum_impl(test_case, device):
@@ -72,6 +73,13 @@ class TestSumModule(flow.unittest.TestCase):
         for arg in GenArgList(arg_dict):
             _test_sum_impl(test_case, *arg)
 
+    def test_sum_against_pytorch(test_case):
+        arg_dict = OrderedDict()
+        arg_dict["test_type"] = [test_flow_against_pytorch, test_tensor_against_pytorch]
+        arg_dict["device"] = ["cpu", "cuda"]
+        for arg in GenArgList(arg_dict):
+            arg[0](test_case, "sum", device=arg[1])
+
 
 if __name__ == "__main__":
     unittest.main()