From f76c167addb2f93c0b0b654bf5d2b0ee00c87be8 Mon Sep 17 00:00:00 2001
From: Peihong Liu <mosout@qq.com>
Date: Fri, 30 Jul 2021 19:17:49 +0800
Subject: [PATCH] add autotest for some math ops (#5646)

* add autotest for some math ops

* add autotest for some activation ops

* clean activation tests

* clean math op tests

* fix a bug

Co-authored-by: oneflow-ci-bot <69100618+oneflow-ci-bot@users.noreply.github.com>
---
 python/oneflow/nn/modules/math_ops.py         |   6 +-
 python/oneflow/test/modules/test_abs.py       |   2 +-
 .../oneflow/test/modules/test_activation.py   | 436 ++----------------
 python/oneflow/test/modules/test_math_ops.py  | 107 ++---
 4 files changed, 92 insertions(+), 459 deletions(-)

diff --git a/python/oneflow/nn/modules/math_ops.py b/python/oneflow/nn/modules/math_ops.py
index 936d0ebd3..fd664de6b 100644
--- a/python/oneflow/nn/modules/math_ops.py
+++ b/python/oneflow/nn/modules/math_ops.py
@@ -927,9 +927,9 @@ def square_op(input):
 
 
 class Std(Module):
-    def __init__(self, dim=None, unbiased=True, keepdim=False) -> None:
+    def __init__(self, dim=None, unbiased=False, keepdim=False) -> None:
         super().__init__()
-        assert unbiased == True, "Only support 'unbiased=True' for now!"
+        assert unbiased == False, "Only support 'unbiased=False' for now!"
         self.unbiased = unbiased
         self.keepdim = keepdim
         self.dim = dim
@@ -960,7 +960,7 @@ class Std(Module):
 
 
 @register_tensor_op("std")
-def std_op(tensor, dim, unbiased=True, keepdim=False):
+def std_op(tensor, dim, unbiased=False, keepdim=False):
     """
     Returns the standard-deviation of each row of the :attr:`input` tensor in the
     dimension :attr:`dim`. If :attr:`dim` is a list of dimensions,
diff --git a/python/oneflow/test/modules/test_abs.py b/python/oneflow/test/modules/test_abs.py
index 43ec34ae3..0b8e8fd2d 100644
--- a/python/oneflow/test/modules/test_abs.py
+++ b/python/oneflow/test/modules/test_abs.py
@@ -61,7 +61,7 @@ def _test_abs_tensor_function_backward(test_case, device):
 
 @flow.unittest.skip_unless_1n1d()
 class TestAbs(flow.unittest.TestCase):
-    def test_cosh(test_case):
+    def test_abs(test_case):
         arg_dict = OrderedDict()
         arg_dict["test_fun"] = [
             _test_abs_forward,
diff --git a/python/oneflow/test/modules/test_activation.py b/python/oneflow/test/modules/test_activation.py
index 27bf0df0d..40996c845 100644
--- a/python/oneflow/test/modules/test_activation.py
+++ b/python/oneflow/test/modules/test_activation.py
@@ -26,40 +26,8 @@ import oneflow as flow
 import oneflow.unittest
 
 
-def _test_relu_impl(test_case, shape, device):
-    np_input = np.random.randn(*shape)
-    of_input = flow.Tensor(
-        np_input, dtype=flow.float32, device=flow.device(device), requires_grad=True
-    )
-    m = flow.nn.ReLU()
-    of_out = m(of_input)
-    np_out = np.maximum(0, np_input)
-    test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 1e-05, 1e-05))
-    of_out = of_out.sum()
-    of_out.backward()
-    test_case.assertTrue(np.allclose(of_input.grad.numpy(), np_out > 0, 1e-05, 1e-05))
-    inplace_m = flow.nn.ReLU(inplace=True)
-    of_input = flow.Tensor(
-        np_input, dtype=flow.float32, device=flow.device(device), requires_grad=True
-    )
-    of_input_inplace = of_input + 1
-    inplace_m(of_input_inplace)
-    np_out = np.maximum(0, np_input + 1)
-    test_case.assertTrue(np.allclose(of_input_inplace.numpy(), np_out, 1e-05, 1e-05))
-    of_out_inplace = of_input_inplace.sum()
-    of_out_inplace.backward()
-    test_case.assertTrue(np.allclose(of_input.grad.numpy(), np_out > 0, 1e-05, 1e-05))
-
-
 @flow.unittest.skip_unless_1n1d()
 class TestReLUModule(flow.unittest.TestCase):
-    def test_relu(test_case):
-        arg_dict = OrderedDict()
-        arg_dict["shape"] = [(2, 3), (2, 3, 4), (2, 4, 5, 6)]
-        arg_dict["device"] = ["cpu", "cuda"]
-        for arg in GenArgList(arg_dict):
-            _test_relu_impl(test_case, *arg)
-
     @autotest()
     def test_relu_module_with_random_data(test_case):
         m = torch.nn.ReLU()
@@ -71,36 +39,8 @@ class TestReLUModule(flow.unittest.TestCase):
         return y
 
 
-def _test_relu6_impl(test_case, shape, device):
-    np_input = np.random.randn(*shape)
-    of_input = flow.Tensor(
-        np_input, dtype=flow.float32, device=flow.device(device), requires_grad=True
-    )
-    m = flow.nn.ReLU6()
-    of_out = m(of_input)
-    np_out = np.minimum(np.maximum(0, np_input), 6.0)
-    test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 1e-05, 1e-05))
-    of_out = of_out.sum()
-    of_out.backward()
-    test_case.assertTrue(
-        np.allclose(
-            of_input.grad.numpy(),
-            np.where(np_input > 6, 0, np.where(np_input < 0, 0, 1)),
-            1e-05,
-            1e-05,
-        )
-    )
-
-
 @flow.unittest.skip_unless_1n1d()
 class TestReLU6Module(flow.unittest.TestCase):
-    def test_relu6(test_case):
-        arg_dict = OrderedDict()
-        arg_dict["shape"] = [(2, 3), (2, 3, 4), (2, 4, 5, 6)]
-        arg_dict["device"] = ["cpu", "cuda"]
-        for arg in GenArgList(arg_dict):
-            _test_relu6_impl(test_case, *arg)
-
     @autotest()
     def test_relu6_module_with_random_data(test_case):
         m = torch.nn.ReLU6()
@@ -112,47 +52,8 @@ class TestReLU6Module(flow.unittest.TestCase):
         return y
 
 
-def _test_tanh_nn_impl(test_case, shape, device):
-    np_input = np.random.randn(*shape)
-    of_input = flow.Tensor(
-        np_input, dtype=flow.float32, device=flow.device(device), requires_grad=True
-    )
-    tanh = flow.nn.Tanh()
-    of_out = tanh(of_input)
-    np_out = np.tanh(np_input)
-    test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 1e-05, 1e-05))
-    of_out = of_out.sum()
-    of_out.backward()
-    test_case.assertTrue(
-        np.allclose(of_input.grad.numpy(), 1.0 - np_out * np_out, 1e-05, 1e-05)
-    )
-
-
-def _test_tanh_function_impl(test_case, shape, device):
-    np_input = np.random.randn(*shape)
-    of_input = flow.Tensor(
-        np_input, dtype=flow.float32, device=flow.device(device), requires_grad=True
-    )
-    of_out = flow.tanh(of_input)
-    np_out = np.tanh(np_input)
-    test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 1e-05, 1e-05))
-    of_out = of_out.sum()
-    of_out.backward()
-    test_case.assertTrue(
-        np.allclose(of_input.grad.numpy(), 1.0 - np_out * np_out, 1e-05, 1e-05)
-    )
-
-
 @flow.unittest.skip_unless_1n1d()
 class TestTanh(flow.unittest.TestCase):
-    def test_tanh(test_case):
-        arg_dict = OrderedDict()
-        arg_dict["shape"] = [(2, 3), (2, 3, 4), (2, 4, 5, 6)]
-        arg_dict["device"] = ["cpu", "cuda"]
-        for arg in GenArgList(arg_dict):
-            _test_tanh_nn_impl(test_case, *arg)
-            _test_tanh_function_impl(test_case, *arg)
-
     @autotest()
     def test_tanh_module_with_random_data(test_case):
         m = torch.nn.Tanh()
@@ -171,34 +72,8 @@ class TestTanh(flow.unittest.TestCase):
         return y
 
 
-def _test_elu_function_impl(test_case, shape, device):
-    m = flow.nn.ELU()
-    arr = np.random.randn(*shape)
-    np_out = np.where(arr > 0, arr, 1.0 * (np.exp(arr) - 1))
-    x = flow.Tensor(arr, device=flow.device(device), requires_grad=True)
-    of_out = m(x)
-    test_case.assertTrue(np.allclose(of_out.numpy(), np_out, rtol=1e-05, atol=1e-05))
-    m = flow.nn.ELU(alpha=1.2)
-    arr = np.random.randn(*shape)
-    np_out = np.where(arr > 0, arr, 1.2 * (np.exp(arr) - 1))
-    x = flow.Tensor(arr, device=flow.device(device), requires_grad=True)
-    of_out = m(x)
-    test_case.assertTrue(np.allclose(of_out.numpy(), np_out, rtol=1e-05, atol=1e-05))
-    of_out = of_out.sum()
-    of_out.backward()
-    np_grad = np.where(arr > 0, 1, 1.2 * np.exp(arr))
-    test_case.assertTrue(np.allclose(x.grad.numpy(), np_grad, 1e-05, 1e-05))
-
-
 @flow.unittest.skip_unless_1n1d()
 class TestELUModule(flow.unittest.TestCase):
-    def test_elu(test_case):
-        arg_dict = OrderedDict()
-        arg_dict["shape"] = [(2, 3), (2, 3, 4), (2, 4, 5, 6)]
-        arg_dict["device"] = ["cpu", "cuda"]
-        for arg in GenArgList(arg_dict):
-            _test_elu_function_impl(test_case, *arg)
-
     @autotest()
     def test_elu_module_with_random_data(test_case):
         m = torch.nn.ELU(alpha=random() | nothing())
@@ -210,46 +85,17 @@ class TestELUModule(flow.unittest.TestCase):
         return y
 
 
-def _np_gelu(x):
-    return 0.5 * x * (1 + special.erf(x / np.sqrt(2)))
-
-
-def _test_gelu_impl(test_case, device):
-    np_input = np.array([1.0, -1.0, 2.3]).astype(np.float32)
-    of_input = flow.Tensor(
-        np_input, dtype=flow.float32, device=flow.device(device), requires_grad=True
-    )
-    gelu = flow.nn.GELU()
-    of_out = gelu(of_input)
-    np_out = _np_gelu(np_input)
-    test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 1e-05, 1e-05))
-    of_out = of_out.sum()
-    of_out.backward()
-    np_grad = [1.0833154916763306, -0.08331547677516937, 1.0544281005859375]
-    test_case.assertTrue(np.allclose(of_input.grad.numpy(), np_grad, 1e-05, 1e-05))
-
-
 @flow.unittest.skip_unless_1n1d()
 class TestGelu(flow.unittest.TestCase):
-    def test_gelu(test_case):
-        arg_dict = OrderedDict()
-        arg_dict["device"] = ["cpu", "cuda"]
-        for arg in GenArgList(arg_dict):
-            _test_gelu_impl(test_case, *arg)
-
+    @autotest()
     def test_gelu_module_with_random_data(test_case):
-        for device in ["cpu", "cuda"]:
-            test_module_against_pytorch(test_case, "nn.GELU", device=device, n=2)
-
-
-def numpy_sigmoid(x):
-    return 1.0 / (1 + np.exp(-x))
-
-
-def numpy_sigmoid_grad(inputs, grads):
-    x = np.exp(-inputs)
-    delta = x / (1 + x) ** 2
-    return delta * grads
+        m = torch.nn.GELU()
+        m.train(random())
+        device = random_device()
+        m.to(device)
+        x = random_pytorch_tensor().to(device)
+        y = m(x)
+        return y
 
 
 def numpy_softmax(x, axis):
@@ -277,49 +123,31 @@ def numpy_mish_grad(x):
     return y_grad
 
 
-def _test_sigmoid(test_case, device):
-    m = flow.nn.Sigmoid()
-    input_arr = np.random.randn(2, 3, 4, 5)
-    x = flow.Tensor(input_arr, device=flow.device(device))
-    y = m(x)
-    y2 = flow.sigmoid(x)
-    y3 = x.sigmoid()
-    output = numpy_sigmoid(input_arr)
-    test_case.assertTrue(np.allclose(y.numpy(), output, 1e-05, 1e-05))
-    test_case.assertTrue(np.allclose(y2.numpy(), output, 1e-05, 1e-05))
-    test_case.assertTrue(np.allclose(y3.numpy(), output, 1e-05, 1e-05))
-
-
-def _test_sigmoid_backward(test_case, device):
-    input_arr = np.random.randn(2, 3, 4, 5)
-    x = flow.Tensor(input_arr, device=flow.device(device), requires_grad=True)
-    x_grad = numpy_sigmoid_grad(input_arr, np.ones(input_arr.shape))
-    m = flow.nn.Sigmoid()
-    y = m(x).sum()
-    y.backward()
-    test_case.assertTrue(np.allclose(x.grad.numpy(), x_grad, 1e-05, 1e-05))
-
-
 @flow.unittest.skip_unless_1n1d()
 class TestSigmoid(flow.unittest.TestCase):
-    def test_sigmoid(test_case):
-        arg_dict = OrderedDict()
-        arg_dict["fun"] = [_test_sigmoid, _test_sigmoid_backward]
-        arg_dict["device"] = ["cpu", "cuda"]
-        for arg in GenArgList(arg_dict):
-            arg[0](test_case, *arg[1:])
-
+    @autotest()
     def test_sigmoid_module_with_random_data(test_case):
-        for device in ["cpu", "cuda"]:
-            test_module_against_pytorch(test_case, "nn.Sigmoid", device=device, n=2)
+        m = torch.nn.Sigmoid()
+        m.train(random())
+        device = random_device()
+        m.to(device)
+        x = random_pytorch_tensor().to(device)
+        y = m(x)
+        return y
 
+    @autotest()
     def test_sigmoid_flow_with_random_data(test_case):
-        for device in ["cpu", "cuda"]:
-            test_flow_against_pytorch(test_case, "sigmoid", device=device, n=2)
+        device = random_device()
+        x = random_pytorch_tensor().to(device)
+        y = torch.sigmoid(x)
+        return y
 
+    @autotest()
     def test_sigmoid_tensor_with_random_data(test_case):
-        for device in ["cpu", "cuda"]:
-            test_tensor_against_pytorch(test_case, "sigmoid", device=device, n=2)
+        device = random_device()
+        x = random_pytorch_tensor().to(device)
+        y = x.sigmoid()
+        return y
 
 
 def _test_softmax(test_case, device):
@@ -414,36 +242,17 @@ class TestSoftmax(flow.unittest.TestCase):
             arg[0](test_case, *arg[1:])
 
 
-def _np_hardsigmoid_grad(x):
-    return np.where(x > 0, np.where(x >= 1, 0, 1.0 / 6), 0)
-
-
-def _test_hardsigmoid_impl(test_case, shape, device):
-    m = flow.nn.Hardsigmoid()
-    arr = np.random.randn(*shape)
-    np_out = np.maximum(0, np.minimum(1, (arr + 3) / 6))
-    x = flow.Tensor(arr, device=flow.device(device), requires_grad=True)
-    of_out = m(x)
-    test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 1e-05, 1e-05))
-    of_out = of_out.sum()
-    of_out.backward()
-    test_case.assertTrue(
-        np.allclose(x.grad.numpy(), _np_hardsigmoid_grad(np_out), 1e-05, 1e-05)
-    )
-
-
 @flow.unittest.skip_unless_1n1d()
 class TestHardsigmoidModule(flow.unittest.TestCase):
-    def test_hardsigmoid(test_case):
-        arg_dict = OrderedDict()
-        arg_dict["shape"] = [(2, 3), (2, 3, 4), (2, 4, 5, 6)]
-        arg_dict["device"] = ["cpu", "cuda"]
-        for arg in GenArgList(arg_dict):
-            _test_hardsigmoid_impl(test_case, *arg)
-
+    @autotest()
     def test_hardsigmoid_module_with_random_data(test_case):
-        for device in ["cpu", "cuda"]:
-            test_module_against_pytorch(test_case, "nn.Hardsigmoid", device=device, n=2)
+        m = torch.nn.Hardsigmoid()
+        m.train(random())
+        device = random_device()
+        m.to(device)
+        x = random_pytorch_tensor().to(device)
+        y = m(x)
+        return y
 
 
 def _test_logsoftmax(test_case, device):
@@ -590,37 +399,17 @@ class TestLogSoftmax(flow.unittest.TestCase):
             arg[0](test_case, *arg[1:])
 
 
-def _test_logsigmoid(test_case, device):
-    m = flow.nn.LogSigmoid()
-    arr = np.array([1.0, 2.0, 3.0, 10.2, 7.6])
-    np_out = np.log(1.0 / (1.0 + np.exp(-arr)))
-    x = flow.Tensor(arr, device=flow.device(device), requires_grad=True)
-    of_out = m(x)
-    test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 1e-05, 1e-05))
-    of_out = of_out.sum()
-    of_out.backward()
-    np_grad = [
-        0.2689414213699951,
-        0.11920292202211764,
-        0.04742587317756669,
-        3.716893710287265e-05,
-        0.0005002011070795276,
-    ]
-    test_case.assertTrue(np.allclose(x.grad.numpy(), np_grad, 1e-05, 1e-05))
-
-
 @flow.unittest.skip_unless_1n1d()
 class TestLogSigmoidModule(flow.unittest.TestCase):
-    def test_logsigmoid(test_case):
-        arg_dict = OrderedDict()
-        arg_dict["fun"] = [_test_logsigmoid]
-        arg_dict["device"] = ["cpu", "cuda"]
-        for arg in GenArgList(arg_dict):
-            arg[0](test_case, *arg[1:])
-
+    @autotest()
     def test_logsigmoid_module_with_random_data(test_case):
-        for device in ["cpu", "cuda"]:
-            test_module_against_pytorch(test_case, "nn.LogSigmoid", device=device, n=2)
+        m = torch.nn.LogSigmoid()
+        m.train(random())
+        device = random_device()
+        m.to(device)
+        x = random_pytorch_tensor().to(device)
+        y = m(x)
+        return y
 
 
 def _test_softplus(test_case, device):
@@ -690,31 +479,8 @@ class TestSoftplusModule(flow.unittest.TestCase):
         return y
 
 
-def _test_hardswish_impl(test_case, shape, device):
-    m = flow.nn.Hardswish()
-    arr = np.random.randn(*shape)
-    f = arr + 3
-    relu6 = np.where(np.where(f < 0, 0, f) > 6, 6, np.where(f < 0, 0, f))
-    relu6_grad = np.where(f > 6, 0, np.where(f < 0, 0, 1))
-    np_out = arr * relu6 / 6
-    x = flow.Tensor(arr, device=flow.device(device), requires_grad=True)
-    of_out = m(x)
-    test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 1e-05, 1e-05))
-    of_out = of_out.sum()
-    of_out.backward()
-    np_grad = relu6 / 6 + arr * relu6_grad / 6
-    test_case.assertTrue(np.allclose(x.grad.numpy(), np_grad, 1e-05, 1e-05))
-
-
 @flow.unittest.skip_unless_1n1d()
 class TestHardswishModule(flow.unittest.TestCase):
-    def test_hardswish(test_case):
-        arg_dict = OrderedDict()
-        arg_dict["shape"] = [(2, 3), (2, 3, 4), (2, 4, 5, 6)]
-        arg_dict["device"] = ["cpu", "cuda"]
-        for arg in GenArgList(arg_dict):
-            _test_hardswish_impl(test_case, *arg)
-
     @autotest()
     def test_hardswish_module_with_random_data(test_case):
         m = torch.nn.Hardswish()
@@ -760,29 +526,8 @@ class TestHardtanhModule(flow.unittest.TestCase):
             _test_hardtanh_impl(test_case, *arg)
 
 
-def _test_leakyrelu_impl(test_case, shape, device):
-    negative_slope = 0.2
-    m = flow.nn.LeakyReLU(negative_slope=negative_slope)
-    arr = np.random.randn(*shape)
-    np_out = np.maximum(0, arr) + negative_slope * np.minimum(0, arr)
-    x = flow.Tensor(arr, device=flow.device(device), requires_grad=True)
-    of_out = m(x)
-    test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 1e-05, 1e-05))
-    np_grad = np.where(arr < 0, 1.0 * negative_slope, 1.0)
-    of_out = of_out.sum()
-    of_out.backward()
-    test_case.assertTrue(np.allclose(x.grad.numpy(), np_grad, 1e-05, 1e-05))
-
-
 @flow.unittest.skip_unless_1n1d()
 class TestLeakyReLUModule(flow.unittest.TestCase):
-    def test_leaky_relu(test_case):
-        arg_dict = OrderedDict()
-        arg_dict["shape"] = [(2, 3), (2, 3, 4), (2, 4, 5, 6)]
-        arg_dict["device"] = ["cpu", "cuda"]
-        for arg in GenArgList(arg_dict):
-            _test_leakyrelu_impl(test_case, *arg)
-
     @autotest()
     def test_leakyrelu_module_with_random_data(test_case):
         m = torch.nn.LeakyReLU(negative_slope=random() | nothing())
@@ -794,36 +539,8 @@ class TestLeakyReLUModule(flow.unittest.TestCase):
         return y
 
 
-def _test_mish(test_case, shape, device):
-    np_input = np.random.randn(*shape)
-    of_input = flow.Tensor(np_input, dtype=flow.float32, device=flow.device(device))
-    m = flow.nn.Mish()
-    of_out = m(of_input)
-    np_out = np_input * np.tanh(numpy_softplus(np_input, 1.0, 20))
-    test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 1e-05, 1e-05))
-
-
-def _test_mish_backward(test_case, shape, device):
-    m = flow.nn.Mish()
-    arr = np.random.randn(*shape)
-    x = flow.Tensor(arr, device=flow.device(device), requires_grad=True)
-    of_out = m(x)
-    of_out = of_out.sum()
-    of_out.backward()
-    np_grad = numpy_mish_grad(arr)
-    test_case.assertTrue(np.allclose(x.grad.numpy(), np_grad, 1e-05, 1e-05))
-
-
 @flow.unittest.skip_unless_1n1d()
 class TestMishModule(flow.unittest.TestCase):
-    def test_mish(test_case):
-        arg_dict = OrderedDict()
-        arg_dict["test_fun"] = [_test_mish, _test_mish_backward]
-        arg_dict["shape"] = [(2, 3), (2, 3, 4), (2, 4, 5, 6)]
-        arg_dict["device"] = ["cpu", "cuda"]
-        for arg in GenArgList(arg_dict):
-            arg[0](test_case, *arg[1:])
-
     @autotest(n=5)
     def test_mish_module_with_random_data(test_case):
         m = torch.nn.Mish()
@@ -835,39 +552,8 @@ class TestMishModule(flow.unittest.TestCase):
         return y
 
 
-def _np_silu_grad(x):
-    _sig = 1 / (1 + np.exp(-x))
-    return _sig * (1 + x * (1 - _sig))
-
-
-def _test_silu_impl(test_case, shape, device):
-    m = flow.nn.SiLU()
-    np_input = np.random.randn(*shape)
-    np_out = np_input / (1 + np.exp(-np_input))
-    of_input = flow.Tensor(
-        np_input, dtype=flow.float32, device=flow.device(device), requires_grad=True
-    )
-    of_out = m(of_input)
-    test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 1e-5, 1e-5))
-
-    of_out = of_out.sum()
-    of_out.backward()
-    test_case.assertTrue(
-        np.allclose(of_input.grad.numpy(), _np_silu_grad(np_input), 1e-5, 1e-5)
-    )
-
-
 @flow.unittest.skip_unless_1n1d()
 class TestSiluModule(flow.unittest.TestCase):
-    def test_silu(test_case):
-        arg_dict = OrderedDict()
-        arg_dict["test_fun"] = [_test_silu_impl]
-        arg_dict["shape"] = [(2, 3), (2, 3, 4), (2, 4, 5, 6)]
-
-        arg_dict["device"] = ["cpu", "cuda"]
-        for arg in GenArgList(arg_dict):
-            arg[0](test_case, *arg[1:])
-
     @autotest(n=5)
     def test_silu_module_with_random_data(test_case):
         m = torch.nn.SiLU()
@@ -879,46 +565,8 @@ class TestSiluModule(flow.unittest.TestCase):
         return y
 
 
-def _np_selu(x):
-    scale = 1.0507009873554804934193349852946
-    alpha = 1.6732632423543772848170429916717
-    return np.where(x < 0, scale * alpha * (np.exp(x) - 1), scale * x)
-
-
-def _np_selu_grad(x):
-    scale = 1.0507009873554804934193349852946
-    alpha = 1.6732632423543772848170429916717
-    return np.where(x < 0, scale * alpha * np.exp(x), scale)
-
-
-def _test_selu_impl(test_case, shape, device):
-    m = flow.nn.SELU()
-    np_input = np.random.randn(*shape)
-    np_out = _np_selu(np_input)
-    of_input = flow.Tensor(
-        np_input, dtype=flow.float32, device=flow.device(device), requires_grad=True
-    )
-    of_out = m(of_input)
-    test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 1e-5, 1e-5))
-
-    of_out = of_out.sum()
-    of_out.backward()
-    test_case.assertTrue(
-        np.allclose(of_input.grad.numpy(), _np_selu_grad(np_input), 1e-5, 1e-5)
-    )
-
-
 @flow.unittest.skip_unless_1n1d()
 class TestSeluModule(flow.unittest.TestCase):
-    def test_selu(test_case):
-        arg_dict = OrderedDict()
-        arg_dict["test_fun"] = [_test_selu_impl]
-        arg_dict["shape"] = [(2, 3), (2, 3, 4), (2, 4, 5, 6)]
-
-        arg_dict["device"] = ["cpu", "cuda"]
-        for arg in GenArgList(arg_dict):
-            arg[0](test_case, *arg[1:])
-
     @autotest(n=5)
     def test_selu_module_with_random_data(test_case):
         m = torch.nn.SELU()
diff --git a/python/oneflow/test/modules/test_math_ops.py b/python/oneflow/test/modules/test_math_ops.py
index 8612a150c..b22b189cc 100644
--- a/python/oneflow/test/modules/test_math_ops.py
+++ b/python/oneflow/test/modules/test_math_ops.py
@@ -74,7 +74,7 @@ class TestVariance(flow.unittest.TestCase):
 
 
 @flow.unittest.skip_unless_1n1d()
-class Testsinh(flow.unittest.TestCase):
+class TestSinh(flow.unittest.TestCase):
     @autotest()
     def test_flow_sinh_with_random_data(test_case):
         device = random_device()
@@ -208,40 +208,42 @@ class TestStd(flow.unittest.TestCase):
         for arg in GenArgList(arg_dict):
             arg[0](test_case, *arg[1:])
 
+    @unittest.skip("std has bug")
+    @autotest()
+    def test_std_flow_with_random_data(test_case):
+        device = random_device()
+        all_dim = random().to(int)
+        dim = random(low=0, high=all_dim).to(int)
+        x = random_pytorch_tensor(ndim=all_dim).to(device)
+        z = torch.std(x, dim=dim)
+        return z
 
-def _test_sqrt(test_case, shape, device):
-    np_arr = np.random.randn(*shape)
-    np_arr = np.abs(np_arr)
-    np_out = np.sqrt(np_arr)
-    x = flow.Tensor(np_arr, device=flow.device(device))
-    of_out = flow.sqrt(input=x)
-    test_case.assertTrue(
-        np.allclose(of_out.numpy(), np_out, 1e-05, 1e-05, equal_nan=True)
-    )
-
-
-def _test_sqrt_backward(test_case, shape, device):
-    np_arr = np.random.randn(*shape)
-    np_arr = np.abs(np_arr)
-    x = flow.Tensor(np_arr, device=flow.device(device), requires_grad=True)
-    y = flow.sqrt(input=x)
-    z = y.sum()
-    z.backward()
-    np_grad = 0.5 * 1 / np.sqrt(x.numpy())
-    test_case.assertTrue(
-        np.allclose(x.grad.numpy(), np_grad, 1e-05, 1e-05, equal_nan=True)
-    )
+    @unittest.skip("std has bug")
+    @autotest()
+    def test_std_tensor_with_random_data(test_case):
+        device = random_device()
+        all_dim = random().to(int)
+        dim = random(low=0, high=all_dim).to(int)
+        x = random_pytorch_tensor(ndim=all_dim).to(device)
+        z = x.std(dim=dim)
+        return z
 
 
 @flow.unittest.skip_unless_1n1d()
 class TestSqrt(flow.unittest.TestCase):
-    def test_sqrt(test_case):
-        arg_dict = OrderedDict()
-        arg_dict["test_fun"] = [_test_sqrt, _test_sqrt_backward]
-        arg_dict["shape"] = [(2, 3), (2, 3, 4), (2, 3, 4, 5)]
-        arg_dict["device"] = ["cpu", "cuda"]
-        for arg in GenArgList(arg_dict):
-            arg[0](test_case, *arg[1:])
+    @autotest()
+    def test_sqrt_flow_with_random_data(test_case):
+        device = random_device()
+        x = random_pytorch_tensor().to(device)
+        z = torch.sqrt(x)
+        return z
+
+    @autotest()
+    def test_sqrt_tensor_with_random_data(test_case):
+        device = random_device()
+        x = random_pytorch_tensor().to(device)
+        z = x.sqrt()
+        return z
 
 
 def _test_rsqrt(test_case, shape, device):
@@ -279,38 +281,21 @@ class TestRsqrt(flow.unittest.TestCase):
             arg[0](test_case, *arg[1:])
 
 
-def _test_square(test_case, shape, device):
-    np_arr = np.random.randn(*shape)
-    np_out = np.square(np_arr)
-    x = flow.Tensor(np_arr, device=flow.device(device))
-    of_out = flow.square(x)
-    test_case.assertTrue(
-        np.allclose(of_out.numpy(), np_out, 1e-05, 1e-05, equal_nan=True)
-    )
-
-
-def _test_square_backward(test_case, shape, device):
-    np_arr = np.random.randn(*shape)
-    np_out = np.square(np_arr)
-    x = flow.Tensor(np_arr, device=flow.device(device), requires_grad=True)
-    y = flow.square(x)
-    z = y.sum()
-    z.backward()
-    np_grad = 2 * np_arr
-    test_case.assertTrue(
-        np.allclose(x.grad.numpy(), np_grad, 1e-05, 1e-05, equal_nan=True)
-    )
-
-
 @flow.unittest.skip_unless_1n1d()
 class TestSquare(flow.unittest.TestCase):
-    def test_square(test_case):
-        arg_dict = OrderedDict()
-        arg_dict["test_fun"] = [_test_square, _test_square_backward]
-        arg_dict["shape"] = [(2, 3), (2, 3, 4), (2, 3, 4, 5)]
-        arg_dict["device"] = ["cpu", "cuda"]
-        for arg in GenArgList(arg_dict):
-            arg[0](test_case, *arg[1:])
+    @autotest()
+    def test_square_flow_with_random_data(test_case):
+        device = random_device()
+        x = random_pytorch_tensor().to(device)
+        z = torch.square(x)
+        return z
+
+    @autotest()
+    def test_square_tensor_with_random_data(test_case):
+        device = random_device()
+        x = random_pytorch_tensor().to(device)
+        z = x.square()
+        return z
 
 
 @flow.unittest.skip_unless_1n1d()
@@ -597,7 +582,7 @@ class TestAcosh(flow.unittest.TestCase):
     @autotest()
     def test_acosh_flow_with_random_data(test_case):
         device = random_device()
-        x = random_pytorch_tensor(2.0, 3.0).to(device)
+        x = random_pytorch_tensor(low=2, high=3).to(device)
         y = torch.acosh(x)
         return y
 
-- 
GitLab