Skip to content
Snippets Groups Projects
Unverified Commit b092835f authored by daquexian's avatar daquexian Committed by GitHub
Browse files

multi client launcher test (#5488)


* Multi-Client LogicalRun degenerate to PhysicalRun

* Add Logical->Physical in Multi-Client for IdGenerator and InstructionsBuilder

* multi client test

Signed-off-by: default avatardaquexian <daquexian566@gmail.com>

* auto format by CI

* fix wrong std::shared_ptr

Signed-off-by: default avatardaquexian <daquexian566@gmail.com>

* run module api test by test_multi_client/generic_test.sh

Signed-off-by: default avatardaquexian <daquexian566@gmail.com>

* fix test script

Signed-off-by: default avatardaquexian <daquexian566@gmail.com>

* use oneflow.distributed.launch -m unittest discover instead of bash for loop to speed up the test

Signed-off-by: default avatardaquexian <daquexian566@gmail.com>

* update ci/test_multi_client/generic_test.sh

Signed-off-by: default avatardaquexian <daquexian566@gmail.com>

Co-authored-by: default avatarchengtbf <472491134@qq.com>
Co-authored-by: default avataroneflow-ci-bot <69100618+oneflow-ci-bot@users.noreply.github.com>
Co-authored-by: default avataroneflow-ci-bot <ci-bot@oneflow.org>
parent 1e351cf8
No related branches found
No related tags found
No related merge requests found
Showing
with 113 additions and 1 deletion
......@@ -523,7 +523,7 @@ jobs:
${{ env.extra_docker_args }} ${{ env.pip_cache_docker_args }} \
-e ONEFLOW_TEST_DIR=$PWD/oneflow/python/test/modules \
${{ env.image_tag }} \
bash -c "python3 -m pip config set global.index-url ${{ env.pip_index_mirror }} && bash ci/test/try_install.sh && bash ci/test/generic_test.sh"
bash -c "python3 -m pip config set global.index-url ${{ env.pip_index_mirror }} && bash ci/test/try_install.sh && bash ci/test_multi_client/generic_test.sh"
- name: Dataloader API test
timeout-minutes: 45
if: matrix.test_suite == 'cuda_new_interface'
......
#!/bin/bash
set -xe
export PYTHONUNBUFFERED=1
src_dir=${ONEFLOW_SRC_DIR:-"$PWD"}
test_dir=${ONEFLOW_TEST_DIR:-"$PWD/oneflow/python/test/modules"}
test_tmp_dir=${ONEFLOW_TEST_TMP_DIR:-"./test_tmp_dir"}
rm -rf $test_tmp_dir
mkdir -p $test_tmp_dir
cp -r $test_dir $test_tmp_dir
cd ${test_tmp_dir}/$(basename $test_dir)
gpu_num=$(nvidia-smi --query-gpu=name --format=csv,noheader | wc -l)
export ONEFLOW_TEST_DEVICE_NUM=1
python3 $src_dir/ci/test/parallel_run.py \
--gpu_num=${gpu_num} \
--dir=${PWD} \
--timeout=1 \
--verbose \
--chunk=1
export ONEFLOW_TEST_DEVICE_NUM=2
python3 -m oneflow.distributed.launch --nproc_per_node 2 -m unittest discover ${PWD} --failfast --verbose
export ONEFLOW_TEST_DEVICE_NUM=4
python3 -m oneflow.distributed.launch --nproc_per_node 4 -m unittest discover ${PWD} --failfast --verbose
......@@ -124,6 +124,8 @@ def node_size():
@oneflow_export("unittest.env.has_world_size")
def has_world_size():
if oneflow.distributed.is_multi_client():
return True
if os.getenv("ONEFLOW_TEST_WORLD_SIZE"):
assert os.getenv(
"ONEFLOW_TEST_WORLD_SIZE"
......@@ -135,6 +137,8 @@ def has_world_size():
@oneflow_export("unittest.env.world_size")
def world_size():
if oneflow.distributed.is_multi_client():
return oneflow.distributed.get_world_size()
return int(os.getenv("ONEFLOW_TEST_WORLD_SIZE"))
......
......@@ -59,6 +59,7 @@ def _test_abs_tensor_function_backward(test_case, device):
test_case.assertTrue(np.allclose(input.grad.numpy(), np_grad, 1e-5, 1e-5))
@flow.unittest.skip_unless_1n1d()
class TestAbs(flow.unittest.TestCase):
def test_cosh(test_case):
arg_dict = OrderedDict()
......
......@@ -39,6 +39,7 @@ def _test_acos_impl(test_case, shape, device):
)
@flow.unittest.skip_unless_1n1d()
class TestAcos(flow.unittest.TestCase):
def test_acos(test_case):
arg_dict = OrderedDict()
......
......@@ -41,6 +41,7 @@ def _test_acosh_impl(test_case, shape, device):
)
@flow.unittest.skip_unless_1n1d()
class TestAcosh(flow.unittest.TestCase):
def test_acosh(test_case):
arg_dict = OrderedDict()
......
......@@ -53,6 +53,7 @@ def _test_relu_impl(test_case, shape, device):
test_case.assertTrue(np.allclose(of_input.grad.numpy(), np_out > 0, 1e-5, 1e-5))
@flow.unittest.skip_unless_1n1d()
class TestReLUModule(flow.unittest.TestCase):
def test_relu(test_case):
arg_dict = OrderedDict()
......@@ -91,6 +92,7 @@ def _test_relu6_impl(test_case, shape, device):
)
@flow.unittest.skip_unless_1n1d()
class TestReLU6Module(flow.unittest.TestCase):
def test_relu6(test_case):
arg_dict = OrderedDict()
......@@ -141,6 +143,7 @@ def _test_tanh_function_impl(test_case, shape, device):
)
@flow.unittest.skip_unless_1n1d()
class TestTanh(flow.unittest.TestCase):
def test_tanh(test_case):
arg_dict = OrderedDict()
......@@ -190,6 +193,7 @@ def _test_elu_function_impl(test_case, shape, device):
test_case.assertTrue(np.allclose(x.grad.numpy(), np_grad, 1e-5, 1e-5))
@flow.unittest.skip_unless_1n1d()
class TestELUModule(flow.unittest.TestCase):
def test_elu(test_case):
arg_dict = OrderedDict()
......@@ -230,6 +234,7 @@ def _test_gelu_impl(test_case, device):
test_case.assertTrue(np.allclose(of_input.grad.numpy(), np_grad, 1e-5, 1e-5))
@flow.unittest.skip_unless_1n1d()
class TestGelu(flow.unittest.TestCase):
def test_gelu(test_case):
arg_dict = OrderedDict()
......@@ -302,6 +307,7 @@ def _test_sigmoid_backward(test_case, device):
test_case.assertTrue(np.allclose(x.grad.numpy(), x_grad, 1e-5, 1e-5))
@flow.unittest.skip_unless_1n1d()
class TestSigmoid(flow.unittest.TestCase):
def test_sigmoid(test_case):
arg_dict = OrderedDict()
......@@ -405,6 +411,7 @@ def _test_softmax_backward_1_dim(test_case, device):
test_case.assertTrue(np.allclose(a.grad.numpy(), a_grad, 1e-5, 1e-5))
@flow.unittest.skip_unless_1n1d()
class TestSoftmax(flow.unittest.TestCase):
def test_softmax(test_case):
arg_dict = OrderedDict()
......@@ -439,6 +446,7 @@ def _test_hardsigmoid_impl(test_case, shape, device):
)
@flow.unittest.skip_unless_1n1d()
class TestHardsigmoidModule(flow.unittest.TestCase):
def test_hardsigmoid(test_case):
arg_dict = OrderedDict()
......@@ -581,6 +589,7 @@ def _test_logsoftmax_backward(test_case, device):
test_case.assertTrue(np.allclose(x.grad.numpy(), x_grad, 1e-5, 1e-5))
@flow.unittest.skip_unless_1n1d()
class TestLogSoftmax(flow.unittest.TestCase):
def test_log_softmax(test_case):
arg_dict = OrderedDict()
......@@ -614,6 +623,7 @@ def _test_logsigmoid(test_case, device):
test_case.assertTrue(np.allclose(x.grad.numpy(), np_grad, 1e-5, 1e-5))
@flow.unittest.skip_unless_1n1d()
class TestLogSigmoidModule(flow.unittest.TestCase):
def test_logsigmoid(test_case):
arg_dict = OrderedDict()
......@@ -670,6 +680,7 @@ def _test_softplus_backward(test_case, device):
test_case.assertTrue(np.allclose(x.grad.numpy(), np_grad, 1e-5, 1e-5))
@flow.unittest.skip_unless_1n1d()
class TestSoftplusModule(flow.unittest.TestCase):
def test_softplus(test_case):
arg_dict = OrderedDict()
......@@ -711,6 +722,7 @@ def _test_hardswish_impl(test_case, shape, device):
test_case.assertTrue(np.allclose(x.grad.numpy(), np_grad, 1e-5, 1e-5))
@flow.unittest.skip_unless_1n1d()
class TestHardswishModule(flow.unittest.TestCase):
def test_hardswish(test_case):
arg_dict = OrderedDict()
......@@ -750,6 +762,7 @@ def _test_hardtanh_impl(test_case, shape, device):
)
@flow.unittest.skip_unless_1n1d()
class TestHardtanhModule(flow.unittest.TestCase):
def test_hardtanh(test_case):
arg_dict = OrderedDict()
......@@ -775,6 +788,7 @@ def _test_leakyrelu_impl(test_case, shape, device):
test_case.assertTrue(np.allclose(x.grad.numpy(), np_grad, 1e-5, 1e-5))
@flow.unittest.skip_unless_1n1d()
class TestLeakyReLUModule(flow.unittest.TestCase):
def test_leaky_relu(test_case):
arg_dict = OrderedDict()
......@@ -816,6 +830,7 @@ def _test_mish_backward(test_case, shape, device):
test_case.assertTrue(np.allclose(x.grad.numpy(), np_grad, 1e-5, 1e-5))
@flow.unittest.skip_unless_1n1d()
class TestMishModule(flow.unittest.TestCase):
def test_mish(test_case):
arg_dict = OrderedDict()
......
......@@ -188,6 +188,7 @@ def _test_adaptive_avgpool2d_hw_backward(test_case, device):
test_case.assertTrue(np.allclose(input.grad.numpy(), np_grad, 1e-5, 1e-5))
@flow.unittest.skip_unless_1n1d()
class TestAdaptiveAvgPool2d(flow.unittest.TestCase):
def test_adaptive_avgpool2d(test_case):
arg_dict = OrderedDict()
......
......@@ -152,6 +152,7 @@ def _test_inplace_add(test_case, shape, device):
test_case.assertTrue(np.allclose(of_x.grad.numpy(), np.ones(shape), 1e-5, 1e-5))
@flow.unittest.skip_unless_1n1d()
class TestAddModule(flow.unittest.TestCase):
def test_add(test_case):
arg_dict = OrderedDict()
......
......@@ -51,6 +51,7 @@ def _test_addmm_backward(test_case, shape, alpha, beta, device):
)
@flow.unittest.skip_unless_1n1d()
class TestAddmm(flow.unittest.TestCase):
def test_addmm(test_case):
arg_dict = OrderedDict()
......
"""
Copyright 2020 The OneFlow Authors. All rights reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import unittest
import oneflow.experimental as flow
import numpy as np
@flow.unittest.skip_unless_1n2d()
class TestAllReduce(flow.unittest.TestCase):
def test_all_reduce(test_case):
arr_rank1 = np.array([1, 2])
arr_rank2 = np.array([3, 4])
if flow.distributed.get_rank() == 0:
x = flow.Tensor([1, 2])
elif flow.distributed.get_rank() == 1:
x = flow.Tensor([3, 4])
else:
raise ValueError
x = x.to(f"cuda:{flow.distributed.get_local_rank()}")
nccl_allreduce_op = (
flow.builtin_op("eager_nccl_all_reduce")
.Input("in")
.Output("out")
.Attr("parallel_conf", f'device_tag: "gpu", device_name: "0:0-1"',)
.Build()
)
y = nccl_allreduce_op(x)[0]
test_case.assertTrue(np.allclose(y.numpy(), arr_rank1 + arr_rank2))
if __name__ == "__main__":
unittest.main()
......@@ -49,6 +49,7 @@ def _test_arange_backward(test_case, device):
test_case.assertTrue(np.allclose(x.grad.numpy(), np.ones(13), 1e-5, 1e-5))
@flow.unittest.skip_unless_1n1d()
class TestArange(flow.unittest.TestCase):
def test_transpose(test_case):
arg_dict = OrderedDict()
......
......@@ -75,6 +75,7 @@ def _test_argmax_dim_equal_none(test_case, device):
test_case.assertTrue(np.array_equal(of_out.numpy().flatten(), np_out.flatten()))
@flow.unittest.skip_unless_1n1d()
class TestArgmax(flow.unittest.TestCase):
def test_argmax(test_case):
arg_dict = OrderedDict()
......
......@@ -47,6 +47,7 @@ def _test_tensor_argsort(test_case, data_shape, axis, descending, data_type, dev
test_case.assertTrue(np.array_equal(of_out.numpy().flatten(), np_out.flatten()))
@flow.unittest.skip_unless_1n1d()
class TestArgsort(flow.unittest.TestCase):
def test_argsort(test_case):
arg_dict = OrderedDict()
......
......@@ -31,6 +31,7 @@ def _test_argwhere(test_case, shape, device):
test_case.assertTrue(np.array_equal(of_out.numpy().shape, np_out.shape))
@flow.unittest.skip_unless_1n1d()
class TestArgwhere(flow.unittest.TestCase):
def test_argwhere(test_case):
arg_dict = OrderedDict()
......
......@@ -64,6 +64,7 @@ def _test_arctan(test_case, shape, device):
)
@flow.unittest.skip_unless_1n1d()
class TestAtan(flow.unittest.TestCase):
def test_atan(test_case):
arg_dict = OrderedDict()
......
......@@ -102,6 +102,7 @@ def _test_atan2_backward(test_case, device):
test_y_grad()
@flow.unittest.skip_unless_1n1d()
class TestAtan2(flow.unittest.TestCase):
def test_atan2_forward(test_case):
arg_dict = OrderedDict()
......
......@@ -62,6 +62,7 @@ def _test_arctanh_impl(test_case, shape, device):
)
@flow.unittest.skip_unless_1n1d()
class TestAtanh(flow.unittest.TestCase):
def test_atanh(test_case):
arg_dict = OrderedDict()
......
......@@ -80,6 +80,7 @@ def _test_autograd_grad(test_case, shape, device):
# TODO(wyg): create_graph
@flow.unittest.skip_unless_1n1d()
class TestAutograd(flow.unittest.TestCase):
def test_autograd_interface(test_case):
arg_dict = OrderedDict()
......
......@@ -592,6 +592,7 @@ def _test_avgpool3d_special_kernel_size_backward(test_case, device):
test_case.assertTrue(np.allclose(x.grad.numpy(), numpy_grad, 1e-5, 1e-5))
@flow.unittest.skip_unless_1n1d()
class TestPoolingModule(flow.unittest.TestCase):
def test_avgpool3d(test_case):
arg_dict = OrderedDict()
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment