diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 7d8fb8ab4713743923e1154fda53165e27c1400b..58239052892d064602f14e2a27683e055d67f827 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -263,7 +263,7 @@ jobs:
           cuda_version: ${{ matrix.cuda_version }}
           extra_docker_args: $extra_docker_args
           python_version: ${{ matrix.python_version }}
-      - name: Custom Op test (run by oneflow build docker)
+      - name: Single client custom Op test (run by oneflow build docker)
         timeout-minutes: 45
         if: matrix.test_suite == 'cpu' && env.is_built != '1'
         run: |
@@ -406,9 +406,6 @@ jobs:
             extra_docker_args+=" --env ONEFLOW_TEST_CPU_ONLY=1"
             extra_docker_args+=" --env CUDA_VISIBLE_DEVICES=-1"
           fi
-          if [ "$test_suite" == "cuda_new_interface" ]; then
-            extra_docker_args+=" --env ONEFLOW_TEST_ENABLE_EAGER=1"
-          fi
           # set container_name
           container_name=pr-${{ github.event.pull_request.number }}-run-id-${{ github.run_id }}-${test_suite}-test
           extra_docker_args+=" --name ${container_name}"
@@ -443,21 +440,21 @@ jobs:
           docker run ${{ env.extra_docker_args }} ${{ env.pip_cache_docker_args }} \
             ${image_name} \
             bash -c "python3 -m pip config set global.index-url ${{ env.pip_index_mirror }} && bash ci/test/try_install.sh && bash ci/test/build_docs.sh"
-      - name: Op test (distributed, 1st try)
+      - name: Single client op test (distributed, 1st try)
         timeout-minutes: 45
         if: matrix.test_suite == 'cuda'
         continue-on-error: true
         id: distributed_try_1
         run: |
           python3 ci/test/distributed_run.py --bash_script=ci/test/2node_op_test.sh --custom_img_tag=${{ env.image_name }} --oneflow_wheel_path=${{ env.wheelhouse_dir }} --oneflow_wheel_python_version=3.6
-      - name: Op test (distributed, 2nd try)
+      - name: Single client op test (distributed, 2nd try)
         timeout-minutes: 45
         if: matrix.test_suite == 'cuda' && steps.distributed_try_1.outcome=='failure'
         continue-on-error: true
         id: distributed_try_2
         run: |
           python3 ci/test/distributed_run.py --bash_script=ci/test/2node_op_test.sh --custom_img_tag=${{ env.image_name }} --oneflow_wheel_path=${{ env.wheelhouse_dir }} --oneflow_wheel_python_version=3.6
-      - name: Op test (distributed, 3rd try)
+      - name: Single client op test (distributed, 3rd try)
         timeout-minutes: 45
         if: matrix.test_suite == 'cuda' && steps.distributed_try_2.outcome=='failure'
         continue-on-error: false
@@ -489,7 +486,7 @@ jobs:
             ${{ env.extra_docker_args }} ${{ env.pip_cache_docker_args }} \
             ${image_name} \
             bash -c "python3 -m pip config set global.index-url ${{ env.pip_index_mirror }} && bash ci/test/try_install.sh && bash ci/test/doctest.sh"
-      - name: Dry run test (run without runtime)
+      - name: Single client dry run test (run without runtime)
         timeout-minutes: 45
         if: matrix.test_suite == 'cuda'
         run: |
@@ -553,7 +550,7 @@ jobs:
             -e ONEFLOW_TEST_DIR=$PWD/oneflow/python/test/graph \
             ${{ env.image_tag }} \
             bash -c "python3 -m pip config set global.index-url ${{ env.pip_index_mirror }} && bash ci/test/try_install.sh && bash ci/test/generic_test.sh"
-      - name: Op test
+      - name: Single client op test
         timeout-minutes: 45
         if: matrix.test_suite == 'cpu' || matrix.test_suite == 'cuda_op'
         run: |
@@ -562,7 +559,7 @@ jobs:
             ${{ env.extra_docker_args }} ${{ env.pip_cache_docker_args }} \
             ${image_name} \
             bash -c "python3 -m pip config set global.index-url ${{ env.pip_index_mirror }} && bash ci/test/try_install.sh && bash ci/test/1node_op_test.sh"
-      - name: Model test
+      - name: Single client model test
         timeout-minutes: 45
         if: matrix.test_suite == 'cpu' || matrix.test_suite == 'cuda'
         run: |
@@ -571,7 +568,7 @@ jobs:
             ${{ env.extra_docker_args }} ${{ env.pip_cache_docker_args }} \
             ${image_name} \
             bash -c "python3 -m pip config set global.index-url ${{ env.pip_index_mirror }} && bash ci/test/try_install.sh && bash ci/test/1node_model_test.sh"
-      - name: Model serve test
+      - name: Single client model serve test
         timeout-minutes: 45
         id: model_serve_test
         if: matrix.test_suite == 'cuda'
@@ -587,7 +584,7 @@ jobs:
           set -x
           docker run ${{ env.extra_docker_args }} ${{ env.pip_cache_docker_args }} \
             ${image_name} bash ci/test/print_stack_from_core.sh python3 serving-tmp
-      - name: Benchmark (mainly for backward compatibility)
+      - name: Single client benchmark (mainly for backward compatibility)
         timeout-minutes: 45
         if: matrix.test_suite == 'cuda'
         run: |
@@ -595,7 +592,7 @@ jobs:
           docker run ${{ env.extra_docker_args }} ${{ env.pip_cache_docker_args }} \
             ${image_name} \
             bash -c "python3 -m pip config set global.index-url ${{ env.pip_index_mirror }} && bash ci/test/try_install.sh && bash ci/test/1node_benchmark_test.sh"
-      - name: Benchmark FP16 (mainly for backward compatibility)
+      - name: Single client benchmark FP16 (mainly for backward compatibility)
         timeout-minutes: 45
         if: matrix.test_suite == 'cuda'
         run: |
@@ -603,7 +600,7 @@ jobs:
           docker run ${{ env.extra_docker_args }} ${{ env.pip_cache_docker_args }} \
             ${image_name} \
             bash -c "python3 -m pip config set global.index-url ${{ env.pip_index_mirror }} && bash ci/test/try_install.sh && bash ci/test/1node_benchmark_test_fp16.sh"
-      - name: XLA Test
+      - name: Single client XLA Test
         timeout-minutes: 45
         if: contains(fromJson('["xla", "xla_cpu"]'), matrix.test_suite) && env.is_built != '1'
         run: |
diff --git a/ci/test/1node_benchmark_test.sh b/ci/test/1node_benchmark_test.sh
index a906bbd9621f5f1f39ab335a81268f894717de8e..e1fbe28a1faf32379209d05aed8564437eed0bb2 100644
--- a/ci/test/1node_benchmark_test.sh
+++ b/ci/test/1node_benchmark_test.sh
@@ -1,7 +1,7 @@
 set -xe
 
 rm -rf /benchmarks
-cp -r oneflow/python/benchmarks /benchmarks
+cp -r oneflow/compatible_single_client_python/benchmarks /benchmarks
 cd /benchmarks
 
 python3 cnn_benchmark/of_cnn_benchmarks.py \
diff --git a/ci/test/1node_benchmark_test_fp16.sh b/ci/test/1node_benchmark_test_fp16.sh
index 1dab4feb3b1280a601cd9c6ca40a4fdfd404203e..2d6abd6a1067c55b0bedbd79044ea274875611eb 100644
--- a/ci/test/1node_benchmark_test_fp16.sh
+++ b/ci/test/1node_benchmark_test_fp16.sh
@@ -1,7 +1,7 @@
 set -ex
 
 rm -rf /benchmarks
-cp -r oneflow/python/benchmarks /benchmarks
+cp -r oneflow/compatible_single_client_python/benchmarks /benchmarks
 cd /benchmarks
 
 python3 cnn_benchmark/of_cnn_benchmarks.py \
diff --git a/ci/test/1node_custom_op_test.sh b/ci/test/1node_custom_op_test.sh
index 8fdf7fedfae9c5bd71e6bb8de79e1ab704776b98..db8c2de3838696674cdf102d3fd41e6cda654088 100644
--- a/ci/test/1node_custom_op_test.sh
+++ b/ci/test/1node_custom_op_test.sh
@@ -7,7 +7,7 @@ test_tmp_dir=${ONEFLOW_TEST_TMP_DIR:-"/test_tmp_dir"}
 
 rm -rf $test_tmp_dir
 mkdir -p $test_tmp_dir
-cp -r $src_dir/oneflow/python/test/custom_ops $test_tmp_dir
+cp -r $src_dir/oneflow/compatible_single_client_python/test/custom_ops $test_tmp_dir
 cd $test_tmp_dir
 
 export ONEFLOW_TEST_DEVICE_NUM=1
diff --git a/ci/test/1node_model_serve_test.sh b/ci/test/1node_model_serve_test.sh
index 2a5c7454d0d73491a099fc48b0a56cd269404db9..1942f0eba9a62376146ab09b39b9d9fdc7d3d1e1 100644
--- a/ci/test/1node_model_serve_test.sh
+++ b/ci/test/1node_model_serve_test.sh
@@ -6,7 +6,7 @@ test_tmp_dir=${ONEFLOW_TEST_TMP_DIR:-"/test_tmp_dir"}
 
 rm -rf $test_tmp_dir
 mkdir -p $test_tmp_dir
-cp -r $src_dir/oneflow/python/test $test_tmp_dir
+cp -r $src_dir/oneflow/compatible_single_client_python/test $test_tmp_dir
 cd $test_tmp_dir
 
 export ONEFLOW_TEST_DEVICE_NUM=1
diff --git a/ci/test/1node_model_test.sh b/ci/test/1node_model_test.sh
index 52e3caf88ac91a66e3e2c6d5b8cadf2a2e957128..589557e69358369c4d5e0140cc40f8734b23a676 100644
--- a/ci/test/1node_model_test.sh
+++ b/ci/test/1node_model_test.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 set -xe
 
-cp -r oneflow/python/test /test_dir
+cp -r oneflow/compatible_single_client_python/test /test_dir
 cd /test_dir
 
 python3 models/1node_test.py
diff --git a/ci/test/1node_op_test.sh b/ci/test/1node_op_test.sh
index 1d28f1e31ecc0aa319e5c4e92d9c894132a506f8..28943e01ad39a0a300afb97b7e238c607a1432e2 100644
--- a/ci/test/1node_op_test.sh
+++ b/ci/test/1node_op_test.sh
@@ -10,7 +10,7 @@ test_tmp_dir=${ONEFLOW_TEST_TMP_DIR:-"./test_tmp_dir"}
 
 rm -rf $test_tmp_dir
 mkdir -p $test_tmp_dir
-cp -r $src_dir/oneflow/python/test $test_tmp_dir
+cp -r $src_dir/oneflow/compatible_single_client_python/test $test_tmp_dir
 cd $test_tmp_dir
 
 python3 -m oneflow --doctor
diff --git a/ci/test/2node_op_test.sh b/ci/test/2node_op_test.sh
index a914e90218ec8654ca8f6a24525cf9dd3e767928..892bd94f1c7b798765eeec3f6ef8d0bb30ad83da 100644
--- a/ci/test/2node_op_test.sh
+++ b/ci/test/2node_op_test.sh
@@ -10,7 +10,7 @@ test_tmp_dir=${ONEFLOW_TEST_TMP_DIR:-"/test_tmp_dir"}
 rm -rf $test_tmp_dir
 mkdir -p $test_tmp_dir
 chmod -R o+w $test_tmp_dir
-cp -r $src_dir/oneflow/python/test $test_tmp_dir
+cp -r $src_dir/oneflow/compatible_single_client_python/test $test_tmp_dir
 cd $test_tmp_dir
 
 ONEFLOW_TEST_DEVICE_NUM=1 python3 test/ops/test_assign.py --failfast --verbose
diff --git a/ci/test/distributed_run.py b/ci/test/distributed_run.py
index 3cb7654e916cf03d7e2e9e99bc83a335ef3ba476..0ba1482d5e422220a786cc7d662cecab789bf8f1 100644
--- a/ci/test/distributed_run.py
+++ b/ci/test/distributed_run.py
@@ -161,7 +161,7 @@ def wait_for_env_proto_and_launch_workers(
                 shell=True,
             )
             run_docker_cmd = f"ssh {remote_host} docker exec {container_name}"
-            run_docker_cmd += f" python3 -m oneflow --start_worker --env_proto={workspace_dir}/env.prototxt"
+            run_docker_cmd += f" python3 -m oneflow.compatible.single_client --start_worker --env_proto={workspace_dir}/env.prototxt"
             print("[docker agent]", run_docker_cmd)
             remote_docker_proc[remote_host] = subprocess.Popen(
                 run_docker_cmd, shell=True
diff --git a/ci/test/doctest.sh b/ci/test/doctest.sh
index d7a83c840756cf78cad6eab32d5153c500f6b1fd..4989c62ce06066d4f038e78e56f3edb1ebdd7a1b 100644
--- a/ci/test/doctest.sh
+++ b/ci/test/doctest.sh
@@ -11,7 +11,7 @@ python3 -c 'import oneflow; f=open("oneflow_path.txt", "w"); f.write(oneflow.__p
 gpu_num=$(nvidia-smi --query-gpu=name --format=csv,noheader | wc -l)
 python3 $src_dir/ci/test/parallel_run.py \
     --gpu_num=${gpu_num} \
-    --dir=$(cat oneflow_path.txt) \
+    --dir=$(cat oneflow_path.txt)/python \
     --timeout=1 \
     --verbose \
     --chunk=1 \
diff --git a/ci/test/dry_run_test.sh b/ci/test/dry_run_test.sh
index 970b6a35f001ea9d2935ee7019db4998f559f742..f1f3dbff16f152c57aa5848fc5b4be16f641826c 100644
--- a/ci/test/dry_run_test.sh
+++ b/ci/test/dry_run_test.sh
@@ -9,7 +9,7 @@ test_tmp_dir=${ONEFLOW_TEST_TMP_DIR:-"./test_tmp_dir"}
 
 rm -rf $test_tmp_dir
 mkdir -p $test_tmp_dir
-cp -r $src_dir/oneflow/python/benchmarks $test_tmp_dir
+cp -r $src_dir/oneflow/compatible_single_client_python/benchmarks $test_tmp_dir
 cd $test_tmp_dir/benchmarks
 
 export ONEFLOW_DRY_RUN=1
diff --git a/ci/test/test_xla.sh b/ci/test/test_xla.sh
index 85755e810a9581d01e7f5ffcce412712082e8f2c..de0f6697eccc08a70f48ee08b4b8ad647a7de29f 100644
--- a/ci/test/test_xla.sh
+++ b/ci/test/test_xla.sh
@@ -5,7 +5,7 @@ test_tmp_dir=${ONEFLOW_TEST_TMP_DIR:-"/test_tmp_dir"}
 
 rm -rf $test_tmp_dir
 mkdir -p $test_tmp_dir
-cp -r $src_dir/oneflow/python/test/xrt $test_tmp_dir
+cp -r $src_dir/oneflow/compatible_single_client_python/test/xrt $test_tmp_dir
 cd $test_tmp_dir
-python3 -c "import oneflow; assert oneflow.sysconfig.with_xla()"
-for f in $src_dir/oneflow/python/test/xrt/*.py; do python3 "$f"; done
+python3 -c "import oneflow.compatible.single_client as flow; assert flow.sysconfig.with_xla()"
+for f in $src_dir/oneflow/compatible_single_client_python/test/xrt/*.py; do python3 "$f"; done
diff --git a/cmake/oneflow.cmake b/cmake/oneflow.cmake
index 7ac7db5bf60364379888cdcf3306ac44bd4c33a8..15bc67304fffb753f2335739644f7c4136504736 100644
--- a/cmake/oneflow.cmake
+++ b/cmake/oneflow.cmake
@@ -296,20 +296,36 @@ add_custom_target(of_pyscript_copy ALL
     COMMAND "${CMAKE_COMMAND}" -E copy
         "${PROJECT_SOURCE_DIR}/oneflow/__main__.py" "${of_pyscript_dir}/oneflow/__main__.py"
     COMMAND rm -rf ${of_pyscript_dir}/oneflow/python
+    COMMAND rm -rf ${of_pyscript_dir}/oneflow/compatible
     COMMAND ${CMAKE_COMMAND} -E create_symlink "${PROJECT_SOURCE_DIR}/oneflow/python" "${of_pyscript_dir}/oneflow/python"
+    COMMAND ${CMAKE_COMMAND} -E copy_directory "${PROJECT_SOURCE_DIR}/oneflow/compatible_single_client_python" "${of_pyscript_dir}/oneflow/compatible/single_client/python"
+    COMMAND "${CMAKE_COMMAND}" -E copy
+        "${PROJECT_SOURCE_DIR}/oneflow/single_client_init.py" "${of_pyscript_dir}/oneflow/compatible/single_client/__init__.py"
+    COMMAND ${Python_EXECUTABLE} "${PROJECT_SOURCE_DIR}/tools/conver_single_client_name_space.py" "${of_pyscript_dir}/oneflow/compatible"
+    COMMAND ${CMAKE_COMMAND} -E touch "${of_pyscript_dir}/oneflow/compatible/__init__.py"
+    COMMAND "${CMAKE_COMMAND}" -E copy
+        "${PROJECT_SOURCE_DIR}/oneflow/single_client_main.py" "${of_pyscript_dir}/oneflow/compatible/single_client/__main__.py"
     COMMAND ${CMAKE_COMMAND} -E make_directory "${of_pyscript_dir}/oneflow/distributed"
     COMMAND ${CMAKE_COMMAND} -E create_symlink "${PROJECT_SOURCE_DIR}/oneflow/python/distributed/launch.py" "${of_pyscript_dir}/oneflow/distributed/launch.py"
     COMMAND ${CMAKE_COMMAND} -E copy_directory "${of_proto_python_dir}/oneflow/core" "${of_pyscript_dir}/oneflow/core"
     COMMAND ${CMAKE_COMMAND} -E touch "${of_pyscript_dir}/oneflow/core/__init__.py"
     COMMAND ${CMAKE_COMMAND} -E make_directory "${of_pyscript_dir}/oneflow/F"
     COMMAND ${CMAKE_COMMAND} -E touch "${of_pyscript_dir}/oneflow/F/__init__.py"
+    COMMAND ${CMAKE_COMMAND} -E make_directory "${of_pyscript_dir}/oneflow/compatible/single_client/F"
+    COMMAND ${CMAKE_COMMAND} -E touch "${of_pyscript_dir}/oneflow/compatible/single_client/F/__init__.py"
     COMMAND ${CMAKE_COMMAND} -E make_directory "${of_pyscript_dir}/oneflow/experimental/F"
     COMMAND ${CMAKE_COMMAND} -E touch "${of_pyscript_dir}/oneflow/experimental/F/__init__.py"
+    COMMAND ${CMAKE_COMMAND} -E make_directory "${of_pyscript_dir}/oneflow/compatible/single_client/experimental/F"
+    COMMAND ${CMAKE_COMMAND} -E touch "${of_pyscript_dir}/oneflow/compatible/single_client/experimental/F/__init__.py"
     COMMAND ${CMAKE_COMMAND} -E make_directory "${of_pyscript_dir}/oneflow/python_gen"
     COMMAND ${CMAKE_COMMAND} -E touch "${of_pyscript_dir}/oneflow/python_gen/__init__.py"
+    COMMAND ${CMAKE_COMMAND} -E make_directory "${of_pyscript_dir}/oneflow/compatible/single_client/python_gen"
+    COMMAND ${CMAKE_COMMAND} -E touch "${of_pyscript_dir}/oneflow/compatible/single_client/python_gen/__init__.py"
     COMMAND ${Python_EXECUTABLE} ${PROJECT_SOURCE_DIR}/tools/generate_pip_version.py ${gen_pip_args} --src=${PROJECT_SOURCE_DIR}
     COMMAND ${Python_EXECUTABLE} "${PROJECT_SOURCE_DIR}/tools/generate_oneflow_symbols_export_file.py"
-        "${PROJECT_SOURCE_DIR}" "${of_pyscript_dir}/oneflow/python_gen/__export_symbols__.py")
+        "${PROJECT_SOURCE_DIR}/oneflow/python" "${of_pyscript_dir}/oneflow/python_gen/__export_symbols__.py" "python"
+    COMMAND ${Python_EXECUTABLE} "${PROJECT_SOURCE_DIR}/tools/generate_oneflow_symbols_export_file.py"
+        "${of_pyscript_dir}/oneflow/compatible" "${of_pyscript_dir}/oneflow/compatible/single_client/python_gen/__export_symbols__.py" "compatible")
 
 # source this file to add oneflow in PYTHONPATH
 file(WRITE "${PROJECT_BINARY_DIR}/source.sh" "export PYTHONPATH=${of_pyscript_dir}:$PYTHONPATH")
diff --git a/oneflow/compatible_single_client_python/__init__.py b/oneflow/compatible_single_client_python/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/oneflow/compatible_single_client_python/advanced/__init__.py b/oneflow/compatible_single_client_python/advanced/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/oneflow/compatible_single_client_python/advanced/distribute_ops.py b/oneflow/compatible_single_client_python/advanced/distribute_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..0c2e7fcd3509c82670748d66d70b35e6a7f62ad5
--- /dev/null
+++ b/oneflow/compatible_single_client_python/advanced/distribute_ops.py
@@ -0,0 +1,237 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+from oneflow.compatible import single_client as flow
+from oneflow.core.operator import op_conf_pb2 as op_conf_util
+from oneflow.core.register import logical_blob_id_pb2 as logical_blob_id_util
+from oneflow.compatible_single_client_python.framework import (
+    interpret_util as interpret_util,
+)
+from oneflow.compatible_single_client_python.framework import id_util as id_util
+from oneflow.compatible_single_client_python.framework import (
+    remote_blob as remote_blob_util,
+)
+from oneflow.compatible_single_client_python.framework import hob as hob
+from oneflow.compatible_single_client_python.lib.core import enable_if as enable_if
+from oneflow.compatible_single_client_python.oneflow_export import oneflow_export
+from typing import Union, Tuple, List, Optional, Sequence, Callable
+import oneflow._oneflow_internal
+
+
+@oneflow_export("advanced.distribute_clone")
+def api_distribute_clone(
+    x: oneflow._oneflow_internal.BlobDesc, name: Optional[str] = None
+) -> Tuple[oneflow._oneflow_internal.BlobDesc]:
+    func = enable_if.unique([distribute_clone])
+    return func(x, name=name)
+
+
+@enable_if.condition(hob.in_global_mode)
+def distribute_clone(x, name=None):
+    if name is None:
+        name = id_util.UniqueStr("DistributeClone_")
+    op_conf = op_conf_util.OperatorConf()
+    op_conf.name = name
+    setattr(op_conf.distribute_clone_conf, "in", x.unique_name)
+    parallel_size = flow.current_scope().device_parallel_desc_symbol.parallel_num
+    op_conf.distribute_clone_conf.out.extend(
+        ["out_%d" % i for i in range(parallel_size)]
+    )
+    interpret_util.ConsistentForward(op_conf)
+    ret = []
+    for i in range(parallel_size):
+        out = "out_%d" % i
+        lbi = logical_blob_id_util.LogicalBlobId()
+        lbi.op_name = op_conf.name
+        lbi.blob_name = out
+        ret.append(remote_blob_util.RemoteBlob(lbi))
+    return tuple(ret)
+
+
+@oneflow_export("advanced.distribute_add")
+def api_distribute_add(
+    xs: Sequence[oneflow._oneflow_internal.BlobDesc], name: Optional[str] = None
+) -> oneflow._oneflow_internal.BlobDesc:
+    func = enable_if.unique([distribute_add])
+    return func(xs, name=name)
+
+
+@enable_if.condition(hob.in_global_mode)
+def distribute_add(xs, name=None):
+    assert flow.current_scope().device_parallel_desc_symbol.parallel_num == len(xs)
+    if name is None:
+        name = id_util.UniqueStr("DistributeAdd_")
+    op_conf = op_conf_util.OperatorConf()
+    op_conf.name = name
+    getattr(op_conf.distribute_add_conf, "in").extend(
+        [_SoleConsistentLbn(x) for x in xs]
+    )
+    op_conf.distribute_add_conf.out = "out"
+    interpret_util.ConsistentForward(op_conf)
+    lbi = logical_blob_id_util.LogicalBlobId()
+    lbi.op_name = op_conf.name
+    lbi.blob_name = "out"
+    return remote_blob_util.RemoteBlob(lbi)
+
+
+@oneflow_export("advanced.distribute_split")
+def api_distribute_split(
+    x: oneflow._oneflow_internal.BlobDesc, axis: int = 0, name: Optional[str] = None
+) -> Tuple[oneflow._oneflow_internal.BlobDesc]:
+    func = enable_if.unique([distribute_split])
+    return func(x, axis=axis, name=name)
+
+
+@enable_if.condition(hob.in_global_mode)
+def distribute_split(x, axis=0, name=None):
+    if name is None:
+        name = id_util.UniqueStr("DistributeSplit_")
+    op_conf = op_conf_util.OperatorConf()
+    op_conf.name = name
+    setattr(op_conf.distribute_split_conf, "in", x.unique_name)
+    op_conf.distribute_split_conf.axis = axis
+    parallel_size = flow.current_scope().device_parallel_desc_symbol.parallel_num
+    op_conf.distribute_split_conf.out.extend(
+        ["out_%d" % i for i in range(parallel_size)]
+    )
+    interpret_util.ConsistentForward(op_conf)
+    ret = []
+    for i in range(parallel_size):
+        out = "out_%d" % i
+        lbi = logical_blob_id_util.LogicalBlobId()
+        lbi.op_name = op_conf.name
+        lbi.blob_name = out
+        ret.append(remote_blob_util.RemoteBlob(lbi))
+    return tuple(ret)
+
+
+@oneflow_export("advanced.distribute_concat")
+def api_distribute_concat(
+    xs: Sequence[oneflow._oneflow_internal.BlobDesc],
+    axis: int = 0,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    func = enable_if.unique([distribute_concat])
+    return func(xs, axis=axis, name=name)
+
+
+@enable_if.condition(hob.in_global_mode)
+def distribute_concat(xs, axis=0, name=None):
+    assert flow.current_scope().device_parallel_desc_symbol.parallel_num == len(xs)
+    if name is None:
+        name = id_util.UniqueStr("DistributeConcat_")
+    op_conf = op_conf_util.OperatorConf()
+    op_conf.name = name
+    getattr(op_conf.distribute_concat_conf, "in").extend(
+        [_SoleConsistentLbn(x) for x in xs]
+    )
+    op_conf.distribute_concat_conf.axis = axis
+    op_conf.distribute_concat_conf.out = "out"
+    interpret_util.ConsistentForward(op_conf)
+    lbi = logical_blob_id_util.LogicalBlobId()
+    lbi.op_name = op_conf.name
+    lbi.blob_name = "out"
+    return remote_blob_util.RemoteBlob(lbi)
+
+
+@oneflow_export("advanced.distribute_map")
+def api_distribute_map(
+    xs: Union[
+        Sequence[oneflow._oneflow_internal.BlobDesc], oneflow._oneflow_internal.BlobDesc
+    ],
+    f: Callable[
+        [oneflow._oneflow_internal.BlobDesc, oneflow._oneflow_internal.BlobDesc],
+        oneflow._oneflow_internal.BlobDesc,
+    ],
+    axis: int = 0,
+) -> Tuple[oneflow._oneflow_internal.BlobDesc]:
+    func = enable_if.unqiue([distribute_map])
+    return func(xs, f, axis=axis)
+
+
+@enable_if.condition(hob.in_global_mode)
+def distribute_map(xs, f, axis=0):
+    _AssertInputOrOutput(xs)
+    if isinstance(xs, (list, tuple)) == False:
+        xs = [xs]
+    splitted_xs = [flow.advanced.distribute_split(x, axis=axis) for x in xs]
+    results = [_UnderSingleDevicePlacementScope(f, *x) for x in zip(*splitted_xs)]
+    output_is_not_container = all(
+        [isinstance(x, oneflow._oneflow_internal.ConsistentBlob) for x in results]
+    )
+    results = [_TryWrapTuple(x) for x in results]
+    result = [flow.advanced.distribute_concat(x, axis=axis) for x in zip(*results)]
+    if output_is_not_container:
+        return result[0]
+    return tuple(result)
+
+
+@oneflow_export("cast_to_current_logical_view")
+def cast_to_current_logical_view(
+    x: oneflow._oneflow_internal.BlobDesc,
+) -> oneflow._oneflow_internal.BlobDesc:
+    if (
+        isinstance(x, oneflow._oneflow_internal.ConsistentBlob)
+        and flow.scope.mirrored_view_enabled()
+    ) or (
+        isinstance(x, oneflow._oneflow_internal.MirroredBlob)
+        and flow.scope.consistent_view_enabled()
+    ):
+        x = flow.identity(x)
+    return x
+
+
+def _SoleConsistentLbn(blob):
+    assert blob.parallel_size == 1
+    if isinstance(blob, oneflow._oneflow_internal.ConsistentBlob):
+        return blob.unique_name
+    if isinstance(blob, oneflow._oneflow_internal.MirroredBlob):
+        return blob.sub_consistent_blob_list[0].unique_name
+    raise NotImplementedError
+
+
+def _AssertInputOrOutput(xs):
+    assert isinstance(xs, (list, tuple, oneflow._oneflow_internal.ConsistentBlob))
+    if isinstance(xs, (list, tuple)):
+        assert len(xs) > 0
+        assert all(
+            [isinstance(x, oneflow._oneflow_internal.ConsistentBlob) for x in xs]
+        )
+
+
+def _TryWrapTuple(ys):
+    _AssertInputOrOutput(ys)
+    if isinstance(ys, (list, tuple)) == False:
+        ys = (ys,)
+    return ys
+
+
+def _UnderSingleDevicePlacementScope(f, *args):
+    parallel_desc_symbol = flow.current_scope().device_parallel_desc_symbol
+    for machine_id, device_id in _EachMachineIdAndDeviceId(parallel_desc_symbol):
+        mch_dev_str = "@%d:%d" % (machine_id, device_id)
+        with flow.scope.placement(parallel_desc_symbol.device_tag, mch_dev_str):
+            return f(*args)
+
+
+def _EachMachineIdAndDeviceId(parallel_desc_symbol):
+    for (
+        machine_id,
+        device_id_list,
+    ) in parallel_desc_symbol.machine_id2device_id_list.items():
+        for device_id in device_id_list:
+            yield machine_id, device_id
diff --git a/oneflow/compatible_single_client_python/autograd/__init__.py b/oneflow/compatible_single_client_python/autograd/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/oneflow/compatible_single_client_python/autograd/autograd.py b/oneflow/compatible_single_client_python/autograd/autograd.py
new file mode 100644
index 0000000000000000000000000000000000000000..c095a9e801cbc8f8f5850be287f625317db1f4ea
--- /dev/null
+++ b/oneflow/compatible_single_client_python/autograd/autograd.py
@@ -0,0 +1,59 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from typing import Union, Sequence, Tuple
+
+from oneflow.compatible_single_client_python.framework.tensor import Tensor
+from oneflow.compatible_single_client_python.oneflow_export import oneflow_export
+from oneflow.compatible_single_client_python.framework.tensor_tuple_util import (
+    convert_to_tensor_tuple,
+)
+from oneflow._oneflow_internal import TensorTuple
+from oneflow._oneflow_internal.autograd import grad as grad_api
+from oneflow._oneflow_internal.autograd import backward as backward_api
+
+
+@oneflow_export("autograd.grad")
+def grad(
+    outputs: Union[Tensor, Sequence[Tensor]],
+    inputs: Union[Tensor, Sequence[Tensor]],
+    out_grads: Union[Tensor, Sequence[Tensor], None] = None,
+    retain_graph: bool = False,
+    create_graph: bool = False,
+) -> Tuple[Tensor]:
+    in_grads = grad_api(
+        convert_to_tensor_tuple(outputs),
+        convert_to_tensor_tuple(inputs),
+        convert_to_tensor_tuple(out_grads),
+        retain_graph,
+        create_graph,
+    )
+    return tuple([Tensor(x) for x in in_grads])
+
+
+@oneflow_export("autograd.backward")
+def backward(
+    outputs: Union[Tensor, Sequence[Tensor]],
+    out_grads: Union[Tensor, Sequence[Tensor], None],
+    retain_graph: bool = False,
+    create_graph: bool = False,
+) -> None:
+    backward_api(
+        convert_to_tensor_tuple(outputs),
+        convert_to_tensor_tuple(out_grads),
+        retain_graph,
+        create_graph,
+    )
diff --git a/oneflow/compatible_single_client_python/benchmarks/bert_benchmark/benchmark_util.py b/oneflow/compatible_single_client_python/benchmarks/bert_benchmark/benchmark_util.py
new file mode 100755
index 0000000000000000000000000000000000000000..396887cff75d908c0e048494843e1c40948115f6
--- /dev/null
+++ b/oneflow/compatible_single_client_python/benchmarks/bert_benchmark/benchmark_util.py
@@ -0,0 +1,115 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import time
+
+import numpy as np
+
+
+class StopWatch:
+    def __init__(self):
+        pass
+
+    def start(self):
+        self.start_time = time.time()
+        self.last_split = self.start_time
+
+    def set_start(self, val):
+        self.start_time = val
+        self.last_split = self.start_time
+
+    def split(self):
+        now = time.time()
+        duration = now - self.last_split
+        self.last_split = now
+        return duration
+
+    def stop(self):
+        self.stop_time = time.time()
+
+    def duration(self):
+        return self.stop_time - self.start_time
+
+
+class BERTSpeedometer:
+    def __init__(self):
+        self.watch = StopWatch()
+        self.throughoutput_list = []
+
+    def speedometer_cb(
+        self,
+        step,
+        start_time,
+        total_batch_size,
+        skip_iter_num,
+        iter_num,
+        loss_print_every_n_iter,
+    ):
+        def callback(train_loss):
+            assert skip_iter_num >= 0
+            if skip_iter_num == 0 and step == 0:
+                self.watch.set_start(start_time)
+                print("Start trainning without any skipping iteration.")
+
+            if step < skip_iter_num:
+                if step == 0:
+                    print(
+                        "Skipping {} iterations for benchmark purpose.".format(
+                            skip_iter_num
+                        )
+                    )
+                if (step + 1) == skip_iter_num:
+                    self.watch.start()
+                    print("Start trainning.")
+            else:
+                train_step = step - skip_iter_num
+
+                if (train_step + 1) % loss_print_every_n_iter == 0:
+                    total_loss = train_loss[0].mean()
+                    mlm_loss = train_loss[1].mean()
+                    nsp_loss = train_loss[2].mean()
+
+                    avg_elapse_time_per_iter = (
+                        self.watch.split() / loss_print_every_n_iter
+                    )
+                    sentences_per_sec = total_batch_size / avg_elapse_time_per_iter
+                    print(
+                        "iter {}, total_loss: {:.3f}, mlm_loss: {:.3f}, nsp_loss: {:.3f}, speed: {:.3f}(sec/batch), {:.3f}(sentences/sec)".format(
+                            train_step,
+                            total_loss,
+                            mlm_loss,
+                            nsp_loss,
+                            avg_elapse_time_per_iter,
+                            sentences_per_sec,
+                        )
+                    )
+                    self.throughoutput_list.append(sentences_per_sec)
+
+                if (train_step + 1) == iter_num:
+                    self.watch.stop()
+                    totoal_duration = self.watch.duration()
+                    avg_sentences_per_sec = (
+                        total_batch_size * iter_num / totoal_duration
+                    )
+
+                    print("-".ljust(66, "-"))
+                    print(
+                        "average speed: {:.3f}(sentences/sec), new_cal_method: {:.3f}(sentences/sec)".format(
+                            avg_sentences_per_sec, np.mean(self.throughoutput_list)
+                        )
+                    )
+                    print("-".ljust(66, "-"))
+
+        return callback
diff --git a/oneflow/compatible_single_client_python/benchmarks/bert_benchmark/bert.py b/oneflow/compatible_single_client_python/benchmarks/bert_benchmark/bert.py
new file mode 100755
index 0000000000000000000000000000000000000000..5b7503f6b2586e9188b6e730e2cfb3a92011e419
--- /dev/null
+++ b/oneflow/compatible_single_client_python/benchmarks/bert_benchmark/bert.py
@@ -0,0 +1,413 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import math
+
+from oneflow.compatible import single_client as flow
+from oneflow.core.common import data_type_pb2 as data_type_util
+from oneflow.core.operator import op_conf_pb2 as op_conf_util
+
+
+class BertBackbone(object):
+    def __init__(
+        self,
+        input_ids_blob,
+        input_mask_blob,
+        token_type_ids_blob,
+        vocab_size,
+        seq_length=512,
+        hidden_size=768,
+        num_hidden_layers=12,
+        num_attention_heads=12,
+        intermediate_size=3072,
+        hidden_act="gelu",
+        hidden_dropout_prob=0.1,
+        attention_probs_dropout_prob=0.1,
+        max_position_embeddings=512,
+        type_vocab_size=16,
+        initializer_range=0.02,
+    ):
+
+        with flow.scope.namespace("bert"):
+            with flow.scope.namespace("embeddings"):
+                (self.embedding_output_, self.embedding_table_) = _EmbeddingLookup(
+                    input_ids_blob=input_ids_blob,
+                    vocab_size=vocab_size,
+                    embedding_size=hidden_size,
+                    initializer_range=initializer_range,
+                    word_embedding_name="word_embeddings",
+                )
+                self.embedding_output_ = _EmbeddingPostprocessor(
+                    input_blob=self.embedding_output_,
+                    seq_length=seq_length,
+                    embedding_size=hidden_size,
+                    use_token_type=True,
+                    token_type_ids_blob=token_type_ids_blob,
+                    token_type_vocab_size=type_vocab_size,
+                    token_type_embedding_name="token_type_embeddings",
+                    use_position_embeddings=True,
+                    position_embedding_name="position_embeddings",
+                    initializer_range=initializer_range,
+                    max_position_embeddings=max_position_embeddings,
+                    dropout_prob=hidden_dropout_prob,
+                )
+            with flow.scope.namespace("encoder"):
+                attention_mask_blob = _CreateAttentionMaskFromInputMask(
+                    input_mask_blob,
+                    from_seq_length=seq_length,
+                    to_seq_length=seq_length,
+                )
+                self.all_encoder_layers_ = _TransformerModel(
+                    input_blob=self.embedding_output_,
+                    attention_mask_blob=attention_mask_blob,
+                    seq_length=seq_length,
+                    hidden_size=hidden_size,
+                    num_hidden_layers=num_hidden_layers,
+                    num_attention_heads=num_attention_heads,
+                    intermediate_size=intermediate_size,
+                    intermediate_act_fn=GetActivation(hidden_act),
+                    hidden_dropout_prob=hidden_dropout_prob,
+                    attention_probs_dropout_prob=attention_probs_dropout_prob,
+                    initializer_range=initializer_range,
+                    do_return_all_layers=False,
+                )
+            self.sequence_output_ = self.all_encoder_layers_[-1]
+
+    def embedding_output(self):
+        return self.embedding_output_
+
+    def all_encoder_layers(self):
+        return self.all_encoder_layers_
+
+    def sequence_output(self):
+        return self.sequence_output_
+
+    def embedding_table(self):
+        return self.embedding_table_
+
+
+def CreateInitializer(std):
+    return flow.truncated_normal(std)
+
+
+def _Gelu(in_blob):
+    return flow.math.gelu(in_blob)
+
+
+def _TransformerModel(
+    input_blob,
+    attention_mask_blob,
+    seq_length,
+    hidden_size=768,
+    num_hidden_layers=12,
+    num_attention_heads=12,
+    intermediate_size=3072,
+    intermediate_act_fn=_Gelu,
+    hidden_dropout_prob=0.1,
+    attention_probs_dropout_prob=0.1,
+    initializer_range=0.02,
+    do_return_all_layers=False,
+):
+
+    assert hidden_size % num_attention_heads == 0
+    attention_head_size = int(hidden_size / num_attention_heads)
+    input_width = hidden_size
+    prev_output_blob = flow.reshape(input_blob, (-1, input_width))
+    all_layer_output_blobs = []
+    for layer_idx in range(num_hidden_layers):
+        with flow.scope.namespace("layer_%d" % layer_idx):
+            layer_input_blob = prev_output_blob
+            with flow.scope.namespace("attention"):
+                with flow.scope.namespace("self"):
+                    attention_output_blob = _AttentionLayer(
+                        from_blob=layer_input_blob,
+                        to_blob=layer_input_blob,
+                        attention_mask_blob=attention_mask_blob,
+                        num_attention_heads=num_attention_heads,
+                        size_per_head=attention_head_size,
+                        attention_probs_dropout_prob=attention_probs_dropout_prob,
+                        initializer_range=initializer_range,
+                        do_return_2d_tensor=True,
+                        from_seq_length=seq_length,
+                        to_seq_length=seq_length,
+                    )
+                with flow.scope.namespace("output"):
+                    attention_output_blob = _FullyConnected(
+                        attention_output_blob,
+                        input_size=num_attention_heads * attention_head_size,
+                        units=hidden_size,
+                        weight_initializer=CreateInitializer(initializer_range),
+                        name="dense",
+                    )
+                    attention_output_blob = _Dropout(
+                        attention_output_blob, hidden_dropout_prob
+                    )
+                    attention_output_blob = attention_output_blob + layer_input_blob
+                    attention_output_blob = _LayerNorm(
+                        attention_output_blob, hidden_size
+                    )
+            with flow.scope.namespace("intermediate"):
+                if callable(intermediate_act_fn):
+                    act_fn = op_conf_util.kNone
+                else:
+                    act_fn = intermediate_act_fn
+                intermediate_output_blob = _FullyConnected(
+                    attention_output_blob,
+                    input_size=num_attention_heads * attention_head_size,
+                    units=intermediate_size,
+                    activation=act_fn,
+                    weight_initializer=CreateInitializer(initializer_range),
+                    name="dense",
+                )
+                if callable(intermediate_act_fn):
+                    intermediate_output_blob = intermediate_act_fn(
+                        intermediate_output_blob
+                    )
+            with flow.scope.namespace("output"):
+                layer_output_blob = _FullyConnected(
+                    intermediate_output_blob,
+                    input_size=intermediate_size,
+                    units=hidden_size,
+                    weight_initializer=CreateInitializer(initializer_range),
+                    name="dense",
+                )
+                layer_output_blob = _Dropout(layer_output_blob, hidden_dropout_prob)
+                layer_output_blob = layer_output_blob + attention_output_blob
+                layer_output_blob = _LayerNorm(layer_output_blob, hidden_size)
+                prev_output_blob = layer_output_blob
+                all_layer_output_blobs.append(layer_output_blob)
+
+    input_shape = (-1, seq_length, hidden_size)
+    if do_return_all_layers:
+        final_output_blobs = []
+        for layer_output_blob in all_layer_output_blobs:
+            final_output_blob = flow.reshape(layer_output_blob, input_shape)
+            final_output_blobs.append(final_output_blob)
+        return final_output_blobs
+    else:
+        final_output_blob = flow.reshape(prev_output_blob, input_shape)
+        return [final_output_blob]
+
+
+def _AttentionLayer(
+    from_blob,
+    to_blob,
+    attention_mask_blob,
+    num_attention_heads=1,
+    size_per_head=512,
+    query_act=op_conf_util.kNone,
+    key_act=op_conf_util.kNone,
+    value_act=op_conf_util.kNone,
+    attention_probs_dropout_prob=0.0,
+    initializer_range=0.02,
+    do_return_2d_tensor=False,
+    batch_size=None,
+    from_seq_length=None,
+    to_seq_length=None,
+):
+    def TransposeForScores(input_blob, num_attention_heads, seq_length, width):
+        output_blob = flow.reshape(
+            input_blob, [-1, seq_length, num_attention_heads, width]
+        )
+        output_blob = flow.transpose(output_blob, perm=[0, 2, 1, 3])
+        return output_blob
+
+    from_blob_2d = flow.reshape(from_blob, [-1, num_attention_heads * size_per_head])
+    to_blob_2d = flow.reshape(to_blob, [-1, num_attention_heads * size_per_head])
+
+    query_blob = _FullyConnected(
+        from_blob_2d,
+        input_size=num_attention_heads * size_per_head,
+        units=num_attention_heads * size_per_head,
+        activation=query_act,
+        name="query",
+        weight_initializer=CreateInitializer(initializer_range),
+    )
+
+    key_blob = _FullyConnected(
+        to_blob_2d,
+        input_size=num_attention_heads * size_per_head,
+        units=num_attention_heads * size_per_head,
+        activation=key_act,
+        name="key",
+        weight_initializer=CreateInitializer(initializer_range),
+    )
+
+    value_blob = _FullyConnected(
+        to_blob_2d,
+        input_size=num_attention_heads * size_per_head,
+        units=num_attention_heads * size_per_head,
+        activation=value_act,
+        name="value",
+        weight_initializer=CreateInitializer(initializer_range),
+    )
+
+    query_blob = TransposeForScores(
+        query_blob, num_attention_heads, from_seq_length, size_per_head
+    )
+    key_blob = TransposeForScores(
+        key_blob, num_attention_heads, to_seq_length, size_per_head
+    )
+
+    attention_scores_blob = flow.matmul(query_blob, key_blob, transpose_b=True)
+    attention_scores_blob = attention_scores_blob * (
+        1.0 / math.sqrt(float(size_per_head))
+    )
+
+    attention_mask_blob = flow.reshape(
+        attention_mask_blob, [-1, 1, from_seq_length, to_seq_length]
+    )
+    attention_mask_blob = flow.cast(attention_mask_blob, dtype=flow.float)
+    addr_blob = (attention_mask_blob - 1.0) * 10000.0
+
+    attention_scores_blob = attention_scores_blob + addr_blob
+    attention_probs_blob = flow.nn.softmax(attention_scores_blob)
+    attention_probs_blob = _Dropout(attention_probs_blob, attention_probs_dropout_prob)
+
+    value_blob = flow.reshape(
+        value_blob, [-1, to_seq_length, num_attention_heads, size_per_head]
+    )
+    value_blob = flow.transpose(value_blob, perm=[0, 2, 1, 3])
+    context_blob = flow.matmul(attention_probs_blob, value_blob)
+    context_blob = flow.transpose(context_blob, perm=[0, 2, 1, 3])
+
+    if do_return_2d_tensor:
+        context_blob = flow.reshape(
+            context_blob, [-1, num_attention_heads * size_per_head]
+        )
+    else:
+        context_blob = flow.reshape(
+            context_blob, [-1, from_seq_length, num_attention_heads * size_per_head]
+        )
+    return context_blob
+
+
+def _FullyConnected(
+    input_blob, input_size, units, activation=None, name=None, weight_initializer=None
+):
+    weight_blob = flow.get_variable(
+        name=name + "-weight",
+        shape=[input_size, units],
+        dtype=input_blob.dtype,
+        initializer=weight_initializer,
+    )
+    bias_blob = flow.get_variable(
+        name=name + "-bias",
+        shape=[units],
+        dtype=input_blob.dtype,
+        initializer=flow.constant_initializer(0.0),
+    )
+    output_blob = flow.matmul(input_blob, weight_blob)
+    output_blob = flow.nn.bias_add(output_blob, bias_blob)
+    return output_blob
+
+
+def _Dropout(input_blob, dropout_prob):
+    if dropout_prob == 0.0:
+        return input_blob
+    return flow.nn.dropout(input_blob, rate=dropout_prob)
+
+
+def _LayerNorm(input_blob, hidden_size):
+    return flow.layers.layer_norm(
+        input_blob, name="LayerNorm", begin_norm_axis=-1, begin_params_axis=-1
+    )
+
+
+def _CreateAttentionMaskFromInputMask(to_mask_blob, from_seq_length, to_seq_length):
+    output = flow.cast(to_mask_blob, dtype=flow.float)
+    output = flow.reshape(output, [-1, 1, to_seq_length])
+    zeros = flow.constant(0.0, dtype=flow.float, shape=[from_seq_length, to_seq_length])
+    output = zeros + output
+    return output
+
+
+def _EmbeddingPostprocessor(
+    input_blob,
+    seq_length,
+    embedding_size,
+    use_token_type=False,
+    token_type_ids_blob=None,
+    token_type_vocab_size=16,
+    token_type_embedding_name="token_type_embeddings",
+    use_position_embeddings=True,
+    position_embedding_name="position_embeddings",
+    initializer_range=0.02,
+    max_position_embeddings=512,
+    dropout_prob=0.1,
+):
+    output = input_blob
+
+    if use_token_type:
+        assert token_type_ids_blob is not None
+        token_type_table = flow.get_variable(
+            name=token_type_embedding_name,
+            shape=[token_type_vocab_size, embedding_size],
+            dtype=input_blob.dtype,
+            initializer=CreateInitializer(initializer_range),
+        )
+        token_type_embeddings = flow.gather(
+            params=token_type_table, indices=token_type_ids_blob, axis=0
+        )
+        output = output + token_type_embeddings
+
+    if use_position_embeddings:
+        position_table = flow.get_variable(
+            name=position_embedding_name,
+            shape=[1, max_position_embeddings, embedding_size],
+            dtype=input_blob.dtype,
+            initializer=CreateInitializer(initializer_range),
+        )
+        assert seq_length <= max_position_embeddings
+        if seq_length != max_position_embeddings:
+            position_table = flow.slice(
+                position_table, begin=[None, 0, 0], size=[None, seq_length, -1]
+            )
+        output = output + position_table
+
+    output = _LayerNorm(output, embedding_size)
+    output = _Dropout(output, dropout_prob)
+
+    return output
+
+
+def _EmbeddingLookup(
+    input_ids_blob,
+    vocab_size,
+    embedding_size=128,
+    initializer_range=0.02,
+    word_embedding_name="word_embeddings",
+):
+    embedding_table = flow.get_variable(
+        name=word_embedding_name,
+        shape=[vocab_size, embedding_size],
+        dtype=flow.float,
+        initializer=CreateInitializer(initializer_range),
+    )
+    output = flow.gather(params=embedding_table, indices=input_ids_blob, axis=0)
+    return output, embedding_table
+
+
+def GetActivation(name):
+    if name == "linear":
+        return None
+    elif name == "relu":
+        return flow.math.relu
+    elif name == "tanh":
+        return flow.math.tanh
+    elif name == "gelu":
+        return flow.math.gelu
+    else:
+        raise Exception("unsupported activation")
diff --git a/oneflow/compatible_single_client_python/benchmarks/bert_benchmark/pretrain.py b/oneflow/compatible_single_client_python/benchmarks/bert_benchmark/pretrain.py
new file mode 100755
index 0000000000000000000000000000000000000000..235e4d55393bcfd61e11983c92342fdc4d8715fd
--- /dev/null
+++ b/oneflow/compatible_single_client_python/benchmarks/bert_benchmark/pretrain.py
@@ -0,0 +1,189 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import bert as bert_util
+from oneflow.compatible import single_client as flow
+from oneflow.core.operator import op_conf_pb2 as op_conf_util
+
+
+def PreTrain(
+    input_ids_blob,
+    input_mask_blob,
+    token_type_ids_blob,
+    masked_lm_positions_blob,
+    masked_lm_ids_blob,
+    masked_lm_weights_blob,
+    next_sentence_label_blob,
+    vocab_size,
+    seq_length=512,
+    hidden_size=768,
+    num_hidden_layers=12,
+    num_attention_heads=12,
+    intermediate_size=3072,
+    hidden_act="gelu",
+    hidden_dropout_prob=0.1,
+    attention_probs_dropout_prob=0.1,
+    max_position_embeddings=512,
+    type_vocab_size=16,
+    max_predictions_per_seq=20,
+    initializer_range=0.02,
+):
+    backbone = bert_util.BertBackbone(
+        input_ids_blob=input_ids_blob,
+        input_mask_blob=input_mask_blob,
+        token_type_ids_blob=token_type_ids_blob,
+        vocab_size=vocab_size,
+        seq_length=seq_length,
+        hidden_size=hidden_size,
+        num_hidden_layers=num_hidden_layers,
+        num_attention_heads=num_attention_heads,
+        intermediate_size=intermediate_size,
+        hidden_act=hidden_act,
+        hidden_dropout_prob=hidden_dropout_prob,
+        attention_probs_dropout_prob=attention_probs_dropout_prob,
+        max_position_embeddings=max_position_embeddings,
+        type_vocab_size=type_vocab_size,
+        initializer_range=initializer_range,
+    )
+
+    (lm_loss, _, _) = _AddMaskedLanguageModelLoss(
+        input_blob=backbone.sequence_output(),
+        output_weights_blob=backbone.embedding_table(),
+        positions_blob=masked_lm_positions_blob,
+        label_id_blob=masked_lm_ids_blob,
+        label_weight_blob=masked_lm_weights_blob,
+        seq_length=seq_length,
+        hidden_size=hidden_size,
+        vocab_size=vocab_size,
+        max_predictions_per_seq=max_predictions_per_seq,
+        hidden_act=bert_util.GetActivation(hidden_act),
+        initializer_range=initializer_range,
+    )
+    pooled_output = PooledOutput(
+        backbone.sequence_output(), hidden_size, initializer_range
+    )
+    (ns_loss, _, _) = _AddNextSentenceOutput(
+        input_blob=pooled_output,
+        label_blob=next_sentence_label_blob,
+        hidden_size=hidden_size,
+        initializer_range=initializer_range,
+    )
+    with flow.scope.namespace("cls-loss"):
+        total_loss = lm_loss + ns_loss
+    return total_loss, lm_loss, ns_loss
+
+
+def PooledOutput(sequence_output, hidden_size, initializer_range):
+    with flow.scope.namespace("bert-pooler"):
+        first_token_tensor = flow.slice(sequence_output, [None, 0, 0], [None, 1, -1])
+        first_token_tensor = flow.reshape(first_token_tensor, [-1, hidden_size])
+        pooled_output = bert_util._FullyConnected(
+            first_token_tensor,
+            input_size=hidden_size,
+            units=hidden_size,
+            weight_initializer=bert_util.CreateInitializer(initializer_range),
+            name="dense",
+        )
+        pooled_output = flow.math.tanh(pooled_output)
+    return pooled_output
+
+
+def _AddMaskedLanguageModelLoss(
+    input_blob,
+    output_weights_blob,
+    positions_blob,
+    label_id_blob,
+    label_weight_blob,
+    seq_length,
+    hidden_size,
+    vocab_size,
+    max_predictions_per_seq,
+    hidden_act,
+    initializer_range,
+):
+    with flow.scope.namespace("other"):
+        sum_label_weight_blob = flow.math.reduce_sum(label_weight_blob, axis=[-1])
+        ones = sum_label_weight_blob * 0.0 + 1.0
+        sum_label_weight_blob = flow.math.reduce_sum(sum_label_weight_blob)
+        batch_size = flow.math.reduce_sum(ones)
+        sum_label_weight_blob = sum_label_weight_blob / batch_size
+    with flow.scope.namespace("cls-predictions"):
+        input_blob = _GatherIndexes(input_blob, positions_blob, seq_length, hidden_size)
+        with flow.scope.namespace("transform"):
+            if callable(hidden_act):
+                act_fn = op_conf_util.kNone
+            else:
+                act_fn = hidden_act
+            input_blob = bert_util._FullyConnected(
+                input_blob,
+                input_size=hidden_size,
+                units=hidden_size,
+                activation=act_fn,
+                weight_initializer=bert_util.CreateInitializer(initializer_range),
+                name="dense",
+            )
+            if callable(hidden_act):
+                input_blob = hidden_act(input_blob)
+                input_blob = bert_util._LayerNorm(input_blob, hidden_size)
+        output_bias = flow.get_variable(
+            name="output_bias",
+            shape=[vocab_size],
+            dtype=input_blob.dtype,
+            initializer=flow.constant_initializer(1.0),
+        )
+        logit_blob = flow.matmul(input_blob, output_weights_blob, transpose_b=True)
+        logit_blob = flow.nn.bias_add(logit_blob, output_bias)
+        label_id_blob = flow.reshape(label_id_blob, [-1])
+        pre_example_loss = flow.nn.sparse_softmax_cross_entropy_with_logits(
+            logits=logit_blob, labels=label_id_blob
+        )
+        pre_example_loss = flow.reshape(pre_example_loss, [-1, max_predictions_per_seq])
+        numerator = pre_example_loss * label_weight_blob
+        with flow.scope.namespace("loss"):
+            numerator = flow.math.reduce_sum(numerator, axis=[-1])
+            denominator = sum_label_weight_blob + 1e-5
+            loss = numerator / denominator
+        return loss, pre_example_loss, logit_blob
+
+
+def _GatherIndexes(sequence_blob, positions_blob, seq_length, hidden_size):
+    output = flow.gather(
+        params=sequence_blob, indices=positions_blob, axis=2, batch_dims=2
+    )
+    output = flow.reshape(output, [-1, hidden_size])
+    return output
+
+
+def _AddNextSentenceOutput(input_blob, label_blob, hidden_size, initializer_range):
+    with flow.scope.namespace("cls-seq_relationship"):
+        output_weight_blob = flow.get_variable(
+            name="output_weights",
+            shape=[2, hidden_size],
+            dtype=input_blob.dtype,
+            initializer=bert_util.CreateInitializer(initializer_range),
+        )
+        output_bias_blob = flow.get_variable(
+            name="output_bias",
+            shape=[2],
+            dtype=input_blob.dtype,
+            initializer=flow.constant_initializer(0.0),
+        )
+        logit_blob = flow.matmul(input_blob, output_weight_blob, transpose_b=True)
+        logit_blob = flow.nn.bias_add(logit_blob, output_bias_blob)
+        pre_example_loss = flow.nn.sparse_softmax_cross_entropy_with_logits(
+            logits=logit_blob, labels=label_blob
+        )
+        loss = pre_example_loss
+        return loss, pre_example_loss, logit_blob
diff --git a/oneflow/compatible_single_client_python/benchmarks/bert_benchmark/run_pretraining.py b/oneflow/compatible_single_client_python/benchmarks/bert_benchmark/run_pretraining.py
new file mode 100755
index 0000000000000000000000000000000000000000..0b1b5ba06376e7c2dfbc85258d8e64322035b30e
--- /dev/null
+++ b/oneflow/compatible_single_client_python/benchmarks/bert_benchmark/run_pretraining.py
@@ -0,0 +1,337 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import, division, print_function
+
+import argparse
+import os
+import random
+import time
+from collections import OrderedDict
+from datetime import datetime
+
+import benchmark_util
+from oneflow.compatible import single_client as flow
+from pretrain import PreTrain
+
+parser = argparse.ArgumentParser(description="flags for bert")
+
+# resouce
+parser.add_argument("--gpu_num_per_node", type=int, default=1)
+parser.add_argument("--node_num", type=int, default=1)
+parser.add_argument("--node_list", type=str, default=None)
+
+# train
+parser.add_argument("--learning_rate", type=float, default=1e-4, help="Learning rate")
+parser.add_argument(
+    "--weight_decay_rate", type=float, default=0.01, help="weight decay rate"
+)
+parser.add_argument("--batch_size_per_device", type=int, default=24)
+parser.add_argument("--iter_num", type=int, default=10, help="total iterations to run")
+parser.add_argument(
+    "--skip_iter_num",
+    type=int,
+    default=10,
+    help="number of skipping iterations for benchmark purpose.",
+)
+parser.add_argument(
+    "--log_every_n_iter", type=int, default=1, help="print loss every n iteration"
+)
+parser.add_argument("--data_dir", type=str, default=None)
+parser.add_argument(
+    "--data_part_num", type=int, default=32, help="data part number in dataset"
+)
+parser.add_argument(
+    "--enable_auto_mixed_precision",
+    default=False,
+    type=lambda x: (str(x).lower() == "true"),
+)
+
+# log and resore/save
+parser.add_argument(
+    "--loss_print_every_n_iter",
+    type=int,
+    default=1,
+    required=False,
+    help="print loss every n iteration",
+)
+parser.add_argument(
+    "--model_save_every_n_iter",
+    type=int,
+    default=200,
+    required=False,
+    help="save model every n iteration",
+)
+parser.add_argument(
+    "--model_save_dir",
+    type=str,
+    default="./output/model_save-{}".format(
+        str(datetime.now().strftime("%Y-%m-%d-%H:%M:%S"))
+    ),
+    required=False,
+    help="model save directory",
+)
+parser.add_argument(
+    "--save_last_snapshot",
+    type=bool,
+    default=False,
+    required=False,
+    help="save model snapshot for last iteration",
+)
+parser.add_argument(
+    "--model_load_dir",
+    type=str,
+    default=None,
+    required=False,
+    help="model load directory",
+)
+parser.add_argument(
+    "--log_dir",
+    type=str,
+    default="./output",
+    required=False,
+    help="log info save directory",
+)
+
+# bert
+parser.add_argument("--seq_length", type=int, default=512)
+parser.add_argument("--max_predictions_per_seq", type=int, default=80)
+parser.add_argument("--num_hidden_layers", type=int, default=24)
+parser.add_argument("--num_attention_heads", type=int, default=16)
+parser.add_argument("--max_position_embeddings", type=int, default=512)
+parser.add_argument("--type_vocab_size", type=int, default=2)
+parser.add_argument("--vocab_size", type=int, default=30522)
+parser.add_argument("--attention_probs_dropout_prob", type=float, default=0.1)
+parser.add_argument("--hidden_dropout_prob", type=float, default=0.1)
+parser.add_argument("--hidden_size_per_head", type=int, default=64)
+
+parser.add_argument("--warmup_batches", type=int, default=1000)
+parser.add_argument("--lr_decay_num", type=int, default=100000)
+parser.add_argument(
+    "--lr_decay_num_same_as_iter_num",
+    default=False,
+    type=(lambda x: str(x).lower() == "true"),
+)
+
+args = parser.parse_args()
+
+
+def _blob_conf(name, shape, dtype=flow.int32):
+    return flow.data.BlobConf(
+        name=name, shape=shape, dtype=dtype, codec=flow.data.RawCodec()
+    )
+
+
+def BertDecoder(
+    data_dir, batch_size, data_part_num, seq_length, max_predictions_per_seq
+):
+    config_ordered_dict = OrderedDict()
+    config_ordered_dict["input_ids"] = seq_length
+    config_ordered_dict["next_sentence_labels"] = 1
+    config_ordered_dict["input_mask"] = seq_length
+    config_ordered_dict["segment_ids"] = seq_length
+    config_ordered_dict["masked_lm_ids"] = max_predictions_per_seq
+    config_ordered_dict["masked_lm_positions"] = max_predictions_per_seq
+    config_ordered_dict["masked_lm_weights"] = max_predictions_per_seq
+
+    ofrecord = flow.data.ofrecord_reader(
+        data_dir, batch_size=batch_size, data_part_num=data_part_num, name="decode",
+    )
+    ret = {}
+    for k, v in config_ordered_dict.items():
+        ret[k] = flow.data.ofrecord_raw_decoder(
+            ofrecord,
+            k,
+            shape=(v,),
+            dtype=flow.float if k == "masked_lm_weights" else flow.int32,
+        )
+    return ret
+
+
+def BuildPreTrainNet(
+    batch_size,
+    data_part_num,
+    seq_length=128,
+    max_position_embeddings=512,
+    num_hidden_layers=12,
+    num_attention_heads=12,
+    hidden_dropout_prob=0.1,
+    attention_probs_dropout_prob=0.1,
+    vocab_size=30522,
+    type_vocab_size=2,
+    max_predictions_per_seq=20,
+):
+    hidden_size = 64 * num_attention_heads  # , H = 64, size per head
+    intermediate_size = hidden_size * 4
+
+    decoders = BertDecoder(
+        args.data_dir, batch_size, data_part_num, seq_length, max_predictions_per_seq
+    )
+
+    input_ids = decoders["input_ids"]
+    next_sentence_labels = decoders["next_sentence_labels"]
+    input_mask = decoders["input_mask"]
+    token_type_ids = decoders["segment_ids"]  # note: segment_ids = token_type_ids
+    masked_lm_ids = decoders["masked_lm_ids"]
+    masked_lm_positions = decoders["masked_lm_positions"]
+    masked_lm_weights = decoders["masked_lm_weights"]
+    return PreTrain(
+        input_ids,
+        input_mask,
+        token_type_ids,
+        masked_lm_positions,
+        masked_lm_ids,
+        masked_lm_weights,
+        next_sentence_labels,
+        vocab_size,
+        seq_length=seq_length,
+        hidden_size=hidden_size,
+        num_hidden_layers=num_hidden_layers,
+        num_attention_heads=num_attention_heads,
+        intermediate_size=intermediate_size,
+        hidden_act="gelu",
+        hidden_dropout_prob=hidden_dropout_prob,
+        attention_probs_dropout_prob=attention_probs_dropout_prob,
+        max_position_embeddings=max_position_embeddings,
+        type_vocab_size=type_vocab_size,
+        max_predictions_per_seq=max_predictions_per_seq,
+        initializer_range=0.02,
+    )
+
+
+_BERT_MODEL_UPDATE_CONF = dict(
+    learning_rate_decay=dict(
+        polynomial_conf=dict(
+            decay_batches=args.iter_num
+            if args.lr_decay_num_same_as_iter_num
+            else args.lr_decay_num,
+            end_learning_rate=0.0,
+        )
+    ),
+    warmup_conf=dict(
+        linear_conf=dict(warmup_batches=args.warmup_batches, start_multiplier=0,)
+    ),
+    clip_conf=dict(clip_by_global_norm=dict(clip_norm=1.0,)),
+    adam_conf=dict(epsilon=1e-6),
+    weight_decay_conf=dict(
+        weight_decay_rate=args.weight_decay_rate,
+        excludes=dict(pattern=["bias", "LayerNorm", "layer_norm"]),
+    ),
+)
+
+func_config = flow.FunctionConfig()
+func_config.default_distribute_strategy(flow.scope.consistent_view())
+func_config.train.primary_lr(args.learning_rate)
+func_config.default_data_type(flow.float)
+func_config.train.model_update_conf(_BERT_MODEL_UPDATE_CONF)
+func_config.enable_auto_mixed_precision(args.enable_auto_mixed_precision)
+
+
+flow.config.gpu_device_num(args.gpu_num_per_node)
+
+
+@flow.global_function(func_config)
+def PretrainJob():
+    total_device_num = args.node_num * args.gpu_num_per_node
+    batch_size = total_device_num * args.batch_size_per_device
+
+    total_loss, mlm_loss, nsp_loss = BuildPreTrainNet(
+        batch_size,
+        args.data_part_num,
+        seq_length=args.seq_length,
+        max_position_embeddings=args.max_position_embeddings,
+        num_hidden_layers=args.num_hidden_layers,
+        num_attention_heads=args.num_attention_heads,
+        hidden_dropout_prob=args.hidden_dropout_prob,
+        attention_probs_dropout_prob=args.attention_probs_dropout_prob,
+        vocab_size=args.vocab_size,
+        type_vocab_size=args.type_vocab_size,
+        max_predictions_per_seq=args.max_predictions_per_seq,
+    )
+    flow.losses.add_loss(total_loss)
+    return total_loss, mlm_loss, nsp_loss
+
+
+def main():
+    print("=".ljust(66, "="))
+    print(
+        "Running bert: num_gpu_per_node = {}, num_nodes = {}.".format(
+            args.gpu_num_per_node, args.node_num
+        )
+    )
+    print("=".ljust(66, "="))
+    for arg in vars(args):
+        print("{} = {}".format(arg, getattr(args, arg)))
+    print("-".ljust(66, "-"))
+    print("Time stamp: {}".format(str(datetime.now().strftime("%Y-%m-%d-%H:%M:%S"))))
+
+    flow.env.log_dir(args.log_dir)
+
+    if args.node_num > 1:
+        nodes = []
+        for n in args.node_list.strip().split(","):
+            addr_dict = {}
+            addr_dict["addr"] = n
+            nodes.append(addr_dict)
+
+        flow.env.machine(nodes)
+    # TODO: make ctrl_port optional in EnvProto, here we must set it otherwise proto serialization will fail
+    if os.getenv("ONEFLOW_DRY_RUN"):
+        flow.env.ctrl_port(9788)
+    check_point = flow.train.CheckPoint()
+    if args.model_load_dir:
+        assert os.path.isdir(args.model_load_dir)
+        check_point.load(args.model_load_dir)
+        print("Restoring model from {}.".format(args.model_load_dir))
+    else:
+        check_point.init()
+        print("Init model on demand")
+
+    total_batch_size = (
+        args.node_num * args.gpu_num_per_node * args.batch_size_per_device
+    )
+    speedometer = benchmark_util.BERTSpeedometer()
+    start_time = time.time()
+
+    for step in range(args.skip_iter_num + args.iter_num):
+        cb = speedometer.speedometer_cb(
+            step,
+            start_time,
+            total_batch_size,
+            args.skip_iter_num,
+            args.iter_num,
+            args.loss_print_every_n_iter,
+        )
+        PretrainJob().async_get(cb)
+
+        if (step + 1) % args.model_save_every_n_iter == 0:
+            if not os.path.exists(args.model_save_dir):
+                os.makedirs(args.model_save_dir)
+            snapshot_save_path = os.path.join(
+                args.model_save_dir, "snapshot_%d" % (step + 1)
+            )
+            print("Saving model to {}.".format(snapshot_save_path))
+            check_point.save(snapshot_save_path)
+
+    if args.save_last_snapshot:
+        snapshot_save_path = os.path.join(args.model_save_dir, "last_snapshot")
+        if not os.path.exists(snapshot_save_path):
+            os.makedirs(snapshot_save_path)
+        print("Saving model to {}.".format(snapshot_save_path))
+        check_point.save(snapshot_save_path)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/oneflow/compatible_single_client_python/benchmarks/cnn_benchmark/__init__.py b/oneflow/compatible_single_client_python/benchmarks/cnn_benchmark/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/oneflow/compatible_single_client_python/benchmarks/cnn_benchmark/alexnet.py b/oneflow/compatible_single_client_python/benchmarks/cnn_benchmark/alexnet.py
new file mode 100755
index 0000000000000000000000000000000000000000..dfe37d87194ec7b47fc6bd386d1ebcd0f08ff475
--- /dev/null
+++ b/oneflow/compatible_single_client_python/benchmarks/cnn_benchmark/alexnet.py
@@ -0,0 +1,251 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+# ###################################################################
+# alexnet.py
+# Usage:
+#     Single Node: python alexnet.py -g 1
+#               -g gpu number
+#     Multi Nodes: python alexnet.py -g 8 -m -n "192.168.1.15,192.168.1.16"
+#               -g gpu number
+#               -m run on multi nodes
+#               -n IP addresses of nodes, seperated by comma
+# ###################################################################
+
+import argparse
+
+from oneflow.compatible import single_client as flow
+
+DATA_DIR = "/dataset/imagenet_1k/oneflow/30/train"
+parser = argparse.ArgumentParser(description="flags for multi-node and resource")
+parser.add_argument("-i", "--iter_num", type=int, default=10, required=False)
+parser.add_argument("-g", "--gpu_num_per_node", type=int, default=1, required=False)
+parser.add_argument(
+    "-m", "--multinode", default=False, action="store_true", required=False
+)
+parser.add_argument("-n", "--node_list", type=str, default=None, required=False)
+parser.add_argument("-e", "--eval_dir", type=str, default=DATA_DIR, required=False)
+parser.add_argument("-t", "--train_dir", type=str, default=DATA_DIR, required=False)
+parser.add_argument("-load", "--model_load_dir", type=str, default="", required=False)
+parser.add_argument(
+    "-save", "--model_save_dir", type=str, default="./checkpoints", required=False
+)
+args = parser.parse_args()
+
+
+def _data_load_layer(data_dir):
+    rgb_mean = [123.68, 116.78, 103.94]
+    ofrecord = flow.data.ofrecord_reader(
+        data_dir, batch_size=12, data_part_num=8, name="decode",
+    )
+    image = flow.data.ofrecord_image_decoder(ofrecord, "encoded", color_space="RGB")
+    label = flow.data.ofrecord_raw_decoder(
+        ofrecord, "class/label", shape=(), dtype=flow.int32
+    )
+    rsz = flow.image.resize(image, resize_x=227, resize_y=227, color_space="RGB")
+    normal = flow.image.crop_mirror_normalize(
+        rsz,
+        color_space="RGB",
+        output_layout="NCHW",
+        mean=rgb_mean,
+        output_dtype=flow.float,
+    )
+    return label, normal
+
+
+def _conv2d_layer(
+    name,
+    input,
+    filters,
+    kernel_size=3,
+    strides=1,
+    padding="SAME",
+    data_format="NCHW",
+    dilation_rate=1,
+    activation="Relu",
+    use_bias=False,
+    weight_initializer=flow.random_uniform_initializer(),
+    bias_initializer=None,
+):
+    weight_shape = (filters, input.shape[1], kernel_size, kernel_size)
+    weight = flow.get_variable(
+        name + "-weight",
+        shape=weight_shape,
+        dtype=input.dtype,
+        initializer=weight_initializer,
+    )
+    output = flow.nn.conv2d(
+        input, weight, strides, padding, None, data_format, dilation_rate, name=name
+    )
+    if use_bias:
+        bias = flow.get_variable(
+            name + "-bias",
+            shape=(filters,),
+            dtype=input.dtype,
+            initializer=bias_initializer,
+        )
+        output = flow.nn.bias_add(output, bias, data_format)
+
+    if activation is not None:
+        if activation == "Relu":
+            output = flow.math.relu(output)
+        else:
+            raise NotImplementedError
+
+    return output
+
+
+def alexnet(images, labels):
+    conv1 = _conv2d_layer(
+        "conv1", images, filters=64, kernel_size=11, strides=4, padding="VALID"
+    )
+
+    pool1 = flow.nn.avg_pool2d(conv1, 3, 2, "VALID", "NCHW", name="pool1")
+
+    conv2 = _conv2d_layer("conv2", pool1, filters=192, kernel_size=5)
+
+    pool2 = flow.nn.avg_pool2d(conv2, 3, 2, "VALID", "NCHW", name="pool2")
+
+    conv3 = _conv2d_layer("conv3", pool2, filters=384)
+
+    conv4 = _conv2d_layer("conv4", conv3, filters=384)
+
+    conv5 = _conv2d_layer("conv5", conv4, filters=256)
+
+    pool5 = flow.nn.avg_pool2d(conv5, 3, 2, "VALID", "NCHW", name="pool5")
+
+    if len(pool5.shape) > 2:
+        pool5 = flow.reshape(pool5, shape=(pool5.shape[0], -1))
+
+    fc1 = flow.layers.dense(
+        inputs=pool5,
+        units=4096,
+        activation=flow.math.relu,
+        use_bias=False,
+        kernel_initializer=flow.random_uniform_initializer(),
+        bias_initializer=False,
+        trainable=True,
+        name="fc1",
+    )
+
+    dropout1 = flow.nn.dropout(fc1, rate=0.5)
+
+    fc2 = flow.layers.dense(
+        inputs=dropout1,
+        units=4096,
+        activation=flow.math.relu,
+        use_bias=False,
+        kernel_initializer=flow.random_uniform_initializer(),
+        bias_initializer=False,
+        trainable=True,
+        name="fc2",
+    )
+
+    dropout2 = flow.nn.dropout(fc2, rate=0.5)
+
+    fc3 = flow.layers.dense(
+        inputs=dropout2,
+        units=1001,
+        activation=None,
+        use_bias=False,
+        kernel_initializer=flow.random_uniform_initializer(),
+        bias_initializer=False,
+        trainable=True,
+        name="fc3",
+    )
+
+    # loss function
+    loss = flow.nn.sparse_softmax_cross_entropy_with_logits(
+        labels, fc3, name="softmax_loss"
+    )
+
+    return loss
+
+
+# train job
+@flow.global_function
+def alexnet_train_job():
+    # set hyper parameter
+    flow.config.train.primary_lr(0.00001)
+    flow.config.train.model_update_conf(dict(naive_conf={}))
+
+    # load data
+    (labels, images) = _data_load_layer(args.train_dir)
+
+    # construct network
+    loss = alexnet(images, labels)
+
+    # set loss
+    flow.losses.add_loss(loss)
+
+    return loss
+
+
+# inference job
+@flow.global_function
+def alexnet_eval_job():
+    # load data
+    (labels, images) = _data_load_layer(args.eval_dir)
+
+    # construct inference network
+    loss = alexnet(images, labels)
+
+    return loss
+
+
+def main():
+    # set running mode
+    flow.config.gpu_device_num(args.gpu_num_per_node)
+    flow.config.ctrl_port(9788)
+    flow.config.default_data_type(flow.float)
+
+    # set multi nodes mode port
+    if args.multinode:
+        flow.config.ctrl_port(12138)
+        nodes = []
+        for n in args.node_list.strip().split(","):
+            addr_dict = {}
+            addr_dict["addr"] = n
+            nodes.append(addr_dict)
+        flow.config.machine(nodes)
+
+    # load/initialize model
+    check_point = flow.train.CheckPoint()
+    if not args.model_load_dir:
+        check_point.init()
+    else:
+        check_point.load(args.model_load_dir)
+
+    # training iter
+    print("{:>12}  {:>12}  {:>12}".format("iter", "loss type", "loss value"))
+    for i in range(args.iter_num):
+        fmt_str = "{:>12}  {:>12}  {:>12.10f}"
+
+        # print training log
+        train_loss = alexnet_train_job().get().mean()
+        print(fmt_str.format(i, "train loss:", train_loss))
+
+        # print inference log
+        if (i + 1) % 10 == 0:
+            eval_loss = alexnet_eval_job().get().mean()
+            print(fmt_str.format(i, "eval loss:", eval_loss))
+
+        # save model
+        if (i + 1) % 100 == 0:
+            check_point.save(args.model_save_dir + str(i))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/oneflow/compatible_single_client_python/benchmarks/cnn_benchmark/alexnet_model.py b/oneflow/compatible_single_client_python/benchmarks/cnn_benchmark/alexnet_model.py
new file mode 100755
index 0000000000000000000000000000000000000000..2092ffc1fae5ebc91c7314aa4797320deee6009f
--- /dev/null
+++ b/oneflow/compatible_single_client_python/benchmarks/cnn_benchmark/alexnet_model.py
@@ -0,0 +1,82 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import, division, print_function
+
+from oneflow.compatible import single_client as flow
+from model_util import conv2d_layer
+
+
+def alexnet(images, trainable=True):
+
+    conv1 = conv2d_layer(
+        "conv1", images, filters=64, kernel_size=11, strides=4, padding="VALID"
+    )
+
+    pool1 = flow.nn.avg_pool2d(conv1, 3, 2, "VALID", "NCHW", name="pool1")
+
+    conv2 = conv2d_layer("conv2", pool1, filters=192, kernel_size=5)
+
+    pool2 = flow.nn.avg_pool2d(conv2, 3, 2, "VALID", "NCHW", name="pool2")
+
+    conv3 = conv2d_layer("conv3", pool2, filters=384)
+
+    conv4 = conv2d_layer("conv4", conv3, filters=384)
+
+    conv5 = conv2d_layer("conv5", conv4, filters=256)
+
+    pool5 = flow.nn.avg_pool2d(conv5, 3, 2, "VALID", "NCHW", name="pool5")
+
+    if len(pool5.shape) > 2:
+        pool5 = flow.reshape(pool5, shape=(pool5.shape[0], -1))
+
+    fc1 = flow.layers.dense(
+        inputs=pool5,
+        units=4096,
+        activation=flow.math.relu,
+        use_bias=False,
+        kernel_initializer=flow.random_uniform_initializer(),
+        bias_initializer=False,
+        trainable=trainable,
+        name="fc1",
+    )
+
+    dropout1 = flow.nn.dropout(fc1, rate=0.5)
+
+    fc2 = flow.layers.dense(
+        inputs=dropout1,
+        units=4096,
+        activation=flow.math.relu,
+        use_bias=False,
+        kernel_initializer=flow.random_uniform_initializer(),
+        bias_initializer=False,
+        trainable=trainable,
+        name="fc2",
+    )
+
+    dropout2 = flow.nn.dropout(fc2, rate=0.5)
+
+    fc3 = flow.layers.dense(
+        inputs=dropout2,
+        units=1001,
+        activation=None,
+        use_bias=False,
+        kernel_initializer=flow.random_uniform_initializer(),
+        bias_initializer=False,
+        trainable=trainable,
+        name="fc3",
+    )
+
+    return fc3
diff --git a/oneflow/compatible_single_client_python/benchmarks/cnn_benchmark/benchmark.md b/oneflow/compatible_single_client_python/benchmarks/cnn_benchmark/benchmark.md
new file mode 100644
index 0000000000000000000000000000000000000000..654ffc068ca2e04ba2da39846065d99d3175ef97
--- /dev/null
+++ b/oneflow/compatible_single_client_python/benchmarks/cnn_benchmark/benchmark.md
@@ -0,0 +1,51 @@
+## Inference
+
+æµ‹è¯•å¹³å°ï¼šNvidia GTX2080Tiå•å¡.  
+CUDAç‰ˆæœ¬ï¼š10.0  
+CUDNNç‰ˆæœ¬ï¼š7.5.0   
+TensorRTç‰ˆæœ¬ï¼š6.0.1  
+
+Oneflow-Benchmark   
+branch: of_dev_python_py3    
+commit: 985dd3f03887d266e66573db0b31a4cf3051ff31   
+
+Oneflow:   
+branch: of_xrt_tensorrt   
+commit: 726c3a12b9d97b57f9fb7e3d212b63564e20e755   
+
+### CV
+
+#### Speed
+
+è¾“å…¥å›¾ç‰‡å¤§å°ä¸º224 (inception-v3ä¸º299)ï¼Œé¢„çƒ5 batchesï¼Œå¹³å‡åžåï¼ˆimg/sï¼‰ä¸º500ä¸ªbatchesçš„å¹³å‡å€¼ã€‚
+
+1. batch sizeä¸º8
+
+>| -            | Oneflow(fp32) | Oneflow(fp16) | TensorRT(fp32) | TensorRT(fp16) | TensorRT(int8) |
+>| ------------ | ------------- | ------------- | -------------- | -------------- | -------------- |
+>| alexnet      | 2637          | 1550          | 2540           | 2759           |                |
+>| vgg16        | 371           | 332           | 377            | 1124           |                |
+>| resnet50     | 657           | 541           | 729            | 940            |                |
+>| inception-v3 | 433           | 434           | 489            | 999            |                |
+
+2. batch sizeä¸º50
+
+>| -            | Oneflow(fp32) | Oneflow(fp16) | TensorRT(fp32) | TensorRT(fp16) | TensorRT(int8) |
+>| ------------ | ------------- | ------------- | -------------- | -------------- | -------------- |
+>| alexnet      | 6999          | 3219          | 4306           | 7704           |                |
+>| vgg16        | 497           | 476           | 404            | 1482           |                |
+>| resnet50     | 810           | 619           | 830            | 1285           |                |
+>| inception-v3 | 544           | 531           | 717            | 1839           |                |
+
+
+#### Precision
+
+æ€»å…±5wå¼ å›¾ç‰‡, ç»Ÿè®¡Top1 accuracyå’Œç›¸å¯¹oneflow fp32çš„åˆ†ç±»è¯¯å·®æ•°é‡ã€‚
+
+>|  -           | Oneflow(fp32) | Oneflow(fp16) | TensorRT(fp32) | TensorRT(fp16) | TensorRT(int8) |
+>| ------------ | ------------- | ------------- | -------------- | -------------- | -------------- |
+>| vgg16        | 0.495 / 0     | 0.495 / 61    | 0.495 / 0      | 0.495 / 101    |                |
+>| alexnet      |               |               |                |                |                |
+>| resnet50     | 0.613 / 0     | 0.613 / 59    | 0.613 / 0      | 0.613 / 130    |                |
+>| inception-v3 |               |               |                |                |                |
+
diff --git a/oneflow/compatible_single_client_python/benchmarks/cnn_benchmark/benchmark_util.py b/oneflow/compatible_single_client_python/benchmarks/cnn_benchmark/benchmark_util.py
new file mode 100755
index 0000000000000000000000000000000000000000..b1e2c4e8ea850ba1b5d908e61bcaa9331bf5d8d5
--- /dev/null
+++ b/oneflow/compatible_single_client_python/benchmarks/cnn_benchmark/benchmark_util.py
@@ -0,0 +1,106 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import time
+
+import numpy as np
+
+
+class StopWatch:
+    def __init__(self):
+        pass
+
+    def start(self):
+        self.start_time = time.time()
+        self.last_split = self.start_time
+
+    def set_start(self, val):
+        self.start_time = val
+        self.last_split = self.start_time
+
+    def split(self):
+        now = time.time()
+        duration = now - self.last_split
+        self.last_split = now
+        return duration
+
+    def stop(self):
+        self.stop_time = time.time()
+
+    def duration(self):
+        return self.stop_time - self.start_time
+
+
+class CNNSpeedometer:
+    def __init__(self):
+        self.watch = StopWatch()
+        self.throughoutput_list = []
+
+    def speedometer_cb(
+        self,
+        step,
+        start_time,
+        total_batch_size,
+        skip_iter_num,
+        iter_num,
+        loss_print_every_n_iter,
+    ):
+        def callback(train_loss):
+            assert skip_iter_num >= 0
+            if skip_iter_num == 0 and step == 0:
+                self.watch.set_start(start_time)
+                print("Start trainning without any skipping iteration.")
+
+            if step < skip_iter_num:
+                if step == 0:
+                    print(
+                        "Skipping {} iterations for benchmark purpose.".format(
+                            skip_iter_num
+                        )
+                    )
+                if (step + 1) == skip_iter_num:
+                    self.watch.start()
+                    print("Start trainning.")
+            else:
+                train_step = step - skip_iter_num
+
+                if (train_step + 1) % loss_print_every_n_iter == 0:
+                    loss = train_loss.mean()
+
+                    avg_elapse_time_per_iter = (
+                        self.watch.split() / loss_print_every_n_iter
+                    )
+                    samples_per_sec = total_batch_size / avg_elapse_time_per_iter
+                    print(
+                        "iter {}, loss: {:.3f}, speed: {:.3f}(sec/batch), {:.3f}(images/sec)".format(
+                            train_step, loss, avg_elapse_time_per_iter, samples_per_sec
+                        )
+                    )
+                    self.throughoutput_list.append(samples_per_sec)
+
+                if (train_step + 1) == iter_num:
+                    self.watch.stop()
+                    totoal_duration = self.watch.duration()
+                    avg_samples_per_sec = total_batch_size * iter_num / totoal_duration
+
+                    print("-".ljust(66, "-"))
+                    print(
+                        "average speed: {:.3f}(images/sec), new_cal_method: {:.3f}(images/sec)".format(
+                            avg_samples_per_sec, np.mean(self.throughoutput_list)
+                        )
+                    )
+                    print("-".ljust(66, "-"))
+
+        return callback
diff --git a/oneflow/compatible_single_client_python/benchmarks/cnn_benchmark/data_loader.py b/oneflow/compatible_single_client_python/benchmarks/cnn_benchmark/data_loader.py
new file mode 100644
index 0000000000000000000000000000000000000000..8c3be82644da3e7d9f76c5cfc46ad43029b480ec
--- /dev/null
+++ b/oneflow/compatible_single_client_python/benchmarks/cnn_benchmark/data_loader.py
@@ -0,0 +1,67 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import, division, print_function
+
+from oneflow.compatible import single_client as flow
+
+
+def load_imagenet(
+    data_dir, image_size, batch_size, data_part_num, gpu_image_decoder=False
+):
+    rgb_mean = [123.68, 116.78, 103.94]
+    rgb_std = [255.0, 255.0, 255.0]
+    ofrecord = flow.data.ofrecord_reader(
+        data_dir, batch_size=batch_size, data_part_num=data_part_num, name="decode",
+    )
+    label = flow.data.ofrecord_raw_decoder(
+        ofrecord, "class/label", shape=(), dtype=flow.int32
+    )
+
+    if gpu_image_decoder:
+        encoded = flow.data.OFRecordBytesDecoder(ofrecord, "encoded")
+        rsz = flow.data.ImageDecoderRandomCropResize(
+            encoded, target_width=image_size, target_height=image_size, num_workers=3
+        )
+    else:
+        image = flow.data.ofrecord_image_decoder(ofrecord, "encoded", color_space="RGB")
+        rsz = flow.image.resize(
+            image, resize_x=image_size, resize_y=image_size, color_space="RGB"
+        )
+
+    normal = flow.image.crop_mirror_normalize(
+        rsz,
+        color_space="RGB",
+        output_layout="NCHW",
+        mean=rgb_mean,
+        std=rgb_std,
+        output_dtype=flow.float,
+    )
+    return label, normal
+
+
+def load_synthetic(image_size, batch_size):
+    label = flow.data.decode_random(
+        shape=(),
+        dtype=flow.int32,
+        batch_size=batch_size,
+        initializer=flow.zeros_initializer(flow.int32),
+    )
+
+    image = flow.data.decode_random(
+        shape=(3, image_size, image_size), dtype=flow.float, batch_size=batch_size
+    )
+
+    return label, image
diff --git a/oneflow/compatible_single_client_python/benchmarks/cnn_benchmark/inceptionv3_model.py b/oneflow/compatible_single_client_python/benchmarks/cnn_benchmark/inceptionv3_model.py
new file mode 100644
index 0000000000000000000000000000000000000000..8ba267c6c0cce6cc67fb82433111620dd9dc4651
--- /dev/null
+++ b/oneflow/compatible_single_client_python/benchmarks/cnn_benchmark/inceptionv3_model.py
@@ -0,0 +1,519 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import, division, print_function
+
+from oneflow.compatible import single_client as flow
+from oneflow.core.operator import op_conf_pb2 as op_conf_util
+
+
+def _conv2d_layer(
+    name,
+    input,
+    filters,
+    kernel_size=3,
+    strides=1,
+    padding="SAME",
+    data_format="NCHW",
+    dilation_rate=1,
+    activation=op_conf_util.kSigmoid,
+    use_bias=True,
+    trainable=True,
+    weight_initializer=flow.random_uniform_initializer(),
+    bias_initializer=flow.constant_initializer(),
+):
+    if isinstance(kernel_size, int):
+        kernel_size = (kernel_size, kernel_size)
+    else:
+        kernel_size = tuple(kernel_size)
+    weight_shape = (filters, input.shape[1]) + kernel_size
+    weight = flow.get_variable(
+        name + "-weight",
+        shape=weight_shape,
+        dtype=input.dtype,
+        initializer=weight_initializer,
+    )
+    output = flow.nn.conv2d(
+        input, weight, strides, padding, None, data_format, dilation_rate, name=name
+    )
+    if use_bias:
+        bias = flow.get_variable(
+            name + "-bias",
+            shape=(filters,),
+            dtype=input.dtype,
+            initializer=bias_initializer,
+        )
+        output = flow.nn.bias_add(output, bias, data_format)
+
+    if activation is not None:
+        if activation == op_conf_util.kRelu:
+            output = flow.math.relu(output)
+        elif activation == op_conf_util.kSigmoid:
+            output = flow.math.sigmoid(output)
+        else:
+            raise NotImplementedError
+
+    return output
+
+
+def InceptionA(in_blob, index):
+    with flow.scope.namespace("mixed_{}".format(index)):
+        with flow.scope.namespace("branch1x1"):
+            branch1x1 = _conv2d_layer(
+                "conv0", in_blob, filters=64, kernel_size=1, strides=1, padding="SAME"
+            )
+        with flow.scope.namespace("branch5x5"):
+            branch5x5_1 = _conv2d_layer(
+                "conv0", in_blob, filters=48, kernel_size=1, strides=1, padding="SAME"
+            )
+            branch5x5_2 = _conv2d_layer(
+                "conv1",
+                branch5x5_1,
+                filters=64,
+                kernel_size=5,
+                strides=1,
+                padding="SAME",
+            )
+        with flow.scope.namespace("branch3x3dbl"):
+            branch3x3dbl_1 = _conv2d_layer(
+                "conv0", in_blob, filters=64, kernel_size=1, strides=1, padding="SAME"
+            )
+            branch3x3dbl_2 = _conv2d_layer(
+                "conv1",
+                branch3x3dbl_1,
+                filters=96,
+                kernel_size=3,
+                strides=1,
+                padding="SAME",
+            )
+            branch3x3dbl_3 = _conv2d_layer(
+                "conv2",
+                branch3x3dbl_2,
+                filters=96,
+                kernel_size=3,
+                strides=1,
+                padding="SAME",
+            )
+        with flow.scope.namespace("branch_pool"):
+            branch_pool_1 = flow.nn.avg_pool2d(
+                in_blob,
+                ksize=3,
+                strides=1,
+                padding="SAME",
+                data_format="NCHW",
+                name="pool",
+            )
+            branch_pool_2 = _conv2d_layer(
+                "conv",
+                branch_pool_1,
+                filters=32 if index == 0 else 64,
+                kernel_size=1,
+                strides=1,
+                padding="SAME",
+            )
+
+        inceptionA_bn = []
+        inceptionA_bn.append(branch1x1)
+        inceptionA_bn.append(branch5x5_2)
+        inceptionA_bn.append(branch3x3dbl_3)
+        inceptionA_bn.append(branch_pool_2)
+
+        mixed_concat = flow.concat(values=inceptionA_bn, axis=1, name="concat")
+
+    return mixed_concat
+
+
+def InceptionB(in_blob, index):
+    with flow.scope.namespace("mixed_{}".format(index)):
+        with flow.scope.namespace("branch3x3"):
+            branch3x3 = _conv2d_layer(
+                "conv0", in_blob, filters=384, kernel_size=3, strides=2, padding="VALID"
+            )
+        with flow.scope.namespace("branch3x3dbl"):
+            branch3x3dbl_1 = _conv2d_layer(
+                "conv0", in_blob, filters=64, kernel_size=1, strides=1, padding="SAME"
+            )
+            branch3x3dbl_2 = _conv2d_layer(
+                "conv1",
+                branch3x3dbl_1,
+                filters=96,
+                kernel_size=3,
+                strides=1,
+                padding="SAME",
+            )
+            branch3x3dbl_3 = _conv2d_layer(
+                "conv2",
+                branch3x3dbl_2,
+                filters=96,
+                kernel_size=3,
+                strides=2,
+                padding="VALID",
+            )
+        with flow.scope.namespace("branch_pool"):
+            branch_pool = flow.nn.max_pool2d(
+                in_blob,
+                ksize=3,
+                strides=2,
+                padding="VALID",
+                data_format="NCHW",
+                name="pool0",
+            )
+
+        inceptionB_bn = []
+        inceptionB_bn.append(branch3x3)
+        inceptionB_bn.append(branch3x3dbl_3)
+        inceptionB_bn.append(branch_pool)
+        mixed_concat = flow.concat(values=inceptionB_bn, axis=1, name="concat")
+
+    return mixed_concat
+
+
+def InceptionC(in_blob, index, filters):
+    with flow.scope.namespace("mixed_{}".format(index)):
+        with flow.scope.namespace("branch1x1"):
+            branch1x1 = _conv2d_layer(
+                "conv0", in_blob, filters=192, kernel_size=1, strides=1, padding="SAME"
+            )
+        with flow.scope.namespace("branch7x7"):
+            branch7x7_1 = _conv2d_layer(
+                "conv0",
+                in_blob,
+                filters=filters,
+                kernel_size=1,
+                strides=1,
+                padding="SAME",
+            )
+            branch7x7_2 = _conv2d_layer(
+                "conv1",
+                branch7x7_1,
+                filters=filters,
+                kernel_size=[1, 7],
+                strides=1,
+                padding="SAME",
+            )
+            branch7x7_3 = _conv2d_layer(
+                "conv2",
+                branch7x7_2,
+                filters=192,
+                kernel_size=[7, 1],
+                strides=[1, 1],
+                padding="SAME",
+            )
+        with flow.scope.namespace("branch7x7dbl"):
+            branch7x7dbl_1 = _conv2d_layer(
+                "conv0",
+                in_blob,
+                filters=filters,
+                kernel_size=1,
+                strides=1,
+                padding="SAME",
+            )
+            branch7x7dbl_2 = _conv2d_layer(
+                "conv1",
+                branch7x7dbl_1,
+                filters=filters,
+                kernel_size=[7, 1],
+                strides=1,
+                padding="SAME",
+            )
+            branch7x7dbl_3 = _conv2d_layer(
+                "conv2",
+                branch7x7dbl_2,
+                filters=filters,
+                kernel_size=[1, 7],
+                strides=1,
+                padding="SAME",
+            )
+            branch7x7dbl_4 = _conv2d_layer(
+                "conv3",
+                branch7x7dbl_3,
+                filters=filters,
+                kernel_size=[7, 1],
+                strides=1,
+                padding="SAME",
+            )
+            branch7x7dbl_5 = _conv2d_layer(
+                "conv4",
+                branch7x7dbl_4,
+                filters=192,
+                kernel_size=[1, 7],
+                strides=1,
+                padding="SAME",
+            )
+        with flow.scope.namespace("branch_pool"):
+            branch_pool_1 = flow.nn.avg_pool2d(
+                in_blob,
+                ksize=3,
+                strides=1,
+                padding="SAME",
+                data_format="NCHW",
+                name="pool",
+            )
+            branch_pool_2 = _conv2d_layer(
+                "conv",
+                branch_pool_1,
+                filters=192,
+                kernel_size=[1, 1],
+                strides=1,
+                padding="SAME",
+            )
+
+        inceptionC_bn = []
+        inceptionC_bn.append(branch1x1)
+        inceptionC_bn.append(branch7x7_3)
+        inceptionC_bn.append(branch7x7dbl_5)
+        inceptionC_bn.append(branch_pool_2)
+        mixed_concat = flow.concat(values=inceptionC_bn, axis=1, name="concat")
+
+    return mixed_concat
+
+
+def InceptionD(in_blob, index):
+    with flow.scope.namespace("mixed_{}".format(index)):
+        with flow.scope.namespace("branch3x3"):
+            branch3x3_1 = _conv2d_layer(
+                "conv0", in_blob, filters=192, kernel_size=1, strides=1, padding="SAME"
+            )
+            branch3x3_2 = _conv2d_layer(
+                "conv1",
+                branch3x3_1,
+                filters=320,
+                kernel_size=3,
+                strides=2,
+                padding="VALID",
+            )
+        with flow.scope.namespace("branch7x7x3"):
+            branch7x7x3_1 = _conv2d_layer(
+                "conv0", in_blob, filters=192, kernel_size=1, strides=1, padding="SAME"
+            )
+            branch7x7x3_2 = _conv2d_layer(
+                "conv1",
+                branch7x7x3_1,
+                filters=192,
+                kernel_size=[1, 7],
+                strides=1,
+                padding="SAME",
+            )
+            branch7x7x3_3 = _conv2d_layer(
+                "conv2",
+                branch7x7x3_2,
+                filters=192,
+                kernel_size=[7, 1],
+                strides=1,
+                padding="SAME",
+            )
+            branch7x7x3_4 = _conv2d_layer(
+                "conv3",
+                branch7x7x3_3,
+                filters=192,
+                kernel_size=3,
+                strides=2,
+                padding="VALID",
+            )
+        with flow.scope.namespace("branch_pool"):
+            branch_pool = flow.nn.max_pool2d(
+                in_blob,
+                ksize=3,
+                strides=2,
+                padding="VALID",
+                data_format="NCHW",
+                name="pool",
+            )
+
+        inceptionD_bn = []
+        inceptionD_bn.append(branch3x3_2)
+        inceptionD_bn.append(branch7x7x3_4)
+        inceptionD_bn.append(branch_pool)
+
+        mixed_concat = flow.concat(values=inceptionD_bn, axis=1, name="concat")
+
+    return mixed_concat
+
+
+def InceptionE(in_blob, index):
+    with flow.scope.namespace("mixed_{}".format(index)):
+        with flow.scope.namespace("branch1x1"):
+            branch1x1 = _conv2d_layer(
+                "conv0", in_blob, filters=320, kernel_size=1, strides=1, padding="SAME"
+            )
+        with flow.scope.namespace("branch3x3"):
+            branch3x3_1 = _conv2d_layer(
+                "conv0", in_blob, filters=384, kernel_size=1, strides=1, padding="SAME"
+            )
+            branch3x3_2 = _conv2d_layer(
+                "conv1",
+                branch3x3_1,
+                filters=384,
+                kernel_size=[1, 3],
+                strides=1,
+                padding="SAME",
+            )
+            branch3x3_3 = _conv2d_layer(
+                "conv2",
+                branch3x3_1,
+                filters=384,
+                kernel_size=[3, 1],
+                strides=[1, 1],
+                padding="SAME",
+            )
+            inceptionE_1_bn = []
+            inceptionE_1_bn.append(branch3x3_2)
+            inceptionE_1_bn.append(branch3x3_3)
+            concat_branch3x3 = flow.concat(
+                values=inceptionE_1_bn, axis=1, name="concat"
+            )
+        with flow.scope.namespace("branch3x3dbl"):
+            branch3x3dbl_1 = _conv2d_layer(
+                "conv0", in_blob, filters=448, kernel_size=1, strides=1, padding="SAME"
+            )
+            branch3x3dbl_2 = _conv2d_layer(
+                "conv1",
+                branch3x3dbl_1,
+                filters=384,
+                kernel_size=3,
+                strides=1,
+                padding="SAME",
+            )
+            branch3x3dbl_3 = _conv2d_layer(
+                "conv2",
+                branch3x3dbl_2,
+                filters=384,
+                kernel_size=[1, 3],
+                strides=1,
+                padding="SAME",
+            )
+            branch3x3dbl_4 = _conv2d_layer(
+                "conv3",
+                branch3x3dbl_2,
+                filters=384,
+                kernel_size=[3, 1],
+                strides=1,
+                padding="SAME",
+            )
+            inceptionE_2_bn = []
+            inceptionE_2_bn.append(branch3x3dbl_3)
+            inceptionE_2_bn.append(branch3x3dbl_4)
+            concat_branch3x3dbl = flow.concat(
+                values=inceptionE_2_bn, axis=1, name="concat"
+            )
+        with flow.scope.namespace("branch_pool"):
+            branch_pool_1 = flow.nn.avg_pool2d(
+                in_blob,
+                ksize=3,
+                strides=1,
+                padding="SAME",
+                data_format="NCHW",
+                name="pool",
+            )
+            branch_pool_2 = _conv2d_layer(
+                "conv",
+                branch_pool_1,
+                filters=192,
+                kernel_size=[1, 1],
+                strides=1,
+                padding="SAME",
+            )
+
+        inceptionE_total_bn = []
+        inceptionE_total_bn.append(branch1x1)
+        inceptionE_total_bn.append(concat_branch3x3)
+        inceptionE_total_bn.append(concat_branch3x3dbl)
+        inceptionE_total_bn.append(branch_pool_2)
+
+        concat_total = flow.concat(values=inceptionE_total_bn, axis=1, name="concat")
+
+    return concat_total
+
+
+def inceptionv3(images, labels, trainable=True):
+    conv0 = _conv2d_layer(
+        "conv0", images, filters=32, kernel_size=3, strides=2, padding="VALID"
+    )
+    conv1 = _conv2d_layer(
+        "conv1", conv0, filters=32, kernel_size=3, strides=1, padding="VALID"
+    )
+    conv2 = _conv2d_layer(
+        "conv2", conv1, filters=64, kernel_size=3, strides=1, padding="SAME"
+    )
+    pool1 = flow.nn.max_pool2d(
+        conv2, ksize=3, strides=2, padding="VALID", data_format="NCHW", name="pool1"
+    )
+    conv3 = _conv2d_layer(
+        "conv3", pool1, filters=80, kernel_size=1, strides=1, padding="VALID"
+    )
+    conv4 = _conv2d_layer(
+        "conv4", conv3, filters=192, kernel_size=3, strides=1, padding="VALID"
+    )
+    pool2 = flow.nn.max_pool2d(
+        conv4, ksize=3, strides=2, padding="VALID", data_format="NCHW", name="pool2"
+    )
+
+    # mixed_0 ~ mixed_2
+    mixed_0 = InceptionA(pool2, 0)
+    mixed_1 = InceptionA(mixed_0, 1)
+    mixed_2 = InceptionA(mixed_1, 2)
+
+    # mixed_3
+    mixed_3 = InceptionB(mixed_2, 3)
+
+    # mixed_4 ~ mixed_7
+    mixed_4 = InceptionC(mixed_3, 4, 128)
+    mixed_5 = InceptionC(mixed_4, 5, 160)
+    mixed_6 = InceptionC(mixed_5, 6, 160)
+    mixed_7 = InceptionC(mixed_6, 7, 192)
+
+    # mixed_8
+    mixed_8 = InceptionD(mixed_7, 8)
+
+    # mixed_9 ~ mixed_10
+    mixed_9 = InceptionE(mixed_8, 9)
+    mixed_10 = InceptionE(mixed_9, 10)
+
+    # pool3
+    pool3 = flow.nn.avg_pool2d(
+        mixed_10, ksize=8, strides=1, padding="VALID", data_format="NCHW", name="pool3"
+    )
+
+    with flow.scope.namespace("logits"):
+        pool3 = flow.reshape(pool3, [pool3.shape[0], -1])
+        # TODO: Need to transpose weight when converting model from TF to OF if
+        # you want to use layers.dense interface.
+        # fc1 = flow.layers.dense(
+        #     pool3,
+        #     1001,
+        #     activation=None,
+        #     use_bias=False,
+        #     kernel_initializer=flow.truncated_normal(0.816496580927726),
+        #     bias_initializer=flow.constant_initializer(),
+        #     name="fc1",
+        # )
+        weight = flow.get_variable(
+            "fc1-weight",
+            shape=(pool3.shape[1], 1001),
+            dtype=flow.float,
+            initializer=flow.truncated_normal(0.816496580927726),
+            model_name="weight",
+        )
+        bias = flow.get_variable(
+            "fc1-bias",
+            shape=(1001,),
+            dtype=flow.float,
+            initializer=flow.constant_initializer(),
+            model_name="bias",
+        )
+        fc1 = flow.matmul(pool3, weight)
+        fc1 = flow.nn.bias_add(fc1, bias)
+
+    return fc1
diff --git a/oneflow/compatible_single_client_python/benchmarks/cnn_benchmark/model_util.py b/oneflow/compatible_single_client_python/benchmarks/cnn_benchmark/model_util.py
new file mode 100644
index 0000000000000000000000000000000000000000..722832555a1e355e40e4387f038c25e09a863a6e
--- /dev/null
+++ b/oneflow/compatible_single_client_python/benchmarks/cnn_benchmark/model_util.py
@@ -0,0 +1,60 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+from oneflow.compatible import single_client as flow
+
+
+def conv2d_layer(
+    name,
+    input,
+    filters,
+    kernel_size=3,
+    strides=1,
+    padding="SAME",
+    data_format="NCHW",
+    dilation_rate=1,
+    activation="Relu",
+    use_bias=True,
+    weight_initializer=flow.random_uniform_initializer(),
+    bias_initializer=flow.constant_initializer(),
+):
+    weight_shape = (filters, input.shape[1], kernel_size, kernel_size)
+    weight = flow.get_variable(
+        name + "-weight",
+        shape=weight_shape,
+        dtype=input.dtype,
+        initializer=weight_initializer,
+    )
+    output = flow.nn.conv2d(
+        input, weight, strides, padding, None, data_format, dilation_rate, name=name
+    )
+    if use_bias:
+        bias = flow.get_variable(
+            name + "-bias",
+            shape=(filters,),
+            dtype=input.dtype,
+            initializer=bias_initializer,
+        )
+        output = flow.nn.bias_add(output, bias, data_format)
+
+    if activation is not None:
+        if activation == "Relu":
+            output = flow.math.relu(output)
+        else:
+            raise NotImplementedError
+
+    return output
diff --git a/oneflow/compatible_single_client_python/benchmarks/cnn_benchmark/of_cnn_benchmarks.py b/oneflow/compatible_single_client_python/benchmarks/cnn_benchmark/of_cnn_benchmarks.py
new file mode 100755
index 0000000000000000000000000000000000000000..526d887de4e8320806f75fc0288b8cf5b920317f
--- /dev/null
+++ b/oneflow/compatible_single_client_python/benchmarks/cnn_benchmark/of_cnn_benchmarks.py
@@ -0,0 +1,289 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import, division, print_function
+
+import argparse
+import os
+import time
+from datetime import datetime
+
+import alexnet_model
+import benchmark_util
+import data_loader
+from oneflow.compatible import single_client as flow
+import resnet_model
+import vgg_model
+
+parser = argparse.ArgumentParser(description="flags for cnn benchmark")
+
+# resouce
+parser.add_argument("--gpu_num_per_node", type=int, default=1, required=False)
+parser.add_argument("--node_num", type=int, default=1)
+parser.add_argument(
+    "--node_list",
+    type=str,
+    default=None,
+    required=False,
+    help="nodes' IP address, split by comma",
+)
+
+# train
+parser.add_argument(
+    "--model", type=str, default="vgg16", required=False, help="vgg16 or resnet50"
+)
+parser.add_argument("--batch_size_per_device", type=int, default=8, required=False)
+parser.add_argument("--learning_rate", type=float, default=1e-4, required=False)
+parser.add_argument(
+    "--optimizer", type=str, default="sgd", required=False, help="sgd, adam, momentum"
+)
+parser.add_argument(
+    "--weight_l2",
+    type=float,
+    default=None,
+    required=False,
+    help="weight decay parameter",
+)
+parser.add_argument(
+    "--iter_num", type=int, default=10, required=False, help="total iterations to run"
+)
+parser.add_argument(
+    "--skip_iter_num",
+    type=int,
+    default=0,
+    required=False,
+    help="number of skipping iterations for benchmark purpose.",
+)
+parser.add_argument(
+    "--data_dir", type=str, default=None, required=False, help="dataset directory"
+)
+parser.add_argument(
+    "--data_part_num",
+    type=int,
+    default=32,
+    required=False,
+    help="data part number in dataset",
+)
+parser.add_argument(
+    "--gpu_image_decoder",
+    type=bool,
+    default=False,
+    required=False,
+    help="Whether to use use ImageDecoderRandomCropResize.",
+)
+parser.add_argument(
+    "--image_size", type=int, default=228, required=False, help="image size"
+)
+
+# log and resore/save
+parser.add_argument(
+    "--loss_print_every_n_iter",
+    type=int,
+    default=1,
+    required=False,
+    help="print loss every n iteration",
+)
+parser.add_argument(
+    "--model_save_every_n_iter",
+    type=int,
+    default=200,
+    required=False,
+    help="save model every n iteration",
+)
+parser.add_argument(
+    "--model_save_dir",
+    type=str,
+    default="./output/model_save-{}".format(
+        str(datetime.now().strftime("%Y-%m-%d-%H:%M:%S"))
+    ),
+    required=False,
+    help="model save directory",
+)
+parser.add_argument(
+    "--save_last_snapshot",
+    type=bool,
+    default=False,
+    required=False,
+    help="save model snapshot for last iteration",
+)
+parser.add_argument(
+    "--model_load_dir",
+    type=str,
+    default=None,
+    required=False,
+    help="model load directory",
+)
+parser.add_argument(
+    "--log_dir",
+    type=str,
+    default="./output",
+    required=False,
+    help="log info save directory",
+)
+parser.add_argument(
+    "--enable_auto_mixed_precision",
+    type=bool,
+    default=False,
+    required=False,
+    help="automatically change the float net into mixed precision net",
+)
+
+args = parser.parse_args()
+
+
+model_dict = {
+    "resnet50": resnet_model.resnet50,
+    "vgg16": vgg_model.vgg16,
+    "alexnet": alexnet_model.alexnet,
+}
+
+#        "warmup_conf": {"linear_conf": {"warmup_batches":10000, "start_multiplier":0}},
+
+func_config = flow.FunctionConfig()
+func_config.default_distribute_strategy(flow.scope.consistent_view())
+func_config.default_data_type(flow.float)
+func_config.enable_auto_mixed_precision(args.enable_auto_mixed_precision)
+
+if args.weight_l2:
+    func_config.train.weight_l2(args.weight_l2)
+
+flow.config.gpu_device_num(args.gpu_num_per_node)
+
+
+def set_up_optimizer(loss, args):
+    # set up optimizer
+    loss_scale_policy = None
+    if args.enable_auto_mixed_precision:
+        loss_scale_policy = flow.optimizer.loss_scale.dynamic_loss_scale(
+            increment_period=2000
+        )
+    if args.optimizer == "sgd":
+        print("Optimizer:  SGD")
+        flow.optimizer.SGD(
+            flow.optimizer.PiecewiseConstantScheduler([], [args.learning_rate]),
+            loss_scale_policy=loss_scale_policy,
+        ).minimize(loss)
+    elif args.optimizer == "momentum":
+        print("Optimizer:  Momentum")
+        flow.optimizer.SGD(
+            flow.optimizer.PiecewiseConstantScheduler([], [args.learning_rate]),
+            momentum=0.9,
+            loss_scale_policy=loss_scale_policy,
+        ).minimize(loss)
+    elif args.optimizer == "adam":
+        print("Optimizer:  Adam")
+        flow.optimizer.Adam(
+            flow.optimizer.PiecewiseConstantScheduler([], [args.learning_rate]),
+            beta1=0.9,
+            loss_scale_policy=loss_scale_policy,
+        ).minimize(loss)
+
+
+@flow.global_function(func_config)
+def TrainNet():
+
+    total_device_num = args.node_num * args.gpu_num_per_node
+    batch_size = total_device_num * args.batch_size_per_device
+
+    if args.data_dir:
+        assert os.path.exists(args.data_dir)
+        print("Loading data from {}".format(args.data_dir))
+        (labels, images) = data_loader.load_imagenet(
+            args.data_dir,
+            args.image_size,
+            batch_size,
+            args.data_part_num,
+            args.gpu_image_decoder,
+        )
+    else:
+        print("Loading synthetic data.")
+        (labels, images) = data_loader.load_synthetic(args.image_size, batch_size)
+
+    logits = model_dict[args.model](images)
+    loss = flow.nn.sparse_softmax_cross_entropy_with_logits(
+        labels, logits, name="softmax_loss"
+    )
+    set_up_optimizer(loss, args)
+    return loss
+
+
+def main():
+    print("=".ljust(66, "="))
+    print(
+        "Running {}: num_gpu_per_node = {}, num_nodes = {}.".format(
+            args.model, args.gpu_num_per_node, args.node_num
+        )
+    )
+    print("=".ljust(66, "="))
+    for arg in vars(args):
+        print("{} = {}".format(arg, getattr(args, arg)))
+    print("-".ljust(66, "-"))
+    print("Time stamp: {}".format(str(datetime.now().strftime("%Y-%m-%d-%H:%M:%S"))))
+    flow.env.log_dir(args.log_dir)
+
+    if args.node_num > 1:
+        nodes = []
+        for n in args.node_list.strip().split(","):
+            addr_dict = {}
+            addr_dict["addr"] = n
+            nodes.append(addr_dict)
+
+        flow.env.machine(nodes)
+
+    check_point = flow.train.CheckPoint()
+    if args.model_load_dir:
+        assert os.path.isdir(args.model_load_dir)
+        print("Restoring model from {}.".format(args.model_load_dir))
+        check_point.load(args.model_load_dir)
+    else:
+        print("Init model on demand.")
+        check_point.init()
+
+    total_batch_size = (
+        args.node_num * args.gpu_num_per_node * args.batch_size_per_device
+    )
+    speedometer = benchmark_util.CNNSpeedometer()
+    start_time = time.time()
+
+    for step in range(args.skip_iter_num + args.iter_num):
+        cb = speedometer.speedometer_cb(
+            step,
+            start_time,
+            total_batch_size,
+            args.skip_iter_num,
+            args.iter_num,
+            args.loss_print_every_n_iter,
+        )
+        TrainNet().async_get(cb)
+
+        if (step + 1) % args.model_save_every_n_iter == 0:
+            if not os.path.exists(args.model_save_dir):
+                os.makedirs(args.model_save_dir)
+            snapshot_save_path = os.path.join(
+                args.model_save_dir, "snapshot_%d" % (step + 1)
+            )
+            print("Saving model to {}.".format(snapshot_save_path))
+            check_point.save(snapshot_save_path)
+
+    if args.save_last_snapshot:
+        snapshot_save_path = os.path.join(args.model_save_dir, "last_snapshot")
+        if not os.path.exists(snapshot_save_path):
+            os.makedirs(snapshot_save_path)
+        print("Saving model to {}.".format(snapshot_save_path))
+        check_point.save(snapshot_save_path)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/oneflow/compatible_single_client_python/benchmarks/cnn_benchmark/of_cnn_infer_benchmarks.py b/oneflow/compatible_single_client_python/benchmarks/cnn_benchmark/of_cnn_infer_benchmarks.py
new file mode 100755
index 0000000000000000000000000000000000000000..9d5b7839d3634f1b004cb7ba4a42538b89342dcc
--- /dev/null
+++ b/oneflow/compatible_single_client_python/benchmarks/cnn_benchmark/of_cnn_infer_benchmarks.py
@@ -0,0 +1,237 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import, division, print_function
+
+import argparse
+import os
+import time
+from datetime import datetime
+
+import alexnet_model
+import data_loader
+import inceptionv3_model
+from oneflow.compatible import single_client as flow
+import resnet_model
+import vgg_model
+
+parser = argparse.ArgumentParser(description="flags for cnn benchmark")
+
+# resouce
+parser.add_argument("--gpu_num_per_node", type=int, default=1, required=False)
+parser.add_argument("--node_num", type=int, default=1)
+parser.add_argument(
+    "--node_list",
+    type=str,
+    default=None,
+    required=False,
+    help="nodes' IP address, split by comma",
+)
+
+# train
+parser.add_argument(
+    "--model", type=str, default="vgg16", required=False, help="vgg16 or resnet50"
+)
+parser.add_argument("--batch_size_per_device", type=int, default=8, required=False)
+parser.add_argument(
+    "--iter_num", type=int, default=10, required=False, help="total iterations to run"
+)
+parser.add_argument(
+    "--warmup_iter_num",
+    type=int,
+    default=0,
+    required=False,
+    help="total iterations to run",
+)
+parser.add_argument(
+    "--data_dir", type=str, default=None, required=False, help="dataset directory"
+)
+parser.add_argument(
+    "--data_part_num",
+    type=int,
+    default=32,
+    required=False,
+    help="data part number in dataset",
+)
+parser.add_argument(
+    "--image_size", type=int, default=228, required=False, help="image size"
+)
+
+parser.add_argument(
+    "--use_tensorrt",
+    dest="use_tensorrt",
+    action="store_true",
+    default=False,
+    required=False,
+    help="inference with tensorrt",
+)
+parser.add_argument(
+    "--use_xla_jit",
+    dest="use_xla_jit",
+    action="store_true",
+    default=False,
+    required=False,
+    help="inference with xla jit",
+)
+
+parser.add_argument(
+    "--precision",
+    type=str,
+    default="float32",
+    required=False,
+    help="inference with low precision",
+)
+
+# log and resore/save
+parser.add_argument(
+    "--print_every_n_iter",
+    type=int,
+    default=1,
+    required=False,
+    help="print log every n iterations",
+)
+parser.add_argument(
+    "--model_load_dir",
+    type=str,
+    default=None,
+    required=False,
+    help="model load directory",
+)
+parser.add_argument(
+    "--log_dir",
+    type=str,
+    default="./output",
+    required=False,
+    help="log info save directory",
+)
+
+args = parser.parse_args()
+
+model_dict = {
+    "resnet50": resnet_model.resnet50,
+    "inceptionv3": inceptionv3_model.inceptionv3,
+    "vgg16": vgg_model.vgg16,
+    "alexnet": alexnet_model.alexnet,
+}
+
+func_config = flow.FunctionConfig()
+func_config.default_data_type(flow.float)
+
+flow.config.gpu_device_num(args.gpu_num_per_node)
+if args.use_tensorrt:
+    func_config.use_tensorrt()
+if args.use_xla_jit:
+    func_config.use_xla_jit()
+
+if args.precision == "float16":
+    if not args.use_tensorrt:
+        func_config.enable_auto_mixed_precision()
+    else:
+        func_config.tensorrt.use_fp16()
+
+
+@flow.global_function(func_config)
+def InferenceNet():
+
+    total_device_num = args.node_num * args.gpu_num_per_node
+    batch_size = total_device_num * args.batch_size_per_device
+
+    if args.data_dir:
+        assert os.path.exists(args.data_dir)
+        print("Loading data from {}".format(args.data_dir))
+        (labels, images) = data_loader.load_imagenet(
+            args.data_dir, args.image_size, batch_size, args.data_part_num
+        )
+    else:
+        print("Loading synthetic data.")
+        (labels, images) = data_loader.load_synthetic(args.image_size, batch_size)
+
+    logits = model_dict[args.model](images)
+    softmax = flow.nn.softmax(logits)
+    return softmax
+
+
+def main():
+    print("=".ljust(66, "="))
+    print(
+        "Running {}: num_gpu_per_node = {}, num_nodes = {}.".format(
+            args.model, args.gpu_num_per_node, args.node_num
+        )
+    )
+    print("=".ljust(66, "="))
+    for arg in vars(args):
+        print("{} = {}".format(arg, getattr(args, arg)))
+    print("-".ljust(66, "-"))
+    print("Time stamp: {}".format(str(datetime.now().strftime("%Y-%m-%d-%H:%M:%S"))))
+
+    flow.env.log_dir(args.log_dir)
+
+    if args.node_num > 1:
+        nodes = []
+        for n in args.node_list.strip().split(","):
+            addr_dict = {}
+            addr_dict["addr"] = n
+            nodes.append(addr_dict)
+
+        flow.env.machine(nodes)
+
+    check_point = flow.train.CheckPoint()
+    if args.model_load_dir:
+        assert os.path.isdir(args.model_load_dir)
+        print("Restoring model from {}.".format(args.model_load_dir))
+        check_point.load(args.model_load_dir)
+    else:
+        print("Init model on demand.")
+        check_point.init()
+
+    # warmups
+    print("Runing warm up for {} iterations.".format(args.warmup_iter_num))
+    for step in range(args.warmup_iter_num):
+        predictions = InferenceNet().get()
+
+    main.total_time = 0.0
+    main.batch_size = args.node_num * args.gpu_num_per_node * args.batch_size_per_device
+    main.start_time = time.time()
+
+    def create_callback(step):
+        def callback(predictions):
+            if step % args.print_every_n_iter == 0:
+                cur_time = time.time()
+                duration = cur_time - main.start_time
+                main.total_time += duration
+                main.start_time = cur_time
+                images_per_sec = main.batch_size / duration
+                print(
+                    "iter {}, speed: {:.3f}(sec/batch), {:.3f}(images/sec)".format(
+                        step, duration, images_per_sec
+                    )
+                )
+                if step == args.iter_num - 1:
+                    avg_img_per_sec = main.batch_size * args.iter_num / main.total_time
+                    print("-".ljust(66, "-"))
+                    print("average speed: {:.3f}(images/sec)".format(avg_img_per_sec))
+                    print("-".ljust(66, "-"))
+
+        return callback
+
+    for step in range(args.iter_num):
+        InferenceNet().async_get(create_callback(step))
+        # predictions = InferenceNet().get()
+        # create_callback(step)(predictions)
+        # print(predictions)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/oneflow/compatible_single_client_python/benchmarks/cnn_benchmark/resnet_model.py b/oneflow/compatible_single_client_python/benchmarks/cnn_benchmark/resnet_model.py
new file mode 100755
index 0000000000000000000000000000000000000000..01e6069c7689f3eccc96810d2b2eb6024c111c4b
--- /dev/null
+++ b/oneflow/compatible_single_client_python/benchmarks/cnn_benchmark/resnet_model.py
@@ -0,0 +1,155 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import, division, print_function
+
+from oneflow.compatible import single_client as flow
+
+BLOCK_COUNTS = [3, 4, 6, 3]
+BLOCK_FILTERS = [256, 512, 1024, 2048]
+BLOCK_FILTERS_INNER = [64, 128, 256, 512]
+
+
+def _conv2d(
+    name,
+    input,
+    filters,
+    kernel_size,
+    strides=1,
+    padding="SAME",
+    data_format="NCHW",
+    dilations=1,
+    trainable=True,
+    weight_initializer=flow.variance_scaling_initializer(data_format="NCHW"),
+):
+    weight = flow.get_variable(
+        name + "-weight",
+        shape=(filters, input.shape[1], kernel_size, kernel_size),
+        dtype=input.dtype,
+        initializer=weight_initializer,
+        trainable=trainable,
+    )
+    return flow.nn.conv2d(
+        input, weight, strides, padding, None, data_format, dilations, name=name
+    )
+
+
+def _batch_norm(inputs, name=None, trainable=True):
+    return flow.layers.batch_normalization(
+        inputs=inputs,
+        axis=1,
+        momentum=0.997,
+        epsilon=1.001e-5,
+        center=True,
+        scale=True,
+        trainable=trainable,
+        name=name,
+    )
+
+
+def conv2d_affine(input, name, filters, kernel_size, strides, activation=None):
+    # input data_format must be NCHW, cannot check now
+    padding = "SAME" if strides > 1 or kernel_size > 1 else "VALID"
+    output = _conv2d(name, input, filters, kernel_size, strides, padding)
+    output = _batch_norm(output, name + "_bn")
+    if activation == "Relu":
+        output = flow.math.relu(output)
+
+    return output
+
+
+def bottleneck_transformation(input, block_name, filters, filters_inner, strides):
+    a = conv2d_affine(
+        input, block_name + "_branch2a", filters_inner, 1, 1, activation="Relu",
+    )
+
+    b = conv2d_affine(
+        a, block_name + "_branch2b", filters_inner, 3, strides, activation="Relu",
+    )
+
+    c = conv2d_affine(b, block_name + "_branch2c", filters, 1, 1)
+
+    return c
+
+
+def residual_block(input, block_name, filters, filters_inner, strides_init):
+    if strides_init != 1 or block_name == "res2_0":
+        shortcut = conv2d_affine(
+            input, block_name + "_branch1", filters, 1, strides_init
+        )
+    else:
+        shortcut = input
+
+    bottleneck = bottleneck_transformation(
+        input, block_name, filters, filters_inner, strides_init
+    )
+
+    return flow.math.relu(bottleneck + shortcut)
+
+
+def residual_stage(input, stage_name, counts, filters, filters_inner, stride_init=2):
+    output = input
+    for i in range(counts):
+        block_name = "%s_%d" % (stage_name, i)
+        output = residual_block(
+            output, block_name, filters, filters_inner, stride_init if i == 0 else 1,
+        )
+
+    return output
+
+
+def resnet_conv_x_body(input, on_stage_end=lambda x: x):
+    output = input
+    for i, (counts, filters, filters_inner) in enumerate(
+        zip(BLOCK_COUNTS, BLOCK_FILTERS, BLOCK_FILTERS_INNER)
+    ):
+        stage_name = "res%d" % (i + 2)
+        output = residual_stage(
+            output, stage_name, counts, filters, filters_inner, 1 if i == 0 else 2,
+        )
+        on_stage_end(output)
+
+    return output
+
+
+def resnet_stem(input):
+    conv1 = _conv2d("conv1", input, 64, 7, 2)
+    conv1_bn = flow.math.relu(_batch_norm(conv1, "conv1_bn"))
+    pool1 = flow.nn.max_pool2d(
+        conv1_bn, ksize=3, strides=2, padding="VALID", data_format="NCHW", name="pool1",
+    )
+    return pool1
+
+
+def resnet50(images, trainable=True):
+
+    with flow.scope.namespace("Resnet"):
+        stem = resnet_stem(images)
+        body = resnet_conv_x_body(stem, lambda x: x)
+        pool5 = flow.nn.avg_pool2d(
+            body, ksize=7, strides=1, padding="VALID", data_format="NCHW", name="pool5",
+        )
+
+        fc1001 = flow.layers.dense(
+            flow.reshape(pool5, (pool5.shape[0], -1)),
+            units=1001,
+            use_bias=True,
+            kernel_initializer=flow.xavier_uniform_initializer(),
+            bias_initializer=flow.zeros_initializer(),
+            trainable=trainable,
+            name="fc1001",
+        )
+
+    return fc1001
diff --git a/oneflow/compatible_single_client_python/benchmarks/cnn_benchmark/vgg_model.py b/oneflow/compatible_single_client_python/benchmarks/cnn_benchmark/vgg_model.py
new file mode 100755
index 0000000000000000000000000000000000000000..257c30ad5555d01fed1215f72a5f20aca4b1a636
--- /dev/null
+++ b/oneflow/compatible_single_client_python/benchmarks/cnn_benchmark/vgg_model.py
@@ -0,0 +1,104 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import, division, print_function
+
+from oneflow.compatible import single_client as flow
+from oneflow.core.operator import op_conf_pb2 as op_conf_util
+from oneflow.core.job import initializer_conf_pb2 as initializer_conf_util
+from model_util import conv2d_layer
+
+
+def _conv_block(in_blob, index, filters, conv_times):
+    conv_block = []
+    conv_block.insert(0, in_blob)
+    for i in range(conv_times):
+        conv_i = conv2d_layer(
+            name="conv{}".format(index),
+            input=conv_block[i],
+            filters=filters,
+            kernel_size=3,
+            strides=1,
+        )
+        conv_block.append(conv_i)
+        index += 1
+
+    return conv_block
+
+
+def vgg16(images, trainable=True):
+    conv1 = _conv_block(images, 0, 64, 2)
+    pool1 = flow.nn.max_pool2d(conv1[-1], 2, 2, "VALID", "NCHW", name="pool1")
+
+    conv2 = _conv_block(pool1, 2, 128, 2)
+    pool2 = flow.nn.max_pool2d(conv2[-1], 2, 2, "VALID", "NCHW", name="pool2")
+
+    conv3 = _conv_block(pool2, 4, 256, 3)
+    pool3 = flow.nn.max_pool2d(conv3[-1], 2, 2, "VALID", "NCHW", name="pool3")
+
+    conv4 = _conv_block(pool3, 7, 512, 3)
+    pool4 = flow.nn.max_pool2d(conv4[-1], 2, 2, "VALID", "NCHW", name="pool4")
+
+    conv5 = _conv_block(pool4, 10, 512, 3)
+    pool5 = flow.nn.max_pool2d(conv5[-1], 2, 2, "VALID", "NCHW", name="pool5")
+
+    def _get_kernel_initializer():
+        kernel_initializer = initializer_conf_util.InitializerConf()
+        kernel_initializer.truncated_normal_conf.std = 0.816496580927726
+        return kernel_initializer
+
+    def _get_bias_initializer():
+        bias_initializer = initializer_conf_util.InitializerConf()
+        bias_initializer.constant_conf.value = 0.0
+        return bias_initializer
+
+    pool5 = flow.reshape(pool5, [pool5.shape[0], -1])
+
+    fc6 = flow.layers.dense(
+        inputs=pool5,
+        units=4096,
+        activation=flow.math.relu,
+        use_bias=True,
+        kernel_initializer=_get_kernel_initializer(),
+        bias_initializer=_get_bias_initializer(),
+        trainable=trainable,
+        name="fc1",
+    )
+
+    fc6 = flow.nn.dropout(fc6, rate=0.5)
+
+    fc7 = flow.layers.dense(
+        inputs=fc6,
+        units=4096,
+        activation=flow.math.relu,
+        use_bias=True,
+        kernel_initializer=_get_kernel_initializer(),
+        bias_initializer=_get_bias_initializer(),
+        trainable=trainable,
+        name="fc2",
+    )
+    fc7 = flow.nn.dropout(fc7, rate=0.5)
+
+    fc8 = flow.layers.dense(
+        inputs=fc7,
+        units=1001,
+        use_bias=True,
+        kernel_initializer=_get_kernel_initializer(),
+        bias_initializer=_get_bias_initializer(),
+        trainable=trainable,
+        name="fc_final",
+    )
+
+    return fc8
diff --git a/oneflow/compatible_single_client_python/benchmarks/coco_data_load/coco_data_loader.py b/oneflow/compatible_single_client_python/benchmarks/coco_data_load/coco_data_loader.py
new file mode 100644
index 0000000000000000000000000000000000000000..3497a2cfdd1772b85eb928cc3d65240b69130584
--- /dev/null
+++ b/oneflow/compatible_single_client_python/benchmarks/coco_data_load/coco_data_loader.py
@@ -0,0 +1,168 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from oneflow.compatible import single_client as flow
+import math
+import pandas as pd
+import time
+
+
+class COCODataLoadConfig(object):
+    def __init__(self):
+        self.annotation_file = (
+            "/dataset/mscoco_2017/annotations/instances_train2017.json"
+        )
+        self.image_dir = "/dataset/mscoco_2017/train2017"
+        # self.annotation_file = "/dataset/mscoco_2017/annotations/instances_val2017.json"
+        # self.image_dir = "/dataset/mscoco_2017/val2017"
+        self.shuffle_after_epoch = True
+        self.stride_partition = False
+        self.batch_size = 2
+        self.target_size = 800
+        self.max_size = 1333
+        self.image_align_size = 32
+        self.image_normal_std = (1.0, 1.0, 1.0)
+        self.image_normal_mean = (102.9801, 115.9465, 122.7717)
+        self.max_num_objs = 512
+
+
+def roundup(x, align):
+    return int(math.ceil(x / float(align)) * align)
+
+
+def coco_data_load(cfg, machine_id, nrank):
+    with flow.scope.placement("cpu", "{}:0-{}".format(machine_id, nrank - 1)):
+        (
+            image,
+            image_id,
+            image_size,
+            bbox,
+            label,
+            segm_poly,
+            segm_poly_index,
+        ) = flow.data.coco_reader(
+            annotation_file=cfg.annotation_file,
+            image_dir=cfg.image_dir,
+            batch_size=cfg.batch_size,
+            shuffle=cfg.shuffle_after_epoch,
+            stride_partition=cfg.stride_partition,
+            name="coco_reader",
+        )
+        # image decode
+        image = flow.image.decode(image, dtype=flow.float)
+        # image target resize
+        aligned_target_size = roundup(cfg.target_size, cfg.image_align_size)
+        aligned_max_size = roundup(cfg.max_size, cfg.image_align_size)
+        image, new_size, scale = flow.image.target_resize(
+            image, target_size=aligned_target_size, max_size=aligned_max_size
+        )
+        bbox = flow.detection.object_bbox_scale(bbox, scale)
+        segm_poly = flow.detection.object_segmentation_polygon_scale(segm_poly, scale)
+        # random flip
+        flip_code = flow.random.coin_flip(cfg.batch_size)
+        image = flow.image.flip(image, flip_code)
+        bbox = flow.detection.object_bbox_flip(bbox, new_size, flip_code)
+        segm_poly = flow.detection.object_segmentation_polygon_flip(
+            segm_poly, new_size, flip_code
+        )
+        # image normalize
+        image = flow.image.normalize(image, cfg.image_normal_std, cfg.image_normal_mean)
+        # batch collate
+        image = flow.image.batch_align(
+            image,
+            shape=(aligned_target_size, aligned_max_size, 3),
+            dtype=flow.float,
+            alignment=cfg.image_align_size,
+        )
+        gt_bbox = flow.tensor_buffer_to_list_of_tensors(
+            bbox, (cfg.max_num_objs, 4), flow.float, True
+        )
+        gt_label = flow.tensor_buffer_to_list_of_tensors(
+            label, (cfg.max_num_objs,), flow.int32, True
+        )
+        segm_mask = flow.detection.object_segmentation_polygon_to_mask(
+            segm_poly, segm_poly_index, new_size
+        )
+        gt_mask = flow.tensor_buffer_to_list_of_tensors(
+            segm_mask,
+            (cfg.max_num_objs, aligned_target_size, aligned_max_size),
+            flow.int8,
+            True,
+        )
+
+        return {
+            "image": image,
+            "image_size": new_size,
+            "gt_bbox": list(gt_bbox),
+            "gt_label": list(gt_label),
+            "gt_mask": list(gt_mask),
+        }
+
+
+def _make_data_load_fn():
+    flow.clear_default_session()
+    func_config = flow.FunctionConfig()
+    func_config.default_data_type(flow.float)
+    func_config.default_distribute_strategy(flow.scope.consistent_view())
+
+    cfg = COCODataLoadConfig()
+
+    @flow.global_function(func_config)
+    def data_load_fn():
+        return coco_data_load(cfg, 0, 1)
+
+    return data_load_fn
+
+
+def _benchmark(iter_num, drop_first_iters, verbose=False):
+    flow.env.init()
+    data_loader = _make_data_load_fn()
+    s = pd.Series([], name="time_elapsed", dtype="float32")
+    timestamp = time.perf_counter()
+    for i in range(iter_num):
+        dict = data_loader().get()
+        image = dict["image"]
+        image_size = dict["image_size"]
+        gt_bbox = dict["gt_bbox"]
+        gt_label = dict["gt_label"]
+        gt_mask = dict["gt_mask"]
+
+        cur = time.perf_counter()
+        s[i] = cur - timestamp
+        timestamp = cur
+
+        if verbose:
+            print("==== iter {} ====".format(i))
+            print(
+                "image: {}\n".format(image.numpy_list()[0].shape),
+                image.numpy_list()[0],
+            )
+            print(
+                "image_size: {}\n".format(image_size.numpy().shape), image_size.numpy(),
+            )
+            print("gt_bbox:\n", [x.numpy_list()[0] for x in gt_bbox])
+            print("gt_label:\n", [x.numpy_list()[0] for x in gt_label])
+            print("gt_mask:\n", [x.numpy_list()[0] for x in gt_mask])
+
+    print(
+        "mean of time elapsed of {} iters (dropped {} first iters): {}".format(
+            iter_num, drop_first_iters, s[drop_first_iters:].mean()
+        )
+    )
+    s.to_csv("coco_data_benchmark.csv", header=True)
+
+
+if __name__ == "__main__":
+    _benchmark(500, 10)
diff --git a/oneflow/compatible_single_client_python/contrib/__init__.py b/oneflow/compatible_single_client_python/contrib/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..fb5daf782a0d43a4e7d17d9e2462194ac5658caa
--- /dev/null
+++ b/oneflow/compatible_single_client_python/contrib/__init__.py
@@ -0,0 +1,16 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from .tensorrt import *
diff --git a/oneflow/compatible_single_client_python/contrib/tensorrt/__init__.py b/oneflow/compatible_single_client_python/contrib/tensorrt/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/oneflow/compatible_single_client_python/contrib/tensorrt/tensorrt_api.py b/oneflow/compatible_single_client_python/contrib/tensorrt/tensorrt_api.py
new file mode 100644
index 0000000000000000000000000000000000000000..2394aeadf108e682667f0c30e4c7b873ab94714d
--- /dev/null
+++ b/oneflow/compatible_single_client_python/contrib/tensorrt/tensorrt_api.py
@@ -0,0 +1,36 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+import traceback
+from oneflow.compatible_single_client_python.oneflow_export import oneflow_export
+import oneflow._oneflow_internal
+
+
+@oneflow_export("tensorrt.write_int8_calibration")
+def write_int8_calibration(path):
+    try:
+        oneflow._oneflow_internal.WriteInt8Calibration(path)
+    except oneflow._oneflow_internal.exception.CompileOptionWrongException:
+        traceback.print_exc()
+
+
+@oneflow_export("tensorrt.cache_int8_calibration")
+def cache_int8_calibration():
+    try:
+        oneflow._oneflow_internal.CacheInt8Calibration()
+    except oneflow._oneflow_internal.exception.CompileOptionWrongException:
+        traceback.print_exc()
diff --git a/oneflow/compatible_single_client_python/deprecated/__init__.py b/oneflow/compatible_single_client_python/deprecated/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/oneflow/compatible_single_client_python/deprecated/init_cluster_env.py b/oneflow/compatible_single_client_python/deprecated/init_cluster_env.py
new file mode 100644
index 0000000000000000000000000000000000000000..9bcb94e217d39534a12fe526daa314e246bae300
--- /dev/null
+++ b/oneflow/compatible_single_client_python/deprecated/init_cluster_env.py
@@ -0,0 +1,67 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+import getpass
+import os
+import sys
+import uuid
+from tempfile import NamedTemporaryFile
+
+from google.protobuf import text_format as pbtxt
+from oneflow.compatible_single_client_python.framework import env_util as env_util
+from oneflow.core.job.env_pb2 import EnvProto
+from oneflow.core.control.ctrl_bootstrap_pb2 import BootstrapConf
+from oneflow.compatible_single_client_python.oneflow_export import oneflow_export
+import subprocess
+
+
+@oneflow_export("deprecated.delete_worker_by_bootstrap")
+def delete_worker_by_bootstrap(ssh_port=22) -> None:
+    ssh_port_arg = " -p {} ".format(ssh_port)
+    bootstrap_conf_list = env_util.global_ctrl_bootstrap_confs
+    assert isinstance(bootstrap_conf_list, list)
+    global _temp_run_dir
+    assert _temp_run_dir != ""
+    for bootstrap_conf in bootstrap_conf_list:
+        assert isinstance(bootstrap_conf, BootstrapConf)
+        if bootstrap_conf.rank == 0:
+            continue
+        ssh_prefix = (
+            "ssh {} ".format(ssh_port_arg)
+            + getpass.getuser()
+            + "@"
+            + bootstrap_conf.host
+            + " "
+        )
+        if os.getenv("ONEFLOW_WORKER_KEEP_LOG"):
+            print("worker log kept at: {}".format(bootstrap_conf.host), flush=True)
+        else:
+            _SystemCall(ssh_prefix + '"rm -r ' + _temp_run_dir + '"')
+            print("temp run dir removed at: {}".format(bootstrap_conf.host), flush=True)
+
+
+@oneflow_export("deprecated.delete_worker_of_multi_process")
+def delete_worker_of_multi_process(run_dir) -> None:
+    assert run_dir != ""
+    if os.getenv("ONEFLOW_WORKER_KEEP_LOG"):
+        print("worker log kept at localhost:" + run_dir, flush=True)
+    else:
+        os.system("rm -r " + run_dir)
+        print("temp run dir removed at localhost:" + run_dir, flush=True)
+
+
+_temp_run_dir = ""
diff --git a/oneflow/compatible_single_client_python/deprecated/initializer_util.py b/oneflow/compatible_single_client_python/deprecated/initializer_util.py
new file mode 100644
index 0000000000000000000000000000000000000000..e33384f1b3832b830fd531bb31a83b991f434bee
--- /dev/null
+++ b/oneflow/compatible_single_client_python/deprecated/initializer_util.py
@@ -0,0 +1,31 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+from oneflow.core.common import data_type_pb2 as data_type_conf_util
+from oneflow.core.operator import op_conf_pb2 as op_conf_util
+from oneflow.core.job import initializer_conf_pb2 as initializer_conf_util
+from oneflow.compatible_single_client_python.oneflow_export import oneflow_export
+
+
+@oneflow_export("truncated_normal")
+def truncated_normal_initializer(
+    stddev: float = 1.0,
+) -> initializer_conf_util.InitializerConf:
+    initializer = initializer_conf_util.InitializerConf()
+    setattr(initializer.truncated_normal_conf, "std", float(stddev))
+
+    return initializer
diff --git a/oneflow/compatible_single_client_python/eager/__init__.py b/oneflow/compatible_single_client_python/eager/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/oneflow/compatible_single_client_python/eager/blob_register.py b/oneflow/compatible_single_client_python/eager/blob_register.py
new file mode 100644
index 0000000000000000000000000000000000000000..169d15c660a5318fee8775c0b42c8482cc2af420
--- /dev/null
+++ b/oneflow/compatible_single_client_python/eager/blob_register.py
@@ -0,0 +1,36 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+from oneflow.compatible import single_client as flow
+import oneflow._oneflow_internal
+from contextlib import contextmanager
+
+
+@contextmanager
+def BnInOp2BlobObjectScope(blob_register, op_attribute):
+    bn_in_op2blob_object = oneflow._oneflow_internal.deprecated.BnInOp2BlobObject()
+    for ibn in op_attribute.input_bns:
+        lbi = op_attribute.arg_signature.bn_in_op2lbi[ibn]
+        bn_in_op2blob_object[ibn] = blob_register.GetObject4BlobName(
+            "%s/%s" % (lbi.op_name, lbi.blob_name)
+        )
+    yield bn_in_op2blob_object
+    for obn in op_attribute.output_bns:
+        lbi = op_attribute.arg_signature.bn_in_op2lbi[obn]
+        blob_register.SetObject4BlobName(
+            "%s/%s" % (lbi.op_name, lbi.blob_name), bn_in_op2blob_object[obn]
+        )
diff --git a/oneflow/compatible_single_client_python/eager/boxing_hob.py b/oneflow/compatible_single_client_python/eager/boxing_hob.py
new file mode 100644
index 0000000000000000000000000000000000000000..f56534880f4db397aa58d5e9a4964efe679de17b
--- /dev/null
+++ b/oneflow/compatible_single_client_python/eager/boxing_hob.py
@@ -0,0 +1,173 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+from oneflow.compatible_single_client_python.lib.core.high_order_bool import (
+    bool_functor,
+)
+from oneflow.compatible_single_client_python.lib.core.high_order_bool import (
+    hob_context_attr,
+)
+from oneflow.compatible_single_client_python.lib.core.high_order_bool import BoolFunctor
+import oneflow._oneflow_internal
+
+
+class BoxingHobContext(object):
+    def __init__(self, produced_blob_object, consumer_op_arg_parallel_attr):
+        self.produced_blob_object_ = produced_blob_object
+        self.consumer_op_arg_parallel_attr_ = consumer_op_arg_parallel_attr
+        self.composer2lhs_context = {}
+        self.composer2rhs_context = {}
+        self.composer2middle_op_arg_parallel_attr = {}
+
+    @property
+    def produced_blob_object(self):
+        return self.produced_blob_object_
+
+    @property
+    def consumer_op_arg_parallel_attr(self):
+        return self.consumer_op_arg_parallel_attr_
+
+
+class ComposeHob(BoolFunctor):
+    def __init__(
+        self, lhs_hob, rhs_hob, get_middle_op_arg_parallel_attr, middle_verbose_str=None
+    ):
+        self.get_middle_op_arg_parallel_attr_ = get_middle_op_arg_parallel_attr
+        self.lhs_hob_ = lhs_hob
+        self.rhs_hob_ = rhs_hob
+        self.ctx_id2middle_op_arg_parallel_attr_ = {}
+        self.middle_verbose_str_ = middle_verbose_str
+
+    def verbose_debug_str(self, ctx, display_result=True):
+        left_display = self.lhs_hob_.debug_str(self._GetLhsContext(ctx), display_result)
+        display_result = display_result and self.lhs_hob_(self._GetLhsContext(ctx))
+        right_display = self.rhs_hob_.debug_str(
+            self._GetRhsContext(ctx), display_result
+        )
+        return "%s -> %s" % (left_display, right_display)
+
+    def __call__(self, ctx):
+        return self.lhs_hob_(self._GetLhsContext(ctx)) and self.rhs_hob_(
+            self._GetRhsContext(ctx)
+        )
+
+    def _GetLhsContext(self, ctx):
+        if self not in ctx.composer2lhs_context:
+            blob_object = oneflow._oneflow_internal.BlobObject(
+                ctx.produced_blob_object.object_id,
+                ctx.produced_blob_object.op_arg_parallel_attr,
+                ctx.produced_blob_object.op_arg_blob_attr,
+            )
+            value = BoxingHobContext(
+                blob_object, self._GetMiddleOpArgParallelAttr(ctx),
+            )
+            ctx.composer2lhs_context[self] = value
+        return ctx.composer2lhs_context[self]
+
+    def _GetRhsContext(self, ctx):
+        if self not in ctx.composer2rhs_context:
+            middle_blob_object = oneflow._oneflow_internal.BlobObject(
+                ctx.produced_blob_object.object_id,
+                self._GetMiddleOpArgParallelAttr(ctx),
+                ctx.produced_blob_object.op_arg_blob_attr,
+            )
+            value = BoxingHobContext(
+                middle_blob_object, ctx.consumer_op_arg_parallel_attr,
+            )
+            ctx.composer2rhs_context[self] = value
+        return ctx.composer2rhs_context[self]
+
+    def _GetMiddleOpArgParallelAttr(self, ctx):
+        if self not in ctx.composer2middle_op_arg_parallel_attr:
+            value = self.get_middle_op_arg_parallel_attr_(
+                None, ctx.produced_blob_object, ctx.consumer_op_arg_parallel_attr
+            )
+            if self.middle_verbose_str_ is not None:
+                print("=== %s ===" % self.middle_verbose_str_)
+                print(value)
+            ctx.composer2middle_op_arg_parallel_attr[self] = value
+        return ctx.composer2middle_op_arg_parallel_attr[self]
+
+
+@bool_functor("SingleMachine")
+def SingleMachine(ctx):
+    blob_device_ids = dict(
+        ctx.produced_blob_object.parallel_desc_symbol.machine_id2device_id_list
+    )
+    arg_parallel_desc_symbol = ctx.consumer_op_arg_parallel_attr.parallel_desc_symbol
+    op_arg_device_ids = dict(arg_parallel_desc_symbol.machine_id2device_id_list)
+    return list(blob_device_ids.keys()) == [0] and list(op_arg_device_ids.keys()) == [0]
+
+
+@bool_functor("MatchDeviceOneToOnePerMachine")
+def MatchDeviceOneToOnePerMachine(ctx):
+    blob_device_ids = dict(
+        ctx.produced_blob_object.parallel_desc_symbol.machine_id2device_id_list
+    )
+    arg_parallel_desc_symbol = ctx.consumer_op_arg_parallel_attr.parallel_desc_symbol
+    op_arg_device_ids = dict(arg_parallel_desc_symbol.machine_id2device_id_list)
+    if blob_device_ids.keys() != op_arg_device_ids.keys():
+        return False
+    for key in blob_device_ids.keys():
+        if len(blob_device_ids[key]) != len(op_arg_device_ids[key]):
+            return False
+    return True
+
+
+@bool_functor("Verbose")
+def Verbose(ctx):
+    print("============[producer]============")
+    print(ctx.produced_blob_object.op_arg_parallel_attr.parallel_desc_symbol)
+    print(ctx.produced_blob_object.op_arg_parallel_attr.sbp_parallel)
+    print("============[consumer]============")
+    print(ctx.consumer_op_arg_parallel_attr.parallel_desc_symbol)
+    print(ctx.consumer_op_arg_parallel_attr.sbp_parallel)
+    return True
+
+
+@bool_functor("producer's devices contained in consumer's devices")
+def ProducerDevicesContainedInConsumerDevices(ctx):
+    return ctx.consumer_op_arg_parallel_attr.parallel_desc_symbol.Containing(
+        ctx.produced_blob_object.parallel_desc_symbol
+    )
+
+
+@bool_functor("consumer's devices contained in producer's devices")
+def ConsumerDevicesContainedInProducerDevices(ctx):
+    return ctx.produced_blob_object.parallel_desc_symbol.Containing(
+        ctx.consumer_op_arg_parallel_attr.parallel_desc_symbol
+    )
+
+
+@hob_context_attr("consumer_sbp_parallel")
+def consumer_sbp_parallel(ctx):
+    return ctx.consumer_op_arg_parallel_attr.sbp_parallel
+
+
+@hob_context_attr("producer_sbp_parallel")
+def producer_sbp_parallel(ctx):
+    return ctx.produced_blob_object.op_arg_parallel_attr.sbp_parallel
+
+
+@hob_context_attr("producer_parallel_desc")
+def producer_parallel_desc(ctx):
+    return ctx.produced_blob_object.op_arg_parallel_attr.parallel_desc_symbol
+
+
+@hob_context_attr("consumer_parallel_desc")
+def consumer_parallel_desc(ctx):
+    return ctx.consumer_op_arg_parallel_attr.parallel_desc_symbol
diff --git a/oneflow/compatible_single_client_python/eager/boxing_middle.py b/oneflow/compatible_single_client_python/eager/boxing_middle.py
new file mode 100644
index 0000000000000000000000000000000000000000..2ddf5c82387d8f213e0758a1ff2be8556705e58b
--- /dev/null
+++ b/oneflow/compatible_single_client_python/eager/boxing_middle.py
@@ -0,0 +1,174 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+from oneflow.compatible_single_client_python.eager import symbol as symbol_util
+from oneflow.core.job import sbp_parallel_pb2 as sbp_parallel_pb
+from oneflow._oneflow_internal.oneflow.core.job import placement as placement_cfg
+from oneflow._oneflow_internal.oneflow.core.common import shape as shape_proto_cfg
+import oneflow._oneflow_internal
+import random
+
+
+class BoxingToMiddle(object):
+    def __init__(
+        self,
+        boxing_method,
+        get_middle_parallel_desc_symbol,
+        get_middle_sbp_parallel,
+        verbose=False,
+    ):
+        self.boxing_method_ = boxing_method
+        self.get_middle_op_arg_parallel_attr_ = MiddleOpArgParallelAttr(
+            get_middle_parallel_desc_symbol, get_middle_sbp_parallel,
+        )
+        self.verbose_ = verbose
+
+    @property
+    def boxing_method(self):
+        return self.boxing_method_
+
+    @property
+    def get_middle_op_arg_parallel_attr(self):
+        return self.get_middle_op_arg_parallel_attr_
+
+    @property
+    def verbose(self):
+        return self.verbose_
+
+
+def MiddleOpArgParallelAttr(get_parallel_desc_symbol, get_sbp_parallel):
+    def GetOpArgParallelAttr(
+        builder, produced_blob_object, consumer_op_arg_parallel_attr
+    ):
+        return oneflow._oneflow_internal.OpArgParallelAttribute(
+            get_parallel_desc_symbol(
+                builder, produced_blob_object, consumer_op_arg_parallel_attr
+            ),
+            str(
+                get_sbp_parallel(
+                    builder, produced_blob_object, consumer_op_arg_parallel_attr
+                )
+            ),
+            str(produced_blob_object.op_arg_parallel_attr.opt_mirrored_parallel),
+        )
+
+    return GetOpArgParallelAttr
+
+
+def ReplaceProducerDeviceTag(new_device_tag):
+    def Getter(builder, produced_blob_object, consumer_op_arg_parallel_attr):
+        x_parallel_attr = produced_blob_object.op_arg_parallel_attr
+        return TryReplaceDeviceTag(
+            builder, x_parallel_attr.parallel_desc_symbol, new_device_tag
+        )
+
+    return Getter
+
+
+def ProducerRandomParallelIdPerMachine(device_tag=None):
+    def Getter(builder, produced_blob_object, consumer_op_arg_parallel_attr):
+        return RandomParallelIdPerMachine(
+            produced_blob_object.parallel_desc_symbol,
+            device_tag=device_tag,
+            builder=builder,
+        )
+
+    return Getter
+
+
+def ConsumerRandomParallelIdPerMachine(device_tag=None):
+    def Getter(builder, produced_blob_object, consumer_op_arg_parallel_attr):
+        return RandomParallelIdPerMachine(
+            consumer_op_arg_parallel_attr.parallel_desc_symbol,
+            device_tag=device_tag,
+            builder=builder,
+        )
+
+    return Getter
+
+
+def ProducerParallelDesc(builder, produced_blob_object, consumer_op_arg_parallel_attr):
+    return produced_blob_object.parallel_desc_symbol
+
+
+def ConsumerParallelDesc(builder, produced_blob_object, consumer_op_arg_parallel_attr):
+    return consumer_op_arg_parallel_attr.parallel_desc_symbol
+
+
+def ReplaceConsumerDeviceTag(new_device_tag):
+    def Getter(builder, produced_blob_object, consumer_op_arg_parallel_attr):
+        parallel_desc_sym = consumer_op_arg_parallel_attr.parallel_desc_symbol
+        return TryReplaceDeviceTag(builder, parallel_desc_sym, new_device_tag)
+
+    return Getter
+
+
+def BroadcastParallel(builder, produced_blob_object, consumer_op_arg_parallel_attr):
+    sbp_parallel = sbp_parallel_pb.SbpParallel()
+    sbp_parallel.broadcast_parallel.SetInParent()
+    return sbp_parallel
+
+
+def ProducerSbpParallel(builder, produced_blob_object, consumer_op_arg_parallel_attr):
+    return produced_blob_object.op_arg_parallel_attr.sbp_parallel
+
+
+def ConsumerSbpParallel(builder, produced_blob_object, consumer_op_arg_parallel_attr):
+    return consumer_op_arg_parallel_attr.sbp_parallel
+
+
+def TryReplaceDeviceTag(builder, parallel_desc_symbol, device_tag):
+    if parallel_desc_symbol.device_tag == device_tag:
+        return parallel_desc_symbol
+    else:
+        return ReplaceDeviceTag(parallel_desc_symbol, device_tag, builder=builder)
+
+
+def ReplaceDeviceTag(parallel_desc_symbol, device_tag, builder=None):
+    assert parallel_desc_symbol.device_tag != device_tag
+    parallel_conf = placement_cfg.ParallelConf()
+    parallel_conf.set_device_tag(device_tag)
+    for device_name in parallel_desc_symbol.parallel_conf.device_name():
+        parallel_conf.add_device_name(device_name)
+    hierarchy = shape_proto_cfg.ShapeProto()
+    for dim in parallel_desc_symbol.hierarchy:
+        hierarchy.add_dim(dim)
+    assert hierarchy.dim_size() > 0
+    parallel_conf.mutable_hierarchy().CopyFrom(hierarchy)
+    if builder is None:
+        return oneflow._oneflow_internal.PlacementSymbol(
+            parallel_desc_symbol.symbol_id, parallel_conf
+        )
+    else:
+        return builder.GetParallelDescSymbol(parallel_conf)
+
+
+def RandomParallelIdPerMachine(parallel_desc_symbol, device_tag=None, builder=None):
+    if device_tag is None:
+        device_tag = parallel_desc_symbol.parallel_conf.device_tag()
+    assert device_tag is not None
+    parallel_conf = placement_cfg.ParallelConf()
+    parallel_conf.set_device_tag(device_tag)
+    for machine_id, dev_ids in parallel_desc_symbol.machine_id2device_id_list.items():
+        dev_id = dev_ids[random.randint(0, len(dev_ids) - 1)]
+        parallel_conf.add_device_name("@%s:%s" % (machine_id, dev_id))
+    if builder is None:
+        return oneflow._oneflow_internal.PlacementSymbol(
+            parallel_desc_symbol.symbol_id, parallel_conf
+        )
+    else:
+        return builder.GetParallelDescSymbol(parallel_conf)
diff --git a/oneflow/compatible_single_client_python/eager/boxing_util.py b/oneflow/compatible_single_client_python/eager/boxing_util.py
new file mode 100644
index 0000000000000000000000000000000000000000..a62e020ee922ab9faba98c07b68d89f717d22a14
--- /dev/null
+++ b/oneflow/compatible_single_client_python/eager/boxing_util.py
@@ -0,0 +1,1012 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+from contextlib import contextmanager
+from oneflow.compatible_single_client_python.eager import symbol as symbol_util
+from oneflow.core.operator import op_conf_pb2 as op_conf_pb
+from oneflow.core.operator import op_attribute_pb2 as op_attribute_pb
+from oneflow.core.job import sbp_parallel_pb2 as sbp_parallel_pb
+from oneflow.compatible_single_client_python.framework import id_util as id_util
+from oneflow.compatible_single_client_python.framework import c_api_util as c_api_util
+from oneflow.compatible_single_client_python.framework import (
+    balanced_splitter as balanced_splitter,
+)
+from oneflow.compatible_single_client_python.lib.core import enable_if as enable_if
+from oneflow.compatible_single_client_python.lib.core import (
+    high_order_bool as high_order_bool,
+)
+from oneflow.core.register import logical_blob_id_pb2 as logical_blob_id_util
+from oneflow.compatible_single_client_python.eager import boxing_hob as boxing_hob
+from oneflow.compatible_single_client_python.eager import op_infer_util as op_infer_util
+from oneflow.compatible_single_client_python.eager.boxing_hob import BoxingHobContext
+from oneflow.compatible_single_client_python.eager import boxing_middle as boxing_middle
+import random
+from oneflow.compatible import single_client as flow
+from oneflow._oneflow_internal.oneflow.core.job import placement as placement_cfg
+from oneflow._oneflow_internal.oneflow.core.common import shape as shape_proto_cfg
+import oneflow._oneflow_internal
+
+
+def BoxingTo(builder, produced_blob_object, consumer_op_arg_parallel_attr):
+    hob_context = BoxingHobContext(produced_blob_object, consumer_op_arg_parallel_attr)
+    if enable_if.get_condition_hob(NoBoxing)(hob_context):
+        return produced_blob_object
+
+    producer_opt_mirrored_parallel = (
+        produced_blob_object.op_arg_parallel_attr.opt_mirrored_parallel
+    )
+    consumer_opt_mirrored_parallel = consumer_op_arg_parallel_attr.opt_mirrored_parallel
+    assert producer_opt_mirrored_parallel == consumer_opt_mirrored_parallel, (
+        "\nproducer_op_arg_parallel_attr: %s\nconsumer_op_arg_parallel_attr: %s"
+        % (produced_blob_object.op_arg_parallel_attr, consumer_op_arg_parallel_attr)
+    )
+
+    def default(get_failed_info, *args, **kwargs):
+        raise NotImplementedError(
+            "%s\n"
+            "no boxing method found.\n"
+            "logical_blob_name: %s\n"
+            "x_arg_attribute: %s\n"
+            "consumer_op_arg_parallel_attr: %s\n"
+            % (
+                get_failed_info(),
+                produced_blob_object.op_arg_blob_attr.logical_blob_name,
+                produced_blob_object.op_arg_parallel_attr,
+                consumer_op_arg_parallel_attr,
+            )
+        )
+
+    global conditional_function_table
+    function = enable_if.unique(
+        conditional_function_table,
+        context=BoxingHobContext(produced_blob_object, consumer_op_arg_parallel_attr),
+        default=default,
+    )
+    return function(builder, produced_blob_object, consumer_op_arg_parallel_attr)
+
+
+def boxing_condition(hob_expr, verbose=False):
+    def Decorator(func):
+        func.__oneflow_condition_hob__ = hob_expr
+        if not verbose:
+            hob_expr.__debug_str__ = GetBoxingDebugString(func)
+        return func
+
+    return Decorator
+
+
+def FirstMatchedBoxing(*boxing_methods):
+    hob_expr = enable_if.get_condition_hob(boxing_methods[0])
+    for boxing_method in boxing_methods[1:]:
+        hob_expr = hob_expr | enable_if.get_condition_hob(boxing_method)
+
+    @enable_if.condition(hob_expr)
+    def FirstMatched(builder, produced_blob_object, consumer_op_arg_parallel_attr):
+        ctx = BoxingHobContext(produced_blob_object, consumer_op_arg_parallel_attr)
+        for boxing_method in boxing_methods:
+            hob_expr = enable_if.get_condition_hob(boxing_method)
+            if not hob_expr(ctx):
+                continue
+            return boxing_method(
+                builder, produced_blob_object, consumer_op_arg_parallel_attr
+            )
+
+    boxing_methods_names = [GetBoxingDebugString(m) for m in boxing_methods]
+    FirstMatched.__debug_str__ = "(%s)" % (" | ".join(boxing_methods_names))
+    return FirstMatched
+
+
+def OptionalBoxing(boxing_method):
+    opt_boxing_method = FirstMatchedBoxing(boxing_method, NoBoxing)
+    debug_str = "Optional(%s)" % GetBoxingDebugString(boxing_method)
+    opt_boxing_method.__debug_str__ = debug_str
+    return opt_boxing_method
+
+
+def ComposeBoxing(
+    lhs_boxing, rhs_boxing, get_middle_op_arg_parallel_attr, middle_verbose_str=None
+):
+    composed_hob = boxing_hob.ComposeHob(
+        enable_if.get_condition_hob(lhs_boxing),
+        enable_if.get_condition_hob(rhs_boxing),
+        get_middle_op_arg_parallel_attr=get_middle_op_arg_parallel_attr,
+        middle_verbose_str=middle_verbose_str,
+    )
+
+    @enable_if.condition(composed_hob)
+    def Composed(builder, produced_blob_object, consumer_op_arg_parallel_attr):
+        tmp_op_arg_parallel_attr = get_middle_op_arg_parallel_attr(
+            builder, produced_blob_object, consumer_op_arg_parallel_attr
+        )
+        tmp = lhs_boxing(builder, produced_blob_object, tmp_op_arg_parallel_attr)
+        return rhs_boxing(builder, tmp, consumer_op_arg_parallel_attr)
+
+    Composed.__debug_str__ = "%s->%s" % (
+        GetBoxingDebugString(lhs_boxing),
+        GetBoxingDebugString(rhs_boxing),
+    )
+    Composed.__left_debug_str__ = GetBoxingLeftDebugString(lhs_boxing)
+    Composed.__right_debug_str__ = GetBoxingRightDebugString(rhs_boxing)
+    return Composed
+
+
+def GetBoxingDebugString(boxing_method):
+    if hasattr(boxing_method, "__debug_str__"):
+        return boxing_method.__debug_str__
+    else:
+        return boxing_method.__name__
+
+
+def GetBoxingLeftDebugString(boxing_method):
+    if hasattr(boxing_method, "__left_debug_str__"):
+        return boxing_method.__left_debug_str__
+    else:
+        return GetBoxingDebugString(boxing_method)
+
+
+def GetBoxingRightDebugString(boxing_method):
+    if hasattr(boxing_method, "__right_debug_str__"):
+        return boxing_method.__right_debug_str__
+    else:
+        return GetBoxingDebugString(boxing_method)
+
+
+def Sequential(*boxing_methods, exclude=tuple(), middle_verbose=False):
+    assert not isinstance(boxing_methods[-1], boxing_middle.BoxingToMiddle)
+    composed = boxing_methods[-1]
+    for boxing_to_middle in boxing_methods[-2::-1]:
+        assert isinstance(boxing_to_middle, boxing_middle.BoxingToMiddle)
+        if middle_verbose:
+            middle_verbose_str = "middle op_arg_parallel_attr of %s->%s:" % (
+                GetBoxingDebugString(boxing_to_middle.boxing_method),
+                GetBoxingLeftDebugString(composed),
+            )
+        else:
+            middle_verbose_str = None
+        composed = ComposeBoxing(
+            boxing_to_middle.boxing_method,
+            composed,
+            boxing_to_middle.get_middle_op_arg_parallel_attr,
+            middle_verbose_str=middle_verbose_str,
+        )
+    if len(exclude) > 0:
+        exclude_hob = enable_if.get_condition_hob(exclude[0])
+        for method in exclude[1:]:
+            exclude_hob = exclude_hob | enable_if.get_condition_hob(method)
+        old_hob = enable_if.get_condition_hob(composed)
+        enable_if.set_condition_hob(composed, old_hob & ~exclude_hob)
+    return composed
+
+
+MatchCopyH2D = (
+    (
+        boxing_hob.producer_parallel_desc.machine_id2device_id_list
+        == boxing_hob.consumer_parallel_desc.machine_id2device_id_list
+    )
+    & (
+        (boxing_hob.producer_sbp_parallel == boxing_hob.consumer_sbp_parallel)
+        | (boxing_hob.producer_parallel_desc.parallel_num == 1)
+    )
+    & (boxing_hob.producer_parallel_desc.device_tag == "cpu")
+    & (boxing_hob.consumer_parallel_desc.device_tag == "gpu")
+)
+
+
+@boxing_condition(MatchCopyH2D)
+def CopyH2D(builder, produced_blob_object, consumer_op_arg_parallel_attr):
+    return CopyHD(builder, produced_blob_object, consumer_op_arg_parallel_attr)
+
+
+MatchCopyD2H = (
+    (
+        boxing_hob.producer_parallel_desc.machine_id2device_id_list
+        == boxing_hob.consumer_parallel_desc.machine_id2device_id_list
+    )
+    & (
+        (boxing_hob.producer_sbp_parallel == boxing_hob.consumer_sbp_parallel)
+        | (boxing_hob.producer_parallel_desc.parallel_num == 1)
+    )
+    & (boxing_hob.producer_parallel_desc.device_tag == "gpu")
+    & (boxing_hob.consumer_parallel_desc.device_tag == "cpu")
+)
+
+
+@boxing_condition(MatchCopyD2H)
+def CopyD2H(builder, produced_blob_object, consumer_op_arg_parallel_attr):
+    return CopyHD(builder, produced_blob_object, consumer_op_arg_parallel_attr)
+
+
+def CopyHD(builder, produced_blob_object, consumer_op_arg_parallel_attr):
+    arg_parallel_desc_symbol = consumer_op_arg_parallel_attr.parallel_desc_symbol
+    op_device_tag = arg_parallel_desc_symbol.device_tag
+    return BuildCopyHdInstruction(builder, produced_blob_object, op_device_tag)
+
+
+BlobIsPartialSum = boxing_hob.producer_sbp_parallel.HasField("partial_sum_parallel")
+OpArgIsBroadcast = boxing_hob.consumer_sbp_parallel.HasField("broadcast_parallel")
+
+
+MatchInterNodeOneToMany = (
+    ~boxing_hob.SingleMachine
+    & (boxing_hob.producer_parallel_desc.device_tag == "cpu")
+    & (boxing_hob.consumer_parallel_desc.device_tag == "cpu")
+    & (boxing_hob.producer_parallel_desc.parallel_num == 1)
+    & (boxing_hob.consumer_parallel_desc.parallel_num > 1)
+    & OpArgIsBroadcast
+)
+
+
+@boxing_condition(MatchInterNodeOneToMany)
+def InterNodeOneToMany(builder, produced_blob_object, consumer_op_arg_parallel_attr):
+    out_blobs = []
+    consumer_dev_ids = (
+        consumer_op_arg_parallel_attr.parallel_desc_symbol.machine_id2device_id_list
+    )
+    for machine_id, device_ids in consumer_dev_ids.items():
+        for device_id in device_ids:
+            parallel_conf = placement_cfg.ParallelConf()
+            parallel_conf.set_device_tag("cpu")
+            parallel_conf.add_device_name("@%s:%s" % (machine_id, device_id))
+            parallel_desc_symbol = builder.GetParallelDescSymbol(parallel_conf)
+            out_blob = builder.Build121To(produced_blob_object, parallel_desc_symbol)
+            out_blobs.append(out_blob)
+
+    return PackPhysicalBoxingBlobObjectsToLogical(
+        builder,
+        out_blobs,
+        consumer_op_arg_parallel_attr,
+        produced_blob_object.op_arg_blob_attr,
+    )
+
+
+MatchInterNodeOneToOne = (
+    (boxing_hob.producer_parallel_desc.device_tag == "cpu")
+    & (boxing_hob.consumer_parallel_desc.device_tag == "cpu")
+    & (boxing_hob.producer_parallel_desc != boxing_hob.consumer_parallel_desc)
+    & (
+        boxing_hob.producer_parallel_desc.parallel_num
+        == boxing_hob.consumer_parallel_desc.parallel_num
+    )
+    & ~boxing_hob.MatchDeviceOneToOnePerMachine
+    & (
+        (boxing_hob.producer_sbp_parallel == boxing_hob.consumer_sbp_parallel)
+        | (boxing_hob.producer_parallel_desc.parallel_num == 1)
+    )
+)
+
+
+@boxing_condition(MatchInterNodeOneToOne)
+def InterNodeOneToOne(builder, produced_blob_object, consumer_op_arg_parallel_attr):
+    return builder.Build121To(
+        produced_blob_object, consumer_op_arg_parallel_attr.parallel_desc_symbol
+    )
+
+
+MatchCpuBroadcastOneToOne = (
+    (boxing_hob.producer_parallel_desc.device_tag == "cpu")
+    & (boxing_hob.consumer_parallel_desc.device_tag == "cpu")
+    & (boxing_hob.producer_parallel_desc != boxing_hob.consumer_parallel_desc)
+    & boxing_hob.MatchDeviceOneToOnePerMachine
+    & (
+        (boxing_hob.producer_sbp_parallel == boxing_hob.consumer_sbp_parallel)
+        | (boxing_hob.producer_parallel_desc.parallel_num == 1)
+    )
+)
+
+
+@boxing_condition(MatchCpuBroadcastOneToOne)
+def CpuBroadcastOneToOne(builder, produced_blob_object, consumer_op_arg_parallel_attr):
+    def get_identity_physical_in_blob_objects(
+        builder,
+        produced_blob_object,
+        consumer_op_arg_parallel_attr,
+        physical_in_blob_objects,
+        boxing_parallel_desc_symbol,
+        out_parallel_num,
+    ):
+        return physical_in_blob_objects
+
+    return NaiveCpuRefPhysicalBlobObjectsScope(
+        builder,
+        produced_blob_object,
+        consumer_op_arg_parallel_attr,
+        get_physical_out_blob_objects=get_identity_physical_in_blob_objects,
+    )
+
+
+MatchNoBoxing = (
+    boxing_hob.producer_parallel_desc == boxing_hob.consumer_parallel_desc
+) & (
+    (boxing_hob.producer_sbp_parallel == boxing_hob.consumer_sbp_parallel)
+    | (boxing_hob.producer_parallel_desc.parallel_num == 1)
+)
+
+
+@boxing_condition(MatchNoBoxing)
+def NoBoxing(builder, produced_blob_object, consumer_op_arg_parallel_attr):
+    return produced_blob_object
+
+
+@boxing_condition(boxing_hob.Verbose & MatchNoBoxing)
+def VerboseNoBoxing(builder, produced_blob_object, consumer_op_arg_parallel_attr):
+    return produced_blob_object
+
+
+def VerboseOptionalBoxing(boxing_method):
+    opt_boxing_method = FirstMatchedBoxing(boxing_method, VerboseNoBoxing)
+    debug_str = "VerboseOptional(%s)" % GetBoxingDebugString(boxing_method)
+    opt_boxing_method.__debug_str__ = debug_str
+    return opt_boxing_method
+
+
+MatchNcclAllReduce = (
+    boxing_hob.SingleMachine
+    & (boxing_hob.producer_parallel_desc.device_tag == "gpu")
+    & (boxing_hob.producer_parallel_desc == boxing_hob.consumer_parallel_desc)
+    & (boxing_hob.consumer_parallel_desc.parallel_num > 1)
+    & BlobIsPartialSum
+    & OpArgIsBroadcast
+)
+
+
+@boxing_condition(MatchNcclAllReduce)
+def GpuNcclAllReduce(builder, produced_blob_object, consumer_op_arg_parallel_attr):
+    parallel_conf = consumer_op_arg_parallel_attr.parallel_desc_symbol.parallel_conf
+    bn_in_op2blob_object = oneflow._oneflow_internal.deprecated.BnInOp2BlobObject()
+    bn_in_op2blob_object["in_0"] = produced_blob_object
+    op_attribute = _GetEagerNcclAllReduce(parallel_conf, bn_in_op2blob_object)
+    cfg_op_attribute = oneflow._oneflow_internal.deprecated.MakeOpAttributeByString(
+        str(op_attribute)
+    )
+    builder.NoBoxingStatelessCall(
+        cfg_op_attribute, parallel_conf, bn_in_op2blob_object,
+    )
+    y_blob_object = bn_in_op2blob_object["out_0"]
+    y_blob_object.op_arg_parallel_attr.Assign(consumer_op_arg_parallel_attr)
+    return y_blob_object
+
+
+MatchSplitOneToMany = (
+    (boxing_hob.producer_parallel_desc.parallel_num == 1)
+    & (boxing_hob.consumer_parallel_desc.parallel_num > 1)
+    & boxing_hob.consumer_sbp_parallel.HasField("split_parallel")
+)
+
+MatchConcatManyToOne = (
+    (boxing_hob.consumer_parallel_desc.parallel_num == 1)
+    & (boxing_hob.producer_parallel_desc.parallel_num > 1)
+    & boxing_hob.producer_sbp_parallel.HasField("split_parallel")
+)
+
+MatchConcatManyToSplitMany = (
+    (boxing_hob.producer_parallel_desc.parallel_num > 1)
+    & (boxing_hob.consumer_parallel_desc.parallel_num > 1)
+    & boxing_hob.producer_sbp_parallel.HasField("split_parallel")
+    & boxing_hob.consumer_sbp_parallel.HasField("split_parallel")
+    & (
+        (boxing_hob.producer_sbp_parallel != boxing_hob.consumer_sbp_parallel)
+        | (
+            boxing_hob.producer_parallel_desc.parallel_num
+            != boxing_hob.consumer_parallel_desc.parallel_num
+        )
+    )
+)
+
+
+MatchNaiveCpuSplitToSplit = (
+    (boxing_hob.producer_parallel_desc.device_tag == "cpu")
+    & (boxing_hob.consumer_parallel_desc.device_tag == "cpu")
+    & (MatchSplitOneToMany | MatchConcatManyToOne | MatchConcatManyToSplitMany)
+)
+
+
+@boxing_condition(MatchNaiveCpuSplitToSplit)
+def NaiveCpuSplitToSplit(builder, produced_blob_object, consumer_op_arg_parallel_attr):
+    return NaiveCpuRefPhysicalBlobObjectsScope(
+        builder,
+        produced_blob_object,
+        consumer_op_arg_parallel_attr,
+        get_physical_out_blob_objects=NaiveBoxingToPhysicalBlobObjects,
+    )
+
+
+MatchNaiveCpuPartialSumToSplit = (
+    (boxing_hob.producer_parallel_desc.device_tag == "cpu")
+    & (boxing_hob.consumer_parallel_desc.device_tag == "cpu")
+    & (boxing_hob.producer_parallel_desc.parallel_num > 1)
+    & boxing_hob.producer_sbp_parallel.HasField("partial_sum_parallel")
+    & (
+        (boxing_hob.consumer_parallel_desc.parallel_num == 1)
+        | boxing_hob.consumer_sbp_parallel.HasField("split_parallel")
+    )
+)
+
+
+@boxing_condition(MatchNaiveCpuPartialSumToSplit)
+def NaiveCpuPartialSumToSplit(
+    builder, produced_blob_object, consumer_op_arg_parallel_attr
+):
+    return NaiveCpuRefPhysicalBlobObjectsScope(
+        builder,
+        produced_blob_object,
+        consumer_op_arg_parallel_attr,
+        get_physical_out_blob_objects=NaiveBoxingToPhysicalBlobObjects,
+    )
+
+
+def NaiveCpuRefPhysicalBlobObjectsScope(
+    builder,
+    produced_blob_object,
+    consumer_op_arg_parallel_attr,
+    get_physical_out_blob_objects,
+):
+    physical_in_blob_objects = UnpackLogicalBoxingBlobObjectToPhysical(
+        builder, produced_blob_object
+    )
+    consumer_parallel_desc_symbol = consumer_op_arg_parallel_attr.parallel_desc_symbol
+    out_parallel_num = consumer_parallel_desc_symbol.parallel_num
+    boxing_parallel_desc_symbol = GetConcatSplitBoxingParallelDescSymbol(
+        builder,
+        consumer_parallel_desc_symbol,
+        max(len(physical_in_blob_objects), out_parallel_num),
+    )
+    physical_output_blob_objects = get_physical_out_blob_objects(
+        builder=builder,
+        produced_blob_object=produced_blob_object,
+        consumer_op_arg_parallel_attr=consumer_op_arg_parallel_attr,
+        physical_in_blob_objects=physical_in_blob_objects,
+        boxing_parallel_desc_symbol=boxing_parallel_desc_symbol,
+        out_parallel_num=out_parallel_num,
+    )
+    phy_parallel_desc_symbols = builder.GetPhysicalParallelDescSymbols(
+        consumer_op_arg_parallel_attr.parallel_desc_symbol
+    )
+    physical_output_blob_objects = RefBlobObjectWithParallelDesc(
+        builder, physical_output_blob_objects, phy_parallel_desc_symbols
+    )
+    return PackPhysicalBoxingBlobObjectsToLogical(
+        builder,
+        physical_output_blob_objects,
+        consumer_op_arg_parallel_attr,
+        produced_blob_object.op_arg_blob_attr,
+    )
+
+
+def NaiveBoxingToPhysicalBlobObjects(
+    builder,
+    produced_blob_object,
+    consumer_op_arg_parallel_attr,
+    physical_in_blob_objects,
+    boxing_parallel_desc_symbol,
+    out_parallel_num,
+):
+    op_attribute = ConstructNaiveBoxingOpConf(
+        produced_blob_object,
+        consumer_op_arg_parallel_attr,
+        len(physical_in_blob_objects),
+        out_parallel_num,
+    )
+    return BuildNaiveCpuBoxing(
+        builder,
+        op_attribute,
+        physical_in_blob_objects,
+        boxing_parallel_desc_symbol,
+        out_parallel_num,
+    )
+
+
+def RefBlobObjectWithParallelDesc(
+    builder, physical_blob_objects, phy_parallel_desc_symbols
+):
+    assert len(physical_blob_objects) == len(
+        phy_parallel_desc_symbols
+    ), "%s v.s. %s" % (len(physical_blob_objects), len(phy_parallel_desc_symbols))
+
+    def RefWithParallelDesc(physical_blob_object, phy_parallel_desc_symbol):
+        if physical_blob_object.parallel_desc_symbol == phy_parallel_desc_symbol:
+            return physical_blob_object
+        return builder.BroadcastBlobReference(
+            physical_blob_object, phy_parallel_desc_symbol
+        )
+
+    return [
+        RefWithParallelDesc(*pair)
+        for pair in zip(physical_blob_objects, phy_parallel_desc_symbols)
+    ]
+
+
+def PackPhysicalBoxingBlobObjectsToLogical(
+    builder, physical_blob_objects, op_arg_parallel_attr, op_arg_blob_attr
+):
+    if len(physical_blob_objects) == 1:
+        return physical_blob_objects[0]
+    return builder.PackPhysicalBlobsToLogicalBlob(
+        physical_blob_objects, op_arg_parallel_attr, op_arg_blob_attr
+    )
+
+
+def BuildNaiveCpuBoxing(
+    builder,
+    op_attribute,
+    physical_in_blob_objects,
+    boxing_parallel_desc_symbol,
+    out_parallel_num,
+):
+    bn_in_op2blob_object = oneflow._oneflow_internal.deprecated.BnInOp2BlobObject()
+    for i in range(len(physical_in_blob_objects)):
+        bn_in_op2blob_object["in_%s" % i] = physical_in_blob_objects[i]
+    cfg_op_attribute = oneflow._oneflow_internal.deprecated.MakeOpAttributeByString(
+        str(op_attribute)
+    )
+    builder.NoBoxingStatelessCall(
+        cfg_op_attribute,
+        boxing_parallel_desc_symbol.parallel_conf,
+        bn_in_op2blob_object,
+    )
+    return [bn_in_op2blob_object["out_%s" % i] for i in range(out_parallel_num)]
+
+
+# S -> S or P -> S
+def ConstructNaiveBoxingOpConf(
+    produced_blob_object,
+    consumer_op_arg_parallel_attr,
+    in_parallel_num,
+    out_parallel_num,
+):
+    op_conf = op_conf_pb.OperatorConf()
+    op_conf.name = "undefined_boxing_op_name"
+    op_conf.device_tag = "cpu"
+    op_conf.boxing_conf.lbi.op_name = "undefined_boxing_op_name"
+    op_conf.boxing_conf.lbi.blob_name = "undefined_boxing_blob_name"
+    op_conf.boxing_conf.in_num = in_parallel_num
+    op_conf.boxing_conf.out_num = out_parallel_num
+    in_sbp_parallel = produced_blob_object.op_arg_parallel_attr.sbp_parallel
+    if in_sbp_parallel.has_split_parallel():
+        op_conf.boxing_conf.concat_box.axis = in_sbp_parallel.split_parallel().axis()
+    elif in_parallel_num == 1:
+        op_conf.boxing_conf.concat_box.axis = 0
+    else:
+        assert in_sbp_parallel.has_partial_sum_parallel()
+        op_conf.boxing_conf.add_box.SetInParent()
+    out_sbp_parallel = consumer_op_arg_parallel_attr.sbp_parallel
+    if out_sbp_parallel.has_split_parallel():
+        out_axis = out_sbp_parallel.split_parallel().axis()
+    else:
+        assert out_parallel_num == 1
+        out_axis = 0
+    op_conf.boxing_conf.split_box.axis = out_axis
+    shape = produced_blob_object.op_arg_blob_attr.shape
+    op_conf.boxing_conf.split_box.part_num.extend(
+        balanced_splitter.BalancedPartNums(shape[out_axis], out_parallel_num)
+    )
+    bn_in_op2blob_object = oneflow._oneflow_internal.deprecated.BnInOp2BlobObject()
+    for i in range(in_parallel_num):
+        bn_in_op2blob_object["in_%s" % i] = produced_blob_object
+    return op_infer_util.Infer(op_conf, bn_in_op2blob_object)
+
+
+def GetConcatSplitBoxingParallelDescSymbol(
+    builder, blob_parallel_desc_symbol, max_parallel_num
+):
+    random_rank_id = random.randint(0, max_parallel_num - 1)
+    parallel_conf = placement_cfg.ParallelConf()
+    parallel_conf.set_device_tag("cpu")
+    for machine_id, _ in blob_parallel_desc_symbol.machine_id2device_id_list.items():
+        parallel_conf.add_device_name("@%s:%s" % (machine_id, random_rank_id))
+    return builder.GetParallelDescSymbol(parallel_conf)
+
+
+def UnpackLogicalBoxingBlobObjectToPhysical(builder, produced_blob_object):
+    if produced_blob_object.parallel_desc_symbol.parallel_num == 1:
+        return [produced_blob_object]
+    return builder.UnpackLogicalBlobToPhysicalBlobs(produced_blob_object)
+
+
+MatchCpuBroadcastOneToMany = (
+    boxing_hob.SingleMachine
+    & (boxing_hob.producer_parallel_desc.device_tag == "cpu")
+    & (boxing_hob.consumer_parallel_desc.device_tag == "cpu")
+    & boxing_hob.ProducerDevicesContainedInConsumerDevices
+    & (boxing_hob.producer_parallel_desc.parallel_num == 1)
+    & (boxing_hob.consumer_parallel_desc.parallel_num > 1)
+    & boxing_hob.consumer_sbp_parallel.HasField("broadcast_parallel")
+)
+
+
+@boxing_condition(MatchCpuBroadcastOneToMany)
+def CpuBroadcastOneToMany(builder, produced_blob_object, consumer_op_arg_parallel_attr):
+    return CpuOneToManyBroadcastBlobReference(
+        builder,
+        produced_blob_object,
+        consumer_op_arg_parallel_attr.parallel_desc_symbol,
+    )
+
+
+MatchBroadcastManyToOne = (
+    (
+        boxing_hob.producer_parallel_desc.device_tag
+        == boxing_hob.consumer_parallel_desc.device_tag
+    )
+    & boxing_hob.ConsumerDevicesContainedInProducerDevices
+    & (boxing_hob.producer_parallel_desc.parallel_num > 1)
+    & (boxing_hob.consumer_parallel_desc.parallel_num == 1)
+    & boxing_hob.producer_sbp_parallel.HasField("broadcast_parallel")
+)
+
+
+@boxing_condition(MatchBroadcastManyToOne)
+def BroadcastManyToOne(builder, produced_blob_object, consumer_op_arg_parallel_attr):
+    y_blob_objects = builder.UnpackLogicalBlobToPhysicalBlobs(produced_blob_object)
+    for y in y_blob_objects:
+        if y.parallel_desc_symbol == consumer_op_arg_parallel_attr.parallel_desc_symbol:
+            return y
+    raise NotImplementedError("op_arg's devices is not contained in blob's devices")
+
+
+def Assign(builder, ref_blob_object, value_blob_object):
+    return BuildAssignInstruction(
+        builder, ref_blob_object, value_blob_object, _AssignOpConf()
+    )
+
+
+def CpuOneToManyBroadcastBlobReference(
+    builder, produced_blob_object, to_parallel_desc_symbol
+):
+    x_parallel_desc_symbol = produced_blob_object.parallel_desc_symbol
+    x_machine_ids = list(dict(x_parallel_desc_symbol.machine_id2device_id_list).keys())
+    to_machine_ids = list(
+        dict(to_parallel_desc_symbol.machine_id2device_id_list).keys()
+    )
+    assert x_machine_ids == to_machine_ids, (x_machine_ids, to_machine_ids)
+    x_first_device_ids = x_parallel_desc_symbol.machine_id2device_id_list[
+        x_machine_ids[0]
+    ]
+    assert len(x_first_device_ids) == 1, x_first_device_ids
+    if x_parallel_desc_symbol == to_parallel_desc_symbol:
+        return produced_blob_object
+    return builder.BroadcastBlobReference(produced_blob_object, to_parallel_desc_symbol)
+
+
+def BuildCopyHdInstruction(builder, produced_blob_object, to_device_tag):
+    op_conf, lbi = _MakeCopyHdOpConfAndRetLbi()
+    return _BuildCopyInstruction(builder, produced_blob_object, op_conf, to_device_tag)
+
+
+def _MakeCopyHdOpConfAndRetLbi():
+    op_conf = op_conf_pb.OperatorConf()
+    op_conf.name = "copy_hd"
+    op_conf.device_tag = "gpu"
+    setattr(op_conf.copy_conf, "in", "%s/in" % op_conf.name)
+    op_conf.copy_conf.out = "out"
+    lbi = logical_blob_id_util.LogicalBlobId()
+    lbi.op_name = op_conf.name
+    lbi.blob_name = "out"
+    return op_conf, lbi
+
+
+@contextmanager
+def _CudaHostPinBlob(build, blob_object):
+    build.CudaHostRegisterBlob(blob_object)
+    try:
+        yield
+    finally:
+        build.CudaHostUnregisterBlob(blob_object)
+
+
+def _BuildCopyInstruction(builder, produced_blob_object, op_conf, to_device_tag):
+    x_devices = produced_blob_object.parallel_desc_symbol.machine_id2device_id_list
+    x_device_tag = produced_blob_object.parallel_desc_symbol.device_tag
+    bn_in_op2blob_object = oneflow._oneflow_internal.deprecated.BnInOp2BlobObject()
+    bn_in_op2blob_object["in"] = produced_blob_object
+    op_attribute = op_infer_util.Infer(op_conf, bn_in_op2blob_object)
+    assert to_device_tag != x_device_tag, (to_device_tag, x_device_tag)
+    cfg_op_attribute = oneflow._oneflow_internal.deprecated.MakeOpAttributeByString(
+        str(op_attribute)
+    )
+    if to_device_tag == "cpu" and x_device_tag == "gpu":
+        x_parallel_conf = produced_blob_object.parallel_desc_symbol.parallel_conf
+        builder.NoBoxingCudaD2HStatelessCall(
+            cfg_op_attribute, x_parallel_conf, bn_in_op2blob_object, TryReplaceDeviceTag
+        )
+    elif to_device_tag == "gpu" and x_device_tag == "cpu":
+        out_parallel_desc_symbol = TryReplaceDeviceTag(
+            builder, produced_blob_object.parallel_desc_symbol, to_device_tag
+        )
+        out_parallel_conf = out_parallel_desc_symbol.parallel_conf
+        with _CudaHostPinBlob(builder, produced_blob_object):
+            builder.NoBoxingCudaH2DStatelessCall(
+                cfg_op_attribute, out_parallel_conf, bn_in_op2blob_object,
+            )
+    else:
+        raise NotImplementedError(
+            "invalid device found. to_device_tag: %s, x_device_tag: %s"
+            % (to_device_tag, x_device_tag)
+        )
+    sbp_parallel = bn_in_op2blob_object["out"].op_arg_parallel_attr.sbp_parallel
+    sbp_parallel.CopyFrom(produced_blob_object.op_arg_parallel_attr.sbp_parallel)
+    return bn_in_op2blob_object["out"]
+
+
+def _AssignOpConf():
+    op_conf = op_conf_pb.OperatorConf()
+    op_conf.name = "assign"
+    op_conf.assign_conf.ref = "assign/ref"
+    op_conf.assign_conf.value = "assign/value"
+    device_tag = flow.current_scope().device_parallel_desc_symbol.device_tag
+    op_conf.device_tag = device_tag
+    return op_conf
+
+
+def BuildAssignInstruction(builder, ref_blob_object, value_blob_object, op_conf):
+    ref_parallel_conf = ref_blob_object.parallel_desc_symbol.parallel_conf
+    ref_devices = ref_blob_object.parallel_desc_symbol.machine_id2device_id_list
+    value_devices = value_blob_object.parallel_desc_symbol.machine_id2device_id_list
+    assert ref_devices == value_devices, "\nref_devices: %s\nvalue_devices: %s" % (
+        ref_devices,
+        value_devices,
+    )
+    ref_device_tag = ref_blob_object.parallel_desc_symbol.device_tag
+    value_device_tag = value_blob_object.parallel_desc_symbol.device_tag
+    bn_in_op2blob_object = oneflow._oneflow_internal.deprecated.BnInOp2BlobObject()
+    bn_in_op2blob_object["ref"] = ref_blob_object
+    bn_in_op2blob_object["value"] = value_blob_object
+    op_attribute = op_infer_util.Infer(op_conf, bn_in_op2blob_object)
+    cfg_op_attribute = oneflow._oneflow_internal.deprecated.MakeOpAttributeByString(
+        str(op_attribute)
+    )
+    if ref_device_tag == value_device_tag:
+        builder.NoBoxingStatelessCall(
+            cfg_op_attribute, ref_parallel_conf, bn_in_op2blob_object,
+        )
+    elif ref_device_tag == "cpu" and value_device_tag == "gpu":
+        value_parallel_conf = value_blob_object.parallel_desc_symbol.parallel_conf
+        builder.NoBoxingCudaD2HStatelessCall(
+            cfg_op_attribute,
+            value_parallel_conf,
+            bn_in_op2blob_object,
+            TryReplaceDeviceTag,
+        )
+    elif ref_device_tag == "gpu" and value_device_tag == "cpu":
+        with _CudaHostPinBlob(builder, value_blob_object):
+            builder.NoBoxingCudaH2DStatelessCall(
+                cfg_op_attribute, ref_parallel_conf, bn_in_op2blob_object,
+            )
+    else:
+        raise NotImplementedError(
+            "invalid device found. ref_device_tag: %s, value_device_tag: %s"
+            % (ref_device_tag, value_device_tag)
+        )
+
+
+def TryReplaceDeviceTag(builder, parallel_desc_symbol, device_tag):
+    return boxing_middle.TryReplaceDeviceTag(builder, parallel_desc_symbol, device_tag)
+
+
+def ReplaceDeviceTag(parallel_desc_symbol, device_tag, builder=None):
+    return boxing_middle.ReplaceDeviceTag(
+        parallel_desc_symbol, device_tag, builder=builder
+    )
+
+
+def _GetEagerNcclAllReduce(parallel_conf, ibn2blob_object):
+    op_conf = op_conf_pb.OperatorConf()
+    op_conf.device_tag = "gpu"
+    op_conf.name = "eager_nccl_all_reduce"
+    op_conf.user_conf.op_type_name = "eager_nccl_all_reduce"
+    op_conf.user_conf.input["in"].s.append("eager_nccl_all_reduce/in_0")
+    op_conf.user_conf.output["out"].s.append("eager_nccl_all_reduce/out_0")
+    op_conf.user_conf.attr["parallel_conf"].at_string = str(parallel_conf)
+    return op_infer_util.Infer(op_conf, ibn2blob_object)
+
+
+NcclAllReduce = Sequential(
+    boxing_middle.BoxingToMiddle(
+        GpuNcclAllReduce,
+        boxing_middle.ProducerParallelDesc,
+        boxing_middle.BroadcastParallel,
+    ),
+    OptionalBoxing(CopyD2H),
+)
+
+BoxingIntraNodeOneToOne = Sequential(
+    boxing_middle.BoxingToMiddle(
+        OptionalBoxing(CopyD2H),
+        boxing_middle.ReplaceProducerDeviceTag("cpu"),
+        boxing_middle.ProducerSbpParallel,
+    ),
+    boxing_middle.BoxingToMiddle(
+        CpuBroadcastOneToOne,
+        boxing_middle.ReplaceConsumerDeviceTag("cpu"),
+        boxing_middle.ConsumerSbpParallel,
+    ),
+    OptionalBoxing(CopyH2D),
+)
+
+BoxingInterNodeOneToOne = Sequential(
+    boxing_middle.BoxingToMiddle(
+        OptionalBoxing(CopyD2H),
+        boxing_middle.ReplaceProducerDeviceTag("cpu"),
+        boxing_middle.ProducerSbpParallel,
+    ),
+    boxing_middle.BoxingToMiddle(
+        InterNodeOneToOne,
+        boxing_middle.ReplaceConsumerDeviceTag("cpu"),
+        boxing_middle.ConsumerSbpParallel,
+    ),
+    OptionalBoxing(CopyH2D),
+)
+
+BoxingInterNodeOneToMany = Sequential(
+    boxing_middle.BoxingToMiddle(
+        OptionalBoxing(CopyD2H),
+        boxing_middle.ReplaceProducerDeviceTag("cpu"),
+        boxing_middle.ProducerSbpParallel,
+    ),
+    boxing_middle.BoxingToMiddle(
+        InterNodeOneToMany,
+        boxing_middle.ReplaceConsumerDeviceTag("cpu"),
+        boxing_middle.ConsumerSbpParallel,
+    ),
+    OptionalBoxing(CopyH2D),
+)
+
+conditional_function_table = [
+    CopyH2D,
+    CopyD2H,
+    NoBoxing,
+    # one to one
+    BoxingIntraNodeOneToOne,
+    BoxingInterNodeOneToOne,
+    BoxingInterNodeOneToMany,
+    # B -> B
+    BroadcastManyToOne,
+    Sequential(
+        boxing_middle.BoxingToMiddle(
+            OptionalBoxing(BroadcastManyToOne),
+            boxing_middle.ProducerRandomParallelIdPerMachine(),
+            boxing_middle.ProducerSbpParallel,
+        ),
+        boxing_middle.BoxingToMiddle(
+            OptionalBoxing(CopyD2H),
+            boxing_middle.ReplaceProducerDeviceTag("cpu"),
+            boxing_middle.ProducerSbpParallel,
+        ),
+        boxing_middle.BoxingToMiddle(
+            OptionalBoxing(CpuBroadcastOneToOne),
+            boxing_middle.ConsumerRandomParallelIdPerMachine("cpu"),
+            boxing_middle.BroadcastParallel,
+        ),
+        boxing_middle.BoxingToMiddle(
+            OptionalBoxing(CpuBroadcastOneToMany),
+            boxing_middle.ReplaceConsumerDeviceTag("cpu"),
+            boxing_middle.BroadcastParallel,
+        ),
+        OptionalBoxing(CopyH2D),
+        exclude=(
+            BroadcastManyToOne,
+            CopyH2D,
+            CopyD2H,
+            NoBoxing,
+            BoxingIntraNodeOneToOne,
+        ),
+    ),
+    # B -> S
+    Sequential(
+        boxing_middle.BoxingToMiddle(
+            BroadcastManyToOne,
+            boxing_middle.ProducerRandomParallelIdPerMachine(),
+            boxing_middle.ProducerSbpParallel,
+        ),
+        boxing_middle.BoxingToMiddle(
+            OptionalBoxing(CopyD2H),
+            boxing_middle.ReplaceProducerDeviceTag("cpu"),
+            boxing_middle.ProducerSbpParallel,
+        ),
+        boxing_middle.BoxingToMiddle(
+            NaiveCpuSplitToSplit,
+            boxing_middle.ReplaceConsumerDeviceTag("cpu"),
+            boxing_middle.ConsumerSbpParallel,
+        ),
+        OptionalBoxing(CopyH2D),
+    ),
+    # P -> B
+    NcclAllReduce,  # e.g. gpu, 0:0-3 -> gpu, 0:0-3
+    Sequential(
+        boxing_middle.BoxingToMiddle(
+            OptionalBoxing(CopyD2H),
+            boxing_middle.ReplaceProducerDeviceTag("cpu"),
+            boxing_middle.ProducerSbpParallel,
+        ),
+        boxing_middle.BoxingToMiddle(
+            NaiveCpuPartialSumToSplit,
+            boxing_middle.ConsumerRandomParallelIdPerMachine("cpu"),
+            boxing_middle.BroadcastParallel,
+        ),
+        boxing_middle.BoxingToMiddle(
+            CpuBroadcastOneToMany,
+            boxing_middle.ReplaceConsumerDeviceTag("cpu"),
+            boxing_middle.BroadcastParallel,
+        ),
+        OptionalBoxing(CopyH2D),
+        exclude=(NcclAllReduce,),
+    ),
+    # P -> S
+    Sequential(
+        boxing_middle.BoxingToMiddle(
+            OptionalBoxing(CopyD2H),
+            boxing_middle.ReplaceProducerDeviceTag("cpu"),
+            boxing_middle.ProducerSbpParallel,
+        ),
+        boxing_middle.BoxingToMiddle(
+            NaiveCpuPartialSumToSplit,
+            boxing_middle.ReplaceConsumerDeviceTag("cpu"),
+            boxing_middle.ConsumerSbpParallel,
+        ),
+        OptionalBoxing(CopyH2D),
+    ),
+    # S -> B
+    Sequential(
+        boxing_middle.BoxingToMiddle(
+            OptionalBoxing(CopyD2H),
+            boxing_middle.ReplaceProducerDeviceTag("cpu"),
+            boxing_middle.ProducerSbpParallel,
+        ),
+        boxing_middle.BoxingToMiddle(
+            NaiveCpuSplitToSplit,
+            boxing_middle.ConsumerRandomParallelIdPerMachine("cpu"),
+            boxing_middle.BroadcastParallel,
+        ),
+        boxing_middle.BoxingToMiddle(
+            CpuBroadcastOneToMany,
+            boxing_middle.ReplaceConsumerDeviceTag("cpu"),
+            boxing_middle.BroadcastParallel,
+        ),
+        OptionalBoxing(CopyH2D),
+        exclude=(NcclAllReduce,),
+    ),
+    # S -> S
+    Sequential(
+        boxing_middle.BoxingToMiddle(
+            OptionalBoxing(CopyD2H),
+            boxing_middle.ReplaceProducerDeviceTag("cpu"),
+            boxing_middle.ProducerSbpParallel,
+        ),
+        boxing_middle.BoxingToMiddle(
+            NaiveCpuSplitToSplit,
+            boxing_middle.ReplaceConsumerDeviceTag("cpu"),
+            boxing_middle.ConsumerSbpParallel,
+        ),
+        OptionalBoxing(CopyH2D),
+    ),
+]
+
+
+class BoxingUtil(oneflow._oneflow_internal.deprecated.ForeignBoxingUtil):
+    def __init__(self):
+        oneflow._oneflow_internal.deprecated.ForeignBoxingUtil.__init__(self)
+
+    def BoxingTo(self, builder, blob_object, op_arg_parallel_attr):
+        return BoxingTo(builder, blob_object, op_arg_parallel_attr)
+
+    def TryReplaceDeviceTag(self, builder, parallel_desc_symbol, device_tag):
+        return TryReplaceDeviceTag(builder, parallel_desc_symbol, device_tag)
+
+    def Assign(self, builder, target_blob_object, source_blob_object):
+        return Assign(builder, target_blob_object, source_blob_object)
+
+
+_global_boxing_util = BoxingUtil()
diff --git a/oneflow/compatible_single_client_python/eager/eager_blob_util.py b/oneflow/compatible_single_client_python/eager/eager_blob_util.py
new file mode 100644
index 0000000000000000000000000000000000000000..400d6e88189d1502d0ffd25f4eab168ddb712cbe
--- /dev/null
+++ b/oneflow/compatible_single_client_python/eager/eager_blob_util.py
@@ -0,0 +1,117 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+from oneflow.compatible_single_client_python.framework.dtype import (
+    convert_proto_dtype_to_oneflow_dtype,
+)
+from oneflow.compatible_single_client_python.framework import blob_trait as blob_trait
+from oneflow.compatible_single_client_python.framework import (
+    python_callback as python_callback,
+)
+from oneflow.compatible_single_client_python.lib.core import async_util as async_util
+import oneflow._oneflow_internal
+
+
+@property
+def dtype(self):
+    return convert_proto_dtype_to_oneflow_dtype(self.get_dtype())
+
+
+def numpy(self):
+    return _GetPhysicalBlobBodyCache(self.blob_object)
+
+
+def numpy_list(self):
+    return _GetPhysicalBlobBodyCache(self.blob_object)
+
+
+def RegisterMethod4EagerPhysicalBlob():
+    oneflow._oneflow_internal.EagerPhysicalBlob.dtype = dtype
+    oneflow._oneflow_internal.EagerPhysicalBlob.numpy = numpy
+    oneflow._oneflow_internal.EagerPhysicalBlob.numpy_list = numpy_list
+
+
+def FetchTensorBlobAsNumpyList(parallel_size, blob_object):
+    def AsyncFetchBlobBody(Yield):
+        fetcher = _MakeFetcherEagerBlobBodyAsNumpyFromOfBlob(Yield)
+
+        def BuildFetchBlobBodyInstruction(builder):
+            builder.FetchBlobBody(
+                blob_object, python_callback.GetIdForRegisteredCallback(fetcher)
+            )
+            builder.InsertRemoveForeignCallbackInstruction(
+                blob_object.object_id,
+                python_callback.GetIdForRegisteredCallback(fetcher),
+            )
+
+        oneflow._oneflow_internal.deprecated.PhysicalRun(BuildFetchBlobBodyInstruction)
+
+    return async_util.Await(parallel_size, AsyncFetchBlobBody)
+
+
+def _GetPhysicalBlobHeaderCache(blob_object):
+    return _FetchBlobHeader(blob_object)
+
+
+def _GetPhysicalBlobBodyCache(blob_object):
+    return _FetchPhysicalBlobBody(blob_object)
+
+
+def _FetchBlobHeader(blob_object):
+    def AsyncFetchBlobHeader(Yield):
+        fetcher = _MakeFetcherEagerPhysicalBlobHeaderFromOfBlob(Yield)
+
+        def BuildFetchBlobHeaderInstruction(builder):
+            builder.FetchBlobHeader(
+                blob_object, python_callback.GetIdForRegisteredCallback(fetcher)
+            )
+            builder.InsertRemoveForeignCallbackInstruction(
+                blob_object.object_id,
+                python_callback.GetIdForRegisteredCallback(fetcher),
+            )
+
+        oneflow._oneflow_internal.deprecated.PhysicalRun(
+            BuildFetchBlobHeaderInstruction
+        )
+
+    return async_util.Await(1, AsyncFetchBlobHeader)[0]
+
+
+def _FetchPhysicalBlobBody(blob_object):
+    return FetchTensorBlobAsNumpyList(1, blob_object)[0]
+
+
+def _MakeFetcherEagerPhysicalBlobHeaderFromOfBlob(Yield):
+    def Callback(ofblob):
+        Yield(
+            oneflow._oneflow_internal.EagerPhysicalBlobHeader(
+                ofblob.static_shape,
+                ofblob.shape,
+                oneflow._oneflow_internal.deprecated.GetProtoDtype4OfDtype(
+                    ofblob.dtype
+                ),
+            )
+        )
+
+    return Callback
+
+
+def _MakeFetcherEagerBlobBodyAsNumpyFromOfBlob(Yield):
+    def FetchFromOfBlob(ofblob):
+        Yield(ofblob.CopyToNdarray())
+
+    return FetchFromOfBlob
diff --git a/oneflow/compatible_single_client_python/eager/gradient_util.py b/oneflow/compatible_single_client_python/eager/gradient_util.py
new file mode 100644
index 0000000000000000000000000000000000000000..dcd40eaef80b8cffdf06a013578c09daeb778605
--- /dev/null
+++ b/oneflow/compatible_single_client_python/eager/gradient_util.py
@@ -0,0 +1,51 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+from oneflow.compatible_single_client_python.framework import (
+    session_context as session_ctx,
+)
+
+
+def GetDefaultBackwardBlobRegister():
+    return session_ctx.GetDefaultSession().backward_blob_register
+
+
+def ReleaseUnusedBlobObject(op_attribute, blob_register):
+    assert op_attribute.HasField("blob_last_used_signature"), op_attribute
+    signature_map = op_attribute.blob_last_used_signature.bn_in_op2blob_last_used
+    bn_in_op2lbi = op_attribute.arg_signature.bn_in_op2lbi
+    for bn_in_op, is_blob_last_used in signature_map.items():
+        if not is_blob_last_used:
+            continue
+        lbi = bn_in_op2lbi[bn_in_op]
+        lbn = "%s/%s" % (lbi.op_name, lbi.blob_name)
+        blob_register.ClearObject4BlobName(lbn)
+
+
+def TrySetBackwardUsedBlobObject(op_attribute, fw_blob_register, bw_blob_register):
+    assert op_attribute.HasField("blob_backward_used_signature"), op_attribute
+    signature_map = (
+        op_attribute.blob_backward_used_signature.bn_in_op2blob_backward_used
+    )
+    bn_in_op2lbi = op_attribute.arg_signature.bn_in_op2lbi
+    for bn_in_op, is_blob_backward_used in signature_map.items():
+        if not is_blob_backward_used:
+            continue
+        lbi = bn_in_op2lbi[bn_in_op]
+        lbn = "%s/%s" % (lbi.op_name, lbi.blob_name)
+        blob_object = fw_blob_register.GetObject4BlobName(lbn)
+        bw_blob_register.TrySetObject4BlobName(lbn, blob_object)
diff --git a/oneflow/compatible_single_client_python/eager/interpreter_callback.py b/oneflow/compatible_single_client_python/eager/interpreter_callback.py
new file mode 100644
index 0000000000000000000000000000000000000000..a5e3d115baacc5c3c1c00cb39ca954ff7dc7f20e
--- /dev/null
+++ b/oneflow/compatible_single_client_python/eager/interpreter_callback.py
@@ -0,0 +1,102 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+from oneflow.compatible_single_client_python.eager import gradient_util as gradient_util
+from oneflow.compatible_single_client_python.eager import op_executor as op_executor
+from oneflow.core.operator import op_attribute_pb2 as op_attribute_pb
+from oneflow.core.job import scope_pb2 as scope_pb
+from oneflow.core.job import placement_pb2 as placement_pb
+from google.protobuf import text_format
+from oneflow.compatible_single_client_python.framework import scope_util as scope_util
+from oneflow.compatible_single_client_python.eager import (
+    symbol_storage as symbol_storage,
+)
+import oneflow._oneflow_internal
+
+
+def MakeScopeSymbol(job_conf, parallel_conf, is_mirrored):
+    parallel_hierarchy = None
+    if parallel_conf.has_hierarchy():
+        parallel_hierarchy = oneflow._oneflow_internal.Size(
+            tuple(parallel_conf.hierarchy().dim())
+        )
+    return scope_util.MakeInitialScope(
+        job_conf,
+        parallel_conf.device_tag(),
+        list(parallel_conf.device_name()),
+        parallel_hierarchy,
+        is_mirrored,
+    ).symbol_id
+
+
+def MakeParallelDescSymbol(parallel_conf):
+    symbol_id = None
+
+    def BuildInstruction(builder):
+        nonlocal symbol_id
+        symbol_id = builder.GetParallelDescSymbol(parallel_conf).symbol_id
+
+    oneflow._oneflow_internal.deprecated.LogicalRun(BuildInstruction)
+    return symbol_id
+
+
+def MirroredCast(op_attribute_str, parallel_conf):
+    op_attribute = text_format.Parse(op_attribute_str, op_attribute_pb.OpAttribute())
+    blob_register = oneflow._oneflow_internal.GetDefaultBlobRegister()
+    is_cast_to_mirrored = op_attribute.op_conf.HasField("cast_to_mirrored_conf")
+    is_cast_from_mirrored = op_attribute.op_conf.HasField("cast_from_mirrored_conf")
+    assert is_cast_to_mirrored or is_cast_from_mirrored
+    _MirroredCastAndAddOutputBlobReleaser(op_attribute, blob_register)
+    bw_blob_register = gradient_util.GetDefaultBackwardBlobRegister()
+    gradient_util.TrySetBackwardUsedBlobObject(
+        op_attribute, blob_register, bw_blob_register
+    )
+
+
+def InterpretCompletedOp(op_attribute_str, parallel_conf):
+    op_attribute = text_format.Parse(op_attribute_str, op_attribute_pb.OpAttribute())
+    blob_register = gradient_util.GetDefaultBackwardBlobRegister()
+    _InterpretCompletedOp(op_attribute, parallel_conf, blob_register)
+    gradient_util.ReleaseUnusedBlobObject(op_attribute, blob_register)
+
+
+def _InterpretCompletedOp(op_attribute, parallel_conf, blob_register):
+    return op_executor.Interpret(op_attribute, parallel_conf, blob_register)
+
+
+def _MirroredCastAndAddOutputBlobReleaser(op_attribute, blob_register):
+    op_executor.MirroredCast(op_attribute, blob_register)
+    _AddOutputBlobObjectReleaser4InputBlobObject(op_attribute, blob_register)
+
+
+def _AddOutputBlobObjectReleaser4InputBlobObject(op_attribute, blob_register):
+    in_lbi = op_attribute.arg_signature.bn_in_op2lbi["in"]
+    in_lbn = "%s/%s" % (in_lbi.op_name, in_lbi.blob_name)
+    in_blob_object = blob_register.GetObject4BlobName(in_lbn)
+    release = _MakeReleaser4MirroredCastBlobObject(op_attribute, blob_register)
+    in_blob_object.add_releaser(release)
+
+
+def _MakeReleaser4MirroredCastBlobObject(op_attribute, blob_register):
+    def ReleaseMirroredBlobObject(obj):
+        for obn in op_attribute.output_bns:
+            lbi = op_attribute.arg_signature.bn_in_op2lbi[obn]
+            lbn = "%s/%s" % (lbi.op_name, lbi.blob_name)
+            blob_object = blob_register.GetObject4BlobName(lbn)
+            blob_register.ClearObject4BlobName(lbn)
+
+    return ReleaseMirroredBlobObject
diff --git a/oneflow/compatible_single_client_python/eager/op_executor.py b/oneflow/compatible_single_client_python/eager/op_executor.py
new file mode 100644
index 0000000000000000000000000000000000000000..b775ed1d0e1cacb282cdcbf5d49967da1927749c
--- /dev/null
+++ b/oneflow/compatible_single_client_python/eager/op_executor.py
@@ -0,0 +1,505 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+from oneflow.core.operator import op_node_signature_pb2 as op_node_signature_pb
+from oneflow.core.register import logical_blob_id_pb2 as logical_blob_id_util
+from oneflow.core.operator import op_conf_pb2 as op_conf_util
+from oneflow.core.operator import interface_blob_conf_pb2 as inter_face_blob_conf_util
+from oneflow.compatible_single_client_python.eager import boxing_util as boxing_util
+from oneflow.compatible_single_client_python.eager import (
+    blob_register as blob_register_util,
+)
+from oneflow.compatible_single_client_python.eager import (
+    symbol_storage as symbol_storage,
+)
+from oneflow.compatible_single_client_python.framework import c_api_util as c_api_util
+from oneflow.compatible_single_client_python.framework import (
+    remote_blob as remote_blob_util,
+)
+from oneflow.compatible_single_client_python.framework import (
+    python_callback as python_callback,
+)
+from oneflow.compatible_single_client_python.experimental import (
+    name_scope as name_scope,
+)
+from oneflow.compatible_single_client_python.framework import (
+    session_context as session_ctx,
+)
+from oneflow.compatible_single_client_python.framework import scope_util as scope_util
+from oneflow.compatible_single_client_python.eager import op_infer_util as op_infer_util
+from oneflow.compatible_single_client_python.eager import (
+    blob_register as blob_register_util,
+)
+from oneflow._oneflow_internal.oneflow.core.job import placement as placement_cfg
+from oneflow._oneflow_internal.oneflow.core.register import logical_blob_id as lbi_util
+from google.protobuf import text_format
+
+from oneflow.compatible import single_client as flow
+import oneflow._oneflow_internal
+import numpy as np
+import os
+
+default_blob_register = oneflow._oneflow_internal.GetDefaultBlobRegister()
+
+
+def Interpret(op_attribute, parallel_conf, blob_register):
+    if op_attribute.op_conf.HasField("cast_to_mirrored_conf"):
+        return MirroredCast(op_attribute, blob_register)
+    if op_attribute.op_conf.HasField("cast_from_mirrored_conf"):
+        return MirroredCast(op_attribute, blob_register)
+    assert isinstance(parallel_conf, placement_cfg.ParallelConf)
+    if op_attribute.op_conf.HasField("distribute_split_conf"):
+        return DistributeSplitOrClone(op_attribute, parallel_conf, blob_register)
+    if op_attribute.op_conf.HasField("distribute_clone_conf"):
+        return DistributeSplitOrClone(op_attribute, parallel_conf, blob_register)
+    if op_attribute.op_conf.HasField("distribute_concat_conf"):
+        return DistributeConcatOrAdd(op_attribute, parallel_conf, blob_register)
+    if op_attribute.op_conf.HasField("distribute_add_conf"):
+        return DistributeConcatOrAdd(op_attribute, parallel_conf, blob_register)
+    if op_attribute.op_conf.HasField("variable_conf"):
+        return _FindOrCreateVarBlobObject(op_attribute, parallel_conf, blob_register)
+    if op_attribute.op_conf.HasField("foreign_watch_conf"):
+        return _Watch(op_attribute, parallel_conf, blob_register)
+    return _NaiveInterpret(op_attribute, parallel_conf, blob_register)
+
+
+def OpKernelCall(opkernel_object, op_attribute, blob_register):
+    def BuildInstruction(builder):
+        with blob_register_util.BnInOp2BlobObjectScope(
+            blob_register, op_attribute
+        ) as bn_in_op2blob_object:
+            cfg_op_attribute = oneflow._oneflow_internal.deprecated.MakeOpAttributeByString(
+                str(op_attribute)
+            )
+            builder.StatefulCall(
+                cfg_op_attribute,
+                opkernel_object,
+                bn_in_op2blob_object,
+                boxing_util.BoxingTo,
+            )
+
+    oneflow._oneflow_internal.deprecated.LogicalRun(BuildInstruction)
+
+
+def MirroredCast(op_attribute, blob_register):
+    def BuildInstruction(builder):
+        with blob_register_util.BnInOp2BlobObjectScope(
+            blob_register, op_attribute
+        ) as bn_in_op2blob_object:
+            in_blob_object = bn_in_op2blob_object["in"]
+            parallel_desc_symbol = in_blob_object.parallel_desc_symbol
+            op_arg_parallel_attr = oneflow._oneflow_internal.GetOpArgParallelAttribute(
+                parallel_desc_symbol, str(op_attribute), "out"
+            )
+            out_blob_object = builder.MakeReferenceBlobObject(
+                in_blob_object, op_arg_parallel_attr
+            )
+            bn_in_op2blob_object["out"] = out_blob_object
+
+    oneflow._oneflow_internal.deprecated.LogicalRun(BuildInstruction)
+
+
+def DistributeSplitOrClone(op_attribute, parallel_conf, blob_register):
+    parallel_sig = op_attribute.parallel_signature.bn_in_op2parallel_desc_symbol_id
+
+    def GetInBlobObject(builder, ibn, bn_in_op2blob_object):
+        origin_blob_object = bn_in_op2blob_object[ibn]
+        in_op_parallel_desc_sym = oneflow._oneflow_internal.GetPlacementSymbol(
+            parallel_sig[ibn]
+        )
+        in_op_arg_parallel_attr = oneflow._oneflow_internal.GetOpArgParallelAttribute(
+            in_op_parallel_desc_sym, str(op_attribute), ibn
+        )
+        return boxing_util.BoxingTo(
+            builder, origin_blob_object, in_op_arg_parallel_attr
+        )
+
+    def BuildInstruction(builder):
+        with blob_register_util.BnInOp2BlobObjectScope(
+            blob_register, op_attribute
+        ) as bn_in_op2blob_object:
+            physical_out_blob_objects = builder.UnpackLogicalBlobToPhysicalBlobs(
+                GetInBlobObject(builder, "in", bn_in_op2blob_object)
+            )
+            for i, blob_object in enumerate(physical_out_blob_objects):
+                bn_in_op2blob_object["out_%s" % i] = blob_object
+
+    oneflow._oneflow_internal.deprecated.LogicalRun(BuildInstruction)
+
+
+def DistributeConcatOrAdd(op_attribute, parallel_conf, blob_register):
+    op_parallel_desc_sym = oneflow._oneflow_internal.GetPlacementSymbol(
+        op_attribute.parallel_signature.op_parallel_desc_symbol_id
+    )
+    parallel_size = len(op_attribute.input_bns)
+    op_arg_parallel_attr = oneflow._oneflow_internal.GetOpArgParallelAttribute(
+        op_parallel_desc_sym, str(op_attribute), "out"
+    )
+    op_arg_blob_attr = oneflow._oneflow_internal.GetOpArgBlobAttribute(
+        str(op_attribute), "out"
+    )
+    parallel_sig = op_attribute.parallel_signature.bn_in_op2parallel_desc_symbol_id
+
+    def GetInBlobObject(builder, i, bn_in_op2blob_object):
+        ibn = "in_%s" % i
+        origin_blob_object = bn_in_op2blob_object[ibn]
+        in_op_parallel_desc_sym = oneflow._oneflow_internal.GetPlacementSymbol(
+            parallel_sig[ibn]
+        )
+        in_op_arg_parallel_attr = oneflow._oneflow_internal.GetOpArgParallelAttribute(
+            in_op_parallel_desc_sym, str(op_attribute), ibn
+        )
+        return boxing_util.BoxingTo(
+            builder, origin_blob_object, in_op_arg_parallel_attr
+        )
+
+    def BuildInstruction(builder):
+        with blob_register_util.BnInOp2BlobObjectScope(
+            blob_register, op_attribute
+        ) as bn_in_op2blob_object:
+
+            def GetPhysicalInBlob(i):
+                return GetInBlobObject(builder, i, bn_in_op2blob_object)
+
+            in_blob_objects = [GetPhysicalInBlob(i) for i in range(parallel_size)]
+            bn_in_op2blob_object["out"] = builder.PackPhysicalBlobsToLogicalBlob(
+                in_blob_objects, op_arg_parallel_attr, op_arg_blob_attr
+            )
+
+    oneflow._oneflow_internal.deprecated.LogicalRun(BuildInstruction)
+
+
+def _FindOrCreateVarBlobObject(op_attribute, parallel_conf, blob_register):
+    job_name = oneflow._oneflow_internal.JobBuildAndInferCtx_GetCurrentJobName()
+    name = name_scope.GetJobNameScopePrefix(job_name) + op_attribute.op_conf.name
+    sess = session_ctx.GetDefaultSession()
+    var_blob, _ = sess.TryGetVariableBlobOfJobFromStash(job_name, name)
+    if var_blob is not None:
+        blob_register.SetObject4BlobName(
+            var_blob.logical_blob_name, var_blob.blob_object
+        )
+        return
+    _NaiveInterpret(op_attribute, parallel_conf, blob_register)
+    var_blob = _MakeEagerLogicalBlob(op_attribute, "out", blob_register=blob_register)
+    EagerInitVariableBlob(sess, op_attribute.op_conf, var_blob)
+    sess.StashVariableBlob4Job(job_name, op_attribute.op_conf.name, var_blob)
+    return var_blob
+
+
+def _Watch(op_attribute, parallel_conf, blob_register):
+    lbi = op_attribute.arg_signature.bn_in_op2lbi["in"]
+    uuid = op_attribute.op_conf.foreign_watch_conf.handler_uuid
+    lbn = "%s/%s" % (lbi.op_name, lbi.blob_name)
+    in_blob_object = blob_register.GetObject4BlobName(lbn)
+    if not isinstance(lbi, lbi_util.LogicalBlobId):
+        cfg_lbi = lbi_util.LogicalBlobId()
+        cfg_lbi.set_op_name(lbi.op_name)
+        cfg_lbi.set_blob_name(lbi.blob_name)
+        lbi = cfg_lbi
+    if in_blob_object.op_arg_parallel_attr.is_mirrored():
+        blob = oneflow._oneflow_internal.EagerMirroredBlob(
+            lbi, in_blob_object, default_blob_register
+        )
+    else:
+        blob = oneflow._oneflow_internal.EagerConsistentBlob(
+            lbi, in_blob_object, default_blob_register
+        )
+    uuid2watch_handler = session_ctx.GetDefaultSession().uuid2watch_handler
+    assert uuid in uuid2watch_handler
+    uuid2watch_handler[uuid](blob)
+    del uuid2watch_handler[uuid]
+
+
+def _NaiveInterpret(op_attribute, parallel_conf, blob_register):
+    def BuildInstruction(builder):
+        with blob_register_util.BnInOp2BlobObjectScope(
+            blob_register, op_attribute
+        ) as bn_in_op2blob_object:
+            cfg_op_attribute = oneflow._oneflow_internal.deprecated.MakeOpAttributeByString(
+                str(op_attribute)
+            )
+            builder.StatelessCall(
+                cfg_op_attribute,
+                parallel_conf,
+                bn_in_op2blob_object,
+                boxing_util.BoxingTo,
+            )
+
+    oneflow._oneflow_internal.deprecated.LogicalRun(BuildInstruction)
+
+
+def _MakeEagerLogicalBlob(op_attribute, obn, blob_register):
+    lbi = op_attribute.arg_signature.bn_in_op2lbi[obn]
+    blob_object = blob_register.GetObject4BlobName(
+        "%s/%s" % (lbi.op_name, lbi.blob_name)
+    )
+    mirrored_sig_map = op_attribute.mirrored_signature.bn_in_op2opt_mirrored_parallel
+    if not isinstance(lbi, lbi_util.LogicalBlobId):
+        cfg_lbi = lbi_util.LogicalBlobId()
+        cfg_lbi.set_op_name(lbi.op_name)
+        cfg_lbi.set_blob_name(lbi.blob_name)
+        lbi = cfg_lbi
+    if mirrored_sig_map[obn].HasField("mirrored_parallel"):
+        return oneflow._oneflow_internal.EagerMirroredBlob(
+            lbi, blob_object, default_blob_register
+        )
+    else:
+        return oneflow._oneflow_internal.EagerConsistentBlob(
+            lbi, blob_object, default_blob_register
+        )
+
+
+def EagerInitVariableBlob(sess, var_op_conf, var_blob):
+    snapshot_path = sess.snapshot_mgr.get_snapshot_path(var_op_conf.name)
+    with flow.scope.placement("cpu", "0:0"):
+        if snapshot_path is None:
+            blob_object = _EagerRunModelInit(var_op_conf)
+        else:
+            blob_object = _EagerRunModelLoad(var_op_conf, snapshot_path)
+
+        _Assign(var_blob.blob_object, blob_object)
+
+
+def EagerSaveVariableBlob(snapshot_path):
+    var_blobs = session_ctx.GetDefaultSession().var_name2var_blob.values()
+    with flow.scope.placement("cpu", "0:0"):
+        _EagerRunModelSave(var_blobs, snapshot_path)
+
+
+def _Assign(var_blob_object, value_blob_object):
+    def BuildAssignInstruction(builder):
+        new_parallel_desc_symbol = boxing_util.TryReplaceDeviceTag(
+            builder, var_blob_object.parallel_desc_symbol, "cpu"
+        )
+        consumer_op_arg_parallel_attr = oneflow._oneflow_internal.OpArgParallelAttribute(
+            new_parallel_desc_symbol,
+            str(var_blob_object.op_arg_parallel_attr.sbp_parallel),
+            str(var_blob_object.op_arg_parallel_attr.opt_mirrored_parallel),
+        )
+        tmp_blob_object = boxing_util.BoxingTo(
+            builder, value_blob_object, consumer_op_arg_parallel_attr
+        )
+        boxing_util.Assign(builder, var_blob_object, tmp_blob_object)
+
+    oneflow._oneflow_internal.deprecated.LogicalRun(BuildAssignInstruction)
+
+
+def _BuildNotMirroredScope(old_scope, builder):
+    return builder.BuildScopeWithNewIsMirrored(old_scope, False)
+
+
+def _EagerRunModelInit(var_op_conf):
+    op_conf, _ = _GenModelInitOpConfAndRetLbi(var_op_conf)
+    bn_in_op2blob_object = oneflow._oneflow_internal.deprecated.BnInOp2BlobObject()
+
+    def BuildModelInitInstruction(builder):
+        upstream_signature = op_node_signature_pb.OpNodeSignature()
+        op_conf.scope_symbol_id = flow.current_scope().symbol_id
+        op_attribute = c_api_util.InferOpConf(op_conf, upstream_signature)
+        parallel_conf = flow.current_scope().device_parallel_desc_symbol.parallel_conf
+        cfg_op_attribute = oneflow._oneflow_internal.deprecated.MakeOpAttributeByString(
+            str(op_attribute)
+        )
+        builder.StatelessCall(
+            cfg_op_attribute, parallel_conf, bn_in_op2blob_object, boxing_util.BoxingTo
+        )
+
+    sess = session_ctx.GetDefaultSession()
+    with scope_util.ScopeContext(scope_util.MakeScope(_BuildNotMirroredScope)):
+        oneflow._oneflow_internal.deprecated.LogicalRun(BuildModelInitInstruction)
+
+    return bn_in_op2blob_object["out_0"]
+
+
+def _MakeModelIOPathInputBuilds(op_conf, path, bn_in_op2blob_object):
+    def BuildModelIOPathInputInstruction(builder):
+        op_attribute = op_infer_util.Infer(op_conf, ibn2blob_object={})
+        parallel_conf = flow.current_scope().device_parallel_desc_symbol.parallel_conf
+        cfg_op_attribute = oneflow._oneflow_internal.deprecated.MakeOpAttributeByString(
+            str(op_attribute)
+        )
+        builder.StatelessCall(
+            cfg_op_attribute, parallel_conf, bn_in_op2blob_object, boxing_util.BoxingTo,
+        )
+
+    def FeedPath(ofblob):
+        ofblob.CopyFromNdarray(np.frombuffer(path.encode("ascii"), dtype=np.int8))
+
+    def BuildFeedPathInstruction(builder):
+        blob_object = bn_in_op2blob_object["out"]
+        builder.FeedBlob(
+            blob_object, python_callback.GetIdForRegisteredCallback(FeedPath)
+        )
+        builder.InsertRemoveForeignCallbackInstruction(
+            blob_object.object_id, python_callback.GetIdForRegisteredCallback(FeedPath)
+        )
+
+    return BuildModelIOPathInputInstruction, BuildFeedPathInstruction
+
+
+def _EagerRunModelLoad(var_op_conf, snapshot_path):
+    assert isinstance(snapshot_path, str)
+    assert os.path.basename(snapshot_path) == "out"
+    snapshot_path = os.path.dirname(snapshot_path)
+    assert os.path.basename(snapshot_path) == var_op_conf.name
+    snapshot_path = os.path.dirname(snapshot_path)
+
+    path_input_op_conf, path_lbi = _GenModelIOPathInputOpConfAndRetLbi()
+    path_input_blob_objects = {}
+    (
+        BuildModelIOPathInputInstruction,
+        BuildFeedPathInstruction,
+    ) = _MakeModelIOPathInputBuilds(
+        path_input_op_conf, snapshot_path, path_input_blob_objects
+    )
+
+    model_load_op_conf, _ = _GenModelLoadOpConfAndRetLbi(var_op_conf, path_lbi)
+    model_load_blob_objects = oneflow._oneflow_internal.deprecated.BnInOp2BlobObject()
+
+    def BuildModelLoadInstruction(builder):
+        path_blob_object = path_input_blob_objects["out"]
+        model_load_blob_objects["path"] = path_blob_object
+        op_attribute = op_infer_util.Infer(
+            model_load_op_conf, ibn2blob_object=model_load_blob_objects
+        )
+        parallel_conf = path_blob_object.parallel_desc_symbol.parallel_conf
+        cfg_op_attribute = oneflow._oneflow_internal.deprecated.MakeOpAttributeByString(
+            str(op_attribute)
+        )
+        builder.StatelessCall(
+            cfg_op_attribute,
+            parallel_conf,
+            model_load_blob_objects,
+            boxing_util.BoxingTo,
+        )
+
+    sess = session_ctx.GetDefaultSession()
+    with scope_util.ScopeContext(scope_util.MakeScope(_BuildNotMirroredScope)):
+        oneflow._oneflow_internal.deprecated.LogicalRun(
+            BuildModelIOPathInputInstruction
+        )
+        oneflow._oneflow_internal.deprecated.LogicalRun(BuildFeedPathInstruction)
+        oneflow._oneflow_internal.deprecated.LogicalRun(BuildModelLoadInstruction)
+
+    return model_load_blob_objects["out_0"]
+
+
+def _EagerRunModelSave(var_blobs, snapshot_path):
+    path_input_op_conf, path_lbi = _GenModelIOPathInputOpConfAndRetLbi()
+    path_input_blob_objects = oneflow._oneflow_internal.deprecated.BnInOp2BlobObject()
+    (
+        BuildModelIOPathInputInstruction,
+        BuildFeedPathInstruction,
+    ) = _MakeModelIOPathInputBuilds(
+        path_input_op_conf, snapshot_path, path_input_blob_objects
+    )
+
+    model_save_op_conf = _GenModelSaveOpConf(var_blobs, path_lbi)
+    model_save_blob_objects = oneflow._oneflow_internal.deprecated.BnInOp2BlobObject()
+
+    def BuildModelSaveInstruction(builder):
+        path_blob_object = path_input_blob_objects["out"]
+        model_save_blob_objects["path"] = path_blob_object
+        for i, blob in enumerate(var_blobs):
+            model_save_blob_objects["in_{}".format(i)] = blob.blob_object
+
+        op_attribute = op_infer_util.Infer(
+            model_save_op_conf, ibn2blob_object=model_save_blob_objects
+        )
+        parallel_conf = path_blob_object.parallel_desc_symbol.parallel_conf
+        cfg_op_attribute = oneflow._oneflow_internal.deprecated.MakeOpAttributeByString(
+            str(op_attribute)
+        )
+        builder.StatelessCall(
+            cfg_op_attribute,
+            parallel_conf,
+            model_save_blob_objects,
+            boxing_util.BoxingTo,
+        )
+
+    sess = session_ctx.GetDefaultSession()
+    with scope_util.ScopeContext(scope_util.MakeScope(_BuildNotMirroredScope)):
+        oneflow._oneflow_internal.deprecated.LogicalRun(
+            BuildModelIOPathInputInstruction
+        )
+        oneflow._oneflow_internal.deprecated.LogicalRun(BuildFeedPathInstruction)
+        oneflow._oneflow_internal.deprecated.LogicalRun(BuildModelSaveInstruction)
+
+
+def _GenModelInitOpConfAndRetLbi(var_op_conf):
+    variable_op_conf = op_conf_util.VariableOpConf()
+    variable_op_conf.CopyFrom(var_op_conf.variable_conf)
+    op_conf = op_conf_util.OperatorConf()
+    op_conf.name = "model_init"
+    op_conf.device_tag = "cpu"
+    op_conf.model_init_conf.out.append("out_0")
+    op_conf.model_init_conf.variable_op_name.append(var_op_conf.name)
+    op_conf.model_init_conf.original_variable_conf.append(variable_op_conf)
+    lbi = logical_blob_id_util.LogicalBlobId()
+    lbi.op_name = op_conf.name
+    lbi.blob_name = op_conf.model_init_conf.out[0]
+    return op_conf, lbi
+
+
+def _GenModelLoadOpConfAndRetLbi(var_op_conf, path_lbi):
+    variable_op_conf = op_conf_util.VariableOpConf()
+    variable_op_conf.CopyFrom(var_op_conf.variable_conf)
+
+    op_conf = op_conf_util.OperatorConf()
+    op_conf.name = "model_load"
+    op_conf.device_tag = "cpu"
+    op_conf.model_load_conf.path = "{}/{}".format(path_lbi.op_name, path_lbi.blob_name)
+    op_conf.model_load_conf.out.append("out_0")
+    op_conf.model_load_conf.variable_op_name.append(var_op_conf.name)
+    op_conf.model_load_conf.original_variable_conf.append(variable_op_conf)
+
+    lbi = logical_blob_id_util.LogicalBlobId()
+    lbi.op_name = op_conf.name
+    lbi.blob_name = op_conf.model_load_conf.out[0]
+    return op_conf, lbi
+
+
+def _GenModelIOPathInputOpConfAndRetLbi():
+    op_conf = op_conf_util.OperatorConf()
+    op_conf.name = "model_io_path_input"
+    op_conf.device_tag = "cpu"
+    op_conf.input_conf.out = "out"
+
+    blob_conf = inter_face_blob_conf_util.InterfaceBlobConf()
+    blob_conf.shape.dim.append(65536)
+    blob_conf.data_type = oneflow._oneflow_internal.deprecated.GetProtoDtype4OfDtype(
+        flow.int8
+    )
+    blob_conf.is_dynamic = True
+    op_conf.input_conf.blob_conf.CopyFrom(blob_conf)
+
+    lbi = logical_blob_id_util.LogicalBlobId()
+    lbi.op_name = op_conf.name
+    lbi.blob_name = op_conf.input_conf.out
+    return op_conf, lbi
+
+
+def _GenModelSaveOpConf(var_blobs, path_lbi):
+    op_conf = op_conf_util.OperatorConf()
+    op_conf.name = "model_save"
+    op_conf.device_tag = "cpu"
+    op_conf.model_save_conf.path = "{}/{}".format(path_lbi.op_name, path_lbi.blob_name)
+    for blob in var_blobs:
+        getattr(op_conf.model_save_conf, "in").append(blob.logical_blob_name)
+        getattr(op_conf.model_save_conf, "key").append(blob.logical_blob_name)
+
+    return op_conf
diff --git a/oneflow/compatible_single_client_python/eager/op_infer_util.py b/oneflow/compatible_single_client_python/eager/op_infer_util.py
new file mode 100644
index 0000000000000000000000000000000000000000..061825fc88706fd6fa538fedff47ca444d0670f3
--- /dev/null
+++ b/oneflow/compatible_single_client_python/eager/op_infer_util.py
@@ -0,0 +1,44 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+from oneflow.core.operator import op_node_signature_pb2 as op_node_signature_pb
+from oneflow.compatible_single_client_python.framework import c_api_util as c_api_util
+from oneflow._oneflow_internal.oneflow.core.operator import (
+    op_node_signature as op_node_signature_cfg,
+)
+from oneflow.compatible import single_client as flow
+from google.protobuf import text_format
+
+
+def Infer(op_conf, ibn2blob_object, scope_symbol_id=None):
+    if scope_symbol_id is None:
+        scope_symbol_id = flow.current_scope().symbol_id
+    op_conf.scope_symbol_id = scope_symbol_id
+    upstream_signature = MakeUpstreamSignature(ibn2blob_object)
+    return c_api_util.InferOpConf(op_conf, upstream_signature)
+
+
+def MakeUpstreamSignature(ibn2blob_object):
+    upstream_signature_cfg = op_node_signature_cfg.OpNodeSignature()
+
+    for ibn, blob_object in ibn2blob_object.items():
+        blob_object.op_arg_blob_attr.DumpToOpNodeSignature(ibn, upstream_signature_cfg)
+        blob_object.op_arg_parallel_attr.DumpToOpNodeSignature(
+            ibn, upstream_signature_cfg
+        )
+    return text_format.Parse(
+        str(upstream_signature_cfg), op_node_signature_pb.OpNodeSignature()
+    )
diff --git a/oneflow/compatible_single_client_python/eager/symbol.py b/oneflow/compatible_single_client_python/eager/symbol.py
new file mode 100644
index 0000000000000000000000000000000000000000..0083b65a168d5b296f6f4f22ef809890108c548e
--- /dev/null
+++ b/oneflow/compatible_single_client_python/eager/symbol.py
@@ -0,0 +1,34 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+from oneflow.compatible_single_client_python.framework import c_api_util as c_api_util
+from oneflow.core.job import placement_pb2 as placement_pb
+import functools
+
+
+class Symbol(object):
+    def __init__(self, symbol_id, data):
+        self.symbol_id_ = symbol_id
+        self.data_ = data
+
+    @property
+    def symbol_id(self):
+        return self.symbol_id_
+
+    @property
+    def data(self):
+        return self.data_
diff --git a/oneflow/compatible_single_client_python/eager/symbol_storage.py b/oneflow/compatible_single_client_python/eager/symbol_storage.py
new file mode 100644
index 0000000000000000000000000000000000000000..0f5d0cddda3c664f3f7f55c9990e8437eb487fd3
--- /dev/null
+++ b/oneflow/compatible_single_client_python/eager/symbol_storage.py
@@ -0,0 +1,54 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+
+def HasSymbol4Id(symbol_id):
+    global id2symbol
+    return symbol_id in id2symbol
+
+
+def GetSymbol4Id(symbol_id):
+    global id2symbol
+    assert symbol_id in id2symbol
+    return id2symbol[symbol_id]
+
+
+def SetSymbol4Id(symbol_id, symbol):
+    global id2symbol
+    assert symbol_id not in id2symbol
+    id2symbol[symbol_id] = symbol
+
+
+id2symbol = {}
+
+
+def HasSymbol4SerializedOpConf(serialized_op_conf):
+    global serialized_op_conf2symbol
+    return serialized_op_conf in serialized_op_conf2symbol
+
+
+def GetSymbol4SerializedOpConf(serialized_op_conf):
+    global serialized_op_conf2symbol
+    return serialized_op_conf2symbol[serialized_op_conf]
+
+
+def SetSymbol4SerializedOpConf(serialized_op_conf, symbol):
+    assert not HasSymbol4SerializedOpConf(serialized_op_conf)
+    global serialized_op_conf2symbol
+    serialized_op_conf2symbol[serialized_op_conf] = symbol
+
+
+serialized_op_conf2symbol = {}
diff --git a/oneflow/compatible_single_client_python/experimental/__init__.py b/oneflow/compatible_single_client_python/experimental/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/oneflow/compatible_single_client_python/experimental/enable_typing_check.py b/oneflow/compatible_single_client_python/experimental/enable_typing_check.py
new file mode 100644
index 0000000000000000000000000000000000000000..062c8846c874942b5a8b8d65054c759f305e12b8
--- /dev/null
+++ b/oneflow/compatible_single_client_python/experimental/enable_typing_check.py
@@ -0,0 +1,35 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+from oneflow.compatible_single_client_python.oneflow_export import oneflow_export
+from oneflow.compatible_single_client_python.framework import hob as hob
+from oneflow.compatible_single_client_python.lib.core import enable_if as enable_if
+
+
+@oneflow_export("experimental.enable_typing_check")
+def api_enable_typing_check(val: bool = True) -> None:
+    """ enable typing check for global_function """
+    return enable_if.unique([enable_typing_check])(val)
+
+
+@enable_if.condition(hob.in_normal_mode & ~hob.any_global_function_defined)
+def enable_typing_check(val):
+    global typing_check_enabled
+    typing_check_enabled = val
+
+
+typing_check_enabled = False
diff --git a/oneflow/compatible_single_client_python/experimental/indexed_slices_ops.py b/oneflow/compatible_single_client_python/experimental/indexed_slices_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..02120b23811447852f792398e3d7971fefc9be68
--- /dev/null
+++ b/oneflow/compatible_single_client_python/experimental/indexed_slices_ops.py
@@ -0,0 +1,58 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+
+from oneflow.compatible import single_client as flow
+from oneflow.core.operator import op_conf_pb2 as op_conf_util
+from oneflow.core.register import logical_blob_id_pb2 as logical_blob_id_util
+from oneflow.compatible_single_client_python.framework import (
+    interpret_util as interpret_util,
+)
+from oneflow.compatible_single_client_python.framework import (
+    distribute as distribute_util,
+)
+from oneflow.compatible_single_client_python.framework import id_util as id_util
+from oneflow.compatible_single_client_python.framework import (
+    input_blob_def as input_blob_util,
+)
+from oneflow.compatible_single_client_python.framework import (
+    remote_blob as remote_blob_util,
+)
+import oneflow._oneflow_internal
+from oneflow.compatible_single_client_python.oneflow_export import oneflow_export
+from typing import Optional, Tuple
+
+
+@oneflow_export("experimental.indexed_slices_reduce_sum")
+def indexed_slices_reduce_sum(
+    indices: input_blob_util.ArgBlobDef,
+    values: input_blob_util.ArgBlobDef,
+    name: Optional[str] = None,
+) -> Tuple[oneflow._oneflow_internal.BlobDesc]:
+    op = (
+        flow.user_op_builder(
+            name if name is not None else id_util.UniqueStr("IndexedSlicesReduceSum_")
+        )
+        .Op("indexed_slices_reduce_sum")
+        .Input("x_indices", [indices])
+        .Input("x_values", [values])
+        .Output("y_indices")
+        .Output("y_values")
+        .Output("num_unique")
+        .Build()
+    )
+    return op.InferAndTryRun().RemoteBlobList()
diff --git a/oneflow/compatible_single_client_python/experimental/interface_op_read_and_write.py b/oneflow/compatible_single_client_python/experimental/interface_op_read_and_write.py
new file mode 100644
index 0000000000000000000000000000000000000000..7875c691c261ff9451b5c42cc2c9f6c625daa4dd
--- /dev/null
+++ b/oneflow/compatible_single_client_python/experimental/interface_op_read_and_write.py
@@ -0,0 +1,186 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from oneflow.compatible import single_client as flow
+from oneflow.core.register import logical_blob_id_pb2 as logical_blob_id_util
+from oneflow.compatible_single_client_python.lib.core import async_util as async_util
+from oneflow.compatible_single_client_python.framework import (
+    input_blob_def as input_blob_def_util,
+)
+from oneflow.compatible_single_client_python.framework import dtype as dtype_util
+from oneflow.compatible_single_client_python.framework import (
+    remote_blob as remote_blob_util,
+)
+from oneflow.compatible_single_client_python.framework import push_util as push_util
+from oneflow.compatible_single_client_python.framework import (
+    session_context as session_ctx,
+)
+from oneflow.compatible_single_client_python.framework import runtime_mode as rt_mode
+from oneflow.compatible_single_client_python.oneflow_export import oneflow_export
+from oneflow.compatible_single_client_python.eager import op_executor as op_executor
+from oneflow._oneflow_internal.oneflow.core.job import placement as placement_cfg
+from oneflow._oneflow_internal.oneflow.core.register import logical_blob_id as lbi_util
+from oneflow._oneflow_internal.oneflow.core.common import shape as shape_proto_cfg
+import oneflow._oneflow_internal
+
+
+def sync_default_session_if_normal():
+    # TODO merge with same function in framework/check_point_v2.py
+    if rt_mode.CurrentMode() == rt_mode.NORMAL_MODE:
+        flow.sync_default_session()
+    else:
+        # do nothing
+        pass
+
+
+blob_register = oneflow._oneflow_internal.GetDefaultBlobRegister()
+
+
+def _GetInterfaceBlobObject(builder, op_name):
+    sess = session_ctx.GetDefaultSession()
+    if oneflow._oneflow_internal.EagerExecutionEnabled():
+        return sess.var_name2var_blob[op_name].blob_object
+    sess = session_ctx.GetDefaultSession()
+    op_attribute = sess.OpAttribute4InterfaceOpName(op_name)
+    cfg_op_attribute = oneflow._oneflow_internal.deprecated.MakeOpAttributeByString(
+        str(op_attribute)
+    )
+    parallel_conf = sess.ParallelConf4LazyInterfaceOpName(op_name)
+    if not isinstance(
+        parallel_conf, oneflow._oneflow_internal.oneflow.core.job.placement.ParallelConf
+    ):
+        parallel_conf_cfg = placement_cfg.ParallelConf()
+        parallel_conf_cfg.set_device_tag(parallel_conf.device_tag)
+        for device_name in parallel_conf.device_name:
+            parallel_conf_cfg.add_device_name(device_name)
+        if parallel_conf.HasField("hierarchy"):
+            hierarchy = shape_proto_cfg.ShapeProto()
+            for dim in parallel_conf.hierarchy.dim:
+                hierarchy.add_dim(dim)
+            assert hierarchy.dim_size() > 0
+            parallel_conf_cfg.mutable_hierarchy().CopyFrom(hierarchy)
+        parallel_conf = parallel_conf_cfg
+
+    blob_object = builder.MakeLazyRefBlobObject(
+        op_name, cfg_op_attribute, parallel_conf
+    )
+    return blob_object
+
+
+def GetEagerInterfaceBlob(op_name):
+    sync_default_session_if_normal()
+
+    sess = session_ctx.GetDefaultSession()
+
+    def CreateBlob():
+        job_name = sess.JobName4InterfaceOpName(op_name)
+
+        def Build(builder, Yield):
+            blob_object = _GetInterfaceBlobObject(builder, op_name)
+            lbi = lbi_util.LogicalBlobId()
+            lbi.set_op_name(op_name)
+            op_attribute = sess.OpAttribute4InterfaceOpName(op_name)
+            assert len(op_attribute.output_bns) == 1
+            lbi.set_blob_name(op_attribute.output_bns[0])
+            if blob_object.op_arg_parallel_attr.is_mirrored():
+                remote_blob = oneflow._oneflow_internal.EagerMirroredBlob(
+                    lbi, blob_object, blob_register, job_name
+                )
+            else:
+                remote_blob = oneflow._oneflow_internal.EagerConsistentBlob(
+                    lbi, blob_object, blob_register, job_name
+                )
+
+            Yield(remote_blob)
+
+        def AsyncGetInterfaceBlob(Yield):
+            oneflow._oneflow_internal.deprecated.LogicalRun(
+                lambda builder: Build(builder, Yield)
+            )
+
+        blob = async_util.Await(1, AsyncGetInterfaceBlob)[0]
+        return blob
+
+    return sess.FindOrCreateLazyBlob(op_name, CreateBlob)
+
+
+@oneflow_export("experimental.get_interface_blob_value")
+def GetInterfaceBlobValue(op_name):
+    sync_default_session_if_normal()
+
+    sess = session_ctx.GetDefaultSession()
+    job_name = sess.JobName4InterfaceOpName(op_name)
+
+    def AsyncGetInterfaceBlobValue(Yield):
+        def build(builder):
+            blob_object = GetEagerInterfaceBlob(op_name).blob_object
+            lbi = lbi_util.LogicalBlobId()
+            lbi.set_op_name(op_name)
+            op_attribute = sess.OpAttribute4InterfaceOpName(op_name)
+            assert len(op_attribute.output_bns) == 1
+            lbi.set_blob_name(op_attribute.output_bns[0])
+            if not isinstance(lbi, lbi_util.LogicalBlobId):
+                cfg_lbi = lbi_util.LogicalBlobId()
+                cfg_lbi.set_op_name(lbi.op_name)
+                cfg_lbi.set_blob_name(lbi.blob_name)
+                lbi = cfg_lbi
+            if blob_object.op_arg_parallel_attr.is_mirrored():
+                remote_blob = oneflow._oneflow_internal.EagerMirroredBlob(
+                    lbi, blob_object, blob_register, job_name
+                )
+            else:
+                remote_blob = oneflow._oneflow_internal.EagerConsistentBlob(
+                    lbi, blob_object, blob_register, job_name
+                )
+            value = remote_blob.numpy()
+            Yield(value)
+
+        oneflow._oneflow_internal.deprecated.LogicalRun(build)
+
+    return async_util.Await(1, AsyncGetInterfaceBlobValue)[0]
+
+
+def FeedValueToInterfaceBlobObject(blob_object, ndarray):
+    sync_default_session_if_normal()
+
+    def build(builder):
+        if blob_object.op_arg_parallel_attr.is_mirrored():
+            input_blob_def = input_blob_def_util.MirroredTensorDef(
+                ndarray.shape,
+                dtype=dtype_util.convert_numpy_dtype_to_oneflow_dtype(ndarray.dtype),
+            )
+        else:
+            input_blob_def = input_blob_def_util.FixedTensorDef(
+                ndarray.shape,
+                dtype=dtype_util.convert_numpy_dtype_to_oneflow_dtype(ndarray.dtype),
+            )
+        push_util.FeedValueToEagerBlob(blob_object, input_blob_def, ndarray)
+
+    oneflow._oneflow_internal.deprecated.LogicalRun(build)
+
+
+@oneflow_export("experimental.set_interface_blob_value")
+def FeedValueToInterfaceBlob(op_name, ndarray):
+    sync_default_session_if_normal()
+
+    def AsyncFeedValueToInterfaceBlob(Yield):
+        def build(builder):
+            blob_object = GetEagerInterfaceBlob(op_name).blob_object
+            FeedValueToInterfaceBlobObject(blob_object, ndarray)
+            Yield()
+
+        oneflow._oneflow_internal.deprecated.LogicalRun(build)
+
+    async_util.Await(1, AsyncFeedValueToInterfaceBlob)
diff --git a/oneflow/compatible_single_client_python/experimental/load_mnist.py b/oneflow/compatible_single_client_python/experimental/load_mnist.py
new file mode 100644
index 0000000000000000000000000000000000000000..9bd14a3abe737c358d0d0b1d68bc0dcec2929eda
--- /dev/null
+++ b/oneflow/compatible_single_client_python/experimental/load_mnist.py
@@ -0,0 +1,105 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import os
+import hashlib
+import numpy as np
+from tqdm import tqdm
+import requests
+
+from oneflow.compatible_single_client_python.oneflow_export import oneflow_export
+
+
+def get_sha256hash(file_path, Bytes=1024):
+    sha256hash = hashlib.sha256()
+    with open(file_path, "rb") as f:
+        while True:
+            data = f.read(Bytes)
+            if data:
+                sha256hash.update(data)
+            else:
+                break
+    ret = sha256hash.hexdigest()
+    return ret
+
+
+def download_mnist_file(out_path, url):
+    resp = requests.get(url=url, stream=True)
+    size = int(resp.headers["Content-Length"]) / 1024
+    print("File size: %.4f kb, downloading..." % size)
+    with open(out_path, "wb") as f:
+        for data in tqdm(
+            iterable=resp.iter_content(1024), total=size, unit="k", desc=out_path
+        ):
+            f.write(data)
+        print("Done!")
+
+
+def get_mnist_file(sha256, url, out_dir):
+    path = os.path.join(out_dir, "mnist.npz")
+    if not (os.path.isfile(path)):
+        download_mnist_file(path, url)
+    print("File mnist.npz already exist, path:", path)
+    if not get_sha256hash(path) == sha256:
+        cheksum_fail = "sha256 verification failed, remove {0} and try again".format(
+            path
+        )
+        raise Exception(cheksum_fail)
+    return path
+
+
+@oneflow_export("data.load_mnist")
+def load_mnist(
+    train_batch_size=100,
+    test_batch_size=100,
+    data_format="NCHW",
+    url="https://oneflow-public.oss-cn-beijing.aliyuncs.com/datasets/mnist.npz",
+    hash_check="63d4344077849053dc3036b247fa012b2b381de53fd055a66b539dffd76cf08e",
+    out_dir=".",
+):
+    r"""Load mnist dataset, return images and labels,
+            if  dataset doesn't exist, then download it to directory that out_dir specified
+
+    Args:
+        train_batch_size (int, optional): batch size for train. Defaults to 100.
+        test_batch_size (int, optional): batch size for test or evaluate. Defaults to 100.
+        data_format (str, optional): data format. Defaults to "NCHW".
+        url (str, optional): url to get mnist.npz. Defaults to "https://oneflow-public.oss-cn-beijing.aliyuncs.com/datasets/mnist.npz".
+        hash_check (str, optional): file hash value. Defaults to "63d4344077849053dc3036b247fa012b2b381de53fd055a66b539dffd76cf08e".
+        out_dir (str, optional): dir to save downloaded file. Defaults to "./".
+
+    Returns:
+        [type]: (train_images, train_labels), (test_images, test_labels)
+    """
+
+    path = get_mnist_file(hash_check, url, out_dir)
+    with np.load(path, allow_pickle=True) as f:
+        x_train, y_train = f["x_train"], f["y_train"]
+        x_test, y_test = f["x_test"], f["y_test"]
+
+    def normalize(x, y, batch_size):
+        x = x.astype(np.float32) / 255.0
+        y = y.astype(np.int32)
+        if data_format == "NCHW":
+            images = x.reshape((-1, batch_size, 1, x.shape[1], x.shape[2]))
+        else:
+            images = x.reshape((-1, batch_size, x.shape[1], x.shape[2], 1))
+        labels = y.reshape((-1, batch_size))
+        return images, labels
+
+    train_images, train_labels = normalize(x_train, y_train, train_batch_size)
+    test_images, test_labels = normalize(x_test, y_test, test_batch_size)
+
+    return (train_images, train_labels), (test_images, test_labels)
diff --git a/oneflow/compatible_single_client_python/experimental/name_scope.py b/oneflow/compatible_single_client_python/experimental/name_scope.py
new file mode 100644
index 0000000000000000000000000000000000000000..29ced6abec14a3810bf7ce1bc4d8b0ecfbcfffbe
--- /dev/null
+++ b/oneflow/compatible_single_client_python/experimental/name_scope.py
@@ -0,0 +1,113 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+from contextlib import contextmanager
+
+from oneflow.compatible_single_client_python.framework import (
+    session_context as session_context,
+)
+from oneflow.compatible_single_client_python.framework import scope_util as scope_util
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    oneflow_deprecate,
+)
+import oneflow._oneflow_internal
+import traceback
+
+
+@oneflow_export(
+    "name_scope", "experimental.name_scope", "deprecated.variable_scope",
+)
+@oneflow_deprecate()
+def deprecated_name_scope(*args, **kwargs):
+    print(
+        "WARNING:",
+        "oneflow.compatible.single_client.name_scope/oneflow.compatible.single_client.experimental.name_scope/deprecated.variable_scope",
+        "will be removed in the future, use {} instead.".format(
+            "oneflow.compatible.single_client.scope.namespace"
+        ),
+    )
+    print(traceback.format_stack()[-2])
+    return name_scope(*args, **kwargs)
+
+
+@oneflow_export("scope.namespace")
+@contextmanager
+def name_scope(name: str) -> None:
+    r"""Create a namespace. All variables within the namespace will have a prefix `[SCOPE NAME]-`. This is for convenience only and has no other effect on the system.
+    Usage::
+
+        with oneflow.compatible.single_client.scope.namespace("scope1"):
+            ...
+            with oneflow.compatible.single_client.scope.namespace("scope2"):
+                ...
+
+    Args:
+        name: Name of this namespace
+
+    """
+    assert isinstance(name, str)
+    name_scope_stack_push(name)
+
+    def BuildScope(old_scope, builder):
+        return builder.BuildScopeWithNewScopeName(old_scope, name)
+
+    sess = session_context.GetDefaultSession()
+    try:
+        with scope_util.ScopeContext(scope_util.MakeScope(BuildScope)):
+            yield
+    finally:
+        name_scope_stack_pop()
+
+
+def name_scope_stack_push(name):
+    job_name = oneflow._oneflow_internal.JobBuildAndInferCtx_GetCurrentJobName()
+    sess = session_context.GetDefaultSession()
+    if job_name not in sess.job_name2name_scope_stack:
+        sess.job_name2name_scope_stack[job_name] = []
+    sess.job_name2name_scope_stack[job_name].append(name)
+
+
+def name_scope_stack_pop():
+    job_name = oneflow._oneflow_internal.JobBuildAndInferCtx_GetCurrentJobName()
+    sess = session_context.GetDefaultSession()
+    assert job_name in sess.job_name2name_scope_stack
+    assert len(sess.job_name2name_scope_stack[job_name]) > 0
+    return sess.job_name2name_scope_stack[job_name].pop()
+
+
+def GetJobNameScopePrefix(job_name):
+    sess = session_context.GetDefaultSession()
+    if job_name not in sess.job_name2name_scope_stack:
+        return ""
+    if len(sess.job_name2name_scope_stack[job_name]) == 0:
+        return ""
+    return "-".join(sess.job_name2name_scope_stack[job_name]) + "-"
+
+
+def PrependOpNamePrefixIfNeed(op_conf):
+    if op_conf.HasField("variable_conf"):
+        return
+
+    if op_conf.HasField("decode_ofrecord_conf"):
+        return
+
+    if op_conf.HasField("user_conf"):
+        return
+
+    job_name = oneflow._oneflow_internal.JobBuildAndInferCtx_GetCurrentJobName()
+    op_conf.name = GetJobNameScopePrefix(job_name) + op_conf.name
diff --git a/oneflow/compatible_single_client_python/experimental/square_sum_op.py b/oneflow/compatible_single_client_python/experimental/square_sum_op.py
new file mode 100644
index 0000000000000000000000000000000000000000..47c7bc9c664f3c100db6fab83e447ed60bc75a86
--- /dev/null
+++ b/oneflow/compatible_single_client_python/experimental/square_sum_op.py
@@ -0,0 +1,57 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+import operator
+from functools import reduce
+
+from oneflow.compatible import single_client as flow
+from oneflow.core.operator import op_conf_pb2 as op_conf_util
+from oneflow.core.register import logical_blob_id_pb2 as logical_blob_id_util
+from oneflow.compatible_single_client_python.framework import (
+    interpret_util as interpret_util,
+)
+from oneflow.compatible_single_client_python.framework import (
+    distribute as distribute_util,
+)
+from oneflow.compatible_single_client_python.framework import id_util as id_util
+from oneflow.compatible_single_client_python.framework import (
+    input_blob_def as input_blob_util,
+)
+from oneflow.compatible_single_client_python.framework import (
+    remote_blob as remote_blob_util,
+)
+import oneflow._oneflow_internal
+from oneflow.compatible_single_client_python.oneflow_export import oneflow_export
+from typing import Optional
+
+
+@oneflow_export("experimental.square_sum")
+def square_sum(
+    x: oneflow._oneflow_internal.BlobDesc, name: Optional[str] = None
+) -> oneflow._oneflow_internal.BlobDesc:
+
+    return (
+        flow.user_op_builder(
+            name if name is not None else id_util.UniqueStr("SquareSum_")
+        )
+        .Op("square_sum")
+        .Input("x", [x])
+        .Output("y")
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
diff --git a/oneflow/compatible_single_client_python/experimental/ssp_variable_proxy_op.py b/oneflow/compatible_single_client_python/experimental/ssp_variable_proxy_op.py
new file mode 100644
index 0000000000000000000000000000000000000000..5f7f14b61dd93bd7f32c882e653152e7136f4b04
--- /dev/null
+++ b/oneflow/compatible_single_client_python/experimental/ssp_variable_proxy_op.py
@@ -0,0 +1,44 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from oneflow.compatible import single_client as flow
+from typing import Tuple
+from oneflow.compatible_single_client_python.oneflow_export import oneflow_export
+from oneflow.compatible_single_client_python.framework import (
+    remote_blob as remote_blob_util,
+)
+from oneflow.compatible_single_client_python.framework import id_util as id_util
+import oneflow._oneflow_internal
+
+
+@oneflow_export("experimental.ssp_variable_proxy")
+def ssp_variable_proxy(
+    var: oneflow._oneflow_internal.BlobDesc, buffer_size: int = 1, name=None
+) -> Tuple[oneflow._oneflow_internal.BlobDesc, oneflow._oneflow_internal.BlobDesc]:
+    r""" return ref_blob, value_blob """
+    if name is None:
+        name = id_util.UniqueStr("SspVariableProxy_")
+    blob_dict = (
+        flow.user_op_builder(name)
+        .Op("ssp_variable_proxy")
+        .Input("var", [var])
+        .Output("ref")
+        .Output("value")
+        .Attr("buffer_size", buffer_size)
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobDict()
+    )
+    return blob_dict["ref"][0], blob_dict["value"][0]
diff --git a/oneflow/compatible_single_client_python/experimental/unique_op.py b/oneflow/compatible_single_client_python/experimental/unique_op.py
new file mode 100644
index 0000000000000000000000000000000000000000..30975b3c3f7707d8414e002f0dcefc25eb517c3e
--- /dev/null
+++ b/oneflow/compatible_single_client_python/experimental/unique_op.py
@@ -0,0 +1,59 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+from typing import Optional, Tuple
+
+from oneflow.compatible import single_client as flow
+from oneflow.core.operator import op_conf_pb2 as op_conf_util
+from oneflow.core.register import logical_blob_id_pb2 as logical_blob_id_util
+from oneflow.compatible_single_client_python.framework import (
+    interpret_util as interpret_util,
+)
+from oneflow.compatible_single_client_python.framework import (
+    distribute as distribute_util,
+)
+from oneflow.compatible_single_client_python.framework import id_util as id_util
+from oneflow.compatible_single_client_python.framework import (
+    input_blob_def as input_blob_util,
+)
+from oneflow.compatible_single_client_python.framework import (
+    remote_blob as remote_blob_util,
+)
+from oneflow.compatible_single_client_python.oneflow_export import oneflow_export
+import oneflow._oneflow_internal
+
+
+@oneflow_export("experimental.unique_with_counts")
+def unique_with_counts(
+    x: input_blob_util.ArgBlobDef,
+    out_idx: flow.dtype = flow.int32,
+    name: Optional[str] = None,
+) -> Tuple[oneflow._oneflow_internal.BlobDesc]:
+    op = (
+        flow.user_op_builder(
+            name if name is not None else id_util.UniqueStr("UniqueWithCounts_")
+        )
+        .Op("unique_with_counts")
+        .Input("x", [x])
+        .Attr("out_idx", out_idx)
+        .Output("y")
+        .Output("idx")
+        .Output("count")
+        .Output("num_unique")
+        .Build()
+    )
+    return op.InferAndTryRun().RemoteBlobList()
diff --git a/oneflow/compatible_single_client_python/framework/__init__.py b/oneflow/compatible_single_client_python/framework/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/oneflow/compatible_single_client_python/framework/attr_util.py b/oneflow/compatible_single_client_python/framework/attr_util.py
new file mode 100644
index 0000000000000000000000000000000000000000..8cf79594a46530a6db7b8f18c72384f201feadf0
--- /dev/null
+++ b/oneflow/compatible_single_client_python/framework/attr_util.py
@@ -0,0 +1,129 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from oneflow.compatible import single_client as flow
+import oneflow._oneflow_internal
+from oneflow._oneflow_internal.oneflow.core.common import shape as shape_cfg
+from oneflow._oneflow_internal.oneflow.core.common import data_type as data_type_cfg
+from oneflow._oneflow_internal.oneflow.core.framework import (
+    user_op_attr as user_op_attr_cfg,
+)
+
+
+def SetAttrValue(attr_value, py_value, default_attr_value):
+    if default_attr_value.HasField("at_bool"):
+        if py_value is None:
+            py_value = True
+        assert type(py_value) is bool
+        attr_value.set_at_bool(py_value)
+    elif default_attr_value.HasField("at_int64"):
+        assert type(py_value) is int
+        attr_value.set_at_int64(py_value)
+    elif default_attr_value.HasField("at_double"):
+        assert type(py_value) is float
+        attr_value.set_at_double(py_value)
+    elif default_attr_value.HasField("at_string"):
+        assert type(py_value) is str
+        attr_value.set_at_string(py_value)
+    else:
+        raise ValueError(
+            "config with type %s is invalid. supported types: [bool, int, float, str]"
+            % type(py_value)
+        )
+
+
+def convert_to_user_attr_value(op_type_name, attr_name, attr_value):
+    attribute = user_op_attr_cfg.AttrValue()
+    assert isinstance(attr_name, str)
+    attr_type = oneflow._oneflow_internal.GetUserOpAttrType(op_type_name, attr_name)
+    if attr_type == user_op_attr_cfg.kAtInt32:
+        assert isinstance(attr_value, int)
+        attribute.set_at_int32(attr_value)
+    elif attr_type == user_op_attr_cfg.kAtInt64:
+        assert isinstance(attr_value, int)
+        attribute.set_at_int64(attr_value)
+    elif attr_type == user_op_attr_cfg.kAtBool:
+        assert isinstance(attr_value, bool)
+        attribute.set_at_bool(attr_value)
+    elif attr_type == user_op_attr_cfg.kAtFloat:
+        assert isinstance(attr_value, (float, int))
+        attribute.set_at_float(attr_value)
+    elif attr_type == user_op_attr_cfg.kAtDouble:
+        assert isinstance(attr_value, (float, int))
+        attribute.set_at_double(attr_value)
+    elif attr_type == user_op_attr_cfg.kAtString:
+        assert isinstance(attr_value, str)
+        attribute.set_at_string(attr_value)
+    elif attr_type == user_op_attr_cfg.kAtShape:
+        assert isinstance(attr_value, (tuple, list))
+        attribute_mutable_at_shape = attribute.mutable_at_shape()
+        for x in attr_value:
+            assert isinstance(x, int)
+            attribute_mutable_at_shape.add_dim(x)
+    elif attr_type == user_op_attr_cfg.kAtDataType:
+        assert attr_value in flow.dtypes()
+        attr_value = oneflow._oneflow_internal.deprecated.GetProtoDtype4OfDtype(
+            attr_value
+        )
+        assert isinstance(attr_value, int)
+        attribute.set_at_data_type(data_type_cfg.DataType(attr_value))
+    elif attr_type == user_op_attr_cfg.kAtListInt32:
+        assert isinstance(attr_value, (tuple, list))
+        attribute_mutable_at_list_int32 = attribute.mutable_at_list_int32()
+        for x in attr_value:
+            assert isinstance(x, int)
+            attribute_mutable_at_list_int32.add_val(x)
+    elif attr_type == user_op_attr_cfg.kAtListInt64:
+        assert isinstance(attr_value, (tuple, list))
+        attribute_mutable_at_list_int64 = attribute.mutable_at_list_int64()
+        for x in attr_value:
+            assert isinstance(x, int)
+            attribute_mutable_at_list_int64.add_val(x)
+    elif attr_type == user_op_attr_cfg.kAtListFloat:
+        assert isinstance(attr_value, (tuple, list))
+        attribute_mutable_at_list_float = attribute.mutable_at_list_float()
+        for x in attr_value:
+            assert isinstance(x, (float, int))
+            attribute_mutable_at_list_float.add_val(x)
+    elif attr_type == user_op_attr_cfg.kAtListDataType:
+        assert isinstance(attr_value, (tuple, list))
+        attribute_mutable_at_list_data_type = attribute.mutable_at_list_data_type()
+        for x in attr_value:
+            assert x in flow.dtypes()
+            x = oneflow._oneflow_internal.deprecated.GetProtoDtype4OfDtype(x)
+            assert isinstance(x, int)
+            attribute_mutable_at_list_data_type.add_val(data_type_cfg.DataType(x))
+    elif attr_type == user_op_attr_cfg.kAtListShape:
+        assert isinstance(attr_value, (tuple, list))
+        attribute_mutable_at_list_shape = (
+            attribute.mutable_at_list_shape().mutable_val()
+        )
+        for x in attr_value:
+            assert isinstance(x, (tuple, list))
+            shape = shape_cfg.ShapeProto()
+            for dim in x:
+                assert isinstance(dim, int)
+                shape.add_dim(dim)
+            attribute_mutable_at_list_shape.Add().CopyFrom(shape)
+    elif attr_type == user_op_attr_cfg.kAtListString:
+        assert isinstance(attr_value, (tuple, list))
+        attribute_mutable_at_list_string = attribute.mutable_at_list_string()
+        for x in attr_value:
+            assert isinstance(x, str)
+            attribute_mutable_at_list_string.add_val(x)
+    else:
+        raise ValueError("Invalid op attribute type {}".format(attr_type))
+    return attribute
diff --git a/oneflow/compatible_single_client_python/framework/balanced_splitter.py b/oneflow/compatible_single_client_python/framework/balanced_splitter.py
new file mode 100644
index 0000000000000000000000000000000000000000..f9a72178fb31eca0e8569681163736bc9d4f8584
--- /dev/null
+++ b/oneflow/compatible_single_client_python/framework/balanced_splitter.py
@@ -0,0 +1,32 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+
+def BalancedPartNums(total, part_size):
+    base = int(total / part_size)
+    remainder = total % part_size
+    return [base + int(i < remainder) for i in range(part_size)]
+
+
+def BalancedRanges(total, part_size):
+    balanced_part_nums = BalancedPartNums(total, part_size)
+    ranges = []
+    start = 0
+    for part_num in balanced_part_nums:
+        end = start + part_num
+        ranges.append((start, end))
+        start == end
+    return ranges
diff --git a/oneflow/compatible_single_client_python/framework/blob.py b/oneflow/compatible_single_client_python/framework/blob.py
new file mode 100644
index 0000000000000000000000000000000000000000..7efcec681e6dafb34e996ba2e8ed1d0f3445c764
--- /dev/null
+++ b/oneflow/compatible_single_client_python/framework/blob.py
@@ -0,0 +1,69 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+import numpy as np
+
+
+class Blob(object):
+    def __init__(self, ndarray=None):
+        self.ndarray_ = ndarray
+
+    def ndarray(self):
+        return self.ndarray_
+
+    def set_ndarray(self, ndarray):
+        self.ndarray_ = ndarray
+
+    def __getattr__(self, attr):
+        return getattr(self.ndarray_, attr)
+
+
+no_override_field = set(
+    [
+        "__class__",
+        "__doc__",
+        "__new__",
+        "__init__",
+        "__del__",
+        "__call__",
+        "__getattr__",
+        "__getattribute__",
+        "__setattr__",
+        "__delattr__",
+        "__dir__",
+        "__get__",
+        "__set__",
+        "__delete__",
+    ]
+)
+
+
+def MakeBlobMethod(field_name):
+    def ConvertOtherArgs(args):
+        return [x.ndarray_ if isinstance(x, Blob) else x for x in args]
+
+    return lambda self, *args: getattr(self.ndarray_, field_name)(
+        *ConvertOtherArgs(args)
+    )
+
+
+for field_name in dir(np.ndarray):
+    if field_name.startswith("__") == False:
+        continue
+    if field_name in no_override_field:
+        continue
+    setattr(Blob, field_name, MakeBlobMethod(field_name))
diff --git a/oneflow/compatible_single_client_python/framework/blob_trait.py b/oneflow/compatible_single_client_python/framework/blob_trait.py
new file mode 100644
index 0000000000000000000000000000000000000000..5699d665fa040cbb8f3774fd464c8725bbb5fa10
--- /dev/null
+++ b/oneflow/compatible_single_client_python/framework/blob_trait.py
@@ -0,0 +1,101 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+from oneflow.compatible import single_client as flow
+
+
+def __add__(self, rhs):
+    return flow.math.add(self, rhs)
+
+
+def __radd__(self, lhs):
+    return flow.math.add(lhs, self)
+
+
+def __sub__(self, rhs):
+    return flow.math.subtract(self, rhs)
+
+
+def __rsub__(self, lhs):
+    return flow.math.subtract(lhs, self)
+
+
+def __mul__(self, rhs):
+    return flow.math.multiply(self, rhs)
+
+
+def __rmul__(self, lhs):
+    return flow.math.multiply(lhs, self)
+
+
+def __truediv__(self, rhs):
+    return flow.math.divide(self, rhs)
+
+
+def __rtruediv__(self, lhs):
+    return flow.math.divide(lhs, self)
+
+
+def __div__(self, rhs):
+    return flow.math.divide(self, rhs)
+
+
+def __mod__(self, rhs):
+    return flow.math.mod(self, rhs)
+
+
+def __eq__(self, rhs):
+    return flow.math.equal(self, rhs)
+
+
+def __ne__(self, rhs):
+    return flow.math.not_equal(self, rhs)
+
+
+def __lt__(self, rhs):
+    return flow.math.less(self, rhs)
+
+
+def __le__(self, rhs):
+    return flow.math.less_equal(self, rhs)
+
+
+def __gt__(self, rhs):
+    return flow.math.greater(self, rhs)
+
+
+def __ge__(self, rhs):
+    return flow.math.greater_equal(self, rhs)
+
+
+def RegisterBlobOperatorTraitMethod(blob_class):
+    blob_class.__add__ = __add__
+    blob_class.__radd__ = __radd__
+    blob_class.__sub__ = __sub__
+    blob_class.__rsub__ = __rsub__
+    blob_class.__mul__ = __mul__
+    blob_class.__rmul__ = __rmul__
+    blob_class.__truediv__ = __truediv__
+    blob_class.__rtruediv__ = __rtruediv__
+    blob_class.__div__ = __div__
+    blob_class.__mod__ = __mod__
+    blob_class.__eq__ = __eq__
+    blob_class.__ne__ = __ne__
+    blob_class.__lt__ = __lt__
+    blob_class.__le__ = __le__
+    blob_class.__gt__ = __gt__
+    blob_class.__ge__ = __ge__
diff --git a/oneflow/compatible_single_client_python/framework/c_api_util.py b/oneflow/compatible_single_client_python/framework/c_api_util.py
new file mode 100644
index 0000000000000000000000000000000000000000..181035bdf1964ca38ff1660f045568d742c87367
--- /dev/null
+++ b/oneflow/compatible_single_client_python/framework/c_api_util.py
@@ -0,0 +1,266 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+from google.protobuf import text_format
+
+from oneflow.core.common import data_type_pb2 as dtype_util
+from oneflow.core.common import error_pb2 as error_util
+from oneflow.core.job import env_pb2 as env_pb2
+from oneflow.core.job import job_set_pb2 as job_set_pb
+from oneflow.core.job import job_pb2 as job_pb
+from oneflow.core.job import placement_pb2 as placement_pb
+from oneflow.core.job import resource_pb2 as resource_util
+from oneflow.core.operator import op_attribute_pb2 as op_attribute_pb
+from oneflow.core.operator import op_conf_pb2 as op_conf_util
+from oneflow.core.record import record_pb2 as record_util
+from oneflow.core.register import logical_blob_id_pb2 as logical_blob_id_util
+from oneflow.core.framework.config_def_pb2 import ConfigDef
+from oneflow.core.job.inter_user_job_info_pb2 import InterUserJobInfo
+
+# import oneflow.compatible.single_client as flow
+from oneflow._oneflow_internal.oneflow.core.job import placement as placement_cfg
+from oneflow.compatible_single_client_python.oneflow_export import oneflow_export
+import oneflow._oneflow_internal
+
+
+def CurrentResource():
+    resource = oneflow._oneflow_internal.CurrentResource()
+    return text_format.Parse(resource, resource_util.Resource())
+
+
+def EnvResource():
+    resource = oneflow._oneflow_internal.EnvResource()
+    return text_format.Parse(resource, resource_util.Resource())
+
+
+def InitDefaultEnv(env_proto):
+    assert type(env_proto) is env_pb2.EnvProto
+    env_proto_str = text_format.MessageToString(env_proto)
+    oneflow._oneflow_internal.InitDefaultEnv(env_proto_str)
+
+
+def InitEnv(env_proto, is_multi_client):
+    assert type(env_proto) is env_pb2.EnvProto
+    env_proto_str = text_format.MessageToString(env_proto)
+    oneflow._oneflow_internal.InitEnv(env_proto_str, is_multi_client)
+
+
+def InitLazyGlobalSession(config_proto):
+    assert type(config_proto) is job_set_pb.ConfigProto
+    config_proto_str = text_format.MessageToString(config_proto)
+    oneflow._oneflow_internal.InitLazyGlobalSession(config_proto_str)
+
+
+def GetInterUserJobInfo():
+    inter_user_job_info = oneflow._oneflow_internal.GetSerializedInterUserJobInfo()
+    ret = InterUserJobInfo()
+    ret.ParseFromString(inter_user_job_info)
+    return ret
+
+
+def JobBuildAndInferCtx_Open(job_name):
+    job_name = str(job_name)
+    oneflow._oneflow_internal.JobBuildAndInferCtx_Open(job_name)
+
+
+def CurJobBuildAndInferCtx_SetJobConf(job_config_proto):
+    oneflow._oneflow_internal.CurJobBuildAndInferCtx_SetJobConf(job_config_proto)
+
+
+def CurJobBuildAndInferCtx_SetTrainConf(train_config_cfg):
+    oneflow._oneflow_internal.CurJobBuildAndInferCtx_SetTrainConf(train_config_cfg)
+
+
+def InferOpConf(op_conf_proto, upstream_signature):
+    serialized_op_conf = str(text_format.MessageToString(op_conf_proto))
+    serialized_upstream_sig = str(text_format.MessageToString(upstream_signature))
+    op_attribute_str = oneflow._oneflow_internal.InferOpConf(
+        serialized_op_conf, serialized_upstream_sig,
+    )
+    return text_format.Parse(op_attribute_str, op_attribute_pb.OpAttribute())
+
+
+def IsInterfaceOpConf(op_conf):
+    op_type_field = op_conf.WhichOneof("op_type")
+    field_number = op_conf_util.OperatorConf.DESCRIPTOR.fields_by_name[
+        op_type_field
+    ].number
+    return oneflow._oneflow_internal.IsInterfaceOpTypeCase(field_number)
+
+
+def GetOpParallelSymbolId(op_conf_proto):
+    serialized_op_conf = str(text_format.MessageToString(op_conf_proto))
+    return oneflow._oneflow_internal.GetOpParallelSymbolId(serialized_op_conf)
+
+
+def CheckAndCompleteUserOpConf(op_conf_proto):
+    serialized_op_conf = str(text_format.MessageToString(op_conf_proto))
+    new_op_conf = oneflow._oneflow_internal.CheckAndCompleteUserOpConf(
+        serialized_op_conf
+    )
+    return text_format.Parse(new_op_conf, op_conf_util.OperatorConf())
+
+
+def CurJobBuildAndInferCtx_AddAndInferConsistentOp(op_conf_proto):
+    serialized_op_conf = str(text_format.MessageToString(op_conf_proto))
+    add_and_infer = (
+        oneflow._oneflow_internal.CurJobBuildAndInferCtx_AddAndInferConsistentOp
+    )
+    op_attribute_str = add_and_infer(serialized_op_conf)
+    return text_format.Parse(op_attribute_str, op_attribute_pb.OpAttribute())
+
+
+def CurJobBuildAndInferCtx_AddAndInferMirroredOp(op_conf_proto):
+    serialized_op_conf = str(text_format.MessageToString(op_conf_proto))
+    add_and_infer = (
+        oneflow._oneflow_internal.CurJobBuildAndInferCtx_AddAndInferMirroredOp
+    )
+    op_attribute_str = add_and_infer(serialized_op_conf)
+    return text_format.Parse(op_attribute_str, op_attribute_pb.OpAttribute())
+
+
+def CurJobBuildAndInferCtx_AddLossLogicalBlobName(lbn):
+    lbn = str(lbn)
+    oneflow._oneflow_internal.CurJobBuildAndInferCtx_AddLossLogicalBlobName(lbn)
+
+
+def CurJobBuildAndInferCtx_AddLbiAndDiffWatcherUuidPair(lbi_and_uuid):
+    serialized = str(text_format.MessageToString(lbi_and_uuid))
+    oneflow._oneflow_internal.CurJobBuildAndInferCtx_AddLbiAndDiffWatcherUuidPair(
+        serialized
+    )
+
+
+def JobBuildAndInferCtx_IsMirroredBlob(job_name, lbn):
+    job_name = str(job_name)
+    lbn = str(lbn)
+    return oneflow._oneflow_internal.JobBuildAndInferCtx_IsMirroredBlob(job_name, lbn)
+
+
+def JobBuildAndInferCtx_MirroredBlobGetNumSubLbi(job_name, lbn):
+    job_name = str(job_name)
+    lbn = str(lbn)
+    return oneflow._oneflow_internal.JobBuildAndInferCtx_MirroredBlobGetNumSubLbi(
+        job_name, lbn
+    )
+
+
+def JobBuildAndInferCtx_MirroredBlobGetSubLbi(job_name, lbn, index):
+    job_name = str(job_name)
+    lbn = str(lbn)
+    ret = oneflow._oneflow_internal.JobBuildAndInferCtx_MirroredBlobGetSerializedSubLbi(
+        job_name, lbn, index
+    )
+    return text_format.Parse(ret, logical_blob_id_util.LogicalBlobId())
+
+
+def JobBuildAndInferCtx_GetStaticShape(job_name, lbn):
+    job_name = str(job_name)
+    lbn = str(lbn)
+    axis_str = oneflow._oneflow_internal.JobBuildAndInferCtx_GetSerializedIdListAsStaticShape(
+        job_name, lbn
+    )
+    int_list = text_format.Parse(axis_str, record_util.Int64List())
+    return tuple(map(int, int_list.value))
+
+
+def JobBuildAndInferCtx_GetDataType(job_name, lbn):
+    job_name = str(job_name)
+    lbn = str(lbn)
+    dtype = oneflow._oneflow_internal.JobBuildAndInferCtx_GetDataType(job_name, lbn)
+    return int(dtype)
+
+
+def JobBuildAndInferCtx_IsDynamic(job_name, lbn):
+    job_name = str(job_name)
+    lbn = str(lbn)
+    ret = oneflow._oneflow_internal.JobBuildAndInferCtx_IsDynamic(job_name, lbn)
+    return ret
+
+
+def JobBuildAndInferCtx_DisableBoxing(job_name, lbn):
+    job_name = str(job_name)
+    lbn = str(lbn)
+    ret = oneflow._oneflow_internal.JobBuildAndInferCtx_DisableBoxing(job_name, lbn)
+    return ret
+
+
+def JobBuildAndInferCtx_GetSplitAxisFromProducerView(job_name, lbn):
+    job_name = str(job_name)
+    lbn = str(lbn)
+    split_axis_str = oneflow._oneflow_internal.JobBuildAndInferCtx_GetSplitAxisFromProducerView(
+        job_name, lbn
+    )
+    split_axis = text_format.Parse(split_axis_str, dtype_util.OptInt64())
+    if split_axis.HasField("value"):
+        return split_axis.value
+    return None
+
+
+def JobBuildAndInferCtx_GetParallelConfFromProducerView(job_name, lbn):
+    job_name = str(job_name)
+    lbn = str(lbn)
+    GetParallelConf = (
+        oneflow._oneflow_internal.JobBuildAndInferCtx_GetSerializedParallelConfFromProducerView
+    )
+    parallel_conf = GetParallelConf(job_name, lbn)
+    parallel_conf = text_format.Parse(parallel_conf, placement_pb.ParallelConf())
+    # TODO(oyy) change temporary transformation after python code migrated into cpp code
+    parallel_conf_cfg = placement_cfg.ParallelConf()
+    parallel_conf_cfg.set_device_tag(parallel_conf.device_tag)
+    for device_name in parallel_conf.device_name:
+        parallel_conf_cfg.add_device_name(device_name)
+
+    return parallel_conf_cfg
+
+
+def GetMachine2DeviceIdListOFRecordFromParallelConf(parallel_conf):
+    serialized_parallel_conf = str(parallel_conf)
+    ofrecord = oneflow._oneflow_internal.GetMachine2DeviceIdListOFRecordFromParallelConf(
+        serialized_parallel_conf
+    )
+    return text_format.Parse(ofrecord, record_util.OFRecord())
+
+
+def GetFunctionConfigDef():
+    func_config_def = oneflow._oneflow_internal.GetFunctionConfigDef()
+    return text_format.Parse(func_config_def, ConfigDef())
+
+
+def GetScopeConfigDef():
+    scope_config_def = oneflow._oneflow_internal.GetScopeConfigDef()
+    return text_format.Parse(scope_config_def, ConfigDef())
+
+
+def GetInterfaceOpAttributes():
+    op_attributes = oneflow._oneflow_internal.GetSerializedInterfaceOpAttributes()
+    return text_format.Parse(op_attributes, op_attribute_pb.OpAttributeList())
+
+
+@oneflow_export("experimental.get_job_set")
+def GetJobSet():
+    job_set = oneflow._oneflow_internal.GetSerializedJobSet()
+    ret = job_set_pb.JobSet()
+    ret.ParseFromString(job_set)
+    return ret
+
+
+def GetCurrentJob():
+    serialized_job = oneflow._oneflow_internal.GetSerializedCurrentJob()
+    ret = job_pb.Job()
+    ret.ParseFromString(serialized_job)
+    return ret
diff --git a/oneflow/compatible_single_client_python/framework/check_point.py b/oneflow/compatible_single_client_python/framework/check_point.py
new file mode 100644
index 0000000000000000000000000000000000000000..bd359d401bd0a1228d62032b921aa258f261c7ea
--- /dev/null
+++ b/oneflow/compatible_single_client_python/framework/check_point.py
@@ -0,0 +1,249 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import datetime
+import os
+import shutil
+
+import numpy as np
+from oneflow.compatible_single_client_python.framework import hob as hob
+from oneflow.compatible_single_client_python.framework import (
+    job_instance as job_instance,
+)
+
+from oneflow.compatible_single_client_python.framework import (
+    check_point_v2 as check_point_v2,
+)
+from oneflow.compatible_single_client_python.framework import config_util as config_util
+from oneflow.compatible_single_client_python.framework import (
+    session_context as session_ctx,
+)
+from oneflow.compatible_single_client_python.lib.core import enable_if as enable_if
+from oneflow.compatible_single_client_python.eager import op_executor as op_executor
+
+from oneflow.compatible_single_client_python.oneflow_export import oneflow_export
+from typing import List, Union
+
+
+@oneflow_export("train.CheckPoint")
+class CheckPoint(object):
+    """Create a `CheckPoint` object to manage checkpoint manually.
+
+    """
+
+    def __init__(self) -> None:
+        if not config_util.api_legacy_model_io_enabled():
+            print(
+                "\033[1mWARNING: 'flow.train.CheckPoint' is deprecated. Please use the new API:\033[0m\n"
+                "flow.train.CheckPoint().save(path) => \033[1m\033[92mflow.checkpoint.save(path)\033[0m\n"
+                "flow.train.CheckPoint().load(path) => \033[1m\033[92mflow.load_variables(flow.checkpoint.get(path))\033[0m\n"
+                "flow.train.CheckPoint().init() is not needed any more.\n"
+            )
+
+    @session_ctx.try_init_default_session
+    def save(self, path: str) -> None:
+        r"""save a checkpoint to `path`.
+
+        Args:
+            path: A `string` of path to save checkpoint. 
+        """
+        if not config_util.api_legacy_model_io_enabled():
+            check_point_v2.SaveVarDict(path)
+            return
+        assert type(path) is str
+        enable_if.unique([lazy_checkpoint_save, eager_checkpoint_save])(path)
+
+    @session_ctx.try_init_default_session
+    def init(self) -> None:
+        r"""Initialize models by default initializer of op or Job.
+        """
+        if not config_util.api_legacy_model_io_enabled():
+            return
+        enable_if.unique([lazy_checkpoint_init, eager_checkpoint_init])()
+
+    @session_ctx.try_init_default_session
+    def load(self, path: str) -> None:
+        r"""load a checkpoint from `path` and initialize models.
+
+        Args:
+            path: A `string` of path to load checkpoint.
+        """
+        if not config_util.api_legacy_model_io_enabled():
+            check_point_v2.LoadVariables(check_point_v2.GetCheckpoint(path))
+            return
+        assert type(path) is str
+        enable_if.unique([lazy_checkpoint_load, eager_checkpoint_load])(path)
+
+
+@enable_if.condition(hob.in_normal_mode & ~hob.eager_execution_enabled)
+def lazy_checkpoint_save(path):
+    session_ctx.GetDefaultSession().LaunchJob(_MakeModelSaveJobFunc(path))
+
+
+@enable_if.condition(hob.in_normal_mode & ~hob.eager_execution_enabled)
+def lazy_checkpoint_init():
+    session_ctx.GetDefaultSession().LaunchJob(_MakeModelInitJobFunc())
+
+
+@enable_if.condition(hob.in_normal_mode & ~hob.eager_execution_enabled)
+def lazy_checkpoint_load(path):
+    session_ctx.GetDefaultSession().LaunchJob(_MakeModelLoadJobFunc(path))
+
+
+@enable_if.condition(hob.in_normal_mode & hob.eager_execution_enabled)
+def eager_checkpoint_save(path):
+    op_executor.EagerSaveVariableBlob(path)
+
+
+@enable_if.condition(hob.in_normal_mode & hob.eager_execution_enabled)
+def eager_checkpoint_init():
+    # eager variables are initialized in oneflow.compatible.single_client.get_variable()
+    pass
+
+
+@enable_if.condition(hob.in_normal_mode & hob.eager_execution_enabled)
+def eager_checkpoint_load(path):
+    session_ctx.GetDefaultSession().snapshot_mgr.load(path)
+
+
+def _MakeModelInitJobFunc():
+    def push_cb(blob):
+        pass
+
+    def finish_cb():
+        pass
+
+    sess = session_ctx.GetDefaultSession()
+    return job_instance.MakeJobInstance(
+        str(sess.inter_user_job_info.global_model_init_job_name),
+        push_cb=push_cb,
+        finish_cb=finish_cb,
+    )
+
+
+def _MakeModelLoadJobFunc(path):
+    def push_cb(blob):
+        blob.CopyFromNdarray(np.frombuffer(path.encode("ascii"), dtype=np.int8))
+
+    def finish_cb():
+        pass
+
+    sess = session_ctx.GetDefaultSession()
+    return job_instance.MakeJobInstance(
+        str(sess.inter_user_job_info.global_model_load_job_name),
+        push_cb=push_cb,
+        finish_cb=finish_cb,
+    )
+
+
+def _MakeModelSaveJobFunc(path):
+    def push_cb(blob):
+        blob.CopyFromNdarray(np.frombuffer(path.encode("ascii"), dtype=np.int8))
+
+    def finish_cb():
+        pass
+
+    sess = session_ctx.GetDefaultSession()
+    return job_instance.MakeJobInstance(
+        str(sess.inter_user_job_info.global_model_save_job_name),
+        push_cb=push_cb,
+        finish_cb=finish_cb,
+    )
+
+
+@oneflow_export("train.SimpleCheckPointManager")
+class SimpleCheckPointManager(object):
+    r"""`SimpleCheckPointManager` is a simple automatic checkpoint manager.
+
+    Args:
+        root_path: root path of snapshot
+        prefix: prefix of snapshot
+    """
+
+    def __init__(self, root_path: str, prefix: str = "snapshot_") -> None:
+        if not os.path.exists(root_path):
+            os.makedirs(root_path)
+        else:
+            assert os.path.isdir(root_path)
+        self._root_path = root_path
+        self._prefix = prefix
+
+    def list_checkpoints(self) -> List[str]:
+        def is_snapshot(name):
+            if not name.startswith(self._prefix):
+                return False
+            snapshot_done = os.path.join(self._GetSnapshotPath(name), "snapshot_done")
+            return os.path.exists(snapshot_done) and os.path.isfile(snapshot_done)
+
+        return sorted([f for f in os.listdir(self._root_path) if is_snapshot(f)])
+
+    def latest_checkpoint(self) -> Union[str, None]:
+        names = self.list_checkpoints()
+        if not names:
+            return None
+        else:
+            return names[-1]
+
+    def initialize_or_restore(self) -> None:
+        name = self.latest_checkpoint()
+        if name:
+            check_point_v2.LoadVariables(
+                check_point_v2.GetCheckpoint(self._GetSnapshotPath(name))
+            )
+        else:
+            self.save()
+
+    def save(self) -> None:
+        check_point_v2.SaveVarDict(self._GetSnapshotPath(self._NextSnapshotName()),)
+
+    def _NextSnapshotName(self) -> str:
+        return self._prefix + datetime.datetime.now().strftime("%Y%m%d_%H%M%S_%f")
+
+    def _GetSnapshotPath(self, name: str) -> str:
+        return os.path.join(self._root_path, name)
+
+
+class SnapshotManager(object):
+    def __init__(self):
+        self.name2path_ = dict()
+
+    def load(self, root_dir, refresh=True):
+        assert os.path.isdir(root_dir)
+
+        if refresh:
+            self.name2path_ = dict()
+
+        for file in os.listdir(root_dir):
+            file_path = os.path.join(root_dir, file)
+            if not os.path.isdir(file_path):
+                continue
+
+            has_out_subfile = False
+            for f in os.listdir(file_path):
+                fpath = os.path.join(file_path, f)
+                if f == "out" and os.path.isfile(fpath):
+                    has_out_subfile = True
+
+            if not has_out_subfile:
+                continue
+
+            assert file not in self.name2path_
+            self.name2path_[file] = os.path.join(file_path, "out")
+
+    def get_snapshot_path(self, name):
+        try:
+            return self.name2path_[name]
+        except KeyError:
+            return None
diff --git a/oneflow/compatible_single_client_python/framework/check_point_v2.py b/oneflow/compatible_single_client_python/framework/check_point_v2.py
new file mode 100644
index 0000000000000000000000000000000000000000..4070286c1b4a5bc1c4c240040964e1932c02a5e6
--- /dev/null
+++ b/oneflow/compatible_single_client_python/framework/check_point_v2.py
@@ -0,0 +1,674 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import os
+
+import numpy as np
+from google.protobuf import text_format
+
+from oneflow.compatible import single_client as flow
+import oneflow._oneflow_internal
+from oneflow.core.operator import op_conf_pb2 as op_conf_pb
+from oneflow.compatible_single_client_python.framework import config_util as config_util
+from oneflow.compatible_single_client_python.framework import dtype as dtype_util
+from oneflow.compatible_single_client_python.framework import runtime_mode as rt_mode
+from oneflow.compatible_single_client_python.ops import (
+    initializer_util as initializer_util,
+)
+from oneflow.core.job import initializer_conf_pb2 as initializer_conf_util
+from oneflow.compatible_single_client_python.framework import id_util as id_util
+from oneflow.compatible_single_client_python.framework import (
+    session_context as session_ctx,
+)
+from oneflow.compatible_single_client_python.framework import (
+    remote_blob as remote_blob_util,
+)
+from oneflow.compatible_single_client_python.lib.core import async_util as async_util
+from oneflow.compatible_single_client_python.eager import boxing_util as boxing_util
+from oneflow.compatible_single_client_python.eager import op_infer_util as op_infer_util
+from oneflow.core.framework import variable_meta_info_pb2 as variable_meta_info_pb
+from oneflow.core.framework import user_op_attr_pb2 as attr_value_pb
+from oneflow.compatible_single_client_python.experimental import (
+    interface_op_read_and_write,
+)
+from oneflow.core.register import logical_blob_id_pb2 as logical_blob_id_util
+from oneflow.compatible_single_client_python.ops import get_variable as get_variable
+
+from oneflow.compatible_single_client_python.oneflow_export import oneflow_export
+from oneflow._oneflow_internal.oneflow.core.register import logical_blob_id as lbi_util
+import oneflow._oneflow_internal
+from oneflow._oneflow_internal import EagerBlobTrait
+from typing import Any, Callable, Dict, List, Union, Sequence, Optional, Iterable, Tuple
+
+
+META_INFO_FILENAME = "meta"
+DATA_FILENAME = "out"
+FAKE_JOB_NAME = "system_checkpoint"
+OP_PREFIX = "system_checkpoint"
+
+
+blob_register = oneflow._oneflow_internal.GetDefaultBlobRegister()
+
+
+def sync_default_session_if_normal():
+    # TODO merge with same function in experimental/interface_op_read_and_write.py
+    if rt_mode.CurrentMode() == rt_mode.NORMAL_MODE:
+        flow.sync_default_session()
+    else:
+        # do nothing
+        pass
+
+
+class FileBackendVariableBlob:
+    def __init__(
+        self,
+        var_dir: str,
+        dtype: Optional[flow.dtype] = None,
+        shape: Optional[Sequence[int]] = None,
+    ):
+        data_path = os.path.join(var_dir, DATA_FILENAME)
+        assert os.path.isfile(data_path)
+        self.var_dir_ = var_dir
+        meta_info_path = os.path.join(self.var_dir_, META_INFO_FILENAME)
+        if os.path.exists(meta_info_path):
+            meta_info = variable_meta_info_pb.VariableMetaInfo()
+            with open(meta_info_path) as f:
+                text_format.Parse(f.read(), meta_info)
+            self.has_meta_info_ = True
+        else:
+            self.has_meta_info_ = False
+
+        if self.has_meta_info_:
+            assert dtype is None and shape is None
+            self.shape_ = tuple(meta_info.shape.dim)
+            self.dtype_ = dtype_util.convert_proto_dtype_to_oneflow_dtype(
+                meta_info.data_type
+            )
+        else:
+            if shape is not None and dtype is not None:
+                self.shape_ = shape
+                self.dtype_ = dtype
+                self.has_meta_info_ = True
+            elif shape is not None or dtype is not None:
+                raise RuntimeError("both or neither of shape and dtype should be None")
+            else:
+                pass
+
+        if self.has_meta_info_:
+            itemsize = np.dtype(
+                dtype_util.convert_oneflow_dtype_to_numpy_dtype(self.dtype_)
+            ).itemsize
+            assert os.path.getsize(data_path) == np.prod(self.shape).item() * itemsize
+
+    @property
+    def file_path(self) -> str:
+        return os.path.join(self.var_dir_, DATA_FILENAME)
+
+    @property
+    def shape(self) -> Tuple[int]:
+        return self.shape_
+
+    @property
+    def quant_info(self):
+        raise NotImplementedError()
+
+    @property
+    def dtype(self) -> flow.dtype:
+        return self.dtype_
+
+    def numpy(self) -> np.ndarray:
+        if not self.has_meta_info_:
+            raise RuntimeError("This variable does not have meta info")
+        return np.fromfile(
+            self.file_path,
+            dtype=dtype_util.convert_oneflow_dtype_to_numpy_dtype(self.dtype),
+        ).reshape(self.shape)
+
+
+ValueContainer = Union[
+    EagerBlobTrait,
+    FileBackendVariableBlob,
+    np.ndarray,
+    "oneflow.compatible.single_client.Tensor",
+]
+
+
+def _ElemCnt(shape):
+    return np.prod(shape).astype(int).item()
+
+
+@oneflow_export("get_all_variables")
+@session_ctx.try_init_default_session
+def GetAllVariables() -> Dict[str, oneflow._oneflow_internal.EagerConsistentBlob]:
+    """
+    Get all variables of all jobs as a dict.
+    """
+    sync_default_session_if_normal()
+
+    sess = session_ctx.GetDefaultSession()
+    interface_ops = sess.interface_ops
+    variables = {}
+    for op in interface_ops:
+        op_attr = sess.OpAttribute4InterfaceOpName(op)
+        if op_attr.op_conf.WhichOneof("op_type") != "variable_conf":
+            continue
+        variables[op] = interface_op_read_and_write.GetEagerInterfaceBlob(op)
+    return variables
+
+
+def _LoadSingleVariable(path: str) -> Optional[FileBackendVariableBlob]:
+    if os.path.isfile(os.path.join(path, DATA_FILENAME)):
+        return FileBackendVariableBlob(path)
+    return None
+
+
+def _GetCheckpoint(
+    path: str,
+) -> Union[Dict[str, FileBackendVariableBlob], FileBackendVariableBlob]:
+    assert os.path.isdir(path), "Directory {} doesn't exist!".format(path)
+    single_var = _LoadSingleVariable(path)
+    if single_var is not None:
+        return single_var
+    var_dict = {}
+    for f in os.listdir(path):
+        var_dir = os.path.join(path, f)
+        var = _LoadSingleVariable(var_dir)
+        if var is not None:
+            var_dict[f] = var
+    return var_dict
+
+
+@oneflow_export("checkpoint.get")
+@session_ctx.try_init_default_session
+def GetCheckpoint(
+    path: str,
+) -> Union[Dict[str, FileBackendVariableBlob], FileBackendVariableBlob]:
+    """
+    Load variable(s) from file system.
+    """
+    return _GetCheckpoint(path)
+
+
+@oneflow_export("load")
+def Load(
+    path: str,
+) -> Union[Dict[str, FileBackendVariableBlob], FileBackendVariableBlob]:
+    return _GetCheckpoint(path)
+
+
+def _GetOpNameFromLbn(lbn):
+    return lbn.split("/")[0]
+
+
+def _GetScopeSymbolIdFromEagerBlob(blob):
+    name = _GetOpNameFromLbn(blob.logical_blob_name)
+    sess = session_ctx.GetDefaultSession()
+    op_conf = sess.OpAttribute4InterfaceOpName(name).op_conf
+    scope_symbol_id = op_conf.scope_symbol_id
+    return scope_symbol_id
+
+
+def _ReadSlice(
+    container: ValueContainer,
+) -> Iterable[Tuple[Sequence[int], Sequence[int], np.ndarray]]:
+    """
+    Return a generator which iterates over the input blob or array and yields
+    (start_nd_idx, stop_nd_idx, slice_np_array)
+    """
+    if isinstance(container, flow.Tensor):
+
+        def ReadFromTensor(tensor, start_nd_idx, stop_nd_idx):
+            start_nd_idx = list(map(int, start_nd_idx))
+            stop_nd_idx = list(map(int, stop_nd_idx))
+            return tensor[
+                tuple(
+                    [
+                        slice(start_nd_idx[i], stop_nd_idx[i])
+                        for i in range(len(start_nd_idx))
+                    ]
+                )
+            ].numpy()
+
+        yield from _ForEachSlice(container, ReadFromTensor)
+    elif isinstance(container, EagerBlobTrait):
+
+        def ReadFromEagerBlob(eager_blob, start_nd_idx, stop_nd_idx):
+            scope_symbol_id = _GetScopeSymbolIdFromEagerBlob(eager_blob)
+            return _LogicalSlice(
+                eager_blob.blob_object, start_nd_idx, stop_nd_idx, scope_symbol_id
+            )
+
+        yield from _ForEachSlice(container, ReadFromEagerBlob)
+    elif isinstance(container, FileBackendVariableBlob):
+        np_dtype = np.dtype(
+            dtype_util.convert_oneflow_dtype_to_numpy_dtype(container.dtype)
+        )
+        with open(container.file_path, "rb") as f:
+
+            def ReadFromFile(_, start_nd_idx, stop_nd_idx):
+                length = _ElemCnt(np.array(stop_nd_idx) - np.array(start_nd_idx))
+                slice = f.read(length * np_dtype.itemsize)
+                return np.frombuffer(slice, dtype=np_dtype,).reshape(
+                    np.array(stop_nd_idx) - np.array(start_nd_idx)
+                )
+
+            yield from _ForEachSlice(container, ReadFromFile)
+    elif isinstance(container, np.ndarray):
+
+        def ReadFromNpArray(array, start_nd_idx, stop_nd_idx):
+            slice_objs = []
+            for start, stop in zip(start_nd_idx, stop_nd_idx):
+                slice_objs.append(slice(start, stop))
+            return array[tuple(slice_objs)]
+
+        yield from _ForEachSlice(container, ReadFromNpArray)
+    else:
+        raise RuntimeError("Unknown type: {}".format(type(container).__name__))
+
+
+def _SaveVarDict(
+    path: str,
+    var_dict: Optional[
+        Dict[str, Union[FileBackendVariableBlob, EagerBlobTrait]]
+    ] = None,
+) -> None:
+    if var_dict is None:
+        var_dict = GetAllVariables()
+
+    def IsFileOrNonEmptyDir(path):
+        if os.path.isfile(path):
+            return True
+        if os.path.isdir(path) and len(os.listdir(path)) != 0:
+            return True
+        return False
+
+    assert not IsFileOrNonEmptyDir(
+        path
+    ), "{} is a file or non-empty directory! Note that flow.save is different from torch.save. It saves each weight as a separated file so that a directory instead of a file should be given.".format(
+        path
+    )
+    os.makedirs(path, exist_ok=True)
+    for name, var in var_dict.items():
+        meta_info = variable_meta_info_pb.VariableMetaInfo()
+        meta_info.shape.dim[:] = var.shape
+        meta_info.data_type = oneflow._oneflow_internal.deprecated.GetProtoDtype4OfDtype(
+            var.dtype
+        )
+        var_dir = os.path.join(path, name)
+        param_path = os.path.join(var_dir, DATA_FILENAME)
+        os.makedirs(os.path.dirname(param_path))
+        with open(param_path, "wb") as f:
+            for _, _, slice in _ReadSlice(var):
+                f.write(slice.tobytes())
+        with open(os.path.join(var_dir, META_INFO_FILENAME), "w") as f:
+            f.write(text_format.MessageToString(meta_info))
+    # write a empty file 'snapshot_done', indicating that
+    # the save process finishes normally
+    with open(os.path.join(path, "snapshot_done"), "w"):
+        pass
+
+
+@oneflow_export("checkpoint.save")
+@session_ctx.try_init_default_session
+def SaveVarDict(
+    path: str,
+    var_dict: Optional[
+        Dict[str, Union[FileBackendVariableBlob, EagerBlobTrait]]
+    ] = None,
+) -> None:
+    """
+    Save `var_dict` to `path`
+    """
+    sync_default_session_if_normal()
+
+    return _SaveVarDict(path, var_dict)
+
+
+@oneflow_export("save")
+def save(obj, save_dir):
+    return _SaveVarDict(save_dir, obj)
+
+
+def _LogicalSlice(
+    input_blob_object: oneflow._oneflow_internal.BlobObject,
+    start: Sequence[int],
+    stop: Sequence[int],
+    scope_symbol_id: int,
+) -> np.ndarray:
+    """
+    Construct a logical_slice op and run it by oneflow eager,
+    return the sliced result as a numpy ndarray
+    """
+    op_name = id_util.UniqueStr(OP_PREFIX)
+
+    def AsyncSlice(Yield):
+        def build(builder):
+            op_conf = op_conf_pb.OperatorConf()
+            # device_tag doesn't matter for logical_slice op
+            device_tag = flow.current_scope().device_parallel_desc_symbol.device_tag
+            op_conf.device_tag = device_tag
+            op_conf.name = op_name
+            op_conf.user_conf.op_type_name = "logical_slice"
+            op_conf.user_conf.input["x"].s.append("{}/x_0".format(op_name))
+            op_conf.user_conf.output["y"].s.append("{}/y_0".format(op_name))
+            parallel_conf = input_blob_object.parallel_desc_symbol.parallel_conf
+            op_conf.user_conf.attr["parallel_conf"].at_string = str(parallel_conf)
+            op_conf.user_conf.attr["start"].at_list_int64.val[:] = start
+            op_conf.user_conf.attr["stop"].at_list_int64.val[:] = stop
+            op_conf.user_conf.attr["step"].at_list_int64.val[:] = [1] * len(start)
+            bn_in_op2blob_object = (
+                oneflow._oneflow_internal.deprecated.BnInOp2BlobObject()
+            )
+            bn_in_op2blob_object["x_0"] = input_blob_object
+            op_attribute = op_infer_util.Infer(
+                op_conf, bn_in_op2blob_object, scope_symbol_id
+            )
+            cfg_op_attribute = oneflow._oneflow_internal.deprecated.MakeOpAttributeByString(
+                str(op_attribute)
+            )
+            builder.StatelessCall(
+                cfg_op_attribute,
+                parallel_conf,
+                bn_in_op2blob_object,
+                boxing_util.BoxingTo,
+            )
+            Yield(bn_in_op2blob_object["y_0"])
+
+        oneflow._oneflow_internal.deprecated.LogicalRun(build)
+
+    lbi = lbi_util.LogicalBlobId()
+    lbi.set_op_name(op_name)
+    lbi.set_blob_name(op_name)
+
+    blob_object = async_util.Await(1, AsyncSlice)[0]
+
+    blob = oneflow._oneflow_internal.EagerConsistentBlob(
+        lbi,
+        blob_object=blob_object,
+        blob_register=blob_register,
+        job_name=FAKE_JOB_NAME,
+    )
+    return blob.numpy()
+
+
+def _GetCpu0VariableBlobFromNumpy(
+    np_array: np.ndarray, dtype: flow.dtype
+) -> oneflow._oneflow_internal.EagerConsistentBlob:
+    """
+    Add a variable on cpu 0, and feed the value of `np_array`
+
+    Note: dtype argument cannot be eliminated by
+    convert_numpy_dtype_to_oneflow_dtype(np_array.dtype),
+    because np.int8 == np.char and
+    numpy_dtype_to_oneflow_dtype(oneflow_dtype_to_numpy_dtype(flow.int8))
+    may be flow.char
+    """
+    with flow.scope.placement("cpu", "0:0"):
+        op_name = id_util.UniqueStr(OP_PREFIX)
+        op_conf = get_variable.GenerateVariableOpConf(
+            name=op_name,
+            shape=np_array.shape,
+            dtype=dtype,
+            initializer=initializer_util.zeros_initializer(dtype=dtype),
+            trainable=False,
+        )
+        current_parallel_desc_sym = flow.current_scope().device_parallel_desc_symbol
+        device_tag = current_parallel_desc_sym.device_tag
+        op_conf.device_tag = device_tag
+        op_attribute = op_infer_util.Infer(op_conf, {})
+        var_blob = get_variable.CreateEagerVariableBlob(
+            op_attribute, job_name=FAKE_JOB_NAME
+        )
+
+        interface_op_read_and_write.FeedValueToInterfaceBlobObject(
+            var_blob.blob_object, np_array
+        )
+        return var_blob
+
+
+def _LogicalSliceAssign(
+    ref_blob_object: oneflow._oneflow_internal.BlobObject,
+    value_blob_object: oneflow._oneflow_internal.BlobObject,
+    start: Sequence[int],
+    stop: Sequence[int],
+    scope_symbol_id: Optional[int],
+) -> None:
+    """
+    Construct a logical_slice_assign op and run it by oneflow eager
+    """
+
+    def BuildAssignInstruction(builder):
+        op_conf = op_conf_pb.OperatorConf()
+        # device_tag doesn't matter for logical_slice_assign op
+        device_tag = flow.current_scope().device_parallel_desc_symbol.device_tag
+        op_conf.device_tag = device_tag
+        op_name = id_util.UniqueStr(OP_PREFIX)
+        op_conf.name = op_name
+        op_conf.user_conf.op_type_name = "logical_slice_assign"
+        op_conf.user_conf.input["value"].s.append("{}/value_0".format(op_name))
+        op_conf.user_conf.input["ref"].s.append("{}/ref_0".format(op_name))
+        parallel_conf = ref_blob_object.parallel_desc_symbol.parallel_conf
+        op_conf.user_conf.attr["parallel_conf"].at_string = str(parallel_conf)
+        op_conf.user_conf.attr["start"].at_list_int64.val[:] = start
+        op_conf.user_conf.attr["stop"].at_list_int64.val[:] = stop
+        op_conf.user_conf.attr["step"].at_list_int64.val[:] = [1] * len(start)
+        bn_in_op2blob_object = oneflow._oneflow_internal.deprecated.BnInOp2BlobObject()
+        bn_in_op2blob_object["ref_0"] = ref_blob_object
+        bn_in_op2blob_object["value_0"] = value_blob_object
+        op_attribute = op_infer_util.Infer(
+            op_conf, bn_in_op2blob_object, scope_symbol_id
+        )
+        cfg_op_attribute = oneflow._oneflow_internal.deprecated.MakeOpAttributeByString(
+            str(op_attribute)
+        )
+        builder.StatelessCall(
+            cfg_op_attribute, parallel_conf, bn_in_op2blob_object, boxing_util.BoxingTo,
+        )
+
+    oneflow._oneflow_internal.deprecated.LogicalRun(BuildAssignInstruction)
+
+
+def FeedValueToVariable(
+    var_blob: Union[
+        oneflow._oneflow_internal.EagerConsistentBlob,
+        "oneflow.compatible.single_client.Tensor",
+    ],
+    value: ValueContainer,
+    scope_symbol_id: Optional[int],
+) -> None:
+    """
+    Feed the value of `value` to the variable `var_blob`
+    """
+    assert isinstance(
+        value, (EagerBlobTrait, FileBackendVariableBlob, np.ndarray, flow.Tensor)
+    ), "Unknown value type: {}".format(type(value).__name__)
+
+    if isinstance(value, FileBackendVariableBlob):
+        if not value.has_meta_info_:
+            value = FileBackendVariableBlob(
+                value.var_dir_, var_blob.dtype, var_blob.shape
+            )
+    assert var_blob.shape == value.shape, "{} vs {}".format(var_blob.shape, value.shape)
+    if isinstance(value, np.ndarray):
+        value_flow_dtype = dtype_util.convert_numpy_dtype_to_oneflow_dtype(value.dtype)
+    else:
+        value_flow_dtype = value.dtype
+    assert var_blob.dtype == value_flow_dtype, "{} vs {}".format(
+        var_blob.dtype, value_flow_dtype
+    )
+
+    if isinstance(var_blob, flow.Tensor):
+        raise ValueError("Tensor object arguments are not supported")
+    else:
+        assert isinstance(var_blob, EagerBlobTrait)
+        var_blob_object = var_blob.blob_object
+
+    for start, stop, slice in _ReadSlice(value):
+        slice_value_blob = _GetCpu0VariableBlobFromNumpy(slice, var_blob.dtype)
+        _LogicalSliceAssign(
+            var_blob_object, slice_value_blob.blob_object, start, stop, scope_symbol_id,
+        )
+
+
+@oneflow_export("load_variables")
+@session_ctx.try_init_default_session
+def LoadVariables(
+    value_dict: Dict[str, ValueContainer], ignore_mismatch: bool = True,
+):
+    """
+    Load value in `value_dict` into oneflow variables.
+    For example, if `value_dict` is {'x', np.ones(x_shape)},
+    the value of variable "x" will all ones.
+    If `ignore_mismatch` is False, an exception will be raised when
+    there is a name in `value_dict` not belonging to any variable.
+    """
+    sync_default_session_if_normal()
+
+    all_vars = GetAllVariables()
+    for name, value in value_dict.items():
+        if name in all_vars:
+            var_blob = interface_op_read_and_write.GetEagerInterfaceBlob(name)
+            scope_symbol_id = _GetScopeSymbolIdFromEagerBlob(var_blob)
+            FeedValueToVariable(var_blob, value, scope_symbol_id)
+        else:
+            if not ignore_mismatch:
+                raise RuntimeError('"{}" is not a variable name'.format(name))
+    oneflow._oneflow_internal.eager.single_client.Sync()
+
+
+def _ForEachSlice(
+    container: ValueContainer,
+    f: Union[
+        Callable[[EagerBlobTrait, Sequence[int], Sequence[int]], Any],
+        Callable[[FileBackendVariableBlob, Sequence[int], Sequence[int]], Any],
+        Callable[[np.ndarray, Sequence[int], Sequence[int]], Any],
+    ],
+):
+    """
+    Slice container into slices whose size < SLICE_BYTES. For every slice,
+    yield start_nd_idx, stop_nd_idx and f(slice)
+    """
+    assert isinstance(
+        container, (EagerBlobTrait, FileBackendVariableBlob, np.ndarray, flow.Tensor)
+    ), "Unknown type: {}".format(type(container).__name__)
+    assert container.shape is not None
+    # For current implementation (transport data by grpc), SLICE_BYTES must be lower than 64M
+    SLICE_BYTES = 32 * 1024 * 1024
+    if isinstance(container, np.ndarray):
+        np_dtype = container.dtype
+    else:
+        np_dtype = np.dtype(
+            dtype_util.convert_oneflow_dtype_to_numpy_dtype(container.dtype)
+        )
+    SLICE_LEN = SLICE_BYTES // np_dtype.itemsize
+    start_idx = 0
+    size = _ElemCnt(container.shape)
+    cnt = 1
+    for axis in reversed(range(len(container.shape))):
+        cnt *= container.shape[axis]
+        if cnt > SLICE_LEN:
+            break
+    unit_size = _ElemCnt(tuple(container.shape)[axis + 1 :])
+    max_unit_num = SLICE_LEN // unit_size
+    while start_idx < size:
+        remainder = container.shape[axis]
+        while remainder > 0:
+            unit_num = max_unit_num if remainder >= max_unit_num else remainder
+            length = unit_num * unit_size
+            remainder -= unit_num
+            stop_idx = start_idx + length
+            start_nd_idx = np.unravel_index(start_idx, container.shape)
+            stop_nd_idx = np.unravel_index(stop_idx - 1, container.shape)
+            stop_nd_idx = tuple([x + 1 for x in stop_nd_idx])
+            yield start_nd_idx, stop_nd_idx, f(container, start_nd_idx, stop_nd_idx)
+            start_idx = stop_idx
+
+
+def generate_values_by_initializer(initializer, shape, dtype):
+    np_dtype = np.dtype(dtype_util.convert_oneflow_dtype_to_numpy_dtype(dtype))
+    length = _ElemCnt(shape)
+    return np.array(initializer(length)).astype(np_dtype).reshape(shape)
+
+
+def init_by_initializer_conf(
+    var_blob: Union[EagerBlobTrait, "oneflow.compatible.single_client.Tensor"],
+    initializer_conf: initializer_conf_util.InitializerConf,
+    sync_between_multi_machine: bool,
+    scope_symbol_id: Optional[int],
+    random_seed: int = 0,
+):
+    initializer = initializer_util.GetInitializer(
+        initializer_conf, random_seed, var_blob.shape
+    )
+    # initializer is None if and only if the initializer_conf is empty_initializer
+    if initializer is None:
+        return
+
+    def GenerateValueAndAssign(var_blob, start_nd_idx, stop_nd_idx):
+        shape = np.array(stop_nd_idx) - np.array(start_nd_idx)
+        vals = generate_values_by_initializer(initializer, shape, var_blob.dtype)
+
+        if isinstance(var_blob, flow.Tensor):
+            raise ValueError("Tensor object arguments are not supported")
+        else:
+            assert isinstance(var_blob, EagerBlobTrait)
+            var_blob_object = var_blob.blob_object
+
+        slice_value_blob = _GetCpu0VariableBlobFromNumpy(vals, var_blob.dtype)
+        _LogicalSliceAssign(
+            var_blob_object,
+            slice_value_blob.blob_object,
+            start_nd_idx,
+            stop_nd_idx,
+            scope_symbol_id,
+        )
+
+    # we just want to run f on every slice without caring about the return value
+    for _ in _ForEachSlice(var_blob, GenerateValueAndAssign):
+        pass
+
+    if sync_between_multi_machine:
+        oneflow._oneflow_internal.eager.single_client.Sync()
+
+
+def Init() -> None:
+    sync_default_session_if_normal()
+
+    sess = session_ctx.GetDefaultSession()
+    for op_name, var_blob in GetAllVariables().items():
+        var_conf = sess.OpAttribute4InterfaceOpName(op_name).op_conf.variable_conf
+        if not (
+            var_conf.HasField("initializer")
+            or var_conf.HasField("initialize_with_snapshot")
+        ):
+            continue
+        if var_conf.HasField("initialize_with_snapshot"):
+            initialize_with_snapshot_conf = var_conf.initialize_with_snapshot
+            if initialize_with_snapshot_conf.HasField("key"):
+                snapshot_key = op_name
+            else:
+                snapshot_key = initialize_with_snapshot_conf.key
+            var_dir = os.path.dirname(
+                os.path.join(initialize_with_snapshot_conf.path, snapshot_key,)
+            )
+            LoadVariables({op_name: GetCheckpoint(var_dir)})
+            continue
+
+        scope_symbol_id = _GetScopeSymbolIdFromEagerBlob(var_blob)
+        init_by_initializer_conf(
+            var_blob, var_conf.initializer, False, scope_symbol_id, var_conf.random_seed
+        )
+
+    oneflow._oneflow_internal.eager.single_client.Sync()
diff --git a/oneflow/compatible_single_client_python/framework/compile_context.py b/oneflow/compatible_single_client_python/framework/compile_context.py
new file mode 100644
index 0000000000000000000000000000000000000000..012b9d1e2531b296de43d184b6922651b612d470
--- /dev/null
+++ b/oneflow/compatible_single_client_python/framework/compile_context.py
@@ -0,0 +1,96 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+from contextlib import contextmanager
+
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.experimental import (
+    name_scope as name_scope,
+)
+from oneflow.compatible_single_client_python.framework import c_api_util as c_api_util
+from oneflow.compatible_single_client_python.framework import (
+    distribute_context as distribute_ctx,
+)
+from oneflow.compatible_single_client_python.framework import (
+    placement_context as placement_context,
+)
+from oneflow.compatible_single_client_python.framework import (
+    session_context as session_ctx,
+)
+from oneflow.compatible_single_client_python.framework import hob as hob
+from oneflow.compatible_single_client_python.lib.core import enable_if as enable_if
+from oneflow.compatible_single_client_python.experimental import (
+    name_scope as name_scope,
+)
+import oneflow._oneflow_internal
+
+
+def GetCurJobConfigProto():
+    return enable_if.unique([GetEagerCurJobConfigProto, GetLazyCurJobConfigProto])()
+
+
+@enable_if.condition(hob.in_global_mode & hob.eager_execution_enabled)
+def GetEagerCurJobConfigProto():
+    function_desc = session_ctx.GetDefaultSession().CurrentEagerGlobalFunctionDesc()
+    assert function_desc is not None
+    return function_desc.job_config_proto
+
+
+@enable_if.condition(hob.in_global_mode & ~hob.eager_execution_enabled)
+def GetLazyCurJobConfigProto():
+    job_name = oneflow._oneflow_internal.JobBuildAndInferCtx_GetCurrentJobName()
+    function_desc = session_ctx.GetDefaultSession().GetLazyFunctionDesc(job_name)
+    assert function_desc is not None
+    return function_desc.job_config_proto
+
+
+logged_op_confs = set({})
+
+
+def CurJobAddOp(op_conf, scope_symbol=None):
+    if distribute_ctx.IsMirroredStrategyEnabled():
+        return CurJobAddMirroredOp(op_conf, scope_symbol)
+    return CurJobAddConsistentOp(op_conf, scope_symbol)
+
+
+def CurJobAddConsistentOp(op_conf, scope_symbol=None):
+    if scope_symbol is None:
+        scope_symbol = flow.current_scope()
+    op_conf.scope_symbol_id = scope_symbol.symbol_id
+    if not op_conf.HasField("device_tag"):
+        device_tag = scope_symbol.device_parallel_desc_symbol.device_tag
+        op_conf.device_tag = device_tag
+    op_attr = c_api_util.CurJobBuildAndInferCtx_AddAndInferConsistentOp(op_conf)
+    if c_api_util.IsInterfaceOpConf(op_conf):
+        sess = session_ctx.GetDefaultSession()
+        sess.AddInfo4InterfaceOpName(op_conf.name, op_attr)
+    return op_attr
+
+
+def CurJobAddMirroredOp(op_conf, scope_symbol=None):
+    assert not hob.consistent_view_enabled(None)
+    if scope_symbol is None:
+        scope_symbol = flow.current_scope()
+    op_conf.scope_symbol_id = scope_symbol.symbol_id
+    if not op_conf.HasField("device_tag"):
+        device_tag = scope_symbol.device_parallel_desc_symbol.device_tag
+        op_conf.device_tag = device_tag
+    op_attr = c_api_util.CurJobBuildAndInferCtx_AddAndInferMirroredOp(op_conf)
+    if c_api_util.IsInterfaceOpConf(op_conf):
+        sess = session_ctx.GetDefaultSession()
+        sess.AddInfo4InterfaceOpName(op_conf.name, op_attr)
+    return op_attr
diff --git a/oneflow/compatible_single_client_python/framework/compiler.py b/oneflow/compatible_single_client_python/framework/compiler.py
new file mode 100644
index 0000000000000000000000000000000000000000..283a95f8a81cc8e1c84661fb0c690ee472aa32e5
--- /dev/null
+++ b/oneflow/compatible_single_client_python/framework/compiler.py
@@ -0,0 +1,238 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+from contextlib import contextmanager
+
+import inspect
+from oneflow.compatible_single_client_python.framework import c_api_util as c_api_util
+from oneflow.compatible_single_client_python.framework import (
+    distribute as distribute_util,
+)
+from oneflow.compatible_single_client_python.framework import (
+    input_blob_def as input_blob_util,
+)
+from oneflow.compatible_single_client_python.framework import hob as hob
+from oneflow.compatible_single_client_python.lib.core import enable_if as enable_if
+from oneflow.compatible_single_client_python.framework import (
+    placement_util as placement_util,
+)
+from oneflow.compatible_single_client_python.framework import (
+    placement_context as placement_ctx,
+)
+from oneflow.compatible_single_client_python.framework import (
+    remote_blob as remote_blob_util,
+)
+from oneflow.compatible_single_client_python.framework import (
+    runtime_mode as runtime_mode,
+)
+from oneflow.compatible_single_client_python.framework import push_util as push_util
+from oneflow.compatible_single_client_python.framework import (
+    session_context as session_ctx,
+)
+from oneflow.compatible_single_client_python.framework import scope_util as scope_util
+from oneflow.compatible_single_client_python.framework import typing as oft
+from oneflow.compatible_single_client_python.framework import typing_util as oft_util
+from oneflow.compatible_single_client_python.lib.core import (
+    func_inspect_util as func_inspect_util,
+)
+from oneflow.compatible_single_client_python import ops as ops
+import typing
+from oneflow.compatible import single_client as flow
+import oneflow._oneflow_internal
+import inspect
+
+
+def Compile(session, function_desc, config_proto):
+    with InterpretScope(session, function_desc, config_proto):
+        _CompileJob(session, function_desc)
+        session.StashJob(function_desc.job_func.__name__)
+        oneflow._oneflow_internal.CurJobBuildAndInferCtx_Complete()
+        session.StashJob(
+            function_desc.job_func.__name__,
+            function_desc.job_func.__name__ + "_after_complete",
+        )
+
+
+def EagerRun(session, function_desc, config_proto, args):
+    with InterpretScope(session, function_desc, config_proto):
+        ret = _InterpretGlobalFunction(function_desc, args)
+        oneflow._oneflow_internal.CurJobBuildAndInferCtx_Complete()
+        session_ctx.GetDefaultSession().UpdateInfo4InterfaceOp()
+    return ret
+
+
+@contextmanager
+def InterpretScope(session, function_desc, config_proto):
+    job_conf = function_desc.job_config_proto
+    job_conf.set_job_name(function_desc.job_func.__name__)
+    placement_scope = function_desc.function_attribute.default_placement_scope
+    if placement_scope is None:
+        tag_and_dev_ids = placement_util.GetDefaultMachineDeviceIds(session.resource)
+        hierarchy = None
+    else:
+        assert isinstance(placement_scope, placement_ctx.EmptyPlacementScope)
+        tag_and_dev_ids = (
+            placement_scope.device_tag,
+            placement_scope.machine_device_ids,
+        )
+        hierarchy = placement_scope.hierarchy
+
+    distribute_strategy = function_desc.function_attribute.default_distribute_strategy
+    if distribute_strategy is None:
+        distribute_strategy = distribute_util.DistributeConsistentStrategy()
+    is_mirrored = isinstance(
+        distribute_strategy, distribute_util.DistributeMirroredStrategy
+    )
+    assert isinstance(hierarchy, (list, tuple)) or hierarchy is None
+    if hierarchy is not None:
+        hierarchy = oneflow._oneflow_internal.Size(tuple(hierarchy))
+    scope = scope_util.MakeInitialScope(
+        job_conf, *tag_and_dev_ids, hierarchy, is_mirrored
+    )
+    with _JobBuildAndInferCtx(job_conf.job_name()), distribute_strategy:
+        c_api_util.CurJobBuildAndInferCtx_SetJobConf(job_conf)
+        with runtime_mode.ModeScope(runtime_mode.GLOBAL_MODE):
+            with scope_util.ScopeContext(scope):
+                yield
+
+
+def _CompileJob(session, function_desc):
+    func = function_desc.job_func
+    parameters = func.__oneflow_function_signature__.parameters
+    if len(parameters) == 0:
+        func.__oneflow_input_blob_defs__ = ()
+    elif all(p.annotation is inspect._empty for _, p in parameters.items()):
+        func.__oneflow_input_blob_defs__ = _GetArgDefault(func)
+    elif all(p.annotation is not inspect._empty for _, p in parameters.items()):
+        func.__oneflow_input_blob_defs__ = _MakeInputBlobDefFromParameterSignature(
+            parameters
+        )
+    else:
+        raise NotImplementedError(
+            "All parameters of global function should be annotated"
+        )
+    inputs = _RecursiveMakeInputBlobs(func.__oneflow_input_blob_defs__)
+    ret = func(*inputs)
+    return_annotation = func.__oneflow_function_signature__.return_annotation
+    oft_util.CheckReturnByAnnotation(func.__name__, ret, return_annotation)
+    func.__oneflow_output_remote_blobs__ = _RecursiveMakeRetRemoteBlobs(
+        ret, allow_cpu_return_op=function_desc.function_attribute.allow_cpu_return_op
+    )
+
+
+def _InterpretGlobalFunction(function_desc, args):
+    func = function_desc.job_func
+    parameters = func.__oneflow_function_signature__.parameters
+    if len(parameters) == 0:
+        func.__oneflow_input_blob_defs__ = ()
+    elif all(p.annotation is inspect._empty for _, p in parameters.items()):
+        func.__oneflow_input_blob_defs__ = _GetArgDefault(func)
+    elif all(p.annotation is not inspect._empty for _, p in parameters.items()):
+        func.__oneflow_input_blob_defs__ = _MakeInputBlobDefFromParameterSignature(
+            parameters
+        )
+    else:
+        raise NotImplementedError(
+            "All parameters of global function should be annotated"
+        )
+    inputs = push_util.MakeEagerInputBlobs(func.__oneflow_input_blob_defs__, args)
+    ret = func(*inputs)
+    return_annotation = func.__oneflow_function_signature__.return_annotation
+    oft_util.CheckReturnByAnnotation(func.__name__, ret, return_annotation)
+    return _RecursiveMakeRetRemoteBlobs(
+        ret, allow_cpu_return_op=function_desc.function_attribute.allow_cpu_return_op
+    )
+
+
+@contextmanager
+def _JobBuildAndInferCtx(job_name):
+    c_api_util.JobBuildAndInferCtx_Open(job_name)
+    try:
+        yield
+    finally:
+        oneflow._oneflow_internal.JobBuildAndInferCtx_Close()
+
+
+def _GetArgDefault(func):
+    if hasattr(func, "__oneflow_arg_default__"):
+        return func.__oneflow_arg_default__
+    return _CloneArgBlobDef(func_inspect_util.GetArgDefaults(func))
+
+
+def _CloneArgBlobDef(args):
+    if isinstance(args, input_blob_util.ArgBlobDef):
+        return args.Clone()
+    if isinstance(args, (tuple, list)):
+        return type(args)(_CloneArgBlobDef(x) for x in args)
+    if isinstance(args, dict):
+        return {k: _CloneArgBlobDef(v) for k, v in args}
+    raise NotImplementedError(
+        "oneflow.compatible.single_client.global_function only accepts nested input blob defs"
+    )
+
+
+def _RecursiveMakeInputBlobs(input_blob_def):
+    if isinstance(input_blob_def, input_blob_util.ArgBlobDef):
+        return ops.InputOpByArgBlobDef(input_blob_def)
+    if isinstance(input_blob_def, (tuple, list)):
+        return type(input_blob_def)(_RecursiveMakeInputBlobs(x) for x in input_blob_def)
+    if isinstance(input_blob_def, dict):
+        return {k: _RecursiveMakeInputBlobs(v) for k, v in input_blob_def.items()}
+    raise NotImplementedError(
+        "oneflow.compatible.single_client.global_function accepts "
+        + "ArgBlobDefs or list/tuple/dict nested ArgBlobDefs as argument"
+    )
+
+
+def _MakeInputBlobDefFromParameterSignature(parameters):
+    def CheckAndRecusiveMake(p):
+        return _RecusiveMakeInputBlobDef(p.annotation)
+
+    return tuple(CheckAndRecusiveMake(p) for _, p in parameters.items())
+
+
+def _RecusiveMakeInputBlobDef(cls):
+    if oft.OriginFrom(cls, oft.OneflowNumpyDef):
+        return cls.NewInputBlobDef()
+    elif oft.OriginFrom(cls, typing.Tuple):
+        return tuple(_RecusiveMakeInputBlobDef(a) for a in cls.__args__)
+    else:
+        raise NotImplementedError(
+            ("\nannotation %s" % cls)
+            + "not supported"
+            + "\nonly support oneflow.compatible.single_client.typing.Numpy.Placeholder, "
+            "oneflow.compatible.single_client.typing.ListNumpy.Placeholder"
+        )
+
+
+def _RecursiveMakeRetRemoteBlobs(remote_blobs, **kwarg):
+    if remote_blobs is None:
+        return None
+    if isinstance(remote_blobs, oneflow._oneflow_internal.BlobDesc):
+        return ops.ReturnRemoteBlob(remote_blobs, **kwarg)
+    if isinstance(remote_blobs, (tuple, list)):
+        return type(remote_blobs)(
+            _RecursiveMakeRetRemoteBlobs(x, **kwarg) for x in remote_blobs
+        )
+    if isinstance(remote_blobs, dict):
+        return {
+            k: _RecursiveMakeRetRemoteBlobs(v, **kwarg) for k, v in remote_blobs.items()
+        }
+    raise NotImplementedError(
+        "oneflow.compatible.single_client.global_function returns "
+        + "RemoteBlob or list/tuple/dict nested RemoteBlob only"
+    )
diff --git a/oneflow/compatible_single_client_python/framework/config_util.py b/oneflow/compatible_single_client_python/framework/config_util.py
new file mode 100644
index 0000000000000000000000000000000000000000..a64d3e8028d47a310e1a8c6feb47452be30e198d
--- /dev/null
+++ b/oneflow/compatible_single_client_python/framework/config_util.py
@@ -0,0 +1,656 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import, print_function
+
+from oneflow.compatible_single_client_python.framework import hob as hob
+from oneflow.compatible_single_client_python.framework import (
+    session_context as session_ctx,
+)
+from oneflow.compatible_single_client_python.lib.core import enable_if as enable_if
+from oneflow.compatible_single_client_python.oneflow_export import oneflow_export
+import oneflow._oneflow_internal
+import traceback
+
+
+@oneflow_export("config.load_library")
+def api_load_library(val: str) -> None:
+    r"""Load necessary library for job
+
+    Args:
+        val (str): path to shared object file
+    """
+    return enable_if.unique([load_library, do_nothing])(val)
+
+
+@enable_if.condition(hob.in_normal_mode & ~hob.session_initialized)
+def load_library(val):
+    assert type(val) is str
+    sess = session_ctx.GetDefaultSession()
+    sess.config_proto.load_lib_path.append(val)
+
+
+@oneflow_export("config.load_library_now")
+def api_load_library_now(val: str) -> None:
+    r"""Load necessary library for job now
+
+    Args:
+        val (str): path to shared object file
+    """
+    return enable_if.unique([load_library_now, do_nothing])(val)
+
+
+@enable_if.condition(hob.in_normal_mode & ~hob.session_initialized)
+def load_library_now(val):
+    assert type(val) is str
+    oneflow._oneflow_internal.LoadLibraryNow(val)
+
+
+@oneflow_export("config.machine_num")
+def api_machine_num(val: int) -> None:
+    r"""Set available number of machine/node for  running job .
+
+    Args:
+        val (int): available number of machines
+    """
+    return enable_if.unique([machine_num, do_nothing])(val)
+
+
+@enable_if.condition(hob.in_normal_mode & ~hob.session_initialized)
+def machine_num(val):
+    sess = session_ctx.GetDefaultSession()
+    assert type(val) is int
+    sess.config_proto.resource.machine_num = val
+
+
+@oneflow_export("config.gpu_device_num")
+def api_gpu_device_num(val: int) -> None:
+    r"""Set number of GPUs on each machine to run oneflow on.
+
+    Args:
+        val (int): number of GPUs. It is identical on every machine. In other words,
+        you can't specify different number of GPUs you would like to use on each machine.
+    """
+    if oneflow._oneflow_internal.flags.with_cuda():
+        return enable_if.unique([gpu_device_num, do_nothing])(val)
+    else:
+        print(
+            "INFO: for CPU-only OneFlow, oneflow.compatible.single_client.config.gpu_device_num is equivalent to oneflow.compatible.single_client.config.cpu_device_num"
+        )
+        print(traceback.format_stack()[-2])
+        return enable_if.unique([cpu_device_num, do_nothing])(val)
+
+
+@enable_if.condition(hob.in_normal_mode & ~hob.session_initialized)
+def gpu_device_num(val):
+    sess = session_ctx.GetDefaultSession()
+    assert type(val) is int
+    sess.config_proto.resource.gpu_device_num = val
+
+
+@oneflow_export("config.cpu_device_num")
+def api_cpu_device_num(val: int) -> None:
+    r"""Set number of CPUs on each machine to run oneflow on. Usually you don't need to set this.
+
+    Args:
+        val (int): number of CPUs. It is identical on every machine.
+    """
+    return enable_if.unique([cpu_device_num, do_nothing])(val)
+
+
+@enable_if.condition(hob.in_normal_mode & ~hob.session_initialized)
+def cpu_device_num(val):
+    sess = session_ctx.GetDefaultSession()
+    assert type(val) is int
+    sess.config_proto.resource.cpu_device_num = val
+
+
+@oneflow_export("config.comm_net_worker_num")
+def api_comm_net_worker_num(val: int) -> None:
+    r"""Set up the workers number in epoll  mode network,
+            If use RDMA mode network, then doesn't need.
+
+    Args:
+        val (int): number of workers
+    """
+    return enable_if.unique([comm_net_worker_num, do_nothing])(val)
+
+
+@enable_if.condition(hob.in_normal_mode & ~hob.session_initialized)
+def comm_net_worker_num(val):
+    sess = session_ctx.GetDefaultSession()
+    assert type(val) is int
+    sess.config_proto.resource.comm_net_worker_num = val
+
+
+@oneflow_export("config.max_mdsave_worker_num")
+def api_max_mdsave_worker_num(val: int) -> None:
+    r"""Set up max number of workers for mdsave process.
+
+    Args:
+        val (int):  max number of workers
+    """
+    return enable_if.unique([max_mdsave_worker_num, do_nothing])(val)
+
+
+@enable_if.condition(hob.in_normal_mode & ~hob.session_initialized)
+def max_mdsave_worker_num(val):
+    sess = session_ctx.GetDefaultSession()
+    assert type(val) is int
+    sess.config_proto.resource.max_mdsave_worker_num = val
+
+
+@oneflow_export("config.enable_numa_aware_cuda_malloc_host")
+def api_numa_aware_cuda_malloc_host(val: bool = True) -> None:
+    r"""Whether or not let numa know  that  cuda allocated host's memory.
+
+    Args:
+        val (bool, optional): True or False. Defaults to True.
+    """
+    print(
+        "'enable_numa_aware_cuda_malloc_host' has been deprecated, has no effect and will be removed in the future."
+    )
+
+
+@oneflow_export("config.compute_thread_pool_size")
+def api_compute_thread_pool_size(val: int) -> None:
+    r"""Set up the size of compute thread pool
+
+    Args:
+        val (int): size of  thread pool
+    """
+    return enable_if.unique([compute_thread_pool_size, do_nothing])(val)
+
+
+@enable_if.condition(hob.in_normal_mode & ~hob.session_initialized)
+def compute_thread_pool_size(val):
+    sess = session_ctx.GetDefaultSession()
+    assert type(val) is int
+    sess.config_proto.resource.compute_thread_pool_size = val
+
+
+@oneflow_export("config.rdma_mem_block_mbyte")
+def api_rdma_mem_block_mbyte(val: int) -> None:
+    r"""Set up the memory block size in rdma mode.
+
+    Args:
+        val (int): size of block, e.g. 1024(mb)
+    """
+    return enable_if.unique([rdma_mem_block_mbyte, do_nothing])(val)
+
+
+@enable_if.condition(hob.in_normal_mode & ~hob.session_initialized)
+def rdma_mem_block_mbyte(val):
+    sess = session_ctx.GetDefaultSession()
+    assert type(val) is int
+    sess.config_proto.resource.rdma_mem_block_mbyte = val
+
+
+@oneflow_export("config.rdma_recv_msg_buf_mbyte")
+def api_rdma_recv_msg_buf_mbyte(val: int) -> None:
+    r"""Set up the buffer size for receiving messages in rama mode
+
+    Args:
+        val (int): buffer size, e.g. 1024(mb)
+    """
+    return enable_if.unique([rdma_recv_msg_buf_mbyte, do_nothing])(val)
+
+
+@enable_if.condition(hob.in_normal_mode & ~hob.session_initialized)
+def rdma_recv_msg_buf_mbyte(val):
+    sess = session_ctx.GetDefaultSession()
+    assert type(val) is int
+    sess.config_proto.resource.rdma_recv_msg_buf_mbyte = val
+
+
+@oneflow_export("config.reserved_host_mem_mbyte")
+def api_reserved_host_mem_mbyte(val: int) -> None:
+    r"""Set up the memory size of reserved host
+
+    Args:
+        val (int):  memory size, e.g. 1024(mb)
+    """
+    return enable_if.unique([reserved_host_mem_mbyte, do_nothing])(val)
+
+
+@enable_if.condition(hob.in_normal_mode & ~hob.session_initialized)
+def reserved_host_mem_mbyte(val):
+    sess = session_ctx.GetDefaultSession()
+    assert type(val) is int
+    sess.config_proto.resource.reserved_host_mem_mbyte = val
+
+
+@oneflow_export("config.reserved_device_mem_mbyte")
+def api_reserved_device_mem_mbyte(val: int) -> None:
+    r"""Set up the memory size of reserved device
+
+    Args:
+        val (int):  memory size, e.g. 1024(mb)
+    """
+    return enable_if.unique([reserved_device_mem_mbyte, do_nothing])(val)
+
+
+@enable_if.condition(hob.in_normal_mode & ~hob.session_initialized)
+def reserved_device_mem_mbyte(val):
+    sess = session_ctx.GetDefaultSession()
+    assert type(val) is int
+    sess.config_proto.resource.reserved_device_mem_mbyte = val
+
+
+@oneflow_export("config.use_rdma")
+def api_use_rdma(val: bool = True) -> None:
+    r"""Whether use RDMA to speed up data transmission in cluster nodes or not.
+          if not, then use normal epoll mode.
+
+    Args:
+        val (bool, optional):  Defaults to True.
+    """
+    return enable_if.unique([use_rdma, do_nothing])(val=val)
+
+
+@enable_if.condition(hob.in_normal_mode & ~hob.session_initialized)
+def use_rdma(val=True):
+    sess = session_ctx.GetDefaultSession()
+    assert type(val) is bool
+    sess.config_proto.resource.use_rdma = val
+
+
+@oneflow_export("config.thread_enable_local_message_queue")
+def api_thread_enable_local_message_queue(val: bool) -> None:
+    """Whether or not enable thread using local  message queue.
+
+    Args:
+        val (bool):  True or False
+    """
+    return enable_if.unique([thread_enable_local_message_queue, do_nothing])(val)
+
+
+@enable_if.condition(hob.in_normal_mode & ~hob.session_initialized)
+def thread_enable_local_message_queue(val):
+    sess = session_ctx.GetDefaultSession()
+    assert type(val) is bool
+    sess.config_proto.resource.thread_enable_local_message_queue = val
+
+
+@oneflow_export("config.enable_debug_mode")
+def api_enable_debug_mode(val: bool) -> None:
+    r"""Whether use debug mode or not.
+
+    Args:
+        val (bool):  True or False
+    """
+    return enable_if.unique([enable_debug_mode, do_nothing])(val)
+
+
+@enable_if.condition(hob.in_normal_mode & ~hob.session_initialized)
+def enable_debug_mode(val):
+    sess = session_ctx.GetDefaultSession()
+    assert type(val) is bool
+    sess.config_proto.resource.enable_debug_mode = val
+
+
+@oneflow_export("config.legacy_model_io_enabled")
+def api_legacy_model_io_enabled():
+    sess = session_ctx.GetDefaultSession()
+    return sess.config_proto.resource.enable_legacy_model_io
+
+
+@oneflow_export("config.enable_legacy_model_io")
+def api_enable_legacy_model_io(val: bool = True):
+    r"""Whether or not use legacy model io.
+
+    Args:
+        val ([type]): True or False
+    """
+    return enable_if.unique([enable_legacy_model_io, do_nothing])(val)
+
+
+@enable_if.condition(hob.in_normal_mode & ~hob.session_initialized)
+def enable_legacy_model_io(val):
+    sess = session_ctx.GetDefaultSession()
+    assert type(val) is bool
+    sess.config_proto.resource.enable_legacy_model_io = val
+
+
+@oneflow_export("config.enable_model_io_v2")
+def api_enable_model_io_v2(val):
+    r"""Whether or not use version2  of model input/output function.
+
+    Args:
+        val ([type]): True or False
+    """
+    return enable_if.unique([enable_model_io_v2, do_nothing])(val)
+
+
+@enable_if.condition(hob.in_normal_mode & ~hob.session_initialized)
+def enable_model_io_v2(val):
+    sess = session_ctx.GetDefaultSession()
+    assert type(val) is bool
+    sess.config_proto.resource.enable_model_io_v2 = val
+
+
+@oneflow_export("config.collect_act_event")
+def api_collect_act_event(val: bool = True) -> None:
+    r"""Whether or not collect active event.
+
+    Args:
+        val (bool, optional): True or False. Defaults to True.
+    """
+    return enable_if.unique([collect_act_event, do_nothing])(val=val)
+
+
+@enable_if.condition(hob.in_normal_mode & ~hob.session_initialized)
+def collect_act_event(val=True):
+    sess = session_ctx.GetDefaultSession()
+    assert type(val) is int
+    sess.config_proto.profile_conf.collect_act_event = val
+
+
+@oneflow_export("config.collective_boxing.enable_fusion")
+def api_enable_fusion(val: bool = True) -> None:
+    r"""Whether or not allow fusion the operators
+
+    Args:
+        val (bool, optional): True or False. Defaults to True.
+    """
+    return enable_if.unique([enable_fusion, do_nothing])(val=val)
+
+
+@enable_if.condition(hob.in_normal_mode & ~hob.session_initialized)
+def enable_fusion(val=True):
+    sess = session_ctx.GetDefaultSession()
+    assert type(val) is bool
+    sess.config_proto.resource.collective_boxing_conf.enable_fusion = val
+
+
+@oneflow_export("config.collective_boxing.num_callback_threads")
+def api_num_callback_threads(val: int) -> None:
+    r"""Set up number of callback threads for boxing process.
+            Boxing is used to convert between different parallel properties of logical tensor
+
+    Args:
+        val (int): number of  callback threads
+    """
+    return enable_if.unique([num_callback_threads, do_nothing])(val)
+
+
+@enable_if.condition(hob.in_normal_mode & ~hob.session_initialized)
+def num_callback_threads(val):
+    sess = session_ctx.GetDefaultSession()
+    assert type(val) is int
+    sess.config_proto.resource.collective_boxing_conf.num_callback_threads = val
+
+
+@oneflow_export("config.enable_tensor_float_32_compute")
+def api_enable_tensor_float_32_compute(val: bool = True) -> None:
+    r"""Whether or not to enable Tensor-float-32 on supported GPUs
+
+    Args:
+        val (bool, optional): True or False. Defaults to True.
+    """
+    return enable_if.unique([enable_tensor_float_32_compute, do_nothing])(val=val)
+
+
+@enable_if.condition(hob.in_normal_mode & ~hob.session_initialized)
+def enable_tensor_float_32_compute(val=True):
+    sess = session_ctx.GetDefaultSession()
+    assert type(val) is bool
+    sess.config_proto.resource.enable_tensor_float_32_compute = val
+
+
+@oneflow_export("config.enable_mem_chain_merge")
+def api_enable_mem_chain_merge(val: bool = True) -> None:
+    r"""Whether or not to enable MemChain merge.
+
+    Args:
+        val (bool, optional): True or False. Defaults to True.
+    """
+    return enable_if.unique([enable_mem_chain_merge, do_nothing])(val=val)
+
+
+@enable_if.condition(hob.in_normal_mode & ~hob.session_initialized)
+def enable_mem_chain_merge(val=True):
+    sess = session_ctx.GetDefaultSession()
+    assert type(val) is bool
+    sess.config_proto.resource.enable_mem_chain_merge = val
+
+
+@oneflow_export("config.nccl_use_compute_stream")
+def api_nccl_use_compute_stream(val: bool = False) -> None:
+    r"""Whether or not nccl use compute stream to reuse nccl memory and speedup
+
+    Args:
+        val (bool, optional): True or False. Defaults to False.
+    """
+    return enable_if.unique([nccl_use_compute_stream, do_nothing])(val=val)
+
+
+@enable_if.condition(hob.in_normal_mode & ~hob.session_initialized)
+def nccl_use_compute_stream(val=False):
+    sess = session_ctx.GetDefaultSession()
+    assert type(val) is bool
+    sess.config_proto.resource.nccl_use_compute_stream = val
+
+
+@oneflow_export("config.disable_group_boxing_by_dst_parallel")
+def api_disable_group_boxing_by_dst_parallel(val: bool = False) -> None:
+    r"""Whether or not disable group boxing by dst parallel pass to reduce boxing memory life cycle.
+
+    Args:
+        val (bool, optional): True or False. Defaults to False.
+    """
+    return enable_if.unique([disable_group_boxing_by_dst_parallel, do_nothing])(val=val)
+
+
+@enable_if.condition(hob.in_normal_mode & ~hob.session_initialized)
+def disable_group_boxing_by_dst_parallel(val=False):
+    sess = session_ctx.GetDefaultSession()
+    assert type(val) is bool
+    sess.config_proto.resource.disable_group_boxing_by_dst_parallel = val
+
+
+@oneflow_export("config.collective_boxing.nccl_num_streams")
+def api_nccl_num_streams(val: int) -> None:
+    r"""Set up the number of nccl parallel streams while use boxing
+
+    Args:
+        val (int): number of streams
+    """
+    return enable_if.unique([nccl_num_streams, do_nothing])(val)
+
+
+@enable_if.condition(hob.in_normal_mode & ~hob.session_initialized)
+def nccl_num_streams(val):
+    sess = session_ctx.GetDefaultSession()
+    assert type(val) is int
+    sess.config_proto.resource.collective_boxing_conf.nccl_num_streams = val
+
+
+@oneflow_export("config.collective_boxing.nccl_fusion_threshold_mb")
+def api_nccl_fusion_threshold_mb(val: int) -> None:
+    r"""Set up threshold for oprators fusion
+
+    Args:
+        val (int): int number, e.g. 10(mb)
+    """
+    return enable_if.unique([nccl_fusion_threshold_mb, do_nothing])(val)
+
+
+@enable_if.condition(hob.in_normal_mode & ~hob.session_initialized)
+def nccl_fusion_threshold_mb(val):
+    sess = session_ctx.GetDefaultSession()
+    assert type(val) is int
+    sess.config_proto.resource.collective_boxing_conf.nccl_fusion_threshold_mb = val
+
+
+@oneflow_export("config.collective_boxing.nccl_fusion_all_reduce_use_buffer")
+def api_nccl_fusion_all_reduce_use_buffer(val: bool) -> None:
+    r"""Whether or not use buffer during nccl fusion progress
+
+    Args:
+        val (bool): True or False
+    """
+    return enable_if.unique([nccl_fusion_all_reduce_use_buffer, do_nothing])(val)
+
+
+@enable_if.condition(hob.in_normal_mode & ~hob.session_initialized)
+def nccl_fusion_all_reduce_use_buffer(val):
+    sess = session_ctx.GetDefaultSession()
+    assert type(val) is bool
+    sess.config_proto.resource.collective_boxing_conf.nccl_fusion_all_reduce_use_buffer = (
+        val
+    )
+
+
+@oneflow_export("config.collective_boxing.nccl_fusion_all_reduce")
+def api_nccl_fusion_all_reduce(val: bool) -> None:
+    r"""Whether or not use nccl fusion during all reduce progress
+
+    Args:
+        val (bool):  True or False
+    """
+    return enable_if.unique([nccl_fusion_all_reduce, do_nothing])(val)
+
+
+@enable_if.condition(hob.in_normal_mode & ~hob.session_initialized)
+def nccl_fusion_all_reduce(val):
+    sess = session_ctx.GetDefaultSession()
+    assert type(val) is bool
+    sess.config_proto.resource.collective_boxing_conf.nccl_fusion_all_reduce = val
+
+
+@oneflow_export("config.collective_boxing.nccl_fusion_reduce_scatter")
+def api_nccl_fusion_reduce_scatter(val: bool) -> None:
+    r"""Whether or not  use nccl fusion during reduce scatter progress
+
+    Args:
+        val (bool): True or False
+    """
+    return enable_if.unique([nccl_fusion_reduce_scatter, do_nothing])(val)
+
+
+@enable_if.condition(hob.in_normal_mode & ~hob.session_initialized)
+def nccl_fusion_reduce_scatter(val):
+    sess = session_ctx.GetDefaultSession()
+    assert type(val) is bool
+    sess.config_proto.resource.collective_boxing_conf.nccl_fusion_reduce_scatter = val
+
+
+@oneflow_export("config.collective_boxing.nccl_fusion_all_gather")
+def api_nccl_fusion_all_gather(val: bool) -> None:
+    r"""Whether or not use nccl fusion during all  gather progress
+
+    Args:
+        val (bool): True or False
+    """
+    return enable_if.unique([nccl_fusion_all_gather, do_nothing])(val)
+
+
+@enable_if.condition(hob.in_normal_mode & ~hob.session_initialized)
+def nccl_fusion_all_gather(val):
+    sess = session_ctx.GetDefaultSession()
+    assert type(val) is bool
+    sess.config_proto.resource.collective_boxing_conf.nccl_fusion_all_gather = val
+
+
+@oneflow_export("config.collective_boxing.nccl_fusion_reduce")
+def api_nccl_fusion_reduce(val: bool) -> None:
+    r"""Whether or not use nccl fusion during reduce progress
+
+    Args:
+        val (bool): True or False
+    """
+    return enable_if.unique([nccl_fusion_reduce, do_nothing])(val)
+
+
+@enable_if.condition(hob.in_normal_mode & ~hob.session_initialized)
+def nccl_fusion_reduce(val):
+    sess = session_ctx.GetDefaultSession()
+    assert type(val) is bool
+    sess.config_proto.resource.collective_boxing_conf.nccl_fusion_reduce = val
+
+
+@oneflow_export("config.collective_boxing.nccl_fusion_broadcast")
+def api_nccl_fusion_broadcast(val: bool) -> None:
+    r"""Whether or not use nccl fusion during broadcast progress
+
+    Args:
+        val (bool): True or False
+    """
+    return enable_if.unique([nccl_fusion_broadcast, do_nothing])(val)
+
+
+@enable_if.condition(hob.in_normal_mode & ~hob.session_initialized)
+def nccl_fusion_broadcast(val):
+    sess = session_ctx.GetDefaultSession()
+    assert type(val) is bool
+    sess.config_proto.resource.collective_boxing_conf.nccl_fusion_broadcast = val
+
+
+@oneflow_export("config.collective_boxing.nccl_fusion_max_ops")
+def api_nccl_fusion_max_ops(val: int) -> None:
+    r"""Maximum number of ops for nccl fusion.
+
+    Args:
+        val (int): Maximum number of ops
+    """
+    return enable_if.unique([nccl_fusion_max_ops, do_nothing])(val)
+
+
+@enable_if.condition(hob.in_normal_mode & ~hob.session_initialized)
+def nccl_fusion_max_ops(val):
+    sess = session_ctx.GetDefaultSession()
+    assert type(val) is int
+    sess.config_proto.resource.collective_boxing_conf.nccl_fusion_max_ops = val
+
+
+@oneflow_export("config.collective_boxing.nccl_enable_all_to_all")
+def api_nccl_enable_all_to_all(val: bool) -> None:
+    r"""Whether or not use nccl all2all during s2s boxing
+
+    Args:
+        val (bool): True or False
+    """
+    return enable_if.unique([nccl_enable_all_to_all, do_nothing])(val)
+
+
+@enable_if.condition(hob.in_normal_mode & ~hob.session_initialized)
+def nccl_enable_all_to_all(val):
+    sess = session_ctx.GetDefaultSession()
+    assert type(val) is bool
+    sess.config_proto.resource.collective_boxing_conf.nccl_enable_all_to_all = val
+
+
+@oneflow_export("config.collective_boxing.nccl_enable_mixed_fusion")
+def api_nccl_enable_mixed_fusion(val: bool) -> None:
+    r"""Whether or not use nccl mixed fusion
+
+    Args:
+        val (bool): True or False
+    """
+    return enable_if.unique([nccl_enable_mixed_fusion, do_nothing])(val)
+
+
+@enable_if.condition(hob.in_normal_mode & ~hob.session_initialized)
+def nccl_enable_mixed_fusion(val):
+    sess = session_ctx.GetDefaultSession()
+    assert type(val) is bool
+    sess.config_proto.resource.collective_boxing_conf.nccl_enable_mixed_fusion = val
+
+
+@enable_if.condition(hob.in_normal_mode & hob.session_initialized)
+def do_nothing(*args, **kwargs):
+    print("Nothing happened because the session is running")
+    return False
diff --git a/oneflow/compatible_single_client_python/framework/distribute.py b/oneflow/compatible_single_client_python/framework/distribute.py
new file mode 100644
index 0000000000000000000000000000000000000000..87105ed17ac099b87a5d0e62761979b6fabea193
--- /dev/null
+++ b/oneflow/compatible_single_client_python/framework/distribute.py
@@ -0,0 +1,269 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+from contextlib import contextmanager
+
+from oneflow.compatible_single_client_python.framework import (
+    distribute_context as distribute_ctx,
+)
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    oneflow_deprecate,
+    oneflow_export_value,
+)
+import oneflow._oneflow_internal
+import traceback
+
+
+@oneflow_export("distribute.mirrored_strategy")
+@oneflow_deprecate()
+def deprecated_mirrored_strategy():
+    print(
+        "WARNING:",
+        "oneflow.compatible.single_client.distribute.mirrored_strategy",
+        "will be removed in the future, use {} instead.".format(
+            "oneflow.compatible.single_client.scope.mirrored_view"
+        ),
+    )
+    print(traceback.format_stack()[-2])
+    return DistributeMirroredStrategy()
+
+
+@oneflow_export("scope.mirrored_view")
+class DistributeMirroredStrategy(distribute_ctx.DistributeStrategy):
+    r"""Create a scope in mirrored view. All operators within the scope will be mirrored among diffierent accelerators.
+    Usage::
+
+        with oneflow.compatible.single_client.scope.mirrored_view():
+            ...
+
+    """
+
+    def __init__(self):
+        distribute_ctx.DistributeStrategy.__init__(self, True)
+
+
+@oneflow_export("distribute.mirrored_strategy_enabled")
+@oneflow_deprecate()
+def deprecated_mirrored_strategy_enabled():
+    print(
+        "WARNING:",
+        "oneflow.compatible.single_client.distribute.mirrored_strategy_enabled",
+        "will be removed in the future, use {} instead.".format(
+            "oneflow.compatible.single_client.scope.mirrored_view_enabled"
+        ),
+    )
+    print(traceback.format_stack()[-2])
+    return MirroredStrategyEnabled()
+
+
+@oneflow_export("scope.mirrored_view_enabled")
+def MirroredStrategyEnabled() -> bool:
+    r"""
+
+    Returns:
+        bool: `True` if mirrored strategy is enabled in current context where this function is called.
+
+    """
+    return distribute_ctx.IsMirroredStrategyEnabled()
+
+
+@oneflow_export("distribute.consistent_strategy")
+@oneflow_deprecate()
+def deprecated_consistent_strategy():
+    print(
+        "WARNING:",
+        "oneflow.compatible.single_client.distribute.consistent_strategy",
+        "will be removed in the future, use {} instead.".format(
+            "oneflow.compatible.single_client.scope.consistent_view"
+        ),
+    )
+    print(traceback.format_stack()[-2])
+    return DistributeConsistentStrategy()
+
+
+@oneflow_export("scope.consistent_view")
+class DistributeConsistentStrategy(distribute_ctx.DistributeStrategy):
+    r"""Create a scope in consistent view. All operators within the scope will be automatically parallelized among diffierent accelerators for best performance and least data transfer.
+
+    Usage::
+
+        with oneflow.compatible.single_client.scope.consistent_view():
+            ...
+
+    """
+
+    def __init__(self):
+        distribute_ctx.DistributeStrategy.__init__(self, False)
+
+
+@oneflow_export("distribute.consistent_strategy_enabled")
+@oneflow_deprecate()
+def deprecated_consistent_strategy_enabled():
+    print(
+        "WARNING:",
+        "oneflow.compatible.single_client.distribute.consistent_strategy_enabled",
+        "will be removed in the future, use {} instead.".format(
+            "oneflow.compatible.single_client.scope.consistent_view_enabled"
+        ),
+    )
+    print(traceback.format_stack()[-2])
+    return ConsistentStrategyEnabled()
+
+
+@oneflow_export("scope.consistent_view_enabled")
+def ConsistentStrategyEnabled() -> bool:
+    r"""
+
+    Returns:
+        bool: `True` if consistent strategy is enabled in current context where this function is called.
+
+    """
+    return distribute_ctx.IsConsistentStrategyEnabled()
+
+
+@oneflow_export("distribute.split")
+def split(axis: int) -> oneflow._oneflow_internal.distribute.SplitDistribute:
+    r"""Generate a split scheme in which op will be splitted at `axis`.
+
+    Args:
+        axis (int): At `axis` the op will be splitted.
+
+    Returns:
+        SplitDistribute: Split scheme object, often required by `with_distribute` method of `Blob` or `oneflow.compatible.single_client.get_variable`.
+
+    Example::
+        weight = weight.with_distribute(distribute.split(1))
+
+    """
+    assert type(axis) is int
+    return oneflow._oneflow_internal.distribute.split(axis)
+
+
+@oneflow_export("distribute.broadcast")
+def broadcast() -> oneflow._oneflow_internal.distribute.BroadcastDistribute:
+    r"""Generate a broadcast scheme.
+
+    Returns:
+        BroadcastDistribute: Broadcast scheme object, often required by `with_distribute` method of `Blob` or `oneflow.compatible.single_client.get_variable`.
+
+    Example::
+        segment_ids = segment_ids.with_distribute(flow.distribute.broadcast())
+
+    """
+    return oneflow._oneflow_internal.distribute.broadcast()
+
+
+@oneflow_export("distribute.auto")
+def auto() -> oneflow._oneflow_internal.distribute.AutoDistribute:
+    r"""Generate a broadcast scheme.
+
+    Returns:
+        AutoDistribute: Auto distribute scheme object, often required by `with_distribute` method of `Blob` or `oneflow.compatible.single_client.get_variable`.
+
+    """
+    return oneflow._oneflow_internal.distribute.auto()
+
+
+@oneflow_export("distribute.assert_is_valid_distribute")
+def assert_is_valid_distribute(
+    distribute: oneflow._oneflow_internal.distribute.Distribute,
+) -> None:
+    assert isinstance(
+        distribute, oneflow._oneflow_internal.distribute.Distribute
+    ), """not a valid distribute policy.
+           expected: 1) oneflow.compatible.single_client.distribute.split(axis); 2) oneflow.compatible.single_client.distribute.broadcast(); 3) oneflow.compatible.single_client.distribute.auto()"""
+
+
+@oneflow_export("distributed.get_local_rank")
+def get_local_rank():
+    return oneflow._oneflow_internal.GetLocalRank()
+
+
+@oneflow_export("distributed.get_rank")
+def get_rank():
+    r"""Returns the rank of current process group.
+
+    Returns:
+        The rank of the process group.
+
+    """
+    return oneflow._oneflow_internal.GetRank()
+
+
+@oneflow_export("distributed.get_world_size")
+def get_world_size():
+    r"""Returns the number of processes in the current process group.
+
+    Returns:
+        The world size of the process group.
+
+    """
+    return oneflow._oneflow_internal.GetWorldSize()
+
+
+@oneflow_export("distributed.is_multi_client")
+def is_multi_client():
+    return oneflow._oneflow_internal.IsMultiClient()
+
+
+@oneflow_export("sbp.split")
+def split_sbp(
+    axis: int,
+) -> oneflow._oneflow_internal.oneflow.core.job.sbp_parallel.SbpParallel:
+    r"""Generate a split scheme in which op will be splitted at `axis`.
+
+    Args:
+        axis (int): At `axis` the op will be splitted.
+
+    Returns:
+        SbpParallel: Split scheme object, often required by `to_consistent` method of `Tensor`
+
+    Example::
+        array = numpy.array([[1.0, 2.0], [3.0, 4.0]])
+        t1 = flow.tensor(array)
+        ct2 = t1.to_consistent(sbp=flow.sbp.split(0), placement=("cuda", {0: [0, 1, 2, 3]}))
+
+    """
+    assert type(axis) is int
+    return oneflow._oneflow_internal.sbp.split(axis)
+
+
+@oneflow_export_value("sbp.broadcast")
+def broadcast_sbp() -> oneflow._oneflow_internal.oneflow.core.job.sbp_parallel.SbpParallel:
+    r"""Generate a broadcast scheme.
+    Returns:
+        SbpParallel: Broadcast scheme object,, often required by `to_consistent` method of `Tensor`
+    Example::
+        array = numpy.array([[1.0, 2.0], [3.0, 4.0]])
+        t1 = flow.tensor(array)
+        ct2 = t1.to_consistent(sbp=flow.sbp.broadcast, placement=("cuda", {0: [0, 1, 2, 3]}))
+    """
+    return oneflow._oneflow_internal.sbp.broadcast()
+
+
+@oneflow_export_value("sbp.partial_sum")
+def partial_sum_sbp() -> oneflow._oneflow_internal.oneflow.core.job.sbp_parallel.SbpParallel:
+    r"""Generate a partial_sum scheme.
+    Returns:
+        SbpParallel: PartialSum scheme object,, often required by `to_consistent` method of `Tensor`
+    Example::
+        array = numpy.array([[1.0, 2.0], [3.0, 4.0]])
+        t1 = flow.tensor(array)
+        ct2 = t1.to_consistent(sbp=flow.sbp.partial_sum, placement=("cuda", {0: [0, 1, 2, 3]}))
+    """
+    return oneflow._oneflow_internal.sbp.partial_sum()
diff --git a/oneflow/compatible_single_client_python/framework/distribute_context.py b/oneflow/compatible_single_client_python/framework/distribute_context.py
new file mode 100644
index 0000000000000000000000000000000000000000..038f6dece1fe02a6c9801e2ec894b06c25680c83
--- /dev/null
+++ b/oneflow/compatible_single_client_python/framework/distribute_context.py
@@ -0,0 +1,63 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+from oneflow.compatible_single_client_python.framework import (
+    session_context as session_ctx,
+)
+from oneflow.compatible_single_client_python.framework import scope_util as scope_util
+
+
+class DistributeStrategy(object):
+    def __init__(self, is_mirrored):
+        self.is_mirrored_ = is_mirrored
+        self.scope_context_ = None
+        sess = session_ctx.GetDefaultSession()
+        # bypass the first DistributeStrategy for avoiding None old_scope
+        if sess.is_running and not sess.has_empty_is_mirrored_strategy_enabled_stack():
+
+            def BuildScope(old_scope, builder):
+                return builder.BuildScopeWithNewIsMirrored(old_scope, is_mirrored)
+
+            self.scope_context_ = scope_util.ScopeContext(
+                scope_util.MakeScope(BuildScope)
+            )
+
+    def __enter__(self, *argc, **kwarg):
+        PushMirroredStrategyEnabled(self.is_mirrored_)
+        if self.scope_context_ is not None:
+            self.scope_context_.__enter__(*argc, **kwarg)
+
+    def __exit__(self, *argc, **kwarg):
+        PopMirroredStrategyEnabled()
+        if self.scope_context_ is not None:
+            self.scope_context_.__exit__(*argc, **kwarg)
+
+
+def PushMirroredStrategyEnabled(val):
+    session_ctx.GetDefaultSession().push_mirrored_strategy_enabled(val)
+
+
+def IsMirroredStrategyEnabled():
+    return session_ctx.GetDefaultSession().is_mirrored_strategy_enabled()
+
+
+def IsConsistentStrategyEnabled():
+    return session_ctx.GetDefaultSession().is_consistent_strategy_enabled()
+
+
+def PopMirroredStrategyEnabled():
+    session_ctx.GetDefaultSession().pop_mirrored_strategy_enabled()
diff --git a/oneflow/compatible_single_client_python/framework/dtype.py b/oneflow/compatible_single_client_python/framework/dtype.py
new file mode 100644
index 0000000000000000000000000000000000000000..9c973ba7130b0223c808a5c5a8db713e5ba28843
--- /dev/null
+++ b/oneflow/compatible_single_client_python/framework/dtype.py
@@ -0,0 +1,80 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+import numpy as np
+from oneflow.core.common import data_type_pb2 as data_type_pb2
+from oneflow.compatible_single_client_python.oneflow_export import oneflow_export
+from oneflow.compatible import single_client as flow
+import oneflow._oneflow_internal
+
+
+_dtypes = [
+    flow.char,
+    flow.float,
+    flow.float32,
+    flow.double,
+    flow.float64,
+    flow.float16,
+    flow.int8,
+    flow.int32,
+    flow.int64,
+    flow.uint8,
+    flow.record,
+    flow.tensor_buffer,
+]
+
+
+@oneflow_export("dtypes")
+def dtypes():
+    return _dtypes
+
+
+def convert_proto_dtype_to_oneflow_dtype(proto_dtype):
+    return oneflow._oneflow_internal.deprecated.GetDTypeByDataType(proto_dtype)
+
+
+_ONEFLOW_DTYPE_TO_NUMPY_DTYPE = {
+    # could be np.ubyte on some platform
+    flow.char: np.byte,
+    flow.float: np.float32,
+    flow.float16: np.float16,
+    flow.float32: np.float32,
+    flow.float64: np.double,
+    flow.double: np.double,
+    flow.int8: np.int8,
+    flow.int32: np.int32,
+    flow.int64: np.int64,
+    flow.uint8: np.uint8,
+}
+
+
+@oneflow_export("convert_oneflow_dtype_to_numpy_dtype")
+def convert_oneflow_dtype_to_numpy_dtype(oneflow_dtype: flow.dtype):
+    if oneflow_dtype not in _ONEFLOW_DTYPE_TO_NUMPY_DTYPE:
+        raise NotImplementedError
+    return _ONEFLOW_DTYPE_TO_NUMPY_DTYPE[oneflow_dtype]
+
+
+def convert_numpy_dtype_to_oneflow_dtype(numpy_dtype: np.dtype):
+    for k, v in _ONEFLOW_DTYPE_TO_NUMPY_DTYPE.items():
+        if v == numpy_dtype:
+            return k
+    raise NotImplementedError
+
+
+del data_type_pb2
+del np
diff --git a/oneflow/compatible_single_client_python/framework/env_util.py b/oneflow/compatible_single_client_python/framework/env_util.py
new file mode 100644
index 0000000000000000000000000000000000000000..88342d88dc14f2511e330c7e4497c13892e1520b
--- /dev/null
+++ b/oneflow/compatible_single_client_python/framework/env_util.py
@@ -0,0 +1,443 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+import socket
+import os
+from contextlib import closing
+
+from oneflow.core.control import ctrl_bootstrap_pb2 as ctrl_bootstrap_pb
+from oneflow.core.job import env_pb2 as env_pb
+from oneflow.compatible_single_client_python.framework import c_api_util as c_api_util
+from oneflow.compatible_single_client_python.framework import (
+    placement_context as placement_ctx,
+)
+from oneflow.compatible_single_client_python.framework import (
+    session_context as session_ctx,
+)
+from oneflow.compatible_single_client_python.framework import scope_util as scope_util
+from oneflow.core.job import resource_pb2 as resource_util
+from oneflow.compatible_single_client_python.framework import hob as hob
+from oneflow.compatible_single_client_python.lib.core import enable_if as enable_if
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    oneflow_deprecate,
+)
+import oneflow._oneflow_internal
+import traceback
+
+
+@oneflow_export("env.all_device_placement")
+def api_all_device_placement(device_type: str) -> None:
+    r"""Return a placement containing all devices of all machines under env.
+
+    Args:
+        device_type (str): cuda or cpu
+    """
+    return oneflow._oneflow_internal.AllDevicePlacement(device_type)
+
+
+@oneflow_export("enable_eager_execution")
+def api_enable_eager_execution(val: bool = True) -> None:
+    r"""If True, job will execute in eager mode, else use lazy mode(static graph).
+
+    Args:
+        val (bool, optional): Whether  eager execution or not.  Defaults to True.
+    """
+    return enable_if.unique([enable_eager_environment])(val)
+
+
+@enable_if.condition(hob.in_normal_mode & ~hob.any_global_function_defined)
+def enable_eager_environment(val=True):
+    return oneflow._oneflow_internal.EnableEagerEnvironment(val)
+
+
+@oneflow_export("env.init")
+def api_env_init() -> bool:
+    r"""Init environment for job
+
+    Returns:
+        bool: [description]
+    """
+    return enable_if.unique([_env_init_single_client, do_nothing])()
+
+
+@enable_if.condition(hob.in_normal_mode & ~hob.env_initialized)
+def _env_init_single_client():
+    return env_init(False)
+
+
+def env_init(is_multi_client):
+    global default_env_proto
+    assert len(default_env_proto.machine) > 0
+    CompleteEnvProto(default_env_proto, is_multi_client)
+    c_api_util.InitEnv(default_env_proto, is_multi_client)
+    if not is_multi_client:
+        if oneflow._oneflow_internal.CurrentMachineId() == 0:
+            scope_util.InitScopeStack()
+        else:
+            exit(0)
+    return True
+
+
+def init_default_physical_env():
+    default_physical_env_proto = _DefaultEnvProto()
+    log_dir = os.getenv("ONEFLOW_TEST_LOG_DIR")
+    if log_dir:
+        default_physical_env_proto.cpp_logging_conf.log_dir = log_dir
+    default_physical_env_proto.is_default_physical_env = True
+    CompleteEnvProto(default_physical_env_proto, False)
+    c_api_util.InitDefaultEnv(default_physical_env_proto)
+
+
+@oneflow_export("env.current_resource", "current_resource")
+def api_get_current_resource() -> resource_util.Resource:
+    r"""Get current resources, such as:machine nums, cpu/gpu device nums,
+            epoch network threed num, rdma params...
+
+    Returns:
+        resource_util.Resource: [description]
+    """
+    return enable_if.unique([get_current_resource])()
+
+
+@enable_if.condition(hob.in_normal_mode & hob.env_initialized)
+def get_current_resource():
+    return c_api_util.CurrentResource()
+
+
+@oneflow_export("current_machine_id")
+def api_get_current_machine_id():
+    r"""Get machine id of current machine/node
+
+    Returns:
+        [type]: [description]
+    """
+    return enable_if.unique([get_current_machine_id])()
+
+
+@enable_if.condition(hob.in_normal_mode & hob.env_initialized)
+def get_current_machine_id() -> int:
+    return oneflow._oneflow_internal.CurrentMachineId()
+
+
+@oneflow_export("env.machine")
+def api_machine(*val: list) -> None:
+    r"""Set machines' hostnames.
+
+    For instance:
+
+        oneflow.compatible.single_client.env.machine([{"addr": "192.168.1.1"}, {"addr": "192.168.1.2"}])
+
+    Args:
+        val:  `list`, `tuple` or multiple arguments of `dict`. First in the list is the master machine.
+    """
+    return enable_if.unique([machine, do_nothing])(*val)
+
+
+@enable_if.condition(hob.in_normal_mode & ~hob.env_initialized)
+def machine(*val):
+
+    del default_env_proto.machine[:]
+    if len(val) == 1 and isinstance(val[0], (list, tuple)):
+        val = val[0]
+    default_env_proto.ClearField("machine")
+    default_env_proto.machine.extend(_MakeMachine(val))
+
+
+@oneflow_export("env.ctrl_port")
+def api_ctrl_port(val: int) -> None:
+    r"""Set port number used to control the execution across multiple machines. Same on every machine.
+
+    Args:
+        val: a port number accessible to peer machines
+    """
+    return enable_if.unique([ctrl_port, do_nothing])(val)
+
+
+@enable_if.condition(hob.in_normal_mode & ~hob.env_initialized)
+def ctrl_port(val):
+    assert type(val) is int
+    default_env_proto.ctrl_port = val
+
+
+@oneflow_export("env.data_port")
+def api_data_port(val: int) -> None:
+    r"""Set port number used to data transfer among multiple machines. Same on every machine.
+
+    Args:
+        val: a port number accessible to peer machines
+    """
+    return enable_if.unique([data_port, do_nothing])(val)
+
+
+@enable_if.condition(hob.in_normal_mode & ~hob.env_initialized)
+def data_port(val):
+    assert type(val) is int
+    default_env_proto.data_port = val
+
+
+@oneflow_export("env.grpc_use_no_signal")
+@oneflow_deprecate()
+def api_grpc_use_no_signal(val: bool = True) -> None:
+    r"""Set rpc use signal or not (deprecate)
+
+    Args:
+        val (bool, optional): True or False. Defaults to True.
+    """
+    print(
+        "WARNING:",
+        "oneflow.compatible.single_client.env.grpc_use_no_signal is deprecated, users no longer need to set rpc use signal or not. \n",
+        traceback.format_stack()[-2],
+    )
+    return None
+
+
+@oneflow_export("env.log_dir")
+def api_log_dir(val: str) -> None:
+    r"""Specify a dir to store OneFlow's logging files. If not specified, it is `./log` by default.
+
+    Args:
+        val (str): string , log file path
+    """
+    return enable_if.unique([log_dir, do_nothing])(val)
+
+
+@enable_if.condition(hob.in_normal_mode & ~hob.env_initialized)
+def log_dir(val):
+    assert type(val) is str
+    default_env_proto.cpp_logging_conf.log_dir = val
+
+
+@oneflow_export("env.logtostderr")
+def api_logtostderr(val: int) -> None:
+    r"""Set whether log messages go to stderr instead of logfiles
+
+    Args:
+        val (int): [description]
+    """
+    return enable_if.unique([logtostderr, do_nothing])(val)
+
+
+@enable_if.condition(hob.in_normal_mode & ~hob.env_initialized)
+def logtostderr(val):
+    assert type(val) is int
+    default_env_proto.cpp_logging_conf.logtostderr = val
+
+
+@oneflow_export("env.logbuflevel")
+def api_logbuflevel(val: int) -> None:
+    r"""Log messages at a level <= this flag are buffered.
+            Log messages at a higher level are flushed immediately.
+
+    Args:
+        val (int): int, number of level
+    """
+    return enable_if.unique([logbuflevel, do_nothing])(val)
+
+
+@enable_if.condition(hob.in_normal_mode & ~hob.env_initialized)
+def logbuflevel(val):
+    assert type(val) is int
+    default_env_proto.cpp_logging_conf.logbuflevel = val
+
+
+@enable_if.condition(hob.in_normal_mode & hob.env_initialized)
+def do_nothing(*args, **kwargs):
+    print("Nothing happened because environment has been initialized")
+    return False
+
+
+def CompleteEnvProto(env_proto, is_multi_client):
+    if is_multi_client:
+        _UpdateDefaultEnvProtoByMultiClientEnvVars(env_proto)
+    if env_proto.HasField("ctrl_port") == False:
+        if len(env_proto.machine) == 1:
+            env_proto.ctrl_port = _FindFreePort()
+        else:
+            raise ValueError(
+                "a ctrl_port is required if running multi-node, set it with 'oneflow.compatible.single_client.env.ctrl_port([YOUR PORT])'"
+            )
+
+
+def _MakeMachine(machines):
+    if isinstance(machines, str):
+        machines = [machines]
+    rp_machine = env_pb.EnvProto().machine
+    for m_data in machines:
+        m = rp_machine.add()
+        if isinstance(m_data, str):
+            m.addr = m_data
+        elif isinstance(m_data, dict):
+            if "addr" in m_data:
+                m.addr = m_data["addr"]
+            if "ctrl_port_agent" in m_data:
+                m.ctrl_port_agent = m_data["ctrl_port_agent"]
+            if "data_port_agent" in m_data:
+                m.data_port_agent = m_data["data_port_agent"]
+        else:
+            raise NotImplementedError
+    id = 0
+    addrs_for_check = set()
+    for m in rp_machine:
+        m.id = id
+        id += 1
+        assert m.addr not in addrs_for_check
+        addrs_for_check.add(m.addr)
+    return rp_machine
+
+
+# only used by CI
+@oneflow_export("env.init_bootstrap_confs")
+def api_init_bootstrap_confs(*val: list, **kargs) -> None:
+    return enable_if.unique([MakeBootstrapConfs, do_nothing])(*val, **kargs)
+
+
+def _MakeBootstrapConf(bootstrap_info: dict):
+    global config_master_addr
+    assert config_master_addr.HasField("host"), "must config master host first"
+    assert config_master_addr.HasField("port"), "must config master port first"
+    assert config_world_size != 0, "must config world size first"
+    bootstrap_conf = ctrl_bootstrap_pb.BootstrapConf()
+    bootstrap_conf.master_addr.CopyFrom(config_master_addr)
+    bootstrap_conf.world_size = config_world_size
+    assert "rank" in bootstrap_info
+    bootstrap_conf.rank = bootstrap_info["rank"]
+    if "host" in bootstrap_info:
+        bootstrap_conf.host = bootstrap_info["host"]
+    global config_bootstrap_ctrl_port
+    if config_bootstrap_ctrl_port != 0:
+        bootstrap_conf.ctrl_port = config_bootstrap_ctrl_port
+    global config_node_size
+    if config_node_size != 0:
+        bootstrap_conf.node_size = config_node_size
+    return bootstrap_conf
+
+
+# only used by CI
+@enable_if.condition(hob.in_normal_mode & ~hob.env_initialized)
+def MakeBootstrapConfs(
+    node_list, master_port, world_size=0, ctrl_port=-1, node_size=-1
+):
+    r"""Set ctrl_bootstrap_conf' info.
+
+    For instance:
+
+        ONEFLOW_TEST_NODE_LIST=192.168.1.16,192.168.1.15 ONEFLOW_TEST_MASTER_PORT=43256
+        ONEFLOW_TEST_WORLD_SIZE=2 ONEFLOW_TEST_RANK_CTRL_PORT=34527
+
+    Args:
+        val:  `list`, First in the list is the master machine.
+    """
+    if isinstance(node_list, str):
+        node_list = [node_list]
+    global global_ctrl_bootstrap_confs
+    assert len(global_ctrl_bootstrap_confs) == 0, "ctrl_bootstrap_conf has been inited"
+    global config_master_addr
+    config_master_addr.host = node_list[0]
+    config_master_addr.port = master_port
+    global config_world_size
+    # set size of node list as world_size if which is not configed
+    if world_size == 0:
+        config_world_size = len(node_list)
+    else:
+        assert (world_size % len(node_list)) == 0
+        config_world_size = world_size
+    global config_bootstrap_ctrl_port
+    if ctrl_port != -1:
+        config_bootstrap_ctrl_port = ctrl_port
+    global config_node_size
+    if node_size != -1:
+        config_node_size = node_size
+    rank = 0
+    for rank_host in node_list:
+        assert isinstance(rank_host, str)
+        bootstrap_conf = _MakeBootstrapConf({"rank": rank, "host": rank_host})
+        # init ctrl_bootstrap_conf on master
+        if rank == 0:
+            global default_env_proto
+            default_env_proto.ctrl_bootstrap_conf.CopyFrom(bootstrap_conf)
+        global_ctrl_bootstrap_confs.append(bootstrap_conf)
+        rank += 1
+    return global_ctrl_bootstrap_confs
+
+
+def _DefaultEnvProto():
+    env_proto = env_pb.EnvProto()
+    machine = env_proto.machine.add()
+    machine.id = 0
+    machine.addr = "127.0.0.1"
+    return env_proto
+
+
+# copied from
+# https://stackoverflow.com/questions/1365265/on-localhost-how-do-i-pick-a-free-port-number
+def _FindFreePort():
+    with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s:
+        s.bind(("localhost", 0))
+        s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+        return s.getsockname()[1]
+
+
+def GetEnvDefaultParallelConf(device_tag):
+    if device_tag not in device_tag2default_parallel_conf:
+        parallel_conf = placement_ctx.MakeParallelConf4Resource(
+            device_tag, c_api_util.EnvResource()
+        )
+        device_tag2default_parallel_conf[device_tag] = parallel_conf
+    return device_tag2default_parallel_conf[device_tag]
+
+
+def HasAllMultiClientEnvVars():
+    return (
+        os.getenv("MASTER_ADDR")
+        and os.getenv("MASTER_PORT")
+        and os.getenv("WORLD_SIZE")
+        and os.getenv("RANK")
+        and os.getenv("LOCAL_RANK")
+    )
+
+
+def _UpdateDefaultEnvProtoByMultiClientEnvVars(env_proto):
+    assert HasAllMultiClientEnvVars()
+
+    def str2int(env_config):
+        assert env_config.isdigit()
+        return int(env_config)
+
+    bootstrap_conf = ctrl_bootstrap_pb.BootstrapConf()
+    master_addr = ctrl_bootstrap_pb.Address()
+    master_addr.host = os.getenv("MASTER_ADDR")
+    master_addr.port = str2int(os.getenv("MASTER_PORT"))
+    bootstrap_conf.master_addr.CopyFrom(master_addr)
+    bootstrap_conf.world_size = str2int(os.getenv("WORLD_SIZE"))
+    bootstrap_conf.rank = str2int(os.getenv("RANK"))
+    env_proto.ctrl_bootstrap_conf.CopyFrom(bootstrap_conf)
+
+
+device_tag2default_parallel_conf = {}
+
+default_env_proto = _DefaultEnvProto()
+
+config_master_addr = ctrl_bootstrap_pb.Address()
+
+config_world_size = 0
+
+config_bootstrap_ctrl_port = 0
+
+config_node_size = 0
+
+global_ctrl_bootstrap_confs = []
diff --git a/oneflow/compatible_single_client_python/framework/function_desc.py b/oneflow/compatible_single_client_python/framework/function_desc.py
new file mode 100644
index 0000000000000000000000000000000000000000..b37cbcf86c97ca4943fc86c135537b0343735693
--- /dev/null
+++ b/oneflow/compatible_single_client_python/framework/function_desc.py
@@ -0,0 +1,108 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+from oneflow.compatible_single_client_python.framework import (
+    session_context as session_ctx,
+)
+from oneflow.compatible_single_client_python.framework import hob as hob
+from oneflow.compatible_single_client_python.lib.core import enable_if as enable_if
+from oneflow.compatible_single_client_python.framework import (
+    session_context as session_ctx,
+)
+from oneflow._oneflow_internal.oneflow.core.job import job_conf as job_conf_cfg
+from oneflow.compatible_single_client_python.oneflow_export import oneflow_export
+import oneflow._oneflow_internal
+
+
+class FunctionAttribute(object):
+    def __init__(self):
+        self.default_placement_scope = None
+        self.default_distribute_strategy = None
+        self.allow_cpu_return_op = True
+
+
+class FunctionDesc(object):
+    def __init__(self, job_func=None, job_config_proto=None, function_attribute=None):
+        if job_config_proto is None:
+            job_config_proto = job_conf_cfg.JobConfigProto()
+        if function_attribute is None:
+            function_attribute = FunctionAttribute()
+        self.job_func = job_func
+        self.job_config_proto = job_config_proto
+        self.job_config_proto.mutable_predict_conf()
+        self.function_attribute = function_attribute
+
+    def IsTrainable(self):
+        if self.job_config_proto.has_train_conf():
+            return True
+        if self.job_config_proto.has_predict_conf():
+            return False
+        raise NotImplementedError
+
+    def HasAttr(self, attr_name):
+        if attr_name == "flag_name2flag_value":
+            return False
+        name2default = session_ctx.GetDefaultSession().function_flag_name2default_val
+        if attr_name in self.job_config_proto.flag_name2flag_value():
+            return True
+        return getattr(self.job_config_proto, "has_" + attr_name)()
+
+    def __getattr__(self, attr_name):
+        assert attr_name != "flag_name2flag_value"
+        flag_name2flag_value = self.job_config_proto.flag_name2flag_value()
+        name2default = session_ctx.GetDefaultSession().function_flag_name2default_val
+        if attr_name not in name2default:
+            assert getattr(self.job_config_proto, "has_" + attr_name)()
+            return getattr(self.job_config_proto, attr_name)()
+        attr_value = name2default[attr_name]
+        if attr_name in flag_name2flag_value:
+            attr_value = flag_name2flag_value[attr_name]
+        if attr_value.HasField("at_bool"):
+            return attr_value.at_bool
+        elif attr_value.HasField("at_int64"):
+            return attr_value.at_int64
+        elif attr_value.HasField("at_double"):
+            return attr_value.at_double
+        elif attr_value.HasField("at_string"):
+            return attr_value.at_string
+        else:
+            raise NotImplementedError()
+
+
+@enable_if.condition(hob.in_global_mode & hob.eager_execution_enabled)
+def GetCurrentEagerGlobalFunctionDesc():
+    sess = session_ctx.GetDefaultSession()
+    ret = sess.CurrentEagerGlobalFunctionDesc()
+    assert ret is not None
+    return ret
+
+
+@enable_if.condition(hob.in_global_mode & ~hob.eager_execution_enabled)
+def GetCurrentLazyGlobalFunctionDesc():
+    sess = session_ctx.GetDefaultSession()
+    job_name = oneflow._oneflow_internal.JobBuildAndInferCtx_GetCurrentJobName()
+    ret = sess.GetLazyFunctionDesc(job_name)
+    assert ret is not None
+    return ret
+
+
+@oneflow_export("current_global_function_desc")
+def api_current_global_function_desc() -> FunctionDesc:
+    api_func = enable_if.unique(
+        [GetCurrentLazyGlobalFunctionDesc, GetCurrentEagerGlobalFunctionDesc]
+    )
+    return api_func()
diff --git a/oneflow/compatible_single_client_python/framework/function_util.py b/oneflow/compatible_single_client_python/framework/function_util.py
new file mode 100644
index 0000000000000000000000000000000000000000..8d55780f5ad86e67d2b21de5eebbb162093c41ad
--- /dev/null
+++ b/oneflow/compatible_single_client_python/framework/function_util.py
@@ -0,0 +1,1013 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+import copy
+import functools
+import re
+import inspect
+import traceback
+from typing import Any, Callable, Optional, Union
+
+from oneflow.compatible_single_client_python.framework import (
+    session_context as session_ctx,
+)
+from oneflow.compatible_single_client_python.framework import hob as hob
+from oneflow.compatible_single_client_python.lib.core import enable_if as enable_if
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    oneflow_deprecate,
+)
+from oneflow.compatible_single_client_python.framework.function_desc import FunctionDesc
+from oneflow.compatible_single_client_python.framework import (
+    placement_context as placement_ctx,
+)
+from oneflow.compatible_single_client_python.framework import (
+    distribute_context as distribute_ctx,
+)
+from oneflow.compatible_single_client_python.framework import (
+    placement_context as placement_ctx,
+)
+from oneflow.compatible_single_client_python.framework import typing_util as oft_util
+from oneflow.compatible_single_client_python.framework import runtime_mode as rt_mode
+from oneflow.compatible_single_client_python.lib.core import pb_util as pb_util
+from oneflow.compatible_single_client_python.framework.function_desc import FunctionDesc
+from oneflow.compatible_single_client_python.oneflow_export import oneflow_export
+from oneflow._oneflow_internal.oneflow.core.common import data_type as data_type_cfg
+import oneflow._oneflow_internal
+import traceback
+import sys
+
+
+@oneflow_export("FunctionConfig", "function_config", "ExecutionConfig")
+class FunctionConfig(object):
+    r"""OneFlow function's configurations.
+    """
+
+    def __init__(self) -> None:
+        self.function_desc = FunctionDesc()
+
+    def __getattr__(
+        self, attr_name: str
+    ) -> Callable[[Optional[Union[bool, int, float, str]]], None]:
+        name2default = session_ctx.GetDefaultSession().function_flag_name2default_val
+        assert attr_name in name2default
+        flag_name2flag_value = (
+            self.function_desc.job_config_proto.mutable_flag_name2flag_value()
+        )
+        default_val = name2default[attr_name]
+
+        def FunctionConfigSetter(
+            attr_value: Optional[Union[bool, int, float, str]] = None
+        ) -> None:
+            if default_val.HasField("at_bool"):
+                if attr_value is None:
+                    attr_value = True
+                assert type(attr_value) is bool
+                flag_name2flag_value[attr_name].set_at_bool(attr_value)
+            elif default_val.HasField("at_int64"):
+                assert type(attr_value) is int
+                flag_name2flag_value[attr_name].set_at_int64(attr_value)
+            elif default_val.HasField("at_double"):
+                assert type(attr_value) is float
+                flag_name2flag_value[attr_name].set_at_double(attr_value)
+            elif default_val.HasField("at_string"):
+                assert type(attr_value) is str
+                flag_name2flag_value[attr_name].set_at_string(attr_value)
+            else:
+                raise NotImplementedError(
+                    "config_flag `%s' with type %s is not supported"
+                    % (attr_name, type(attr_value))
+                )
+
+        return FunctionConfigSetter
+
+
+@oneflow_export("global_function")
+def api_oneflow_function(
+    type: str = "predict", function_config: FunctionConfig = None,
+) -> Callable[[Callable], Callable]:
+    r"""Creates a callable OneFlow global function from a Python function.
+
+    For instance::
+
+        @oneflow.compatible.single_client.global_function(flow.FunctionConfig())
+        def train():
+            # your model
+
+    Args:
+        function_config (FunctionConfig, optional): a `FunctionConfig` object. Defaults to FunctionConfig().
+
+    Returns:
+        Callable[[Callable], Callable]: a callable which is called to execute the compiled function
+    """
+    if isinstance(type, FunctionConfig):
+        function_config = type
+        print(
+            """WARNING: flow.global_function(func_config) is deprecated. Please replace it with flow.global_function(type, func_config).
+            """
+        )
+        print(traceback.format_stack()[-2])
+    else:
+        assert type in ["train", "predict"]
+        if function_config is None:
+            function_config = FunctionConfig()
+        if type == "train":
+            function_config.function_desc.job_config_proto.mutable_train_conf()
+        else:
+            function_config.function_desc.job_config_proto.mutable_predict_conf()
+    api = enable_if.unique([eager_oneflow_function, lazy_oneflow_function])
+    return api(function_config)
+
+
+@enable_if.condition(hob.in_normal_mode & hob.eager_execution_enabled)
+def eager_oneflow_function(function_config=FunctionConfig()):
+    assert isinstance(function_config, FunctionConfig)
+
+    def Decorator(job_func):
+        if not hasattr(job_func, "__oneflow_function_signature__"):
+            job_func.__oneflow_function_signature__ = inspect.signature(job_func)
+        oft_util.CheckGlobalFunctionAnnotation(job_func.__oneflow_function_signature__)
+        sess = session_ctx.GetDefaultSession()
+        function_desc = _CloneFunctionDesc(function_config.function_desc, job_func)
+
+        @functools.wraps(job_func)
+        def Func(*args, **kwargs):
+            return _RunEagerJob(sess, function_desc, *args, **kwargs)
+
+        for x in dir(job_func):
+            if x.startswith("__oneflow_"):
+                setattr(Func, x, getattr(job_func, x))
+        return Func
+
+    return Decorator
+
+
+@enable_if.condition(
+    hob.in_normal_mode & ~hob.eager_execution_enabled & ~hob.session_initialized
+)
+def lazy_oneflow_function(function_config=FunctionConfig()):
+    assert isinstance(function_config, FunctionConfig)
+
+    def Decorator(job_func):
+        if not hasattr(job_func, "__oneflow_function_signature__"):
+            job_func.__oneflow_function_signature__ = inspect.signature(job_func)
+        oft_util.CheckGlobalFunctionAnnotation(job_func.__oneflow_function_signature__)
+        sess = session_ctx.GetDefaultSession()
+
+        @functools.wraps(job_func)
+        def Func(*args, **kwargs):
+            return _RunLazyJob(sess, job_func, *args, **kwargs)
+
+        sess.AddJob(_CloneFunctionDesc(function_config.function_desc, job_func))
+        for x in dir(job_func):
+            if x.startswith("__oneflow_"):
+                setattr(Func, x, getattr(job_func, x))
+        return Func
+
+    return Decorator
+
+
+def global_function_or_identity(*args, **kwargs):
+    if rt_mode.CurrentMode() == rt_mode.NORMAL_MODE:
+        return api_oneflow_function(*args, **kwargs)
+    else:
+        assert rt_mode.CurrentMode() == rt_mode.GLOBAL_MODE
+        identity_decorator = lambda func: func
+        return identity_decorator
+
+
+def _CloneFunctionDesc(func_desc, job_func):
+    new_func_desc = FunctionDesc(job_func=job_func)
+    new_func_desc.job_config_proto.CopyFrom(func_desc.job_config_proto)
+    new_func_desc.function_attribute = copy.deepcopy(func_desc.function_attribute)
+    return new_func_desc
+
+
+def oneflow_function_config(*field_paths):
+    def Decorator(func):
+        global _class_property2return_obj_class
+        for field_path in field_paths:
+            fields = field_path.split(".")
+            assert len(fields) > 0
+            cls = FunctionConfig
+            for index, field in enumerate(fields):
+                assert field != "function_desc"
+                assert re.match("^[_\w]+[_\w\d]*$", field)
+                if (cls, field) not in _class_property2return_obj_class:
+                    class_name = ".".join(["function_config"] + fields[: index + 1])
+
+                    def Init(self, function_desc):
+                        self.function_desc = function_desc
+
+                    config_class = type(class_name, (object,), dict(__init__=Init))
+                    setattr(cls, field, _MakeInnerJobConfigClassProperty(config_class))
+                    _class_property2return_obj_class[cls, field] = config_class
+                cls = _class_property2return_obj_class[cls, field]
+            cls.__call__ = _MakeLeafJobConfigCall(func)
+        return func
+
+    return Decorator
+
+
+_class_property2return_obj_class = {}
+
+
+def _MakeInnerJobConfigClassProperty(return_obj_class):
+    return property(lambda self: return_obj_class(self.function_desc))
+
+
+def _MakeLeafJobConfigCall(method):
+    return lambda self, *argv, **kwarg: method(self.function_desc, *argv, **kwarg)
+
+
+def _RunEagerJob(session, function_desc, *args):
+    return session.TryInit().EagerRun(function_desc, *args)
+
+
+def _RunLazyJob(session, job_func, *args, **kwargs):
+    return session.TryInit().LazyRun(job_func, *args, **kwargs)
+
+
+@oneflow_function_config("default_data_type")
+def set_default_data_type(func_desc, value):
+    r"""Set default data type for job
+
+    Args:
+        func_desc ([type]): job function
+        value ([type]): data type. e.g. flow.float
+    """
+    func_desc.job_config_proto.set_default_data_type(
+        data_type_cfg.DataType(
+            oneflow._oneflow_internal.deprecated.GetProtoDtype4OfDtype(value)
+        )
+    )
+
+
+@oneflow_function_config("default_initializer_conf")
+def set_default_initializer_conf(func_desc, value):
+    r"""Set default initial configuration for job
+
+    Args:
+        func_desc ([type]): [description]
+        value ([type]): [description]
+    """
+    assert type(value) is dict
+    pb_util.PythonDict2CFG(
+        value, func_desc.job_config_proto.mutable_default_initializer_conf()
+    )
+
+
+@oneflow_function_config("exp_run_conf")
+def set_exp_run_conf(value):
+    r"""Set experimental configuration for job
+
+    Args:
+        value ([type]): [description]
+    """
+    assert type(func_desc, value) is dict
+    pb_util.PythonDict2CFG(value, func_desc.job_config_proto.mutable_exp_run_conf())
+
+
+@oneflow_function_config("static_mem_alloc_policy_white_list.has")
+def static_mem_alloc_policy_white_list_has_policy(func_desc, policy):
+    r"""Get items from white list related to static memory allocation policy
+
+    Args:
+        func_desc ([type]): [description]
+        policy ([type]): [description]
+
+    Returns:
+        [type]: [description]
+    """
+    return getattr(
+        func_desc.job_config_proto.mutable_memory_allocation_algorithm_conf(), policy
+    )()
+
+
+@oneflow_function_config("static_mem_alloc_policy_white_list.add")
+def static_mem_alloc_policy_white_list_add_policy(func_desc, policy):
+    r"""Add item to white list related to static memory allocation policy
+
+    Args:
+        func_desc ([type]): [description]
+        policy ([type]): [description]
+    """
+    getattr(
+        func_desc.job_config_proto.mutable_memory_allocation_algorithm_conf(),
+        "set_" + policy,
+    )(True)
+
+
+@oneflow_function_config("static_mem_alloc_policy_white_list.remove")
+def static_mem_alloc_policy_white_list_remove_policy(func_desc, policy):
+    r"""Remove item of white list related to static memory allocation policy
+
+    Args:
+        func_desc ([type]): [description]
+        policy ([type]): [description]
+    """
+    getattr(
+        func_desc.job_config_proto.mutable_memory_allocation_algorithm_conf(),
+        "set_" + policy,
+    )(False)
+
+
+@oneflow_function_config("static_mem_alloc_policy_white_list.policy_mem_size_first")
+def policy_mem_size_first(func_desc):
+    r"""A static memory allocation policy called: mem_size_first
+
+    Args:
+        func_desc ([type]): [description]
+
+    Returns:
+        [type]: [description]
+    """
+    return "use_mem_size_first_algo"
+
+
+@oneflow_function_config(
+    "static_mem_alloc_policy_white_list.policy_mutual_exclusion_first"
+)
+def policy_mutual_exclusion_first(func_desc):
+    r"""A static memory allocation policy called: mutual_exclusion_first
+
+    Args:
+        func_desc ([type]): [description]
+
+    Returns:
+        [type]: [description]
+    """
+    return "use_mutual_exclusion_first_algo"
+
+
+@oneflow_function_config("static_mem_alloc_policy_white_list.policy_time_line")
+def policy_time_line(func_desc):
+    r"""A static memory allocation policy called: time_line
+
+    Args:
+        func_desc ([type]): [description]
+
+    Returns:
+        [type]: [description]
+    """
+    return "use_time_line_algo"
+
+
+@oneflow_function_config("static_mem_alloc_algo_white_list.show")
+def show_static_mem_alloc_algo_white_list(func_desc):
+    r"""Show configuration of  static memory allocation policy,
+          including: "use_mem_size_first_algo", "use_mutual_exclusion_first_algo", "use_time_line_algo"
+
+    Args:
+        func_desc ([type]): [description]
+
+    Returns:
+        [type]: [description]
+    """
+    return [
+        "use_mem_size_first_algo",
+        "use_mutual_exclusion_first_algo",
+        "use_time_line_algo",
+    ]
+
+
+@oneflow_function_config("enable_cudnn")
+def set_enable_cudnn(func_desc, value=True):
+    r"""Whether use cudnn to accelerate job or not.
+
+    Args:
+        func_desc ([type]): [description]
+        value (bool, optional): [description]. Defaults to True.
+    """
+    func_desc.job_config_proto.set_enable_cudnn(value)
+
+
+@oneflow_function_config("cudnn_buf_limit_mbyte")
+def set_cudnn_buf_limit_mbyte(func_desc, value):
+    r"""Set cudnn buffer limit, e.g. 1024mb
+
+    Args:
+        func_desc ([type]): [description]
+        value ([type]): [description]
+    """
+    func_desc.job_config_proto.set_cudnn_buf_limit_mbyte(value)
+
+
+@oneflow_function_config("cudnn_conv_force_fwd_algo")
+def set_cudnn_conv_force_fwd_algo(func_desc, value):
+    r"""Set value to cudnn conv_force_forward algorithm
+
+    Args:
+        func_desc ([type]): [description]
+        value ([type]): [description]
+    """
+    func_desc.job_config_proto.set_cudnn_conv_force_fwd_algo(value)
+
+
+@oneflow_function_config("cudnn_conv_force_bwd_data_algo")
+def set_cudnn_conv_force_bwd_data_algo(func_desc, value):
+    r"""Set value to cudnn conv_force_backward_data algorithm
+
+    Args:
+        func_desc ([type]): [description]
+        value ([type]): [description]
+    """
+    func_desc.job_config_proto.set_cudnn_conv_force_bwd_data_algo(value)
+
+
+@oneflow_function_config("cudnn_conv_force_bwd_filter_algo")
+def set_cudnn_conv_force_bwd_filter_algo(func_desc, value):
+    r"""Set value to cudnn conv_force_backward_filter algorithm
+
+    Args:
+        func_desc ([type]): [description]
+        value ([type]): [description]
+    """
+    func_desc.job_config_proto.set_cudnn_conv_force_bwd_filter_algo(value)
+
+
+@oneflow_function_config("cudnn_conv_heuristic_search_algo")
+def set_cudnn_conv_heuristic_search_algo(func_desc, value):
+    r"""Set value to cudnn conv_heuristic_search algorithm
+
+    Args:
+        func_desc ([type]): [description]
+        value ([type]): [description]
+    """
+    func_desc.job_config_proto.set_cudnn_conv_heuristic_search_algo(value)
+
+
+@oneflow_function_config("enable_cudnn_fused_normalization_add_relu")
+def set_enable_cudnn_fused_normalization_add_relu(func_desc, value):
+    r"""Whether enable cudnn_fused_normalization_add_relu.
+
+    Args:
+        func_desc ([type]): [description]
+        value ([type]): [description]
+    """
+    func_desc.job_config_proto.set_enable_cudnn_fused_normalization_add_relu(value)
+
+
+@oneflow_function_config("enable_fuse_add_to_output")
+def set_enable_fuse_add_to_output(func_desc, value):
+    r"""Whether enable fuse_add_to_output.
+            If enabled, try to fuse a binary element-wise add to one of the predecessors to improve performance.
+
+    Args:
+        func_desc ([type]): [description]
+        value ([type]): [description]
+    """
+    func_desc.job_config_proto.set_enable_fuse_add_to_output(value)
+
+
+@oneflow_function_config("enable_fuse_cast_scale")
+def set_enable_fuse_cast_scale(func_desc, value=True):
+    r"""Whether enable fuse_cast_scale.
+            If enabled, try to fuse cast and scalar_mul_by_tensor to improve performance.
+
+    Args:
+        func_desc ([type]): [description]
+        value ([type]): [description]
+    """
+    func_desc.job_config_proto.set_enable_fuse_cast_scale(value)
+
+
+@oneflow_function_config("cudnn_conv_use_deterministic_algo_only")
+def set_cudnn_conv_use_deterministic_algo_only(func_desc, value):
+    r"""Set value to cudnn conv_use_deterministic_only algorithm
+
+    Args:
+        func_desc ([type]): [description]
+        value ([type]): [description]
+    """
+    func_desc.job_config_proto.set_cudnn_conv_use_deterministic_algo_only(value)
+
+
+@oneflow_function_config("enable_reused_mem")
+def set_enable_reused_mem(func_desc, value=True):
+    r"""Whether enable reuse memory or not
+
+    Args:
+        func_desc ([type]): [description]
+        value (bool, optional): [description]. Defaults to True.
+    """
+    func_desc.job_config_proto.set_enable_reused_mem(value)
+
+
+@oneflow_function_config("enable_inplace")
+def set_enable_inplace(func_desc, value=True):
+    r"""Whether enable inplace  or not
+
+    Args:
+        func_desc ([type]): [description]
+        value (bool, optional): [description]. Defaults to True.
+    """
+    func_desc.job_config_proto.set_enable_inplace(value)
+
+
+@oneflow_function_config("enable_inplace_in_reduce_struct")
+def set_enable_inplace_in_reduce_struct(func_desc, value=True):
+    print(
+        "'enable_inplace_in_reduce_struct' has been deprecated, has no effect and will be removed in the future."
+    )
+
+
+@oneflow_function_config("enable_nccl")
+def set_enable_nccl(func_desc, value=True):
+    print(
+        "'enable_nccl' has been deprecated, has no effect and will be removed in the future."
+    )
+
+
+@oneflow_function_config("use_nccl_inter_node_communication")
+def set_use_nccl_inter_node_communication(func_desc, value=True):
+    print(
+        "'use_nccl_inter_node_communication' has been deprecated, has no effect and will be removed in the future."
+    )
+
+
+@oneflow_function_config("use_boxing_v2")
+def set_use_boxing_v2(func_desc, value=True):
+    print(
+        "'use_boxing_v2' has been deprecated, has no effect and will be removed in the future."
+    )
+
+
+@oneflow_function_config("do_parallel_cast_before_widening_type_cast")
+def set_do_parallel_cast_before_widening_type_cast(func_desc, value=True):
+    func_desc.job_config_proto.set_do_parallel_cast_before_widening_type_cast(value)
+
+
+@oneflow_function_config("enable_all_reduce_group")
+def set_enable_all_reduce_group(func_desc, value=True):
+    print(
+        "'enable_all_reduce_group' has been deprecated, has no effect and will be removed in the future."
+    )
+
+
+@oneflow_function_config("all_reduce_group_num")
+def set_all_reduce_group_num(func_desc, value):
+    print(
+        "'all_reduce_group_num' has been deprecated, has no effect and will be removed in the future."
+    )
+
+
+@oneflow_function_config("all_reduce_lazy_ratio")
+def set_all_reduce_lazy_ratio(func_desc, value):
+    print(
+        "'all_reduce_lazy_ratio' has been deprecated, has no effect and will be removed in the future."
+    )
+
+
+@oneflow_function_config("all_reduce_group_min_mbyte")
+def set_all_reduce_group_min_mbyte(func_desc, value):
+    print(
+        "'all_reduce_group_min_mbyte' has been deprecated, has no effect and will be removed in the future."
+    )
+
+
+@oneflow_function_config("all_reduce_group_size_warmup")
+def set_all_reduce_group_size_warmup(func_desc, value):
+    print(
+        "'all_reduce_group_size_warmup' has been deprecated, has no effect and will be removed in the future."
+    )
+
+
+@oneflow_function_config("all_reduce_fp16")
+def set_all_reduce_fp16(func_desc, value=True):
+    print(
+        "'all_reduce_fp16' has been deprecated, has no effect and will be removed in the future."
+    )
+
+
+@oneflow_function_config(
+    "optimizer_placement_optimization_mode",
+    "train.optimizer_placement_optimization_mode",
+)
+def set_optimizer_placement_optimization_mode(func_desc, mode):
+    r"""Enable optimizer_placement_optimization with mode 'mode'
+
+    Args:
+        func_desc ([type]): [description]
+        mode (str): [description].
+    """
+    assert mode in ["non_distributed", "distributed_split"]
+    func_desc.job_config_proto.set_optimizer_placement_optimization_mode(mode)
+
+
+@oneflow_function_config(
+    "optimizer_placement_optimization_threshold",
+    "train.optimizer_placement_optimization_threshold",
+)
+def set_optimizer_placement_optimization_threshold(func_desc, value):
+    func_desc.job_config_proto.set_optimizer_placement_optimization_threshold(value)
+
+
+@oneflow_function_config("enable_non_distributed_optimizer")
+def set_enable_non_distributed_optimizer(func_desc, value=True):
+    r"""Whether enable non_distributed optimizer or not
+
+    Args:
+        func_desc ([type]): [description]
+        value (bool, optional): [description]. Defaults to True.
+    """
+    if value:
+        set_optimizer_placement_optimization_mode(func_desc, "non_distributed")
+
+
+@oneflow_function_config("disable_all_reduce_sequence")
+def set_disable_all_reduce_sequence(func_desc, value=True):
+    print(
+        "'disable_all_reduce_sequence' has been deprecated, has no effect and will be removed in the future."
+    )
+
+
+@oneflow_function_config("prune_parallel_cast_ops")
+def set_prune_parallel_cast_ops(func_desc, value=True):
+    r"""Whether prune parallel cast  operations or not.
+
+    Args:
+        func_desc ([type]): [description]
+        value (bool, optional): [description]. Defaults to True.
+    """
+    func_desc.job_config_proto.set_prune_parallel_cast_ops(value)
+
+
+@oneflow_function_config("prune_cast_to_static_shape_ops")
+def set_prune_cast_to_static_shape_ops(func_desc, value=True):
+    r"""Whether or not set prune_cast to static shape opretions
+
+    Args:
+        func_desc ([type]): [description]
+        value (bool, optional): [description]. Defaults to True.
+    """
+    func_desc.job_config_proto.set_prune_cast_to_static_shape_ops(value)
+
+
+@oneflow_function_config("prune_amp_white_identity_ops")
+def set_prune_amp_white_identity_ops(func_desc, value=True):
+    r"""Whether prune amp_white_identity operations or not.
+
+    Args:
+        func_desc ([type]): [description]
+        value (bool, optional): [description]. Defaults to True.
+    """
+    func_desc.job_config_proto.set_prune_amp_white_identity_ops(value)
+
+
+@oneflow_function_config("non_distributed_optimizer_group_size_mbyte")
+def set_non_distributed_optimizer_group_size_mbyte(func_desc, value):
+    print(
+        "'non_distributed_optimizer_group_size_mbyte' has been deprecated, has no effect and will be removed in the future."
+    )
+
+
+@oneflow_function_config(
+    "enable_true_half_config_when_conv", "cudnn_conv_enable_true_half"
+)
+def set_cudnn_conv_enable_true_half(func_desc, value=True):
+    r"""Whether  use true_half mode or not during  convolution calculation process while using cudnn.
+
+    Args:
+        func_desc ([type]): [description]
+        value (bool, optional): [description]. Defaults to True.
+    """
+    func_desc.job_config_proto.set_cudnn_conv_enable_pseudo_half(not value)
+
+
+@oneflow_function_config(
+    "cudnn_conv_enable_pseudo_half", "enable_cudnn_conv_pseudo_half"
+)
+def set_cudnn_conv_enable_pseudo_half(func_desc, value):
+    r"""Whether  enable pseudo_half mode or not during  convolution calculation process while using cudnn
+
+    Args:
+        func_desc ([type]): [description]
+        value ([type]): [description]
+    """
+    func_desc.job_config_proto.set_cudnn_conv_enable_pseudo_half(value)
+
+
+@oneflow_function_config("enable_float_compute_for_half_gemm")
+def set_enable_float_compute_for_half_gemm(func_desc, value=True):
+    r"""Whether  enable float_compute or not ,
+          if True, means that the type of intermedia value is float when compute half gemm.
+
+    Args:
+        func_desc ([type]): [description]
+        value (bool, optional): [description]. Defaults to True.
+    """
+    print(
+        """WARNING: enable_float_compute_for_half_gemm has been deprecated, because we always use float compute for half gemm. Please remove it.
+        """
+    )
+    print(traceback.format_stack()[-3])
+
+
+@oneflow_function_config("enable_quantization_aware_training")
+@oneflow_function_config("enable_qat")
+def set_enable_quantization_aware_training(func_desc, value=True):
+    r"""If true, then job will use quantization aware training
+
+    Args:
+        func_desc ([type]): [description]
+        value (bool, optional): [description]. Defaults to True.
+    """
+    func_desc.job_config_proto.set_enable_quantization_aware_training(value)
+
+
+@oneflow_function_config("qat.per_channel_weight_quantization")
+def set_qat_per_channel(func_desc, value=True):
+    func_desc.job_config_proto.mutable_qat_config().set_per_channel_weight_quantization(
+        value
+    )
+
+
+@oneflow_function_config("qat.symmetric")
+def set_qat_symmetric(func_desc, value=True):
+    func_desc.job_config_proto.mutable_qat_config().set_symmetric(value)
+
+
+@oneflow_function_config("qat.moving_min_max_momentum")
+def set_qat_moving_min_max_momentum(func_desc, value: float):
+    func_desc.job_config_proto.mutable_qat_config().set_moving_min_max_momentum(value)
+
+
+@oneflow_function_config("qat.moving_min_max_stop_update_after_iters")
+def set_qat_moving_min_max_momentum(func_desc, value: float):
+    func_desc.job_config_proto.mutable_qat_config().set_moving_min_max_stop_update_after_iters(
+        value
+    )
+
+
+@oneflow_function_config("qat.target_backend")
+def set_qat_symmetric(func_desc, value: str):
+    func_desc.job_config_proto.mutable_qat_config().set_target_backend(value)
+
+
+@oneflow_function_config("enable_auto_mixed_precision")
+def set_enable_auto_mixed_precision(func_desc, value=True):
+    r"""If true, then job will use mixed precision mode, it means use both float16 and float32 during model training.
+
+    Args:
+        func_desc ([type]): [description]
+        value (bool, optional): [description]. Defaults to True.
+    """
+    func_desc.job_config_proto.set_enable_auto_mixed_precision(value)
+
+
+@oneflow_function_config("enable_keep_header_only")
+def set_enable_keep_header_only(func_desc, value=True):
+    r"""deprecated api.
+
+    Args:
+        func_desc ([type]): [description]
+        value (bool, optional): [description]. Defaults to True.
+    """
+    print("Sorry! enable_keep_header_only is deprecated and it doesn't work.\n")
+
+
+@oneflow_function_config("concurrency_width")
+def set_concurrency_width(func_desc, value):
+    r"""Set up concurrency width
+
+    Args:
+        func_desc ([type]): [description]
+        value ([type]): [description]
+    """
+    func_desc.job_config_proto.set_concurrency_width(value)
+
+
+@oneflow_function_config("train.model_update_conf")
+def set_model_update_conf(func_desc, value):
+    r"""Set up optimizer and update method of learning rate  for job
+
+    Args:
+        func_desc ([type]): [description]
+        value ([type]): [description]
+    """
+    print(
+        """WARNING: func_config.train.* has been deprecated. Please replace it by the new optimizer api.
+        """
+    )
+    print(traceback.format_stack()[-3])
+    assert type(value) is dict
+    pb_msg = func_desc.job_config_proto.mutable_train_conf().mutable_model_update_conf()
+    pb_util.PythonDict2CFG(value, pb_msg)
+
+
+@oneflow_function_config("indexed_slices_optimizer_conf")
+def set_indexed_slices_optimizer_conf(func_desc, value):
+    r"""Set indexed slices configuration of optimizer
+
+    Args:
+        func_desc ([type]): [description]
+        value ([type]): [description]
+    """
+    assert type(value) is dict
+    pb_msg = func_desc.job_config_proto.mutable_indexed_slices_optimizer_conf()
+    pb_util.PythonDict2CFG(value, pb_msg)
+
+
+@oneflow_function_config("enable_fuse_model_update_ops")
+def set_enable_fuse_model_update_ops(func_desc, value=True):
+    r"""Whether enable fuse_model_update_ops.
+            If enabled, try to fuse cast + scale + l1_l2_regularize_gradient + model_update to one op to improve performance.
+
+    Args:
+        func_desc ([type]): [description]
+        value ([type]): [description]
+    """
+    func_desc.job_config_proto.set_enable_fuse_model_update_ops(value)
+
+
+@oneflow_function_config("enable_gradients_stats_aggregation")
+def set_enable_gradients_stats_aggregation(func_desc, value=True):
+    r"""Whether enable gradients_stats_aggregation.
+            If enabled, gradients stats ops (norm, finite, ...) will be aggregated.
+
+    Args:
+        func_desc ([type]): [description]
+        value ([type]): [description]
+    """
+    func_desc.job_config_proto.set_enable_gradients_stats_aggregation(value)
+
+
+@oneflow_function_config("train.loss_scale_factor")
+def set_loss_scale_factor(func_desc, value):
+    r"""Set scale factor for loss
+
+    Args:
+        func_desc ([type]): [description]
+        value ([type]): [description]
+    """
+    print(
+        """WARNING: func_config.train.* has been deprecated. Please replace it by the new optimizer api.
+        """
+    )
+    print(traceback.format_stack()[-3])
+    func_desc.job_config_proto.mutable_train_conf().set_loss_scale_factor(value)
+
+
+@oneflow_function_config("train.primary_lr")
+def set_primary_lr(func_desc, value):
+    r"""Set the primary leaning rate for job
+
+    Args:
+        func_desc ([type]): [description]
+        value ([type]): [description]
+    """
+    print(
+        """WARNING: func_config.train.* has been deprecated. Please replace it by the new optimizer api.
+        """
+    )
+    print(traceback.format_stack()[-3])
+    func_desc.job_config_proto.mutable_train_conf().set_primary_lr(value)
+
+
+@oneflow_function_config("train.secondary_lr")
+def set_secondary_lr(func_desc, value):
+    r"""Set the secondary leaning rate for job
+
+    Args:
+        func_desc ([type]): [description]
+        value ([type]): [description]
+    """
+    print(
+        """WARNING: func_config.train.* has been deprecated. Please replace it by the new optimizer api.
+        """
+    )
+    print(traceback.format_stack()[-3])
+    func_desc.job_config_proto.mutable_train_conf().set_secondary_lr(value)
+
+
+@oneflow_function_config("train.num_gradient_accumulation_steps")
+def set_num_gradient_accumulation_steps(func_desc, value):
+    func_desc.job_config_proto.set_num_gradient_accumulation_steps(value)
+
+
+@oneflow_function_config("default_placement_scope")
+def set_default_placement(func_desc, value):
+    r"""Set the default placement for job
+
+    Args:
+        func_desc ([type]): [description]
+        value ([type]): [description]
+    """
+    assert isinstance(value, placement_ctx.EmptyPlacementScope)
+    func_desc.function_attribute.default_placement_scope = value
+
+
+@oneflow_function_config("use_xla_jit")
+def set_use_xla_jit(func_desc, value=True):
+    r"""Whether use xla  or not
+
+    Args:
+        func_desc ([type]): [description]
+        value (bool, optional): [description]. Defaults to True.
+    """
+    func_desc.job_config_proto.mutable_xrt_config().set_use_xla_jit(value)
+
+
+@oneflow_function_config("use_tensorrt")
+def set_use_tensorrt(func_desc, value=True):
+    r"""Whether use tensorrt or not
+
+    Args:
+        func_desc ([type]): [description]
+        value (bool, optional): [description]. Defaults to True.
+    """
+    func_desc.job_config_proto.mutable_xrt_config().set_use_tensorrt(value)
+
+
+@oneflow_function_config("tensorrt.use_fp16")
+def set_tensorrt_use_fp16(func_desc, value=True):
+    r"""Whether use tensorrt fp16  or not
+
+    Args:
+        func_desc ([type]): [description]
+        value (bool, optional): [description]. Defaults to True.
+    """
+    set_use_tensorrt(func_desc, True)
+    func_desc.job_config_proto.mutable_xrt_config().mutable_tensorrt_config().set_use_fp16(
+        value
+    )
+
+
+@oneflow_function_config("tensorrt.use_int8")
+def set_tensorrt_use_int8(func_desc, value=True):
+    r"""Whether use tensorrt int8 mode or not
+
+    Args:
+        func_desc ([type]): [description]
+        value (bool, optional): [description]. Defaults to True.
+    """
+    set_use_tensorrt(func_desc, True)
+    func_desc.job_config_proto.mutable_xrt_config().mutable_tensorrt_config().set_use_int8(
+        value
+    )
+
+
+@oneflow_function_config("tensorrt.int8_calibration")
+def set_tensorrt_int8_calibration(func_desc, value):
+    r"""Set up calibration of tensorrt int8
+
+    Args:
+        func_desc ([type]): [description]
+        value ([type]): [description]
+    """
+    assert func_desc.job_config_proto.xrt_config().tensorrt_config().use_int8()
+    func_desc.job_config_proto.mutable_xrt_config().mutable_tensorrt_config().set_int8_calibration(
+        value
+    )
+
+
+@oneflow_function_config("default_logical_view")
+def set_default_distribute_strategy(func_desc, value):
+    r"""Set up default distribute strategy for job
+
+    Args:
+        func_desc ([type]): [description]
+        value ([type]): [description]
+    """
+    assert isinstance(value, distribute_ctx.DistributeStrategy)
+    func_desc.function_attribute.default_distribute_strategy = value
+
+
+@oneflow_function_config("allow_cpu_return_op")
+def allow_cpu_return_op(func_desc, value):
+    r"""Whether allow operaions returned from cpu or  not
+
+    Args:
+        func_desc ([type]): [description]
+        value ([type]): [description]
+    """
+    func_desc.function_attribute.allow_cpu_return_op = value
+
+
+@oneflow_function_config("default_distribute_strategy")
+@oneflow_deprecate()
+def deprecated_set_default_distribute_strategy(*args, **kwargs):
+    print(
+        "WARNING:",
+        "function_config.default_distribute_strategy",
+        "has been deprecated. Please use {} instead.".format(
+            "function_config.default_logical_view"
+        ),
+    )
+    print(traceback.format_stack()[-3], file=sys.stderr)
+    set_default_distribute_strategy(*args, **kwargs)
diff --git a/oneflow/compatible_single_client_python/framework/functional.py b/oneflow/compatible_single_client_python/framework/functional.py
new file mode 100644
index 0000000000000000000000000000000000000000..ee25d3791d62cc28d135669dfb2d17698dbf75d3
--- /dev/null
+++ b/oneflow/compatible_single_client_python/framework/functional.py
@@ -0,0 +1,78 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from oneflow.compatible import single_client as flow
+import oneflow._oneflow_internal
+
+
+def RecursveDetermine(arg):
+    if isinstance(arg, flow.Tensor):
+        if not arg.is_determined:
+            arg.determine()
+        return arg._local_or_consistent_tensor
+    elif isinstance(arg, list) or isinstance(arg, tuple):
+        arg = list(arg)
+        for i in range(len(arg)):
+            arg[i] = RecursveDetermine(arg[i])
+        return arg
+    elif isinstance(arg, dict):
+        for k, v in arg.items():
+            arg[k] = RecursveDetermine(v)
+    else:
+        return arg
+
+
+class Function:
+    def __init__(self, func_name, handle):
+        self.func_name = func_name
+        self.handle = handle
+
+    def __call__(self, *args, **kwargs):
+        args = list(args)
+        for i in range(len(args)):
+            args[i] = RecursveDetermine(args[i])
+        for k, v in kwargs.items():
+            kwargs[k] = RecursveDetermine(v)
+        return self.handle(*args, **kwargs)
+
+
+def RegisterFunctionalApis():
+    import inspect
+    from oneflow.compatible.single_client import F
+    from oneflow.compatible.single_client.experimental import F as expr_F
+
+    for s in dir(oneflow._oneflow_internal.F):
+        f = getattr(oneflow._oneflow_internal.F, s)
+        if inspect.isbuiltin(f):
+            func_name = s
+            if s in _function_name_aliases:
+                func_name = _function_name_aliases[s]
+                setattr(
+                    F, func_name, Function(func_name, f),
+                )
+                setattr(
+                    expr_F, func_name, Function(func_name, f),
+                )
+            setattr(F, s, Function(func_name, f))
+            setattr(
+                expr_F, s, Function(func_name, f),
+            )
+
+    del inspect
+
+
+_function_name_aliases = {
+    "add_scalar": "scalar_add",
+}
diff --git a/oneflow/compatible_single_client_python/framework/generator.py b/oneflow/compatible_single_client_python/framework/generator.py
new file mode 100644
index 0000000000000000000000000000000000000000..945525fe8deab5e1f95155de0ba815c981ecb874
--- /dev/null
+++ b/oneflow/compatible_single_client_python/framework/generator.py
@@ -0,0 +1,38 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+from oneflow.compatible import single_client as flow
+import oneflow._oneflow_internal
+from oneflow.compatible_single_client_python.oneflow_export import oneflow_export
+
+
+@oneflow_export("Generator")
+def create_generator(device=None):
+    if device is None:
+        device = "auto"
+    return oneflow._oneflow_internal.create_generator(device)
+
+
+@oneflow_export("default_generator")
+def default_generator(device=None):
+    if device is None:
+        device = "auto"
+    return oneflow._oneflow_internal.default_generator(device)
+
+
+@oneflow_export("manual_seed")
+def manual_seed(seed):
+    oneflow._oneflow_internal.manual_seed(seed)
diff --git a/oneflow/compatible_single_client_python/framework/hob.py b/oneflow/compatible_single_client_python/framework/hob.py
new file mode 100644
index 0000000000000000000000000000000000000000..5b312a81e12c70dcd8b9454b5459f68942ae432c
--- /dev/null
+++ b/oneflow/compatible_single_client_python/framework/hob.py
@@ -0,0 +1,87 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from oneflow.compatible_single_client_python.framework import runtime_mode as rt_mode
+from oneflow.compatible_single_client_python.framework import (
+    session_context as session_ctx,
+)
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.lib.core.high_order_bool import (
+    bool_functor,
+)
+import oneflow._oneflow_internal
+
+
+@bool_functor("Current mode is %s" % rt_mode.NORMAL_MODE)
+def in_normal_mode(ctx):
+    return rt_mode.CurrentMode() == rt_mode.NORMAL_MODE
+
+
+@bool_functor("Current mode is %s" % rt_mode.GLOBAL_MODE)
+def in_global_mode(ctx):
+    return rt_mode.CurrentMode() == rt_mode.GLOBAL_MODE
+
+
+@bool_functor("Current mode is %s" % rt_mode.DEVICE_MODE)
+def in_device_mode(ctx):
+    return rt_mode.CurrentMode() == rt_mode.DEVICE_MODE
+
+
+@bool_functor("Environment initialized")
+def env_initialized(ctx):
+    assert in_normal_mode(ctx)
+    return oneflow._oneflow_internal.IsEnvInited()
+
+
+@bool_functor("Any global function defined")
+def any_global_function_defined(ctx):
+    assert in_normal_mode(ctx)
+    return session_ctx.GetDefaultSession().AnyGlobalFunctionDefined()
+
+
+@bool_functor("Eager execution enabled")
+def eager_execution_enabled(ctx):
+    return oneflow._oneflow_internal.EagerExecutionEnabled()
+
+
+@bool_functor("Session initialized")
+def session_initialized(ctx):
+    assert in_normal_mode(ctx)
+    return session_ctx.GetDefaultSession().is_running
+
+
+@bool_functor("Current global function is trainable")
+def is_trainable(ctx):
+    assert in_global_mode(ctx)
+    if oneflow._oneflow_internal.EagerExecutionEnabled():
+        return session_ctx.GetDefaultSession().CurrentEagerGlobalFunctionDesc()
+    else:
+        job_name = oneflow._oneflow_internal.JobBuildAndInferCtx_GetCurrentJobName()
+        return session_ctx.GetDefaultSession().GetFunctionDesc(job_name)
+
+
+@bool_functor("Current machine is master")
+def is_current_machine_master(ctx):
+    return oneflow._oneflow_internal.CurrentMachineId() == 0
+
+
+@bool_functor("Consistent view enabled")
+def consistent_view_enabled(ctx):
+    return flow.scope.consistent_view_enabled()
+
+
+@bool_functor("Mirrored view enabled")
+def mirrored_view_enabled(ctx):
+    return flow.scope.mirrored_view_enabled()
diff --git a/oneflow/compatible_single_client_python/framework/id_util.py b/oneflow/compatible_single_client_python/framework/id_util.py
new file mode 100644
index 0000000000000000000000000000000000000000..87082cb3d0e4a1dbfb42ab69c04de0e0c338e2ca
--- /dev/null
+++ b/oneflow/compatible_single_client_python/framework/id_util.py
@@ -0,0 +1,24 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+from oneflow.compatible_single_client_python.oneflow_export import oneflow_export
+import oneflow._oneflow_internal
+
+
+@oneflow_export("util.unique_str")
+def UniqueStr(prefix):
+    return oneflow._oneflow_internal.UniqueStr(prefix)
diff --git a/oneflow/compatible_single_client_python/framework/input_blob_def.py b/oneflow/compatible_single_client_python/framework/input_blob_def.py
new file mode 100644
index 0000000000000000000000000000000000000000..8137b9a4c1f5e00a2b421b604d63f41e59c7c935
--- /dev/null
+++ b/oneflow/compatible_single_client_python/framework/input_blob_def.py
@@ -0,0 +1,308 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+import sys
+from functools import reduce
+from typing import Any, Optional, Sequence, Union
+
+import numpy as np
+
+from oneflow.compatible import single_client as flow
+from oneflow.core.operator import op_conf_pb2 as op_conf_util
+from oneflow.core.operator import interface_blob_conf_pb2 as inter_face_blob_conf_util
+from oneflow.core.job import sbp_parallel_pb2 as sbp_parallel_pb
+from oneflow.compatible_single_client_python.framework import c_api_util as c_api_util
+from oneflow.compatible_single_client_python.framework import (
+    compile_context as compile_context,
+)
+from oneflow.compatible_single_client_python.framework import (
+    distribute as distribute_util,
+)
+from oneflow.compatible_single_client_python.framework import id_util as id_util
+from oneflow.compatible_single_client_python.framework import (
+    placement_context as placement_ctx,
+)
+from oneflow.compatible_single_client_python.framework import (
+    remote_blob as remote_blob_util,
+)
+from oneflow.compatible_single_client_python.oneflow_export import oneflow_export
+from oneflow._oneflow_internal.oneflow.core.register import logical_blob_id as lbi_util
+import oneflow._oneflow_internal
+from functools import reduce
+import traceback
+
+
+class ArgBlobDef(object):
+    def __init__(
+        self,
+        shape,
+        dtype,
+        name=None,
+        distribute=oneflow._oneflow_internal.distribute.auto(),
+    ):
+        lbi = lbi_util.LogicalBlobId()
+        if name is None:
+            name = id_util.UniqueStr("Input_")
+        lbi.set_op_name(name)
+        lbi.set_blob_name("out")
+        self.lbi_ = lbi
+        assert type(shape) is tuple
+        for dim in shape:
+            assert type(dim) is int
+            assert dim > 0
+        self.shape_ = shape
+        self.dtype_ = dtype
+        self.distribute_ = distribute
+
+    @property
+    def lbi(self):
+        return self.lbi_
+
+    @property
+    def op_name(self):
+        return self.lbi_.op_name()
+
+    @property
+    def blob_name(self):
+        return self.lbi_.blob_name()
+
+    @property
+    def unique_name(self):
+        return self.op_name + "/" + self.blob_name + self._Distribute2Str()
+
+    @property
+    def shape(self):
+        return self.shape_
+
+    @property
+    def dtype(self):
+        return self.dtype_
+
+    @property
+    def is_dynamic(self):
+        raise NotImplementedError
+
+    def with_distribute(self, distribute):
+        return type(self)(shape=self.shape_, dtype=self.dtype_, name=self.op_name,)
+
+    def Clone(self, op_name=None):
+        return type(self)(shape=self.shape_, dtype=self.dtype_, name=op_name,)
+
+    def AddAndInferOp(self, op_conf):
+        raise NotImplementedError
+
+    def EagerAddAndInferOp(self, op_conf):
+        raise NotImplementedError
+
+    def CheckAndAsyncPush(self, session, arg_ndarray):
+        self._CheckNdarray(arg_ndarray)
+        self._AsyncPush(session, arg_ndarray)
+
+    def _CheckNdarray(self, ndarray):
+        raise NotImplementedError
+
+    def _AsyncPush(self, session, arg_ndarray):
+        raise NotImplementedError
+
+    def ToInterfaceBlobConf(self):
+        interface_blob_conf = inter_face_blob_conf_util.InterfaceBlobConf()
+        interface_blob_conf.shape.dim.extend(self.shape_)
+        interface_blob_conf.data_type = oneflow._oneflow_internal.deprecated.GetProtoDtype4OfDtype(
+            self.dtype_
+        )
+        interface_blob_conf.is_dynamic = self.is_dynamic
+        # NOTE(chengcheng): rm batch_axis, so set split_axis always = 0 for safe. will support
+        #     set sbp in future, or will delete in multi-client
+        sbp_parallel = sbp_parallel_pb.SbpParallel()
+        sbp_parallel.split_parallel.axis = 0
+        interface_blob_conf.parallel_distribution.sbp_parallel.extend([sbp_parallel])
+        return interface_blob_conf
+
+    def _Distribute2Str(self):
+        if (
+            type(self.distribute_)
+            is oneflow._oneflow_internal.distribute.AutoDistribute
+        ):
+            return ""
+        elif (
+            type(self.distribute_)
+            is oneflow._oneflow_internal.distribute.SplitDistribute
+        ):
+            return ":S" + str(self.distribute_.axis)
+        elif (
+            type(self.distribute_)
+            is oneflow._oneflow_internal.distribute.BroadcastDistribute
+        ):
+            return ":B"
+        else:
+            raise NotImplementedError
+
+
+class FixedTensorDef(ArgBlobDef):
+    def __init__(
+        self,
+        shape: Sequence[int],
+        dtype: flow.dtype = flow.float,
+        name: Optional[str] = None,
+    ) -> None:
+        ArgBlobDef.__init__(
+            self, shape, dtype=dtype, name=name,
+        )
+
+    @property
+    def is_dynamic(self) -> bool:
+        return False
+
+    def AddAndInferOp(self, op_conf: op_conf_util.OperatorConf) -> Any:
+        return compile_context.CurJobAddConsistentOp(op_conf)
+
+    def EagerAddAndInferOp(self, op_conf: op_conf_util.OperatorConf) -> Any:
+        parallel_symbol = flow.current_scope().device_parallel_desc_symbol
+        if (
+            parallel_symbol.device_tag == "gpu"
+            and list(dict(parallel_symbol.machine_id2device_id_list).keys()) == [0]
+            and parallel_symbol.parallel_num == 1
+        ):
+            device_tag = "gpu"
+            device_ids = "@0:%s" % (parallel_symbol.machine_id2device_id_list[0][0])
+        else:
+            device_tag = "cpu"
+            device_ids = "@0:0"
+        with flow.scope.placement(device_tag, device_ids):
+            return compile_context.CurJobAddConsistentOp(op_conf)
+
+    def _CheckNdarray(self, ndarray: np.ndarray) -> None:
+        assert isinstance(ndarray, np.ndarray)
+        assert ndarray.shape == self.shape
+
+    def _AsyncPush(self, session: object, arg_ndarray: np.ndarray) -> None:
+        session.AsyncPush(self.op_name, _MakePushNdarrayCallback(arg_ndarray))
+
+
+class MirroredTensorDef(ArgBlobDef):
+    def __init__(
+        self,
+        shape: Sequence[int],
+        dtype: flow.dtype = flow.float,
+        name: Optional[str] = None,
+    ) -> None:
+        assert type(shape) is tuple
+        ArgBlobDef.__init__(self, shape, dtype=dtype, name=name)
+        self.sub_consistent_blob_list_ = []
+
+    @property
+    def is_dynamic(self) -> bool:
+        return True
+
+    def AddAndInferOp(self, op_conf: op_conf_util.OperatorConf) -> None:
+        _AddAndInferMirroredOp(
+            self.unique_name, op_conf, self.sub_consistent_blob_list_
+        )
+
+    def EagerAddAndInferOp(self, op_conf: op_conf_util.OperatorConf) -> Any:
+        return compile_context.CurJobAddMirroredOp(op_conf)
+
+    def _CheckNdarray(self, ndarray_list: Sequence[np.ndarray]) -> None:
+        assert isinstance(ndarray_list, (list, tuple))
+        assert len(self.sub_consistent_blob_list_) == len(ndarray_list)
+
+        def GetElemCnt(shape):
+            return reduce(lambda x, y: x * y, shape, 1)
+
+        for consistent_blob, ndarray in zip(
+            self.sub_consistent_blob_list_, ndarray_list
+        ):
+            assert type(ndarray) is np.ndarray
+            assert len(ndarray.shape) == len(self.shape)
+            assert GetElemCnt(ndarray.shape) <= GetElemCnt(self.shape)
+
+    def _AsyncPush(self, session: object, ndarray_list: Sequence[np.ndarray]) -> None:
+        for i in range(len(ndarray_list)):
+            sub_blob = self.sub_consistent_blob_list_[i]
+            session.AsyncPush(
+                sub_blob.op_name, _MakePushNdarrayCallback(ndarray_list[i])
+            )
+
+
+def _AddAndInferMirroredOp(mirrored_lbn, op_conf, sub_consistent_blob_list):
+    compile_context.CurJobAddMirroredOp(op_conf)
+    job_name = oneflow._oneflow_internal.JobBuildAndInferCtx_GetCurrentJobName()
+    num_sub_lbi = c_api_util.JobBuildAndInferCtx_MirroredBlobGetNumSubLbi(
+        job_name, mirrored_lbn
+    )
+    for i in range(num_sub_lbi):
+        sub_lbi = c_api_util.JobBuildAndInferCtx_MirroredBlobGetSubLbi(
+            job_name, mirrored_lbn, i
+        )
+        lbi = lbi_util.LogicalBlobId()
+        lbi.set_op_name(sub_lbi.op_name)
+        lbi.set_blob_name(sub_lbi.blob_name)
+        sub_consistent_blob_list.append(
+            oneflow._oneflow_internal.ConsistentBlob(
+                lbi, "", oneflow._oneflow_internal.distribute.auto()
+            )
+        )
+
+
+def _MakePushNdarrayCallback(ndarray):
+    copied = np.copy(ndarray, order="C")
+
+    def Copy(ofblob):
+        capacity = reduce(lambda x, y: x * y, ofblob.static_shape, 1)
+        elem_cnt = reduce(lambda x, y: x * y, copied.shape, 1)
+        assert elem_cnt <= capacity, "%s v.s. %s" % (copied.shape, ofblob.static_shape)
+        ofblob.CopyFromNdarray(copied)
+
+    return Copy
+
+
+@oneflow_export("FixedTensorDef")
+class DeprecatedFixedTensorDef(FixedTensorDef):
+    def __init__(self, *args, **kwargs):
+        running_script = traceback.format_stack()[-2].split(",")[0].split(" ")[3]
+        if not running_script.endswith('input_blob_def.py"'):
+            print(
+                "WARNING: oneflow.compatible.single_client.FixedTensorDef has been deprecated. "
+                "Please use oneflow.compatible.single_client.typing.Numpy.Placeholder instead."
+            )
+            print(
+                """For instance:
+            - def job_func(images=oneflow.compatible.single_client.FixedTensorDef((32, 1, 28, 28), dtype=flow.float))
+            + def job_func(images:oneflow.compatible.single_client.typing.Numpy.Placeholder((32, 1, 28, 28), dtype=flow.float))"""
+            )
+            print(traceback.format_stack()[-2])
+
+        super().__init__(*args, **kwargs)
+
+
+@oneflow_export("MirroredTensorDef")
+class DeprecatedMirroredTensorDef(MirroredTensorDef):
+    def __init__(self, *args, **kwargs):
+        running_script = traceback.format_stack()[-2].split(",")[0].split(" ")[3]
+        if not running_script.endswith('input_blob_def.py"'):
+            print(
+                "WARNING: oneflow.compatible.single_client.MirroredTensorDef has been deprecated. "
+                "Please use oneflow.compatible.single_client.typing.ListNumpy.Placeholder instead."
+            )
+            print(
+                """For instance:
+            - def job_func(images=oneflow.compatible.single_client.MirroredTensorDef((32, 1, 28, 28), dtype=flow.float))
+            + def job_func(images:oneflow.compatible.single_client.typing.ListNumpy.Placeholder((32, 1, 28, 28), dtype=flow.float))"""
+            )
+            print(traceback.format_stack()[-2])
+
+        super().__init__(*args, **kwargs)
diff --git a/oneflow/compatible_single_client_python/framework/interpret_util.py b/oneflow/compatible_single_client_python/framework/interpret_util.py
new file mode 100644
index 0000000000000000000000000000000000000000..e9e7632e5830ebabcdc2d39986275baa8a505dc6
--- /dev/null
+++ b/oneflow/compatible_single_client_python/framework/interpret_util.py
@@ -0,0 +1,85 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+from oneflow.compatible_single_client_python.framework import (
+    compile_context as compile_ctx,
+)
+from oneflow.compatible_single_client_python.framework import hob as hob
+from oneflow.compatible_single_client_python.lib.core import enable_if as enable_if
+from oneflow.compatible_single_client_python.eager import op_executor as op_executor
+from oneflow.compatible_single_client_python.eager import gradient_util as gradient_util
+from oneflow.compatible import single_client as flow
+import oneflow._oneflow_internal
+
+blob_register = oneflow._oneflow_internal.GetDefaultBlobRegister()
+
+
+def Forward(op_conf, scope_symbol=None):
+    if scope_symbol is None:
+        scope_symbol = flow.current_scope()
+    func = enable_if.unique([LazyInfer, EagerForward])
+    return func(compile_ctx.CurJobAddOp, op_conf, scope_symbol)
+
+
+def OpKernelForward(op_conf, opkernel_object):
+    func = enable_if.unique([LazyOpKernelInfer, EagerOpKernelForward])
+    return func(compile_ctx.CurJobAddOp, op_conf, opkernel_object)
+
+
+def ConsistentForward(op_conf, scope_symbol=None):
+    if scope_symbol is None:
+        scope_symbol = flow.current_scope()
+    func = enable_if.unique([LazyInfer, EagerForward])
+    return func(compile_ctx.CurJobAddConsistentOp, op_conf, scope_symbol)
+
+
+def OpKernelConsistentForward(op_conf, opkernel_object):
+    func = enable_if.unique([LazyOpKernelInfer, EagerOpKernelForward])
+    return func(compile_ctx.CurJobAddConsistentOp, op_conf, opkernel_object)
+
+
+@enable_if.condition(hob.in_global_mode & ~hob.eager_execution_enabled)
+def LazyInfer(add_and_infer, op_conf, scope_symbol=None):
+    return add_and_infer(op_conf, scope_symbol)
+
+
+@enable_if.condition(hob.in_global_mode & ~hob.eager_execution_enabled)
+def LazyOpKernelInfer(add_and_infer, op_conf, opkernel_object):
+    return add_and_infer(op_conf, opkernel_object.scope_symbol)
+
+
+@enable_if.condition(hob.in_global_mode & hob.eager_execution_enabled)
+def EagerForward(add_and_infer, op_conf, scope_symbol=None):
+    op_attribute = add_and_infer(op_conf, scope_symbol)
+    parallel_conf = scope_symbol.device_parallel_desc_symbol.parallel_conf
+    op_executor.Interpret(op_attribute, parallel_conf, blob_register)
+    bw_blob_register = gradient_util.GetDefaultBackwardBlobRegister()
+    gradient_util.TrySetBackwardUsedBlobObject(
+        op_attribute, blob_register, bw_blob_register
+    )
+    return op_attribute
+
+
+@enable_if.condition(hob.in_global_mode & hob.eager_execution_enabled)
+def EagerOpKernelForward(add_and_infer, op_conf, opkernel_object):
+    op_attribute = add_and_infer(op_conf, opkernel_object.scope_symbol)
+    op_executor.OpKernelCall(opkernel_object, op_attribute, blob_register)
+    bw_blob_register = gradient_util.GetDefaultBackwardBlobRegister()
+    gradient_util.TrySetBackwardUsedBlobObject(
+        op_attribute, blob_register, bw_blob_register
+    )
+    return op_attribute
diff --git a/oneflow/compatible_single_client_python/framework/job_instance.py b/oneflow/compatible_single_client_python/framework/job_instance.py
new file mode 100644
index 0000000000000000000000000000000000000000..63c9da2eab181e333f9d8c18baf9adcbdd660558
--- /dev/null
+++ b/oneflow/compatible_single_client_python/framework/job_instance.py
@@ -0,0 +1,150 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+import sys
+import traceback
+
+from oneflow.compatible_single_client_python.framework import ofblob as ofblob
+import oneflow._oneflow_internal
+
+
+def MakeUserJobInstance(job_name, finish_cb=None):
+    return MakeJobInstance(job_name, finish_cb=finish_cb)
+
+
+def MakePullJobInstance(job_name, op_name, pull_cb, finish_cb=None):
+    return MakeJobInstance(
+        job_name,
+        sole_output_op_name_in_user_job=op_name,
+        pull_cb=pull_cb,
+        finish_cb=finish_cb,
+    )
+
+
+def MakePushJobInstance(job_name, op_name, push_cb, finish_cb=None):
+    return MakeJobInstance(
+        job_name,
+        sole_input_op_name_in_user_job=op_name,
+        push_cb=push_cb,
+        finish_cb=finish_cb,
+    )
+
+
+def MakeArgPassJobInstance(job_name, src_op_name, dst_op_name, finish_cb=None):
+    return MakeJobInstance(
+        job_name,
+        sole_output_op_name_in_user_job=src_op_name,
+        sole_input_op_name_in_user_job=dst_op_name,
+        finish_cb=finish_cb,
+    )
+
+
+def MakeJobInstance(*arg, **kw):
+    def _DoNothing():
+        pass
+
+    if "finish_cb" not in kw or kw["finish_cb"] is None:
+        kw["finish_cb"] = _DoNothing
+    job_instance = JobInstance(*arg, **kw)
+    # python object lifetime is a headache
+    # _flying_job_instance prevents job_instance earlier destructation
+    global _flying_job_instance
+    _flying_job_instance[id(job_instance)] = job_instance
+
+    def DereferenceJobInstance(job_instance):
+        global _flying_job_instance
+        del _flying_job_instance[id(job_instance)]
+
+    job_instance.AddPostFinishCallback(DereferenceJobInstance)
+    return job_instance
+
+
+class JobInstance(oneflow._oneflow_internal.JobInstance):
+    def __init__(
+        self,
+        job_name,
+        sole_input_op_name_in_user_job=None,
+        sole_output_op_name_in_user_job=None,
+        push_cb=None,
+        pull_cb=None,
+        finish_cb=None,
+    ):
+        oneflow._oneflow_internal.JobInstance.__init__(self)
+        self.thisown = 0
+        self.job_name_ = str(job_name)
+        self.sole_input_op_name_in_user_job_ = str(sole_input_op_name_in_user_job)
+        self.sole_output_op_name_in_user_job_ = str(sole_output_op_name_in_user_job)
+        self.push_cb_ = push_cb
+        self.pull_cb_ = pull_cb
+        self.finish_cb_ = finish_cb
+        self.post_finish_cbs_ = []
+
+    def job_name(self):
+        try:
+            return self.job_name_
+        except Exception as e:
+            print(traceback.format_exc())
+            raise e
+
+    def sole_input_op_name_in_user_job(self):
+        try:
+            return self.sole_input_op_name_in_user_job_
+        except Exception as e:
+            print(traceback.format_exc())
+            raise e
+
+    def sole_output_op_name_in_user_job(self):
+        try:
+            return self.sole_output_op_name_in_user_job_
+        except Exception as e:
+            print(traceback.format_exc())
+            raise e
+
+    def PushBlob(self, of_blob_ptr):
+        try:
+            self.push_cb_(ofblob.OfBlob(of_blob_ptr))
+        except Exception as e:
+            print(traceback.format_exc())
+            raise e
+
+    def PullBlob(self, of_blob_ptr):
+        try:
+            self.pull_cb_(ofblob.OfBlob(of_blob_ptr))
+        except Exception as e:
+            print(traceback.format_exc())
+            raise e
+
+    def Finish(self):
+        try:
+            self.finish_cb_()
+        except Exception as e:
+            print(traceback.format_exc())
+            raise e
+        finally:
+            try:
+                for post_finish_cb in self.post_finish_cbs_:
+                    post_finish_cb(self)
+            except Exception as e:
+                print(traceback.format_exc())
+                raise e
+
+    def AddPostFinishCallback(self, cb):
+        self.post_finish_cbs_.append(cb)
+
+
+# span python object lifetime
+_flying_job_instance = {}
diff --git a/oneflow/compatible_single_client_python/framework/job_set_util.py b/oneflow/compatible_single_client_python/framework/job_set_util.py
new file mode 100644
index 0000000000000000000000000000000000000000..1606d2d104de5b80483a1aab107ee7dcc57c6610
--- /dev/null
+++ b/oneflow/compatible_single_client_python/framework/job_set_util.py
@@ -0,0 +1,57 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+from typing import Optional, TypeVar
+
+from oneflow.core.job.job_set_pb2 import JobSet
+from oneflow.compatible_single_client_python.oneflow_export import oneflow_export
+
+_VT = TypeVar("_VT")
+
+
+@oneflow_export("inter_job_reuse_mem_strategy")
+def inter_job_reuse_mem_strategy(
+    strategy_str: str, job_set: Optional[JobSet] = None, **kwargs: _VT
+) -> None:
+    r"""Set memory sharing strategy for job set.
+
+    Args:
+        strategy_str: An optional `string` from: `mem_sharing_priority`, `parallelism_priority` 
+        or `custom_parallelism`. 
+        job_set: A `JobSet` object. If None, set default job set.
+    """
+    assert type(strategy_str) is str
+    if job_set == None:
+        job_set = _default_job_set
+    if strategy_str == "reuse_mem_priority":
+        job_set.inter_job_reuse_mem_strategy.reuse_mem_priority.SetInParent()
+        assert job_set.inter_job_reuse_mem_strategy.HasField("reuse_mem_priority")
+    elif strategy_str == "parallelism_priority":
+        job_set.inter_job_reuse_mem_strategy.parallelism_priority.SetInParent()
+        assert job_set.inter_job_reuse_mem_strategy.HasField("parallelism_priority")
+    elif strategy_str == "custom_parallelism":
+        assert kwargs["job_name_groups"] is not None
+        for job_name_group in kwargs["job_name_groups"]:
+            group = (
+                job_set.inter_job_reuse_mem_strategy.custom_parallelism.nonparallel_group.add()
+            )
+            for job_name in job_name_group:
+                assert type(job_name) is str
+                group.job_name.append(job_name)
+
+
+_default_job_set = JobSet()
diff --git a/oneflow/compatible_single_client_python/framework/local_blob.py b/oneflow/compatible_single_client_python/framework/local_blob.py
new file mode 100644
index 0000000000000000000000000000000000000000..8983fd0ce3ae9d403eddf84b197066a33689b23e
--- /dev/null
+++ b/oneflow/compatible_single_client_python/framework/local_blob.py
@@ -0,0 +1,113 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+import numpy as np
+from oneflow.compatible_single_client_python.framework import (
+    remote_blob as remote_blob_util,
+)
+import oneflow._oneflow_internal
+import traceback
+
+
+class LocalBlob(object):
+    # TODO(chengcheng): maybe not need LocalBlob.
+    def __init__(self, ndarray, is_dynamic):
+        self.ndarray_ = ndarray
+        self.is_dynamic_ = is_dynamic
+
+    @property
+    def is_dynamic(self):
+        return self.is_dynamic_
+
+    def ndarray_list(self):
+        print(
+            "WARNING:",
+            "LocalBlob.ndarray_list is deprecated, please use LocalBlob.numpy()\n",
+            traceback.format_stack()[-2],
+        )
+        return self.numpy_list()
+
+    def numpy_list(self):
+        return [self.numpy()]
+
+    def ndarray(self):
+        print(
+            "WARNING:",
+            "LocalBlob.ndarray is deprecated, please use LocalBlob.numpy()\n",
+            traceback.format_stack()[-2],
+        )
+        return self.numpy()
+
+    def numpy(self, parallel_id=None):
+        assert parallel_id is None or parallel_id == 0
+        return self.ndarray_
+
+    def parallel_num(self):
+        return 1
+
+    def __getattr__(self, attr):
+        return getattr(self.numpy(), attr)
+
+
+def MakeLocalBlob4EagerBlob(eager_blob):
+    # TODO(chengcheng): refactor eager local blob.
+    assert isinstance(eager_blob, oneflow._oneflow_internal.EagerBlobTrait)
+    if isinstance(eager_blob, oneflow._oneflow_internal.EagerMirroredBlob):
+        assert eager_blob.numpy_size() == 1
+        return LocalBlob(eager_blob.numpy(), is_dynamic=eager_blob.is_dynamic,)
+    elif isinstance(eager_blob, oneflow._oneflow_internal.EagerConsistentBlob):
+        return LocalBlob(eager_blob.numpy(), is_dynamic=False)
+    else:
+        raise NotImplementedError
+
+
+non_override_field = set(
+    [
+        "__class__",
+        "__doc__",
+        "__new__",
+        "__init__",
+        "__del__",
+        "__call__",
+        "__getattr__",
+        "__getattribute__",
+        "__setattr__",
+        "__delattr__",
+        "__dir__",
+        "__get__",
+        "__set__",
+        "__delete__",
+    ]
+)
+
+
+def MakeBlobMethod(field_name):
+    def ConvertOtherArgs(args):
+        return [x.numpy() if isinstance(x, LocalBlob) else x for x in args]
+
+    return lambda self, *args: getattr(self.numpy(), field_name)(
+        *ConvertOtherArgs(args)
+    )
+
+
+for field_name in dir(np.ndarray):
+    if field_name.startswith("__") == False:
+        continue
+    if field_name in non_override_field:
+        continue
+    if hasattr(LocalBlob, field_name) == False:
+        setattr(LocalBlob, field_name, MakeBlobMethod(field_name))
diff --git a/oneflow/compatible_single_client_python/framework/model.py b/oneflow/compatible_single_client_python/framework/model.py
new file mode 100644
index 0000000000000000000000000000000000000000..eba5a9b1fff2759c75a576acc6be582e51a5bf38
--- /dev/null
+++ b/oneflow/compatible_single_client_python/framework/model.py
@@ -0,0 +1,835 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+__all__ = [
+    "DataModule",
+    "NumpyDataModule",
+    "TrainingConfig",
+    "ValidationConfig",
+    "CheckpointConfig",
+    "Callback",
+    "Model",
+]
+
+from abc import ABC
+from typing import Optional, Any, Union, Tuple, List
+
+import inspect
+import numpy as np
+
+import oneflow._oneflow_internal
+from oneflow.compatible_single_client_python.framework.check_point_v2 import (
+    LoadVariables,
+    SaveVarDict,
+    GetCheckpoint,
+)
+from oneflow.compatible_single_client_python.framework.function_util import (
+    api_oneflow_function,
+)
+from oneflow.compatible_single_client_python.framework.function_util import (
+    FunctionConfig as ExecutionConfig,
+)
+from oneflow.compatible_single_client_python.framework.local_blob import LocalBlob
+from oneflow.compatible_single_client_python.framework.session_util import (
+    api_clear_default_session,
+)
+from oneflow.compatible_single_client_python.framework.tensor import Tensor
+from oneflow.compatible_single_client_python.nn.module import Module
+from oneflow.compatible_single_client_python.oneflow_export import oneflow_export
+from oneflow.compatible_single_client_python.ops.optimizer import Optimizer
+from oneflow.compatible_single_client_python.nn.optimizer.optimizer import (
+    Optimizer as OOPOptimizer,
+)
+from oneflow.compatible_single_client_python.framework import typing as oneflow_typing
+from oneflow.compatible_single_client_python.framework import dtype as dtype_util
+
+
+@oneflow_export("model.DataModule")
+class DataModule(Module):
+    def __init__(self, *args, **kwargs):
+        super().__init__()
+
+    def forward(self, step_idx: int = 0, optimizer_idx: int = 0):
+        # Do nothing, to be overrided by subclass.
+        pass
+
+    def infer_oneflow_data_placeholder(
+        self, batch: Tuple[Any] = None, optimizer_idx: int = 0
+    ):
+        return None
+
+
+@oneflow_export("model.NumpyDataModule")
+class NumpyDataModule(DataModule):
+    def __init__(self, *args, **kwargs):
+        super().__init__()
+
+    def forward(self, step_idx: int = 0, optimizer_idx: int = 0):
+        # Do nothing, to be overrided by subclass.
+        pass
+
+    def __call__(self, *args):
+        ret = self.forward(*args)
+        return ret
+
+    def infer_oneflow_data_placeholder(
+        self, batch: Tuple[np.ndarray, ...] = None, optimizer_idx: int = 0
+    ):
+        assert isinstance(batch, tuple), "model.NumpyDataModule must return a tuple."
+        data_placeholder_list = []
+        for item in batch:
+            assert isinstance(
+                item, np.ndarray
+            ), "model.NumpyDataModule must return a tuple of numpy."
+            of_dtype = dtype_util.convert_numpy_dtype_to_oneflow_dtype(item.dtype)
+            numpy_placeholder = oneflow_typing.Numpy.Placeholder(
+                shape=item.shape, dtype=of_dtype
+            )
+            data_placeholder_list.append(numpy_placeholder)
+        return data_placeholder_list
+
+
+@oneflow_export("model.TrainingConfig")
+class TrainingConfig:
+    def __init__(self):
+        super().__init__()
+        self.exe_cfg = ExecutionConfig()
+        self.data = None
+        self.error_msg = ""
+
+    def config_execution(self, exe_cfg: ExecutionConfig = None):
+        self.exe_cfg = exe_cfg
+
+    def config_data(self, data: DataModule = None):
+        self.data = data
+
+    def check_valid(self):
+        is_valid = True
+        self.error_msg = ""
+        if not isinstance(self.exe_cfg, ExecutionConfig):
+            self.error_msg += "model.TrainingConfig exe_cfg is not ExecutionConfig;"
+            is_valid = False
+        if self.data is None:
+            self.error_msg += "model.TrainingConfig data is None;"
+            is_valid = False
+        if not isinstance(self.data, DataModule):
+            self.error_msg += "model.TrainingConfig data is not DataModule;"
+            is_valid = False
+        return is_valid
+
+
+@oneflow_export("model.ValidationConfig")
+class ValidationConfig:
+    def __init__(self):
+        super().__init__()
+        self.exe_cfg = ExecutionConfig()
+        self.data = None
+        self.step_interval = 10
+        self.error_msg = ""
+
+    def config_execution(self, exe_cfg: ExecutionConfig = None):
+        self.exe_cfg = exe_cfg
+
+    def config_data(self, data: DataModule = None):
+        self.data = data
+
+    def config_step_interval(self, step_interval: int = 1):
+        self.step_interval = step_interval
+
+    def check_valid(self):
+        is_valid = True
+        self.error_msg = ""
+        if self.data is None:
+            self.error_msg += "model.ValidationConfig data is None;"
+            is_valid = False
+        if not isinstance(self.data, DataModule):
+            self.error_msg += "model.ValidationConfig data is not DataModule;"
+            is_valid = False
+        if self.step_interval <= 0 or not isinstance(self.step_interval, int):
+            self.error_msg += (
+                "model.ValidationConfig step_interval is <= 0 or is not int;"
+            )
+            is_valid = False
+        return is_valid
+
+
+@oneflow_export("model.CheckpointConfig")
+class CheckpointConfig(object):
+    def __init__(self,):
+        self.need_load = False
+        self.load_dirpath = None
+        self.need_save = False
+        self.save_dirpath = None
+        self.save_step_interval = 1
+        self.error_msg = ""
+
+    def config_load(self, dirpath: str = None):
+        self.need_load = True
+        assert dirpath is not None, "dirpath should not be None"
+        self.load_dirpath = dirpath
+
+    def config_save(self, dirpath: str = None, step_interval: int = 1):
+        self.need_save = True
+        self.save_dirpath = dirpath
+        assert dirpath is not None, "dirpath should not be None"
+        self.save_step_interval = step_interval
+        assert step_interval > 0, "step_interval should not <= 0"
+        assert isinstance(step_interval, int), "step_interval should be int"
+
+    def check_valid(self):
+        # Configs has already been checked
+        is_valid = True
+        self.error_msg = ""
+        return is_valid
+
+
+@oneflow_export("model.Callback")
+class Callback(ABC):
+    r""" Abstract base class used to build new callbacks.
+    """
+
+    def on_training_step_end(
+        self,
+        outputs: Optional[
+            Union[LocalBlob, Tuple[LocalBlob, ...], Tensor, Tuple[Tensor, ...]]
+        ],
+        step_idx: int = 0,
+        optimizer_idx: int = 0,
+    ):
+        # Do nothing, to be overrided by subclass.
+        pass
+
+    def on_validation_step_end(
+        self,
+        outputs: Optional[
+            Union[LocalBlob, Tuple[LocalBlob, ...], Tensor, Tuple[Tensor, ...]]
+        ],
+        step_idx: int = 0,
+    ):
+        # Do nothing, to be overrided by subclass.
+        pass
+
+
+@oneflow_export("Model", "model.Model")
+class Model(
+    ABC, Module,
+):
+    r"""A high level API for model training and validation.
+    """
+
+    def __init__(self, *args, **kwargs):
+        super().__init__()
+
+        self._is_deprecated_function_style = (
+            kwargs["is_deprecated_function_style"]
+            if "is_deprecated_function_style" in kwargs
+            else False
+        )
+
+    def forward(self, *args, **kwargs):
+        r"""Same as `nn.Module.forward()`, here is to define the operations you want to use for prediction.
+        """
+        raise NotImplementedError
+
+    def training_step(self, *args, **kwargs):
+        r"""Operates on a single batch of data from the training set and return loss.
+        """
+        raise NotImplementedError()
+
+    def validation_step(self, *args, **kwargs):
+        r"""Operates on a single batch of data from the validation set.
+        """
+        raise NotImplementedError()
+
+    def configure_optimizers(self):
+        r"""Choose what optimizers and learning-rate schedulers to use in your optimization.
+        Normally you'd need one. But in the case of GANs or similar you might have multiple.
+        """
+        raise NotImplementedError()
+
+    def fit(
+        self,
+        training_config: Optional[TrainingConfig] = None,
+        validation_config: Optional[ValidationConfig] = None,
+        checkpoint_config: Optional[CheckpointConfig] = None,
+        callbacks: Optional[Union[Callback, List[Callback]]] = None,
+        max_steps: int = 100,
+    ):
+        r""" Runs the full training and validation routine.
+        """
+        self._max_steps = max_steps
+        api_clear_default_session()
+        self._sub_models = self._get_and_check_sub_models(
+            training_config, validation_config, checkpoint_config, callbacks
+        )
+
+        if len(self._sub_models) == 0:
+            return
+
+        if self._checkpoint_model.is_valid:
+            self._checkpoint_model.load()
+        for step_idx in range(0, self._max_steps):
+            for sub_model in self._sub_models:
+                try:
+                    sub_model.step(step_idx)
+                except Exception as e:
+                    print(
+                        "Model step_idx {} {} failed.".format(step_idx, sub_model.name)
+                    )
+                    raise e
+
+    def method_overrided(self, method_name: str = None) -> bool:
+        return getattr(self.__class__, method_name) != getattr(Model, method_name)
+
+    def _get_and_check_sub_models(
+        self,
+        training_config: Optional[TrainingConfig] = None,
+        validation_config: Optional[ValidationConfig] = None,
+        checkpoint_config: Optional[CheckpointConfig] = None,
+        callbacks: Optional[Union[Callback, List[Callback]]] = None,
+    ):
+        sub_models = []
+
+        self._train_model = (
+            TrainModel(training_config, self, callbacks)
+            if self._is_deprecated_function_style
+            else TrainModelOOPStyle(training_config, self, callbacks)
+        )
+        if self._train_model.is_valid:
+            sub_models.append(self._train_model)
+        else:
+            if training_config is not None:
+                print(
+                    self._train_model.error_msg,
+                    "{}'s fit() will not do training.".format(self.__class__.__name__),
+                )
+
+        self._val_model = (
+            ValidateModel(validation_config, self, callbacks)
+            if self._is_deprecated_function_style
+            else ValidateModelOOPStyle(validation_config, self, callbacks)
+        )
+        if self._val_model.is_valid:
+            sub_models.append(self._val_model)
+        else:
+            if validation_config is not None:
+                print(
+                    self._val_model.error_msg,
+                    "{}'s fit() will not do validation.".format(
+                        self.__class__.__name__
+                    ),
+                )
+
+        if len(sub_models) == 0:
+            print(
+                "{}'s fit() will do nothing because there has no valid configuration.".format(
+                    self.__class__.__name__
+                )
+            )
+            return sub_models
+
+        self._checkpoint_model = (
+            CheckpointModel(checkpoint_config, self, callbacks)
+            if self._is_deprecated_function_style
+            else CheckpointModelOOPStyle(checkpoint_config, self, callbacks)
+        )
+        if self._checkpoint_model.is_valid:
+            sub_models.append(self._checkpoint_model)
+        else:
+            if checkpoint_config is not None:
+                print(
+                    self._checkpoint_model.error_msg,
+                    "{}'s fit() will not do checkpoint.".format(
+                        self.__class__.__name__
+                    ),
+                )
+
+        return sub_models
+
+
+class SubModel(ABC):
+    def __init__(self, name, cfg, model, callbacks):
+        self._cfg = cfg
+        assert isinstance(model, Model)
+        self._model = model
+        self._cbs = callbacks
+
+        self.name = name
+        self.is_valid = True
+        self.error_msg = (
+            self._model.__class__.__name__ + " " + self.name + " error message: "
+        )
+
+        if not self._get_and_check_cfg():
+            self.is_valid = False
+
+        if not self._get_and_check_cbs():
+            self.is_valid = False
+
+    def step(self, step_idx: int = 0):
+        raise NotImplementedError
+
+    def _get_and_check_cfg(self):
+        if self._cfg is None:
+            self.error_msg += "config is None;"
+            return False
+
+        if not self._cfg.check_valid():
+            self.error_msg += self._cfg.error_msg
+            return False
+        else:
+            return True
+
+    def _get_and_check_cbs(self):
+        if self._cbs is None:
+            self._cbs = []
+            return True
+
+        if isinstance(self._cbs, Callback):
+            self._cbs = [self._cbs]
+            return True
+
+        if isinstance(self._cbs, list):
+            for cb in self._cbs:
+                assert isinstance(
+                    cb, Callback
+                ), "model callbacks' type must be model.Callback or List[model.Callback]."
+            return True
+
+        assert (
+            False
+        ), "model callbacks' type must be model.Callback or List[model.Callback]."
+
+    def _method_callback(self, method_name: str = None, *args, **kwargs):
+        for cb in self._cbs:
+            method = getattr(cb, method_name)
+            method(*args, **kwargs)
+
+
+class TrainModel(SubModel):
+    def __init__(
+        self,
+        cfg: TrainingConfig = None,
+        model: Model = None,
+        callbacks: Optional[Union[Callback, List[Callback]]] = None,
+    ):
+        super().__init__("training", cfg, model, callbacks)
+
+        if not self._get_and_check_step():
+            self.is_valid = False
+
+        if not self._get_and_check_opts():
+            self.is_valid = False
+
+        if self.is_valid and not self._get_and_check_jobs():
+            self.is_valid = False
+
+    def step(self, step_idx: int = 0):
+        assert self.is_valid, self.error_msg
+        for optimizer_idx in range(0, len(self._opts)):
+            outputs = None
+            if self._is_numpy_input:
+                batch = None
+                if step_idx == 0:
+                    batch = self._first_numpy_batch[optimizer_idx]
+                else:
+                    batch = self._cfg.data(step_idx, optimizer_idx)
+                outputs = self._jobs[optimizer_idx](*batch).get()
+            else:
+                outputs = self._jobs[optimizer_idx]().get()
+
+            self._method_callback(
+                "on_training_step_end",
+                outputs=outputs,
+                step_idx=step_idx,
+                optimizer_idx=optimizer_idx,
+            )
+
+    def _get_and_check_step(self):
+        if not self._model.method_overrided("training_step"):
+            self.error_msg += "model.training_step() is empty;"
+            return False
+        else:
+            return True
+
+    def _get_and_check_opts(self):
+        self._opts = []
+        if not self._model.method_overrided("configure_optimizers"):
+            self.error_msg += "model.configure_optimizers() is empty;"
+            return False
+
+        opt_conf = self._model.configure_optimizers()
+        if isinstance(opt_conf, Optimizer):
+            self._opts = [opt_conf]
+        elif isinstance(opt_conf, (list, tuple)):
+            for opt in opt_conf:
+                assert isinstance(
+                    opt, Optimizer
+                ), "model.configure_optimizers() must return Optimizer \
+                    or List[Optimizer, ...] or Tuple[Optimizer, ...]"
+            self._opts = opt_conf
+        else:
+            assert (
+                False
+            ), "model.configure_optimizers() must return Optimizer \
+                or List[Optimizer, ...] or Tuple[Optimizer, ...]"
+
+        return True
+
+    def _get_and_check_jobs(self):
+        # TOOD(strint): rm numpy in sub-model
+        self._is_numpy_input = (
+            True if isinstance(self._cfg.data, NumpyDataModule) else False
+        )
+        self._jobs = []
+
+        if self._is_numpy_input:
+            self._first_numpy_batch = []
+            for optimizer_idx in range(0, len(self._opts)):
+                batch = self._cfg.data(0, optimizer_idx)
+                self._first_numpy_batch.insert(optimizer_idx, batch)
+                self._jobs.insert(
+                    optimizer_idx, self._construct_numpy_job(batch, optimizer_idx)
+                )
+        else:
+            for optimizer_idx in range(0, len(self._opts)):
+                self._jobs.insert(optimizer_idx, self._construct_job(optimizer_idx))
+
+        return True
+
+    def _construct_job(self, optimizer_idx: int = 0):
+        def job():
+            batch = self._cfg.data(0, optimizer_idx)
+            outputs = self._model.training_step(
+                batch=batch, optimizer_idx=optimizer_idx
+            )
+            loss = None
+            if isinstance(outputs, tuple) and len(outputs) > 0:
+                loss = outputs[0]
+            else:
+                loss = outputs
+            self._opts[optimizer_idx].minimize(loss)
+            return outputs
+
+        job.__name__ = (
+            self._model.__class__.__name__ + "_Model_train_job_" + str(optimizer_idx)
+        )
+        deco = api_oneflow_function(type="train", function_config=self._cfg.exe_cfg)
+        return deco(job)
+
+    def _construct_numpy_job(self, batch, optimizer_idx):
+        def job(*input_batch):
+            outputs = self._model.training_step(
+                batch=input_batch, optimizer_idx=optimizer_idx
+            )
+            loss = None
+            if isinstance(outputs, tuple) and len(outputs) > 0:
+                loss = outputs[0]
+            else:
+                loss = outputs
+            self._opts[optimizer_idx].minimize(loss)
+            return outputs
+
+        _infer_job_signature(self._cfg.data, batch, optimizer_idx, job)
+
+        job.__name__ = (
+            self._model.__class__.__name__
+            + "_Model_train_numpy_job_"
+            + str(optimizer_idx)
+        )
+        deco = api_oneflow_function(type="train", function_config=self._cfg.exe_cfg)
+        return deco(job)
+
+
+class ValidateModel(SubModel):
+    def __init__(
+        self,
+        cfg: ValidationConfig = None,
+        model: Model = None,
+        callbacks: Optional[Union[Callback, List[Callback]]] = None,
+    ):
+        super().__init__("validation", cfg, model, callbacks)
+
+        if not self._get_and_check_step():
+            self.is_valid = False
+
+        if self.is_valid and not self._get_and_check_job():
+            self.is_valid = False
+
+    def step(self, step_idx: int = 0):
+        assert self.is_valid
+        if (step_idx + 1) % self._cfg.step_interval == 0:
+            outputs = None
+            if self._is_numpy_input:
+                batch = None
+                if step_idx == 0:
+                    batch = self._first_numpy_batch
+                else:
+                    batch = self._cfg.data(step_idx, 0)
+                outputs = self._job(*batch).get()
+            else:
+                outputs = self._job().get()
+            self._method_callback(
+                "on_validation_step_end", step_idx=step_idx, outputs=outputs,
+            )
+
+    def _get_and_check_step(self):
+        if not self._model.method_overrided("validation_step"):
+            self.error_msg += "model.validation_step() is empty;"
+            return False
+        else:
+            return True
+
+    def _get_and_check_job(self):
+        # TOOD(strint): rm numpy in sub-model
+        self._is_numpy_input = (
+            True if isinstance(self._cfg.data, NumpyDataModule) else False
+        )
+        self._job = None
+        if not self._is_numpy_input:
+            self._job = self._construct_job()
+        else:
+            batch = self._cfg.data(0, 0)
+            self._first_numpy_batch = batch
+            self._job = self._construct_numpy_job(batch)
+
+        return True
+
+    def _construct_job(self):
+        def job():
+            batch = self._cfg.data(0, 0)
+            return self._model.validation_step(batch)
+
+        job.__name__ = self._model.__class__.__name__ + "_Model_eval_job"
+        deco = api_oneflow_function(type="predict", function_config=self._cfg.exe_cfg)
+        return deco(job)
+
+    def _construct_numpy_job(self, batch: Tuple[np.ndarray, ...] = None):
+        def job(*input_batch):
+            return self._model.validation_step(batch=input_batch)
+
+        _infer_job_signature(self._cfg.data, batch, 0, job)
+
+        job.__name__ = self._model.__class__.__name__ + "_Model_eval_numpy_job"
+        deco = api_oneflow_function(type="predict", function_config=self._cfg.exe_cfg)
+        return deco(job)
+
+
+class CheckpointModel(SubModel):
+    def __init__(
+        self,
+        cfg: CheckpointConfig = None,
+        model: Model = None,
+        callbacks: Optional[Union[Callback, List[Callback]]] = None,
+    ):
+        super().__init__("checkpointing", cfg, model, callbacks)
+
+    def load(self):
+        assert self.is_valid
+        if self._cfg.need_load:
+            self._load_checkpoint(self._cfg.load_dirpath)
+
+    def step(self, step_idx: int = 0):
+        assert self.is_valid
+        if self._cfg.need_save:
+            if (step_idx + 1) % self._cfg.save_step_interval == 0:
+                self._save_checkpoint(
+                    dirpath=self._cfg.save_dirpath + "-" + str(step_idx)
+                )
+
+    def _load_checkpoint(
+        self, dirpath: str,
+    ):
+        r"""Load model states from a checkpoint.
+        """
+        LoadVariables(GetCheckpoint(path=dirpath))
+
+    def _save_checkpoint(
+        self, dirpath: str,
+    ):
+        r"""Save model states as a checkpoint.
+        """
+        SaveVarDict(path=dirpath)
+
+
+class TrainModelOOPStyle(SubModel):
+    def __init__(
+        self,
+        cfg: TrainingConfig = None,
+        model: Model = None,
+        callbacks: Optional[Union[Callback, List[Callback]]] = None,
+    ):
+        super().__init__("training", cfg, model, callbacks)
+
+        if not self._get_and_check_step():
+            self.is_valid = False
+
+        if not self._get_and_check_opts():
+            self.is_valid = False
+
+    def step(self, step_idx: int = 0):
+        assert self.is_valid, self.error_msg
+        for optimizer_idx in range(0, len(self._opts)):
+            batch = self._cfg.data(step_idx, optimizer_idx)
+            outputs = self._model.training_step(
+                batch=batch, optimizer_idx=optimizer_idx
+            )
+            loss = None
+            if isinstance(outputs, tuple) and len(outputs) > 0:
+                loss = outputs[0]
+            else:
+                loss = outputs
+
+            loss.backward()
+            opt = self._opts[optimizer_idx]
+            opt.step()
+            opt.zero_grad()
+
+            self._method_callback(
+                "on_training_step_end",
+                outputs=outputs,
+                step_idx=step_idx,
+                optimizer_idx=optimizer_idx,
+            )
+
+    def _get_and_check_step(self):
+        if not self._model.method_overrided("training_step"):
+            self.error_msg += "model.training_step() is empty;"
+            return False
+        else:
+            return True
+
+    def _get_and_check_opts(self):
+        self._opts = []
+        if not self._model.method_overrided("configure_optimizers"):
+            self.error_msg += "model.configure_optimizers() is empty;"
+            return False
+
+        opt_conf = self._model.configure_optimizers()
+        if isinstance(opt_conf, OOPOptimizer):
+            self._opts = [opt_conf]
+        elif isinstance(opt_conf, (list, tuple)):
+            for opt in opt_conf:
+                assert isinstance(
+                    opt, OOPOptimizer
+                ), "model.configure_optimizers() must return Optimizer \
+                    or List[Optimizer, ...] or Tuple[Optimizer, ...]"
+            self._opts = opt_conf
+        else:
+            assert (
+                False
+            ), "model.configure_optimizers() must return Optimizer \
+                or List[Optimizer, ...] or Tuple[Optimizer, ...]"
+
+        return True
+
+
+class ValidateModelOOPStyle(SubModel):
+    def __init__(
+        self,
+        cfg: ValidationConfig = None,
+        model: Model = None,
+        callbacks: Optional[Union[Callback, List[Callback]]] = None,
+    ):
+        super().__init__("validation", cfg, model, callbacks)
+
+        if not self._get_and_check_step():
+            self.is_valid = False
+
+    def step(self, step_idx: int = 0):
+        assert self.is_valid
+        if (step_idx + 1) % self._cfg.step_interval == 0:
+            outputs = None
+            with oneflow._oneflow_internal.autograd.no_grad():
+                inputs = self._cfg.data(step_idx, 0)
+                model_previous_mode = self._model.training
+                self._model.train()
+                outputs = self._model.validation_step(inputs)
+                self._model.train(model_previous_mode)
+            self._method_callback(
+                "on_validation_step_end", step_idx=step_idx, outputs=outputs,
+            )
+
+    def _get_and_check_step(self):
+        if not self._model.method_overrided("validation_step"):
+            self.error_msg += "model.validation_step() is empty;"
+            return False
+        else:
+            return True
+
+
+class CheckpointModelOOPStyle(SubModel):
+    def __init__(
+        self,
+        cfg: CheckpointConfig = None,
+        model: Model = None,
+        callbacks: Optional[Union[Callback, List[Callback]]] = None,
+    ):
+        super().__init__("checkpointing", cfg, model, callbacks)
+
+    def load(self):
+        assert self.is_valid
+        if self._cfg.need_load:
+            self._load_checkpoint(self._cfg.load_dirpath)
+
+    def step(self, step_idx: int = 0):
+        assert self.is_valid
+        if self._cfg.need_save:
+            if (step_idx + 1) % self._cfg.save_step_interval == 0:
+                self._save_checkpoint(
+                    dirpath=self._cfg.save_dirpath + "-" + str(step_idx)
+                )
+
+    def _load_checkpoint(
+        self, dirpath: str,
+    ):
+        r"""Load model states from a checkpoint.
+        """
+        stat_dict = GetCheckpoint(path=dirpath)
+        self._model.load_state_dict(stat_dict)
+
+    def _save_checkpoint(
+        self, dirpath: str,
+    ):
+        r"""Save model states as a checkpoint.
+        """
+        stat_dict = self._model.state_dict()
+        SaveVarDict(path=dirpath, var_dict=stat_dict)
+
+
+def _infer_job_signature(data_module, batch, optimizer_idx, job):
+    para_list = []
+    placeholder_list = data_module.infer_oneflow_data_placeholder(batch, optimizer_idx)
+    for i, placeholder in enumerate(placeholder_list):
+        para_name = (
+            data_module.__class__.__name__
+            + "_opt_"
+            + str(optimizer_idx)
+            + "_para_"
+            + str(i)
+        )
+        para_list.append(
+            inspect.Parameter(
+                name=para_name,
+                kind=inspect.Parameter.POSITIONAL_OR_KEYWORD,
+                annotation=placeholder,
+            )
+        )
+
+    origin_sig = inspect.signature(job)
+    new_sig = origin_sig.replace(parameters=para_list)
+    job.__oneflow_function_signature__ = new_sig
diff --git a/oneflow/compatible_single_client_python/framework/module.py b/oneflow/compatible_single_client_python/framework/module.py
new file mode 100644
index 0000000000000000000000000000000000000000..6e6fc90803a6145f866a2cae2dd2ff4c70176d86
--- /dev/null
+++ b/oneflow/compatible_single_client_python/framework/module.py
@@ -0,0 +1,51 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from __future__ import absolute_import
+from oneflow.compatible_single_client_python.oneflow_export import oneflow_export
+from oneflow.compatible_single_client_python.framework import id_util as id_util
+
+
+@oneflow_export("deprecated.nn.Module")
+class Module(object):
+    def __init__(self, name=None):
+        if name is None:
+            name = id_util.UniqueStr("Module_")
+        self.module_name_ = name
+        self.call_seq_no_ = 0
+
+    @property
+    def module_name(self):
+        return self.module_name_
+
+    @property
+    def call_seq_no(self):
+        return self.call_seq_no_
+
+    # only for overriding
+    # do not call module.foward(*args) directly
+    def forward(self, *args):
+        raise NotImplementedError()
+
+    def __call__(self, *args):
+        ret = self.forward(*args)
+        self.call_seq_no_ = self.call_seq_no_ + 1
+        return ret
+
+    def __del__(self):
+        assert (
+            getattr(type(self), "__call__") is Module.__call__
+        ), "do not override __call__"
diff --git a/oneflow/compatible_single_client_python/framework/ofblob.py b/oneflow/compatible_single_client_python/framework/ofblob.py
new file mode 100644
index 0000000000000000000000000000000000000000..093d644333f860591d3bb11cda2efc3b4ddf85da
--- /dev/null
+++ b/oneflow/compatible_single_client_python/framework/ofblob.py
@@ -0,0 +1,106 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+import collections
+from functools import reduce
+
+import numpy as np
+from oneflow.compatible import single_client as flow
+import oneflow._oneflow_internal
+from google.protobuf import text_format
+from oneflow.compatible_single_client_python.framework.dtype import (
+    convert_proto_dtype_to_oneflow_dtype,
+)
+from oneflow.compatible_single_client_python.lib.core.box import Box
+
+
+class OfBlob(object):
+    def __init__(self, of_blob_ptr):
+        self.of_blob_ptr_ = of_blob_ptr
+
+    @property
+    def dtype(self):
+        return convert_proto_dtype_to_oneflow_dtype(
+            oneflow._oneflow_internal.Ofblob_GetDataType(self.of_blob_ptr_)
+        )
+
+    @property
+    def static_shape(self):
+        num_axes = oneflow._oneflow_internal.OfBlob_NumAxes(self.of_blob_ptr_)
+        dst_ndarray = np.ndarray(num_axes, dtype=np.int64)
+        oneflow._oneflow_internal.OfBlob_CopyStaticShapeTo(
+            self.of_blob_ptr_, dst_ndarray
+        )
+        return tuple(dst_ndarray.tolist())
+
+    @property
+    def shape(self):
+        num_axes = oneflow._oneflow_internal.OfBlob_NumAxes(self.of_blob_ptr_)
+        dst_ndarray = np.zeros(num_axes, dtype=np.int64)
+        oneflow._oneflow_internal.OfBlob_CopyShapeTo(self.of_blob_ptr_, dst_ndarray)
+        return tuple(dst_ndarray.tolist())
+
+    def set_shape(self, shape):
+        assert isinstance(shape, (list, tuple))
+        assert len(shape) == oneflow._oneflow_internal.OfBlob_NumAxes(self.of_blob_ptr_)
+        oneflow._oneflow_internal.OfBlob_CopyShapeFrom(
+            self.of_blob_ptr_, np.array(shape, dtype=np.int64)
+        )
+
+    @property
+    def num_axes(self):
+        return oneflow._oneflow_internal.OfBlob_NumAxes(self.of_blob_ptr_)
+
+    @property
+    def is_dynamic(self):
+        return oneflow._oneflow_internal.OfBlob_IsDynamic(self.of_blob_ptr_)
+
+    def CopyToNdarray(self):
+        return self._CopyToNdarray()
+
+    def CopyFromNdarray(self, src_ndarray):
+        if self.is_dynamic:
+            self.set_shape(src_ndarray.shape)
+        else:
+            shape_tensor = np.zeros(self.num_axes, dtype=np.int64)
+            oneflow._oneflow_internal.OfBlob_CopyShapeTo(
+                self.of_blob_ptr_, shape_tensor
+            )
+            shape = tuple(shape_tensor.tolist())
+            assert src_ndarray.shape == shape
+        return self._CopyBodyFromNdarray(src_ndarray)
+
+    def _CopyBodyFromNdarray(self, src_ndarray):
+        method_name = oneflow._oneflow_internal.Dtype_GetOfBlobCopyFromBufferFuncName(
+            oneflow._oneflow_internal.deprecated.GetProtoDtype4OfDtype(self.dtype)
+        )
+        copy_method = getattr(oneflow._oneflow_internal, method_name)
+        copy_method(self.of_blob_ptr_, src_ndarray)
+
+    def _CopyToNdarray(self):
+        method_name = oneflow._oneflow_internal.Dtype_GetOfBlobCopyToBufferFuncName(
+            oneflow._oneflow_internal.deprecated.GetProtoDtype4OfDtype(self.dtype)
+        )
+        copy_method = getattr(oneflow._oneflow_internal, method_name)
+        shape_tensor = np.zeros(self.num_axes, dtype=np.int64)
+        oneflow._oneflow_internal.OfBlob_CopyShapeTo(self.of_blob_ptr_, shape_tensor)
+        shape = tuple(shape_tensor.tolist())
+        tensor = np.zeros(
+            shape, dtype=flow.convert_oneflow_dtype_to_numpy_dtype(self.dtype)
+        )
+        copy_method(self.of_blob_ptr_, tensor)
+        return tensor
diff --git a/oneflow/compatible_single_client_python/framework/op_expr_util.py b/oneflow/compatible_single_client_python/framework/op_expr_util.py
new file mode 100644
index 0000000000000000000000000000000000000000..79967654447f1358694831d99571559b5bcef645
--- /dev/null
+++ b/oneflow/compatible_single_client_python/framework/op_expr_util.py
@@ -0,0 +1,48 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from oneflow.compatible import single_client as flow
+import oneflow._oneflow_internal
+from oneflow.compatible_single_client_python.framework.attr_util import (
+    convert_to_user_attr_value,
+)
+
+
+def user_op_expr_call(self, *args, **kwargs):
+    args = list(args)
+    for i in range(len(args)):
+        arg = args[i]
+        if isinstance(arg, flow.Tensor):
+            if not arg.is_determined:
+                arg.determine()
+            args[i] = arg._local_or_consistent_tensor
+
+    attrs = oneflow._oneflow_internal.MutableCfgAttrMap()
+    for attr_name, attr_value in kwargs.items():
+        assert isinstance(attr_name, str)
+        attrs[attr_name] = convert_to_user_attr_value(
+            self.op_type_name, attr_name, attr_value
+        )
+
+    try:
+        results = self.apply(args, attrs)
+    except oneflow._oneflow_internal.exception.Exception:
+        raise oneflow._oneflow_internal.exception.GetThreadLocalLastError()
+
+    return results
+
+
+def RegisterMethod4UserOpExpr():
+    oneflow._oneflow_internal.one.UserOpExpr.__call__ = user_op_expr_call
diff --git a/oneflow/compatible_single_client_python/framework/op_util.py b/oneflow/compatible_single_client_python/framework/op_util.py
new file mode 100644
index 0000000000000000000000000000000000000000..8e17b9026038dba1b8ec6a4857831ed457c08f4f
--- /dev/null
+++ b/oneflow/compatible_single_client_python/framework/op_util.py
@@ -0,0 +1,46 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+from oneflow.core.operator.op_conf_pb2 import OperatorConf
+import oneflow._oneflow_internal
+
+
+def IsOpConfOnlyCpuSupported(op_conf):
+    assert isinstance(op_conf, OperatorConf)
+    """
+    global _cpu_only_op_type_cases
+    if _cpu_only_op_type_cases == None:
+        _cpu_only_op_type_cases = set()
+        for field in OperatorConf.DESCRIPTOR.oneofs_by_name["op_type"].fields:
+            if oneflow._oneflow_internal.IsOpTypeCaseCpuSupportOnly(field.number):
+                _cpu_only_op_type_cases.add(field.number)
+    op_type_field = op_conf.WhichOneof("op_type")
+    return OperatorConf.DESCRIPTOR.fields_by_name[op_type_field].number in _cpu_only_op_type_cases
+    """
+    op_type_field = op_conf.WhichOneof("op_type")
+    if op_type_field == "user_conf":
+        return IsUserOpOnlyCpuSupported(op_conf.user_conf.op_type_name)
+    else:
+        field_number = OperatorConf.DESCRIPTOR.fields_by_name[op_type_field].number
+        return oneflow._oneflow_internal.IsOpTypeCaseCpuSupportOnly(field_number)
+
+
+def IsUserOpOnlyCpuSupported(op_type_name):
+    return oneflow._oneflow_internal.IsOpTypeNameCpuSupportOnly(op_type_name)
+
+
+# _cpu_only_op_type_cases = None
diff --git a/oneflow/compatible_single_client_python/framework/ops.py b/oneflow/compatible_single_client_python/framework/ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..e6eff14ecb80d6ea414d2340a60ebf5b19561f3c
--- /dev/null
+++ b/oneflow/compatible_single_client_python/framework/ops.py
@@ -0,0 +1,241 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+from oneflow.core.common import data_type_pb2 as data_type_util
+from oneflow.core.operator import op_conf_pb2 as op_conf_util
+from oneflow.core.register import logical_blob_id_pb2 as logical_blob_id_util
+from oneflow.compatible_single_client_python.framework import (
+    compile_context as compile_context,
+)
+from oneflow.compatible_single_client_python.framework import (
+    distribute as distribute_util,
+)
+from oneflow.compatible_single_client_python.framework import id_util as id_util
+from oneflow.compatible_single_client_python.framework import (
+    remote_blob as remote_blob_util,
+)
+from oneflow.compatible_single_client_python.framework import hob as hob
+from oneflow.compatible_single_client_python.lib.core import enable_if as enable_if
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    stable_api,
+)
+from oneflow.compatible import single_client as flow
+import oneflow._oneflow_internal
+from typing import Union, Optional, Sequence
+
+
+@oneflow_export("repeat")
+@stable_api
+def api_repeat(
+    input: oneflow._oneflow_internal.BlobDesc,
+    repeat_num: int,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    func = enable_if.unique([repeat])
+    return func(input, repeat_num, name=name)
+
+
+@enable_if.condition(hob.in_global_mode & ~hob.eager_execution_enabled)
+def repeat(input, repeat_num, name=None):
+    assert not flow.eager_execution_enabled()
+    return (
+        flow.user_op_builder(name if name is not None else id_util.UniqueStr("Repeat_"))
+        .Op("repeat")
+        .Input("in", [input])
+        .Output("out")
+        .Attr("repeat_num", repeat_num)
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
+
+
+@oneflow_export("acc")
+def api_acc(
+    one: oneflow._oneflow_internal.BlobDesc,
+    max_acc_num: int,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    func = enable_if.unique([acc])
+    return func(one, max_acc_num, name=name)
+
+
+@enable_if.condition(hob.in_global_mode & ~hob.eager_execution_enabled)
+def acc(one, max_acc_num, name=None):
+    assert not flow.eager_execution_enabled()
+    return (
+        flow.user_op_builder(name if name is not None else id_util.UniqueStr("Acc_"))
+        .Op("acc")
+        .Input("in", [one])
+        .Output("out")
+        .Attr("max_acc_num", max_acc_num)
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
+
+
+@oneflow_export("unpack")
+def api_unpack(
+    input: oneflow._oneflow_internal.BlobDesc,
+    unpack_num: int,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    func = enable_if.unique([unpack])
+    return func(input, unpack_num, name=name)
+
+
+@enable_if.condition(hob.in_global_mode & ~hob.eager_execution_enabled)
+def unpack(input, unpack_num, name=None):
+    assert not flow.eager_execution_enabled()
+    return (
+        flow.user_op_builder(name if name is not None else id_util.UniqueStr("Unpack_"))
+        .Op("unpack")
+        .Input("in", [input])
+        .Output("out")
+        .Attr("unpack_num", unpack_num)
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
+
+
+@oneflow_export("pack")
+def api_pack(
+    input: oneflow._oneflow_internal.BlobDesc, pack_num: int, name: Optional[str] = None
+) -> oneflow._oneflow_internal.BlobDesc:
+    func = enable_if.unique([pack])
+    return func(input, pack_num, name=name)
+
+
+@enable_if.condition(hob.in_global_mode & ~hob.eager_execution_enabled)
+def pack(input, pack_num, name=None):
+    assert not flow.eager_execution_enabled()
+    return (
+        flow.user_op_builder(name if name is not None else id_util.UniqueStr("Pack_"))
+        .Op("pack")
+        .Input("in", [input])
+        .Output("out")
+        .Attr("pack_num", pack_num)
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
+
+
+@oneflow_export("parallel_cast")
+def api_parallel_cast(
+    input: oneflow._oneflow_internal.BlobDesc,
+    name: Optional[str] = None,
+    distribute: Optional[oneflow._oneflow_internal.distribute.Distribute] = None,
+    gradient_distribute: Optional[
+        oneflow._oneflow_internal.distribute.Distribute
+    ] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    func = enable_if.unique([parallel_cast])
+    return func(
+        input, name=name, distribute=distribute, gradient_distribute=gradient_distribute
+    )
+
+
+@enable_if.condition(hob.in_global_mode & ~hob.eager_execution_enabled)
+def parallel_cast(input, name=None, distribute=None, gradient_distribute=None):
+    if name is None:
+        name = id_util.UniqueStr("ParallelCast_")
+
+    def distribute_to_str(dist):
+        dist_str = ""
+        if dist is None:
+            pass
+        elif type(dist) is oneflow._oneflow_internal.distribute.SplitDistribute:
+            dist_str = "S({})".format(dist.axis)
+        elif type(dist) is oneflow._oneflow_internal.distribute.BroadcastDistribute:
+            dist_str = "B"
+        else:
+            raise ValueError("unsupported distribute")
+        return dist_str
+
+    sbp_parallel = distribute_to_str(distribute)
+    grad_sbp_parallel = distribute_to_str(gradient_distribute)
+    op = (
+        flow.user_op_builder(name)
+        .Op("parallel_cast")
+        .Input("in", [input])
+        .Output("out")
+        .Attr("sbp_parallel", sbp_parallel)
+        .Attr("grad_sbp_parallel", grad_sbp_parallel)
+        .Build()
+    )
+    return op.InferAndTryRun().SoleOutputBlob()
+
+
+@oneflow_export("hierarchical_parallel_cast")
+def api_hierarchical_parallel_cast(
+    input: oneflow._oneflow_internal.BlobDesc,
+    parallel_distribution: Sequence[str],
+    grad_mode: Optional[str] = None,
+    grad_parallel_distribution: Sequence[str] = None,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    func = enable_if.unique([hierarchical_parallel_cast])
+    return func(
+        input,
+        parallel_distribution=parallel_distribution,
+        grad_mode=grad_mode,
+        grad_parallel_distribution=grad_parallel_distribution,
+        name=name,
+    )
+
+
+@enable_if.condition(hob.in_global_mode & ~hob.eager_execution_enabled)
+def hierarchical_parallel_cast(
+    input, parallel_distribution, grad_mode, grad_parallel_distribution, name,
+):
+    if name is None:
+        name = id_util.UniqueStr("HierarchicalParallelCast_")
+
+    def distribute_to_str(dist):
+        if dist is None:
+            return ""
+        elif type(dist) is str:
+            return dist
+        elif type(dist) is oneflow._oneflow_internal.distribute.SplitDistribute:
+            return "S({})".format(dist.axis)
+        elif type(dist) is oneflow._oneflow_internal.distribute.BroadcastDistribute:
+            return "B"
+        else:
+            raise ValueError("unsupported distribute")
+
+    op = (
+        flow.user_op_builder(name)
+        .Op("hierarchical_parallel_cast")
+        .Input("in", [input])
+        .Output("out")
+        .Attr(
+            "parallel_distribution", list(map(distribute_to_str, parallel_distribution))
+        )
+        .Attr("grad_mode", grad_mode or "restore")
+        .Attr(
+            "grad_parallel_distribution",
+            list(map(distribute_to_str, grad_parallel_distribution))
+            if grad_parallel_distribution
+            else [],
+        )
+        .Build()
+    )
+    return op.InferAndTryRun().SoleOutputBlob()
diff --git a/oneflow/compatible_single_client_python/framework/placement_context.py b/oneflow/compatible_single_client_python/framework/placement_context.py
new file mode 100644
index 0000000000000000000000000000000000000000..5e2ea7fd36111fd04e084a117ad955a796b120bb
--- /dev/null
+++ b/oneflow/compatible_single_client_python/framework/placement_context.py
@@ -0,0 +1,126 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+import collections
+import re
+
+from oneflow.core.job import placement_pb2 as placement_pb
+from oneflow.compatible_single_client_python.framework import c_api_util as c_api_util
+from oneflow.compatible_single_client_python.framework import op_util as op_util
+from oneflow.compatible_single_client_python.framework import (
+    session_context as session_ctx,
+)
+from oneflow.compatible import single_client as flow
+from oneflow._oneflow_internal.oneflow.core.job import placement as placement_cfg
+import oneflow._oneflow_internal
+
+
+class PlacementScope(object):
+    pass
+
+
+class EmptyPlacementScope(PlacementScope):
+    def __init__(self, device_tag, machine_device_ids, hierarchy):
+        if isinstance(machine_device_ids, (list, tuple)) == False:
+            machine_device_ids = [machine_device_ids]
+        self.device_tag_ = device_tag
+        self.machine_device_ids_ = machine_device_ids
+        self.hierarchy_ = hierarchy
+
+    @property
+    def device_tag(self):
+        return self.device_tag_
+
+    @property
+    def machine_device_ids(self):
+        return self.machine_device_ids_
+
+    @property
+    def hierarchy(self):
+        return self.hierarchy_
+
+    def __enter__(self):
+        # do nothing
+        pass
+
+    def __exit__(self, *args):
+        # do nothing
+        pass
+
+
+class GlobalModePlacementScope(PlacementScope):
+    def __init__(self, scope_ctx):
+        self.scope_ctx_ = scope_ctx
+
+    def __enter__(self):
+        self.scope_ctx_.__enter__()
+
+    def __exit__(self, *args):
+        self.scope_ctx_.__exit__(*args)
+
+
+def MakeParallelConf4Resource(device_tag, resource):
+    if device_tag == "gpu":
+        assert resource.HasField("gpu_device_num")
+        machine_device_ids = GetGpuMachineDeviceIds(resource)
+    elif device_tag == "cpu":
+        assert resource.HasField("cpu_device_num")
+        machine_device_ids = GetCpuMachineDeviceIds(resource)
+    else:
+        raise NotImplementedError
+    return oneflow._oneflow_internal.MakeParallelConf(device_tag, machine_device_ids)
+
+
+def MakeMachineId2DeviceIdList(parallel_conf):
+    parallel_conf_str = str(parallel_conf)
+    global _parallel_conf_str2ofrecord
+    if parallel_conf_str not in _parallel_conf_str2ofrecord:
+        ofrecord = c_api_util.GetMachine2DeviceIdListOFRecordFromParallelConf(
+            parallel_conf
+        )
+        _parallel_conf_str2ofrecord[parallel_conf_str] = {
+            int(k): list(v.int32_list.value) for k, v in ofrecord.feature.items()
+        }
+    return _parallel_conf_str2ofrecord[parallel_conf_str]
+
+
+def GetParallelSize(key2list):
+    size = 0
+    for k, v in key2list.items():
+        size += len(v)
+    return size
+
+
+def GetGpuMachineDeviceIds(resource):
+    assert resource.machine_num > 0
+    assert resource.HasField("gpu_device_num")
+    return [
+        "%s:0-%s" % (m_id, resource.gpu_device_num - 1)
+        for m_id in range(resource.machine_num)
+    ]
+
+
+def GetCpuMachineDeviceIds(resource):
+    assert resource.machine_num > 0
+    assert resource.HasField("cpu_device_num")
+    return [
+        "%s:0-%s" % (m_id, resource.cpu_device_num - 1)
+        for m_id in range(resource.machine_num)
+    ]
+
+
+_parallel_conf_str2ofrecord = {}
diff --git a/oneflow/compatible_single_client_python/framework/placement_util.py b/oneflow/compatible_single_client_python/framework/placement_util.py
new file mode 100644
index 0000000000000000000000000000000000000000..76743e41fc7f84e621a6aced2e1f5794b4169620
--- /dev/null
+++ b/oneflow/compatible_single_client_python/framework/placement_util.py
@@ -0,0 +1,149 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+import re
+from oneflow.compatible_single_client_python.framework import (
+    placement_context as placement_ctx,
+)
+from oneflow.compatible_single_client_python.framework import (
+    session_context as session_ctx,
+)
+from oneflow.compatible_single_client_python.framework import scope_util as scope_util
+from oneflow.compatible_single_client_python.framework import hob as hob
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    oneflow_deprecate,
+)
+from oneflow.compatible_single_client_python.lib.core import enable_if as enable_if
+from oneflow.compatible import single_client as flow
+import traceback
+import oneflow._oneflow_internal
+
+
+@oneflow_export("device_prior_placement", "fixed_placement")
+@oneflow_deprecate()
+def deprecated_placement(*args, **kwargs):
+    print(
+        "WARNING:",
+        "oneflow.compatible.single_client.device_prior_placement/oneflow.compatible.single_client.fixed_placement",
+        "will be removed in the future, use {} instead.".format(
+            "oneflow.compatible.single_client.scope.placement"
+        ),
+    )
+    print(traceback.format_stack()[-2])
+    return api_placement(*args, **kwargs)
+
+
+@oneflow_export("scope.placement")
+def api_placement(
+    device_tag: str, machine_device_ids: str, hierarchy=None
+) -> placement_ctx.PlacementScope:
+    r"""Create a scope. All ops within the scope will run on specified device that placed by  "device_tag" and "machine_device_ids".
+
+    Args:
+        device_tag (str): Device tag, "cpu" or "gpu" only
+        machine_device_ids (str): List of string that specifies what machine & device(s) to use, the format is "List[<NODE INDEX>:<DEVICE START INDEX>-<DEVICE END INDEX>, <NODE INDEX>:<DEVICE START INDEX>-<DEVICE END INDEX>, ...]", For example, "0:0" means use the device 0 of machine 0, and "1:4-6" means use device 4, 5, 6 of machine 1.
+
+    Returns:
+        placement_ctx.DevicePriorPlacementScope:  Placement scope
+
+    For example:
+
+    If you run program on single machine, you can assign the specified device like this:
+
+    .. code-block:: python
+
+        with flow.scope.placement("gpu", "0:0"):
+            logits = lenet(images, train=False)
+            loss = flow.nn.sparse_softmax_cross_entropy_with_logits(labels, logits, name="softmax_loss")
+            flow.losses.add_loss(loss)
+
+    Or you run distributed program, you can assign the specified devices like this:
+
+    .. code-block:: python
+
+        # configure machines ids, ips, etc.
+        with flow.scope.placement("gpu", ['0:0-7', '1:0-7']):
+            logits = lenet(images, train=False)
+            loss = flow.nn.sparse_softmax_cross_entropy_with_logits(labels, logits, name="softmax_loss")
+            flow.losses.add_loss(loss)
+
+    """
+
+    if oneflow._oneflow_internal.flags.with_cuda() == False and device_tag == "gpu":
+        device_tag = "cpu"
+    assert (
+        isinstance(hierarchy, (list, tuple, oneflow._oneflow_internal.Size))
+        or hierarchy is None
+    )
+    func = enable_if.unique(
+        [
+            GetEmptyPlacementScope,
+            GetNormalModePlacementScope,
+            GetGlobalModePlacementScope,
+        ]
+    )
+    return func(device_tag, machine_device_ids, hierarchy)
+
+
+@enable_if.condition(
+    hob.in_normal_mode & hob.env_initialized & ~hob.session_initialized
+)
+def GetEmptyPlacementScope(device_tag, machine_device_ids, hierarchy=None):
+    return placement_ctx.EmptyPlacementScope(device_tag, machine_device_ids, hierarchy)
+
+
+@enable_if.condition(hob.in_normal_mode & hob.session_initialized)
+def GetNormalModePlacementScope(device_tag, machine_device_ids, hierarchy=None):
+    if isinstance(machine_device_ids, tuple):
+        machine_device_ids = list(machine_device_ids)
+    if not isinstance(machine_device_ids, list):
+        machine_device_ids = [machine_device_ids]
+    sess = session_ctx.GetDefaultSession()
+    if hierarchy is not None:
+        hierarchy = oneflow._oneflow_internal.Size(tuple(hierarchy))
+    scope = scope_util.MakeScope(
+        lambda old_scope, builder: builder.BuildScopeWithNewParallelDesc(
+            old_scope, device_tag, machine_device_ids, hierarchy
+        )
+    )
+    return scope_util.ScopeContext(scope)
+
+
+@enable_if.condition(hob.in_global_mode)
+def GetGlobalModePlacementScope(device_tag, machine_device_ids, hierarchy=None):
+    if isinstance(machine_device_ids, (list, tuple)) == False:
+        machine_device_ids = [machine_device_ids]
+    sess = session_ctx.GetDefaultSession()
+    if hierarchy is not None:
+        hierarchy = oneflow._oneflow_internal.Size(tuple(hierarchy))
+
+    def BuildScope(old_scope, builder):
+        return builder.BuildScopeWithNewParallelDesc(
+            old_scope, device_tag, machine_device_ids, hierarchy
+        )
+
+    scope_ctx = scope_util.ScopeContext(scope_util.MakeScope(BuildScope))
+    return placement_ctx.GlobalModePlacementScope(scope_ctx)
+
+
+def GetDefaultMachineDeviceIds(resource):
+    if resource.HasField("gpu_device_num") and resource.gpu_device_num > 0:
+        return "gpu", placement_ctx.GetGpuMachineDeviceIds(resource)
+    elif resource.HasField("cpu_device_num"):
+        return "cpu", placement_ctx.GetCpuMachineDeviceIds(resource)
+    else:
+        raise NotImplementedError
diff --git a/oneflow/compatible_single_client_python/framework/profiler.py b/oneflow/compatible_single_client_python/framework/profiler.py
new file mode 100644
index 0000000000000000000000000000000000000000..cc63035794dbb7ac6a346400fef36f7b17bc925c
--- /dev/null
+++ b/oneflow/compatible_single_client_python/framework/profiler.py
@@ -0,0 +1,29 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+from oneflow.compatible_single_client_python.oneflow_export import oneflow_export
+import oneflow._oneflow_internal
+
+
+@oneflow_export("profiler.range_push")
+def RangePush(range_name):
+    oneflow._oneflow_internal.profiler.RangePush(range_name)
+
+
+@oneflow_export("profiler.range_pop")
+def RangePop():
+    oneflow._oneflow_internal.profiler.RangePop()
diff --git a/oneflow/compatible_single_client_python/framework/pull_util.py b/oneflow/compatible_single_client_python/framework/pull_util.py
new file mode 100644
index 0000000000000000000000000000000000000000..32aff237c278c6fd3579b2324de09e858b6d1fa8
--- /dev/null
+++ b/oneflow/compatible_single_client_python/framework/pull_util.py
@@ -0,0 +1,285 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+import threading
+from oneflow.compatible_single_client_python.framework import (
+    local_blob as local_blob_util,
+)
+from oneflow.compatible_single_client_python.framework import (
+    remote_blob as remote_blob_util,
+)
+import oneflow._oneflow_internal
+import numpy as np
+
+
+class FutureRemoteBlobs(object):
+    def __init__(self):
+        self.inited_ = False
+
+    def get(self):
+        raise NotImplementedError
+
+    def async_get(self, callback):
+        raise NotImplementedError
+
+    def SetResult(self, remote_blobs):
+        raise NotImplementedError
+
+    def Inited(self):
+        assert self.inited_ is False
+        self.inited_ = True
+        return self
+
+
+class LazyFutureRemoteBlobs(FutureRemoteBlobs):
+    def __init__(self, session):
+        super().__init__()
+        self.session_ = session
+        self.cond_var_ = threading.Condition()
+        self.out_remote_blob_pullers_ = []
+        self.finished_cnt_ = 0
+        self.data_delivered_ = False
+        self.async_get_callback_ = lambda: None
+
+    # user api
+    def get(self):
+        assert self.inited_
+        assert self.data_delivered_ == False
+        self._Wait()
+        self.data_delivered_ = True
+        return self._TrySyncAndGetResultNdarray(self.out_remote_blob_pullers_)
+
+    # user api
+    def async_get(self, callback):
+        assert self.inited_
+        assert self.data_delivered_ == False
+        pullers_cnt = self._GetPullersCnt()
+
+        def Callback():
+            assert self.finished_cnt_ <= pullers_cnt
+            if self.finished_cnt_ == pullers_cnt:
+                callback(
+                    self._TrySyncAndGetResultNdarray(self.out_remote_blob_pullers_)
+                )
+
+        try:
+            self.cond_var_.acquire()
+            if self.finished_cnt_ == pullers_cnt:
+                Callback()
+            else:
+                self.async_get_callback_ = Callback
+        finally:
+            self.cond_var_.release()
+        self.data_delivered_ = True
+
+    def SetResult(self, out_remote_blobs):
+        assert self.inited_ == False
+        assert isinstance(self.out_remote_blob_pullers_, list)
+        assert len(self.out_remote_blob_pullers_) == 0
+        pullers = self._MakeRemoteBlobPullers(out_remote_blobs)
+        self.out_remote_blob_pullers_ = pullers
+        for puller in self._FlatConsistentBlobPullers(pullers):
+            puller.AsyncPull(self._FinishCallback)
+        return self
+
+    def _FinishCallback(self):
+        self.cond_var_.acquire()
+        self.finished_cnt_ += 1
+        self.cond_var_.notify()
+        self.async_get_callback_()
+        self.cond_var_.release()
+
+    def _Wait(self):
+        pullers_cnt = self._GetPullersCnt()
+        self.cond_var_.acquire()
+        while self.finished_cnt_ != pullers_cnt:
+            self.cond_var_.wait()
+        self.cond_var_.release()
+
+    def _TrySyncAndGetResultNdarray(self, pullers):
+        if self.session_.HasAnyCallbackAfterFunctionReturn():
+            self.session_.Sync()
+        return self._GetResultLocalBlob(pullers)
+
+    def _GetResultLocalBlob(self, pullers):
+        assert self.inited_
+        if isinstance(pullers, _BlobPuller):
+            return pullers.result
+        if isinstance(pullers, (list, tuple)):
+            return type(pullers)(self._GetResultLocalBlob(x) for x in pullers)
+        if isinstance(pullers, dict):
+            return {k: self._GetResultLocalBlob(v) for k, v in pullers.items()}
+        raise NotImplementedError
+
+    def _GetPullersCnt(self):
+        cnt = 0
+        for _ in self._FlatConsistentBlobPullers(self.out_remote_blob_pullers_):
+            cnt += 1
+        return cnt
+
+    def _FlatConsistentBlobPullers(self, pullers):
+        if isinstance(pullers, _BlobPuller):
+            for x in pullers.FlatConsistentBlobPullers():
+                yield x
+        elif isinstance(pullers, list) or isinstance(pullers, tuple):
+            for elem in pullers:
+                for x in self._FlatConsistentBlobPullers(elem):
+                    yield x
+        elif isinstance(pullers, dict):
+            for _, v in pullers.items():
+                for x in self._FlatConsistentBlobPullers(v):
+                    yield x
+        else:
+            raise NotImplementedError
+
+    def _MakeRemoteBlobPullers(self, out_remote_blobs):
+        if isinstance(out_remote_blobs, oneflow._oneflow_internal.ConsistentBlob):
+            return _ConsistentBlobPuller(out_remote_blobs, self.session_)
+        if isinstance(out_remote_blobs, oneflow._oneflow_internal.MirroredBlob):
+            return _MirroredBlobPuller(out_remote_blobs, self.session_)
+        if isinstance(out_remote_blobs, list) or isinstance(out_remote_blobs, tuple):
+            return type(out_remote_blobs)(
+                self._MakeRemoteBlobPullers(x) for x in out_remote_blobs
+            )
+        if isinstance(out_remote_blobs, dict):
+            return {
+                k: self._MakeRemoteBlobPullers(v) for k, v in out_remote_blobs.items()
+            }
+        raise NotImplementedError
+
+
+class _BlobPuller(object):
+    def __init__(self, session):
+        self.session_ = session
+
+    def FlatConsistentBlobPullers(self):
+        raise NotImplementedError
+
+    @property
+    def result(self):
+        raise NotImplementedError
+
+
+class _ConsistentBlobPuller(_BlobPuller):
+    def __init__(self, consistent_blob, session):
+        _BlobPuller.__init__(self, session)
+        self.result_ = None
+        self.consistent_blob_ = consistent_blob
+
+    @property
+    def result(self):
+        assert self.result_ is not None
+        return self.result_
+
+    def FlatConsistentBlobPullers(self):
+        yield self
+
+    def AsyncPull(self, pull_cb):
+        def PullCallback(of_blob):
+            self.result_ = local_blob_util.LocalBlob(
+                of_blob.CopyToNdarray(), self.consistent_blob_.is_dynamic
+            )
+            pull_cb()
+
+        self.session_.AsyncPull(self.consistent_blob_.op_name, PullCallback)
+
+
+class _MirroredBlobPuller(_BlobPuller):
+    def __init__(self, mirrored_blob, session):
+        _BlobPuller.__init__(self, session)
+        self.mirrored_blob_ = mirrored_blob
+        self.sub_pullers_ = tuple(
+            _ConsistentBlobPuller(x, self.session_)
+            for x in mirrored_blob.sub_consistent_blob_list
+        )
+        self.local_mirrored_blob_ = None
+
+    @property
+    def result(self):
+        if self.local_mirrored_blob_ is not None:
+            return self.local_mirrored_blob_
+        local_blob_list = [x.result.numpy() for x in self.sub_pullers_]
+        local_numpy = local_blob_list[0]
+        # TODO(chengcheng): check list length = 1 in single client. fix after multi-client
+        if len(local_blob_list) > 1:
+            print("WARNING: return tensor list will concat as axis = 0.")
+            local_numpy = np.concatenate(local_blob_list, axis=0)
+        self.local_mirrored_blob_ = local_blob_util.LocalBlob(
+            local_numpy, self.mirrored_blob_.is_dynamic
+        )
+        return self.local_mirrored_blob_
+
+    def FlatConsistentBlobPullers(self):
+        for x in self.sub_pullers_:
+            yield x
+
+
+class EagerFutureRemoteBlobs(FutureRemoteBlobs):
+    def __init__(self):
+        super().__init__()
+        self.blob_getters_ = None
+
+    def get(self):
+        return self._GetResultLocalBlob(self.blob_getters_)
+
+    def async_get(self, callback):
+        assert callable(callback)
+        callback(self._GetResultLocalBlob(self.blob_getters_))
+
+    def SetResult(self, remote_blobs):
+        assert self.inited_ is False
+        assert self.blob_getters_ is None
+        self.blob_getters_ = self._MakeRemoteBlobGetters(remote_blobs)
+        return self
+
+    def _MakeRemoteBlobGetters(self, remote_blobs):
+        if isinstance(remote_blobs, (list, tuple)):
+            return type(remote_blobs)(
+                self._MakeRemoteBlobGetters(blob) for blob in remote_blobs
+            )
+        elif isinstance(remote_blobs, dict):
+            return {k: self._MakeRemoteBlobGetters(v) for k, v in remote_blobs.items()}
+        elif isinstance(remote_blobs, oneflow._oneflow_internal.EagerBlobTrait):
+            return _EagerBlobGetter(remote_blobs)
+        else:
+            raise NotImplementedError
+
+    def _GetResultLocalBlob(self, getter):
+        assert self.inited_
+        if isinstance(getter, _EagerBlobGetter):
+            return getter.result
+        elif isinstance(getter, (list, tuple)):
+            return type(getter)(self._GetResultLocalBlob(g) for g in getter)
+        elif isinstance(getter, dict):
+            return {k: self._GetResultLocalBlob(v) for k, v in getter.items()}
+        else:
+            raise NotImplementedError(type(getter))
+
+
+class _EagerBlobGetter(object):
+    def __init__(self, eager_blob):
+        assert isinstance(eager_blob, oneflow._oneflow_internal.EagerBlobTrait)
+        self.eager_blob_ = eager_blob
+        self.local_tensor_ = None
+
+    @property
+    def result(self):
+        if self.local_tensor_ is not None:
+            return self.local_tensor_
+
+        self.local_tensor_ = local_blob_util.MakeLocalBlob4EagerBlob(self.eager_blob_)
+        return self.local_tensor_
diff --git a/oneflow/compatible_single_client_python/framework/push_util.py b/oneflow/compatible_single_client_python/framework/push_util.py
new file mode 100644
index 0000000000000000000000000000000000000000..445ba0bb61cd524705cc1bc126eda0364ad41a2d
--- /dev/null
+++ b/oneflow/compatible_single_client_python/framework/push_util.py
@@ -0,0 +1,293 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.framework import (
+    input_blob_def as input_blob_def,
+)
+from oneflow.compatible_single_client_python.framework import dtype as dtype_util
+from oneflow.compatible_single_client_python.framework import (
+    python_callback as python_callback,
+)
+from oneflow.compatible_single_client_python.framework import (
+    balanced_splitter as balanced_splitter,
+)
+from oneflow.compatible_single_client_python.framework import (
+    remote_blob as remote_blob_util,
+)
+from oneflow.compatible_single_client_python.framework import id_util as id_util
+from oneflow.compatible_single_client_python.eager import boxing_util as boxing_util
+from oneflow.core.operator import op_conf_pb2 as op_conf_util
+from oneflow.core.register import logical_blob_id_pb2 as logical_blob_id_util
+from oneflow._oneflow_internal.oneflow.core.register import logical_blob_id as lbi_util
+import oneflow._oneflow_internal
+import numpy
+from functools import reduce
+
+blob_register = oneflow._oneflow_internal.GetDefaultBlobRegister()
+
+
+def AsyncPush(session, job_func, *arg):
+    assert len(arg) == len(job_func.__oneflow_input_blob_defs__)
+    for i in range(len(arg)):
+        _AsyncPushArg(session, job_func.__oneflow_input_blob_defs__[i], arg[i])
+
+
+def _AsyncPushArg(session, arg_blob_def, arg_ndarray):
+    if isinstance(arg_blob_def, (list, tuple)):
+        assert isinstance(arg_ndarray, (list, tuple)), "type(arg_ndarray): %s" % (
+            type(arg_ndarray)
+        )
+        assert len(arg_blob_def) == len(arg_ndarray), "%s v.s. %s" % (
+            len(arg_blob_def),
+            len(arg_ndarray),
+        )
+        for blob_def, ndarray in zip(arg_blob_def, arg_ndarray):
+            _AsyncPushArg(session, blob_def, ndarray)
+    elif isinstance(arg_blob_def, dict):
+        assert type(arg_blob_def) is type(arg_ndarray)
+        assert set(arg_blob_def.keys()) == set(arg_ndarray.keys())
+        for k, blob_def in arg_blob_def.items():
+            _AsyncPushArg(session, blob_def, arg_ndarray[k])
+    else:
+        assert isinstance(arg_blob_def, input_blob_def.ArgBlobDef)
+        arg_blob_def.CheckAndAsyncPush(session, arg_ndarray)
+
+
+def MakeEagerInputBlobs(arg_blob_def, arg_ndarray):
+    if isinstance(arg_blob_def, (list, tuple)):
+        assert isinstance(arg_ndarray, (list, tuple)), "type(arg_ndarray): %s" % (
+            type(arg_ndarray)
+        )
+        assert len(arg_blob_def) == len(arg_ndarray)
+        return type(arg_blob_def)(
+            MakeEagerInputBlobs(blob_def, ndarray)
+            for blob_def, ndarray in zip(arg_blob_def, arg_ndarray)
+        )
+    elif isinstance(arg_blob_def, dict):
+        assert type(arg_blob_def) is type(arg_ndarray)
+        assert set(arg_blob_def.keys()) == set(arg_ndarray.keys())
+        return {
+            k: MakeEagerInputBlobs(blob_def, arg_ndarray[k])
+            for k, blob_def in arg_blob_def.items()
+        }
+    else:
+        return _CreateEagerInputBlobAndFeedValue(arg_blob_def, arg_ndarray)
+
+
+def _CheckInputArgBlobDefValueMatch(arg_blob_def, arg_value):
+    if isinstance(arg_blob_def, input_blob_def.FixedTensorDef):
+        assert isinstance(arg_value, numpy.ndarray)
+        assert arg_blob_def.shape == arg_value.shape
+    elif isinstance(arg_blob_def, input_blob_def.MirroredTensorDef):
+        assert isinstance(arg_value, (list, tuple))
+        for v in arg_value:
+            assert isinstance(v, numpy.ndarray)
+            assert len(v.shape) == len(arg_blob_def.shape)
+            assert numpy.prod(v.shape) <= numpy.prod(arg_blob_def.shape)
+    else:
+        raise NotImplementedError
+
+
+def FeedValueToEagerBlob(blob_object, blob_def, ndarray):
+    physical_blob_objects = _GetPhysicalBlobObjects(blob_object, None)
+    feed_ctx = FeedContext(blob_object.op_arg_parallel_attr, ndarray)
+    for i, physical_blob_object in enumerate(physical_blob_objects):
+        feed_ctx.set_rank(i)
+        _FeedValueToInputPhysicalBlob(feed_ctx, blob_def, physical_blob_object)
+
+
+def _CreateEagerInputBlobAndFeedValue(arg_blob_def, arg_ndarray):
+    _CheckInputArgBlobDefValueMatch(arg_blob_def, arg_ndarray)
+    arg_blob_object, lbi = _MakeInputBlobObject(arg_blob_def)
+    FeedValueToEagerBlob(arg_blob_object, arg_blob_def, arg_ndarray)
+    get_blob = None
+    if not isinstance(lbi, lbi_util.LogicalBlobId):
+        cfg_lbi = lbi_util.LogicalBlobId()
+        cfg_lbi.set_op_name(lbi.op_name)
+        cfg_lbi.set_blob_name(lbi.blob_name)
+        lbi = cfg_lbi
+    if isinstance(arg_blob_def, input_blob_def.FixedTensorDef):
+
+        def get_blob(lbi, blob_object, blob_register):
+            blob = oneflow._oneflow_internal.EagerConsistentBlob(
+                lbi, blob_object, blob_register
+            )
+            with flow.scope.consistent_view():
+                return flow.identity(blob)
+
+    elif isinstance(arg_blob_def, input_blob_def.MirroredTensorDef):
+        get_blob = oneflow._oneflow_internal.EagerMirroredBlob
+    else:
+        raise NotImplementedError
+    return get_blob(lbi, blob_object=arg_blob_object, blob_register=blob_register)
+
+
+def _MakeInputBlobObject(arg_blob_def):
+    input_op_conf, lbi = _MakeInputOpConfAndRetLbi(arg_blob_def)
+    bn_in_op2blob_object = oneflow._oneflow_internal.deprecated.BnInOp2BlobObject()
+
+    def BuildInputInstruction(builder):
+        op_attribute = arg_blob_def.EagerAddAndInferOp(input_op_conf)
+        scope = flow.current_scope()
+        parallel_conf = scope.device_parallel_desc_symbol.parallel_conf
+        cfg_op_attribute = oneflow._oneflow_internal.deprecated.MakeOpAttributeByString(
+            str(op_attribute)
+        )
+        builder.StatelessCall(
+            cfg_op_attribute, parallel_conf, bn_in_op2blob_object, boxing_util.BoxingTo,
+        )
+
+    oneflow._oneflow_internal.deprecated.LogicalRun(BuildInputInstruction)
+    return bn_in_op2blob_object["out"], lbi
+
+
+def _GetPhysicalBlobObjects(logical_blob_object, lbi):
+    blob_register = oneflow._oneflow_internal.GetDefaultBlobRegister()
+    physical_blob_objects = None
+
+    def BuildLogical2PhysicalInstruction(builder):
+        nonlocal physical_blob_objects
+        physical_blob_objects = builder.UnpackLogicalBlobToPhysicalBlobs(
+            logical_blob_object
+        )
+
+    oneflow._oneflow_internal.deprecated.LogicalRun(BuildLogical2PhysicalInstruction)
+    return physical_blob_objects
+
+
+def _MakeInputOpConfAndRetLbi(arg_blob_def):
+    assert isinstance(arg_blob_def, input_blob_def.ArgBlobDef)
+    op_conf = op_conf_util.OperatorConf()
+    op_conf.name = id_util.UniqueStr("Input_")
+    op_conf.input_conf.out = "out"
+    op_conf.input_conf.blob_conf.CopyFrom(arg_blob_def.ToInterfaceBlobConf())
+    lbi = logical_blob_id_util.LogicalBlobId()
+    lbi.op_name = op_conf.name
+    lbi.blob_name = op_conf.input_conf.out
+    return op_conf, lbi
+
+
+class FeedContext(object):
+    def __init__(self, op_arg_parallel_attr, arg_ndarray, rank=0):
+        self.op_arg_parallel_attr_ = op_arg_parallel_attr
+        self.arg_ndarray_ = arg_ndarray
+        self.rank_ = rank
+        # balanced_range is used in split_parallel
+        self.balanced_range_ = None
+
+    def set_rank(self, rank):
+        self.rank_ = rank
+
+    def GetFixedTensor(self, logical_shape):
+        assert isinstance(self.arg_ndarray_, numpy.ndarray)
+        assert self.arg_ndarray_.shape == logical_shape, "%s v.s. %s" % (
+            self.arg_ndarray_.shape,
+            logical_shape,
+        )
+        sbp_parallel = self.op_arg_parallel_attr_.sbp_parallel
+        parallel_num = self.op_arg_parallel_attr_.parallel_desc_symbol.parallel_num
+        if sbp_parallel.has_broadcast_parallel() or parallel_num == 1:
+            return self._AsContiguousNdArray(self.arg_ndarray_)
+        elif sbp_parallel.has_split_parallel():
+            axis = sbp_parallel.split_parallel().axis()
+            start, end = self._GetBalancedRanges(logical_shape[axis])[self.rank_]
+            slc = [slice(None)] * len(logical_shape)
+            slc[axis] = slice(start, end)
+            ndarray = self.arg_ndarray_[tuple(slc)]
+            return self._AsContiguousNdArray(ndarray)
+        else:
+            raise NotImplementedError
+
+    def _GetBalancedRanges(self, dim):
+        parallel_num = self.op_arg_parallel_attr_.parallel_desc_symbol.parallel_num
+        if self.balanced_range_ is None:
+            self.balanced_range_ = balanced_splitter.BalancedRanges(dim, parallel_num)
+        return self.balanced_range_
+
+    def GetMirroredTensor(self, static_shape):
+        capacity = reduce(lambda x, y: x * y, static_shape, 1)
+        assert isinstance(self.arg_ndarray_, (list, tuple))
+        parallel_num = self.op_arg_parallel_attr_.parallel_desc_symbol.parallel_num
+        assert len(self.arg_ndarray_) == parallel_num
+        assert all(isinstance(a, numpy.ndarray) for a in self.arg_ndarray_)
+        assert self.rank_ >= 0
+        assert self.rank_ < parallel_num
+        ndarray = self.arg_ndarray_[self.rank_]
+        elem_cnt = reduce(lambda x, y: x * y, ndarray.shape, 1)
+        assert elem_cnt <= capacity, "%s v.s. %s" % (ndarray.shape, static_shape)
+        return self._AsContiguousNdArray(ndarray)
+
+    def _AsContiguousNdArray(self, ndarray):
+        if isinstance(ndarray, numpy.ndarray):
+            return (
+                ndarray
+                if ndarray.flags["C_CONTIGUOUS"]
+                else numpy.ascontiguousarray(ndarray)
+            )
+        elif isinstance(ndarray, (tuple, list)):
+            return type(ndarray)(self._AsContiguousNdArray(a) for a in ndarray)
+        else:
+            raise NotImplementedError
+
+
+def _FeedValueToInputPhysicalBlob(feed_ctx, blob_def, blob_object):
+    assert isinstance(blob_def, input_blob_def.ArgBlobDef)
+    assert isinstance(blob_object, oneflow._oneflow_internal.BlobObject)
+
+    FeedBlob = _MakeFeedBlobCallback(feed_ctx, blob_def, blob_object)
+    assert callable(FeedBlob)
+
+    def BuildFeedInstruction(builder):
+        builder.FeedBlob(
+            blob_object, python_callback.GetIdForRegisteredCallback(FeedBlob)
+        )
+        builder.InsertRemoveForeignCallbackInstruction(
+            blob_object.object_id, python_callback.GetIdForRegisteredCallback(FeedBlob)
+        )
+
+    oneflow._oneflow_internal.deprecated.PhysicalRun(BuildFeedInstruction)
+
+
+def _MakeFeedBlobCallback(feed_ctx, blob_def, blob_object):
+    if isinstance(blob_def, input_blob_def.FixedTensorDef):
+
+        def FeedBlob(ofblob):
+            ndarray = feed_ctx.GetFixedTensor(blob_def.shape)
+            dtype = dtype_util.convert_oneflow_dtype_to_numpy_dtype(ofblob.dtype)
+            assert ndarray.dtype == dtype, "%s v.s. %s" % (ndarray.dtype, dtype)
+            assert ndarray.shape == ofblob.static_shape, "%s v.s. %s" % (
+                ndarray.shape,
+                ofblob.static_shape,
+            )
+            if ofblob.CopyFromNdarray(ndarray) is False:
+                raise ValueError
+
+    elif isinstance(blob_def, input_blob_def.MirroredTensorDef):
+
+        def FeedBlob(ofblob):
+            ndarray = feed_ctx.GetMirroredTensor(ofblob.static_shape)
+            assert isinstance(ndarray, numpy.ndarray)
+            dtype = dtype_util.convert_oneflow_dtype_to_numpy_dtype(ofblob.dtype)
+            assert ndarray.dtype == dtype, "%s v.s. %s" % (ndarray.dtype, dtype)
+            if ofblob.CopyFromNdarray(ndarray) is False:
+                raise ValueError
+
+    else:
+        raise NotImplementedError
+
+    return FeedBlob
diff --git a/oneflow/compatible_single_client_python/framework/python_callback.py b/oneflow/compatible_single_client_python/framework/python_callback.py
new file mode 100644
index 0000000000000000000000000000000000000000..fb04c91fb6b633b9d38f10ef80866cd579b2182e
--- /dev/null
+++ b/oneflow/compatible_single_client_python/framework/python_callback.py
@@ -0,0 +1,111 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+import traceback
+
+from oneflow.compatible_single_client_python.framework import ofblob as ofblob
+from oneflow._oneflow_internal.oneflow.core.operator import (
+    op_attribute as op_attribute_cfg,
+)
+from oneflow._oneflow_internal.oneflow.core.job import placement as placement_cfg
+from oneflow._oneflow_internal.oneflow.core.job import job_conf as job_conf_cfg
+from oneflow._oneflow_internal.oneflow.core.job import scope as scope_cfg
+import oneflow._oneflow_internal
+
+
+def GetIdForRegisteredCallback(cb):
+    assert callable(cb)
+    global unique_id2handler
+    unique_id2handler[id(cb)] = cb
+    return id(cb)
+
+
+def DeleteRegisteredCallback(cb):
+    global unique_id2handler
+    assert id(cb) in unique_id2handler
+    del unique_id2handler[id(cb)]
+
+
+class PythonCallback(oneflow._oneflow_internal.ForeignCallback):
+    def __init__(self):
+        oneflow._oneflow_internal.ForeignCallback.__init__(self)
+
+    def OfBlobCall(self, unique_id, of_blob_ptr):
+        try:
+            _WatcherHandler(unique_id, of_blob_ptr)
+        except Exception as e:
+            print(traceback.format_exc())
+            raise e
+
+    def RemoveForeignCallback(self, unique_id):
+        global unique_id2handler
+        try:
+            del unique_id2handler[unique_id]
+        except Exception as e:
+            print(traceback.format_exc())
+            raise e
+
+    def EagerInterpretCompletedOp(self, op_attribute, parallel_conf):
+        try:
+            # TODO(hanbinbin): str() will be removed after proto obj is replaced with cfg obj in python side
+            interpreter_callback.InterpretCompletedOp(str(op_attribute), parallel_conf)
+        except Exception as e:
+            print(traceback.format_exc())
+            raise e
+
+    def EagerMirroredCast(self, op_attribute, parallel_conf):
+        try:
+            # TODO(hanbinbin): str() will be removed after proto obj is replaced with cfg obj in python side
+            interpreter_callback.MirroredCast(str(op_attribute), parallel_conf)
+        except Exception as e:
+            print(traceback.format_exc())
+            raise e
+
+    def MakeScopeSymbol(self, job_conf, parallel_conf, is_mirrored):
+        try:
+            # TODO(hanbinbin): str() will be removed after proto obj is replaced with cfg obj in python side
+            return interpreter_callback.MakeScopeSymbol(
+                job_conf, parallel_conf, is_mirrored
+            )
+        except Exception as e:
+            print(traceback.format_exc())
+            raise e
+
+    def MakeParallelDescSymbol(self, parallel_conf):
+        try:
+            return interpreter_callback.MakeParallelDescSymbol(parallel_conf)
+        except Exception as e:
+            print(traceback.format_exc())
+            raise e
+
+
+def _WatcherHandler(unique_id, of_blob_ptr):
+    global unique_id2handler
+    assert unique_id in unique_id2handler
+    handler = unique_id2handler[unique_id]
+    assert callable(handler)
+    handler(ofblob.OfBlob(of_blob_ptr))
+
+
+unique_id2handler = {}
+
+# static lifetime
+# registered in the file python/framework/register_python_callback
+global_python_callback = PythonCallback()
+
+# initialized in the file python/framework/register_python_callback for avoiding import loop
+interpreter_callback = None
diff --git a/oneflow/compatible_single_client_python/framework/register_class_method_util.py b/oneflow/compatible_single_client_python/framework/register_class_method_util.py
new file mode 100644
index 0000000000000000000000000000000000000000..72a8ee072b82ac2be14a3f29bd9949245042eaee
--- /dev/null
+++ b/oneflow/compatible_single_client_python/framework/register_class_method_util.py
@@ -0,0 +1,46 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from oneflow.compatible_single_client_python.eager import (
+    eager_blob_util as eager_blob_util,
+)
+from oneflow.compatible_single_client_python.framework import (
+    op_expr_util as op_expr_util,
+)
+from oneflow.compatible_single_client_python.framework import functional as functional
+from oneflow.compatible_single_client_python.framework import generator as generator
+from oneflow.compatible_single_client_python.framework import (
+    remote_blob as remote_blob_util,
+)
+from oneflow.compatible_single_client_python.framework import blob_trait as blob_trait
+import oneflow._oneflow_internal
+
+
+def RegisterMethod4Class():
+    op_expr_util.RegisterMethod4UserOpExpr()
+    functional.RegisterFunctionalApis()
+
+    eager_blob_util.RegisterMethod4EagerPhysicalBlob()
+
+    blob_trait.RegisterBlobOperatorTraitMethod(
+        oneflow._oneflow_internal.EagerPhysicalBlob
+    )
+    blob_trait.RegisterBlobOperatorTraitMethod(oneflow._oneflow_internal.ConsistentBlob)
+    blob_trait.RegisterBlobOperatorTraitMethod(oneflow._oneflow_internal.MirroredBlob)
+
+    remote_blob_util.RegisterMethod4EagerBlobTrait()
+    remote_blob_util.RegisterMethod4LazyConsistentBlob()
+    remote_blob_util.RegisterMethod4LazyMirroredBlob()
+    remote_blob_util.RegisterMethod4EagerConsistentBlob()
diff --git a/oneflow/compatible_single_client_python/framework/register_python_callback.py b/oneflow/compatible_single_client_python/framework/register_python_callback.py
new file mode 100644
index 0000000000000000000000000000000000000000..fcd325286a7965d655a638077e658446ee2b4fc3
--- /dev/null
+++ b/oneflow/compatible_single_client_python/framework/register_python_callback.py
@@ -0,0 +1,26 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+from oneflow.compatible_single_client_python.framework import (
+    python_callback as python_callback,
+)
+from oneflow.compatible_single_client_python.eager import (
+    interpreter_callback as interpreter_callback,
+)
+import oneflow._oneflow_internal
+
+python_callback.interpreter_callback = interpreter_callback
diff --git a/oneflow/compatible_single_client_python/framework/remote_blob.py b/oneflow/compatible_single_client_python/framework/remote_blob.py
new file mode 100644
index 0000000000000000000000000000000000000000..b78a1b6117b499e9b081f88d0c4c156dba7b3297
--- /dev/null
+++ b/oneflow/compatible_single_client_python/framework/remote_blob.py
@@ -0,0 +1,243 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+from oneflow.compatible import single_client as flow
+from oneflow.core.register import logical_blob_id_pb2 as logical_blob_id_util
+from oneflow.compatible_single_client_python.framework import c_api_util as c_api_util
+from oneflow.compatible_single_client_python.framework import id_util as id_util
+from oneflow.compatible_single_client_python.framework import (
+    placement_context as placement_ctx,
+)
+from oneflow.compatible_single_client_python.framework import blob_trait as blob_trait
+from oneflow.compatible_single_client_python.framework.dtype import (
+    convert_proto_dtype_to_oneflow_dtype,
+)
+from oneflow.compatible_single_client_python.lib.core import enable_if as enable_if
+from oneflow.compatible_single_client_python.framework import hob as hob
+from oneflow.compatible_single_client_python.eager import (
+    eager_blob_util as eager_blob_util,
+)
+from oneflow.compatible_single_client_python.eager import gradient_util as gradient_util
+from oneflow.compatible_single_client_python.eager import boxing_util as boxing_util
+from oneflow._oneflow_internal.oneflow.core.job import placement as placement_cfg
+from oneflow._oneflow_internal.oneflow.core.register import logical_blob_id as lbi_util
+import oneflow._oneflow_internal
+import traceback
+import sys
+
+blob_register = oneflow._oneflow_internal.GetDefaultBlobRegister()
+
+
+def RemoteBlob(lbi, **kw):
+    api = enable_if.unique([EagerLogicalBlob, LazyRemoteBlob])
+    return api(lbi, **kw)
+
+
+@enable_if.condition(hob.in_global_mode & hob.eager_execution_enabled)
+def EagerLogicalBlob(lbi, **kw):
+    job_name = oneflow._oneflow_internal.JobBuildAndInferCtx_GetCurrentJobName()
+    lbn = lbi.op_name + "/" + lbi.blob_name
+    if not isinstance(lbi, lbi_util.LogicalBlobId):
+        cfg_lbi = lbi_util.LogicalBlobId()
+        cfg_lbi.set_op_name(lbi.op_name)
+        cfg_lbi.set_blob_name(lbi.blob_name)
+        lbi = cfg_lbi
+    blob_type = oneflow._oneflow_internal.EagerConsistentBlob
+    if c_api_util.JobBuildAndInferCtx_IsMirroredBlob(job_name, lbn):
+        blob_type = oneflow._oneflow_internal.EagerMirroredBlob
+    job_name = ""
+    if ("job_name" in kw) and (kw["job_name"] is not None):
+        job_name = kw["job_name"]
+    blob_object = None
+    if "blob_object" in kw:
+        blob_object = kw["blob_object"]
+    distribute = oneflow._oneflow_internal.distribute.auto()
+    if "distribute" in kw:
+        distribute = kw["distribute"]
+    return blob_type(lbi, blob_object, blob_register, job_name, distribute)
+
+
+@enable_if.condition(~hob.eager_execution_enabled)
+def LazyRemoteBlob(lbi, **kw):
+    job_name = oneflow._oneflow_internal.JobBuildAndInferCtx_GetCurrentJobName()
+    lbn = lbi.op_name + "/" + lbi.blob_name
+    blob_type = oneflow._oneflow_internal.LazyConsistentBlob
+    if c_api_util.JobBuildAndInferCtx_IsMirroredBlob(job_name, lbn):
+        blob_type = oneflow._oneflow_internal.LazyMirroredBlob
+    if not isinstance(lbi, lbi_util.LogicalBlobId):
+        cfg_lbi = lbi_util.LogicalBlobId()
+        cfg_lbi.set_op_name(lbi.op_name)
+        cfg_lbi.set_blob_name(lbi.blob_name)
+        lbi = cfg_lbi
+    job_name = ""
+    if ("job_name" in kw) and (kw["job_name"] is not None):
+        job_name = kw["job_name"]
+    distribute = oneflow._oneflow_internal.distribute.auto()
+    if "distribute" in kw:
+        distribute = kw["distribute"]
+    return blob_type(lbi, job_name, distribute)
+
+
+@property
+def dtype(self):
+    ret = convert_proto_dtype_to_oneflow_dtype(self.get_dtype())
+    assert isinstance(ret, flow.dtype)
+    return ret
+
+
+def with_distribute(self, distribute):
+    new = type(self)(
+        self.lbi, self.job_name, oneflow._oneflow_internal.distribute.auto()
+    )
+    new.set_distribute(distribute)
+    return new
+
+
+def with_gradient_distribute(self, distribute):
+    return flow.parallel_cast(self, gradient_distribute=distribute)
+
+
+def get_lazy_shape_log_warning(self):
+    if flow.scope.mirrored_view_enabled():
+        return ("%s\n%s\n%s") % (
+            "WARNING:",
+            "You access a consistent blob shape in mirrored view, there may be problems,",
+            "you should add 'x = flow.cast_to_current_logical_view(x)'.",
+        )
+    else:
+        return ""
+
+
+def get_mirror_shape_log_warning(self):
+    if flow.scope.consistent_view_enabled():
+        return ("%s\n%s\n%s") % (
+            "WARNING:",
+            "You access a mirrored blob shape in consistent view, there may be problems,",
+            "you should add 'x = flow.cast_to_current_logical_view(x)'.",
+        )
+    else:
+        return ""
+
+
+def RegisterMethod4BlobDef(blob_class):
+    blob_class.dtype = dtype
+    blob_class.with_distribute = with_distribute
+    blob_class.with_gradient_distribute = with_gradient_distribute
+
+
+def RegisterMethod4LazyConsistentBlob():
+    RegisterMethod4BlobDef(oneflow._oneflow_internal.LazyConsistentBlob)
+    oneflow._oneflow_internal.LazyConsistentBlob.get_lazy_shape_log_warning = (
+        get_lazy_shape_log_warning
+    )
+
+
+def RegisterMethod4LazyMirroredBlob():
+    RegisterMethod4BlobDef(oneflow._oneflow_internal.LazyMirroredBlob)
+    oneflow._oneflow_internal.LazyMirroredBlob.get_mirror_shape_log_warning = (
+        get_mirror_shape_log_warning
+    )
+
+
+@property
+def sub_consistent_blob_list(self):
+    raise NotImplementedError
+
+
+def numpy(self, rank=None):
+    assert rank is None or rank == 0
+    return self._Numpy()
+
+
+def numpy_list(self, rank=None):
+    assert rank is None or rank == 0
+    return [self._Numpy()]
+
+
+def BlobObjectNumpy(blob_object, tmp_name=None):
+    if tmp_name is None:
+        tmp_name = id_util.UniqueStr("numpy-tmp-")
+
+    def FetchBlobNumpy(blob_object):
+        consistent_blob_name = None
+
+        def BoxingToSingleDevice(builder):
+            parallel_conf = placement_cfg.ParallelConf()
+            parallel_conf.set_device_tag(blob_object.parallel_desc_symbol.device_tag)
+            parallel_conf.add_device_name("{}:{}".format(0, 0))
+            tmp_parallel_desc_symbol = builder.GetParallelDescSymbol(parallel_conf)
+            tmp_op_arg_parallel_attr = oneflow._oneflow_internal.OpArgParallelAttribute(
+                tmp_parallel_desc_symbol,
+                str(blob_object.op_arg_parallel_attr.sbp_parallel),
+                str(blob_object.op_arg_parallel_attr.opt_mirrored_parallel),
+            )
+            with flow.scope.placement(
+                parallel_conf.device_tag(), list(parallel_conf.device_name()),
+            ):
+                tmp_blob_object = boxing_util.BoxingTo(
+                    builder, blob_object, tmp_op_arg_parallel_attr
+                )
+            nonlocal consistent_blob_name
+            consistent_blob_name = tmp_name
+            if not blob_register.HasObject4BlobName(consistent_blob_name):
+                blob_register.SetObject4BlobName(consistent_blob_name, tmp_blob_object)
+
+        oneflow._oneflow_internal.deprecated.LogicalRun(BoxingToSingleDevice)
+        return oneflow._oneflow_internal.EagerPhysicalBlob(
+            consistent_blob_name,
+            blob_register,
+            eager_blob_util._GetPhysicalBlobHeaderCache,
+        ).numpy()
+
+    return FetchBlobNumpy(blob_object)
+
+
+def _Numpy(self):
+    tmp_name = "{}-consistent".format(self.logical_blob_name)
+    return BlobObjectNumpy(self.blob_object, tmp_name)
+
+
+def RegisterMethod4EagerBlobTrait():
+    oneflow._oneflow_internal.EagerBlobTrait.sub_consistent_blob_list = (
+        sub_consistent_blob_list
+    )
+    oneflow._oneflow_internal.EagerBlobTrait.dtype = dtype
+    oneflow._oneflow_internal.EagerBlobTrait._Numpy = _Numpy
+    oneflow._oneflow_internal.EagerBlobTrait.numpy = numpy
+    oneflow._oneflow_internal.EagerBlobTrait.numpy_list = numpy_list
+
+
+def eager_with_distribute(self, distribute):
+    new = type(self)(
+        self.lbi,
+        blob_object=self.blob_object,
+        blob_register=blob_register,
+        job_name=self.job_name,
+        distribute=self.distribute,
+    )
+    new.set_distribute(distribute)
+    return new
+
+
+def RegisterMethod4EagerConsistentBlob():
+    oneflow._oneflow_internal.EagerConsistentBlob.dtype = dtype
+    oneflow._oneflow_internal.EagerConsistentBlob.with_distribute = (
+        eager_with_distribute
+    )
+    oneflow._oneflow_internal.EagerConsistentBlob.with_gradient_distribute = (
+        with_gradient_distribute
+    )
diff --git a/oneflow/compatible_single_client_python/framework/runtime_mode.py b/oneflow/compatible_single_client_python/framework/runtime_mode.py
new file mode 100644
index 0000000000000000000000000000000000000000..0e063b035e2201dc89e4a9e7e03f19b8d267937a
--- /dev/null
+++ b/oneflow/compatible_single_client_python/framework/runtime_mode.py
@@ -0,0 +1,41 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from contextlib import contextmanager
+
+NORMAL_MODE = "NORMAL_MODE"
+GLOBAL_MODE = "GLOBAL_MODE"
+DEVICE_MODE = "DEVICE_MODE"
+
+
+def CurrentMode():
+    return mode_statck[0]
+
+
+def IsValidMode(mode):
+    return mode == NORMAL_MODE or mode == GLOBAL_MODE or mode == DEVICE_MODE
+
+
+@contextmanager
+def ModeScope(mode):
+    global mode_statck
+    mode_statck.insert(0, mode)
+    try:
+        yield
+    finally:
+        mode_statck.pop(0)
+
+
+mode_statck = [NORMAL_MODE]
diff --git a/oneflow/compatible_single_client_python/framework/scope_symbol.py b/oneflow/compatible_single_client_python/framework/scope_symbol.py
new file mode 100644
index 0000000000000000000000000000000000000000..1cf976910a42938b6358eefe46469b083bab594b
--- /dev/null
+++ b/oneflow/compatible_single_client_python/framework/scope_symbol.py
@@ -0,0 +1,165 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+from oneflow.compatible_single_client_python.eager.symbol import Symbol
+from oneflow.compatible_single_client_python.eager import (
+    symbol_storage as symbol_storage,
+)
+from oneflow._oneflow_internal.oneflow.core.job import scope as scope_cfg
+from oneflow._oneflow_internal.oneflow.core.job import placement as placement_cfg
+import oneflow._oneflow_internal
+import collections
+import re
+
+
+class ScopeSymbol(Symbol):
+    def __init__(self, symbol_id, scope_proto, parent_scope_symbol=None):
+        Symbol.__init__(self, symbol_id, scope_proto)
+        self.parent_scope_symbol_ = parent_scope_symbol
+        self.job_desc_symbol_ = oneflow._oneflow_internal.GetJobConfSymbol(
+            scope_proto.job_desc_symbol_id()
+        )
+        self.device_parallel_desc_symbol_ = oneflow._oneflow_internal.GetPlacementSymbol(
+            scope_proto.device_parallel_desc_symbol_id()
+        )
+        self.host_parallel_desc_symbol_ = oneflow._oneflow_internal.GetPlacementSymbol(
+            scope_proto.host_parallel_desc_symbol_id()
+        )
+        self.auto_increment_id_ = 0
+
+    def auto_increment_id(self):
+        self.auto_increment_id_ = self.auto_increment_id_ + 1
+        return self.auto_increment_id_
+
+    @property
+    def session_id(self):
+        return self.data.session_id()
+
+    @property
+    def job_desc_symbol(self):
+        return self.job_desc_symbol_
+
+    @property
+    def device_parallel_desc_symbol(self):
+        return self.device_parallel_desc_symbol_
+
+    @property
+    def parent_scope_symbol(self):
+        return self.parent_scope_symbol_
+
+    def BuildBySetter(self, instruction_builder, setter):
+        scope_proto = self._CloneScopeProto()
+        setter(scope_proto)
+        return instruction_builder.GetScopeSymbol(scope_proto)
+
+    def BuildWithNewParallelDesc(
+        self, instruction_builder, device_tag, machine_device_ids
+    ):
+        if isinstance(machine_device_ids, str):
+            machine_device_ids = [machine_device_ids]
+
+        def SetScopeProto(scope_proto):
+            parallel_conf = MakeParallelConf(device_tag, machine_device_ids)
+            device_parallel_desc_sym = instruction_builder.GetParallelDescSymbol(
+                parallel_conf
+            )
+            parallel_conf = MakeParallelConf("cpu", machine_device_ids)
+            host_parallel_desc_sym = instruction_builder.GetParallelDescSymbol(
+                parallel_conf
+            )
+            scope_proto.set_device_parallel_desc_symbol_id(
+                device_parallel_desc_sym.symbol_id
+            )
+            scope_proto.set_host_parallel_desc_symbol_id(
+                host_parallel_desc_sym.symbol_id
+            )
+
+        return self.BuildBySetter(instruction_builder, SetScopeProto)
+
+    def BuildWithNewParallelConf(self, instruction_builder, parallel_conf):
+        (
+            device_tag,
+            machine_device_ids,
+            hierarchy,
+        ) = oneflow._oneflow_internal.GetDeviceTagAndMachineDeviceIdsAndHierarchy(
+            parallel_conf
+        )
+        return self.BuildWithNewParallelDesc(
+            instruction_builder, device_tag, machine_device_ids
+        )
+
+    def BuildWithNewIsMirrored(self, instruction_builder, is_mirrored):
+        def SetScopeProto(scope_proto):
+            if is_mirrored:
+                scope_proto.mutable_opt_mirrored_parallel_conf().mutable_mirrored_parallel()
+            else:
+                scope_proto.mutable_opt_mirrored_parallel_conf().clear_mirrored_parallel()
+
+        return self.BuildBySetter(instruction_builder, SetScopeProto)
+
+    def BuildWithNewScopeName(self, instruction_builder, scope_name):
+        def SetScopeProto(scope_proto):
+            scope_proto.add_scope_op_name_prefixes(scope_name)
+
+        return self.BuildBySetter(instruction_builder, SetScopeProto)
+
+    def _CloneScopeProto(self):
+        scope_proto = scope_cfg.ScopeProto()
+        scope_proto.CopyFrom(self.data)
+        return scope_proto
+
+
+def BuildInitialScope(
+    instruction_builder,
+    session_id,
+    job_conf,
+    device_tag,
+    machine_device_ids,
+    is_mirrored,
+):
+    scope_proto = scope_cfg.ScopeProto()
+    scope_proto.set_session_id(session_id)
+    job_conf_sym = instruction_builder.GetJobConfSymbol(job_conf)
+    scope_proto.set_job_desc_symbol_id(job_conf_sym.symbol_id)
+    parallel_conf = MakeParallelConf(device_tag, machine_device_ids)
+    device_parallel_desc_sym = instruction_builder.GetParallelDescSymbol(parallel_conf)
+    scope_proto.set_device_parallel_desc_symbol_id(device_parallel_desc_sym.symbol_id)
+    parallel_conf = MakeParallelConf("cpu", machine_device_ids)
+    host_parallel_desc_sym = instruction_builder.GetParallelDescSymbol(parallel_conf)
+    scope_proto.set_host_parallel_desc_symbol_id(host_parallel_desc_sym.symbol_id)
+    if is_mirrored:
+        scope_proto.mutable_opt_mirrored_parallel_conf().mutable_mirrored_parallel()
+    else:
+        scope_proto.mutable_opt_mirrored_parallel_conf().clear_mirrored_parallel()
+    return instruction_builder.GetScopeSymbol(scope_proto)
+
+
+def MakeParallelConf(device_tag, machine_device_ids):
+    assert isinstance(machine_device_ids, (list, tuple))
+
+    parallel_conf = placement_cfg.ParallelConf()
+    parallel_conf.set_device_tag(device_tag)
+    for machine_device_id in machine_device_ids:
+        assert isinstance(
+            machine_device_id, str
+        ), "type of machine_device_id (%s) is not string" % type(machine_device_id)
+        assert re.match("^\d+:\d+(-\d+)?$", machine_device_id) is not None, (
+            "machine_device_id: %s is not valid" % machine_device_id
+        )
+        parallel_conf.add_device_name(machine_device_id)
+
+    return parallel_conf
diff --git a/oneflow/compatible_single_client_python/framework/scope_util.py b/oneflow/compatible_single_client_python/framework/scope_util.py
new file mode 100644
index 0000000000000000000000000000000000000000..61141b6a18f8124836facd90ca232f5190cbbd77
--- /dev/null
+++ b/oneflow/compatible_single_client_python/framework/scope_util.py
@@ -0,0 +1,119 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import traceback
+from oneflow.compatible_single_client_python.framework import (
+    session_context as session_ctx,
+)
+from oneflow.compatible_single_client_python.framework import attr_util as attr_util
+from oneflow._oneflow_internal.oneflow.core.job import job_conf as job_conf_cfg
+from contextlib import contextmanager
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    oneflow_deprecate,
+)
+import oneflow._oneflow_internal
+
+
+@oneflow_export("experimental.scope.config")
+def api_scope_config(**kwargs):
+    name2default = session_ctx.GetDefaultSession().scope_attr_name2default_val
+
+    def SetScopeProto(scope_proto):
+        for attr_name, py_value in kwargs.items():
+            assert attr_name in name2default
+            attr_util.SetAttrValue(
+                scope_proto.mutable_attr_name2attr_value()[attr_name],
+                py_value,
+                name2default[attr_name],
+            )
+
+    sess = session_ctx.GetDefaultSession()
+    scope = MakeScope(
+        lambda old_scope, builder: builder.BuildScopeByProtoSetter(
+            old_scope, SetScopeProto
+        )
+    )
+    return ScopeContext(scope)
+
+
+@oneflow_export("current_scope")
+def api_current_scope():
+    r""" Return current scope
+    """
+    return oneflow._oneflow_internal.GetCurrentScope()
+
+
+@oneflow_export("scope.current_scope")
+@oneflow_deprecate()
+def deprecated_current_scope(*args, **kwargs):
+    print(
+        "WARNING:",
+        "oneflow.compatible.single_client.scope.current_scope",
+        "will be removed in the future, use {} instead.".format(
+            "oneflow.compatible.single_client.current_scope"
+        ),
+    )
+    print(traceback.format_stack()[-2])
+
+    return api_current_scope(*args, **kwargs)
+
+
+def MakeScope(build_func):
+    scope = None
+    old_scope = oneflow._oneflow_internal.GetCurrentScope()
+    assert old_scope is not None
+
+    def BuildScope(builder):
+        nonlocal scope
+        scope = build_func(old_scope, builder)
+        assert scope is not None
+
+    oneflow._oneflow_internal.deprecated.LogicalRun(BuildScope)
+    return scope
+
+
+def MakeInitialScope(job_conf, device_tag, machine_device_ids, hierarchy, is_mirrored):
+    scope = None
+
+    def BuildInitialScope(builder):
+        nonlocal scope
+        session_id = session_ctx.GetDefaultSession().id
+        scope = builder.BuildInitialScope(
+            session_id, job_conf, device_tag, machine_device_ids, hierarchy, is_mirrored
+        )
+
+    oneflow._oneflow_internal.deprecated.LogicalRun(BuildInitialScope)
+    return scope
+
+
+def InitScopeStack():
+    job_conf = job_conf_cfg.JobConfigProto()
+    job_conf.mutable_predict_conf()
+    job_conf.set_job_name("")
+    scope = MakeInitialScope(job_conf, "cpu", ["0:0"], None, is_mirrored=False)
+    oneflow._oneflow_internal.InitGlobalScopeStack(scope)
+
+
+@contextmanager
+def ScopeContext(scope):
+    old_scope = oneflow._oneflow_internal.GetCurrentScope()
+    oneflow._oneflow_internal.GlobalScopeStackPush(scope)
+    try:
+        yield
+    finally:
+        assert oneflow._oneflow_internal.GetCurrentScope() is scope
+        oneflow._oneflow_internal.GlobalScopeStackPop()
+        assert oneflow._oneflow_internal.GetCurrentScope() is old_scope
diff --git a/oneflow/compatible_single_client_python/framework/session_context.py b/oneflow/compatible_single_client_python/framework/session_context.py
new file mode 100644
index 0000000000000000000000000000000000000000..f829a4655aac11faa855f2ee552f131537c2861a
--- /dev/null
+++ b/oneflow/compatible_single_client_python/framework/session_context.py
@@ -0,0 +1,60 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import functools
+from oneflow.compatible import single_client as flow
+import oneflow._oneflow_internal
+
+
+class SessionStatus:
+    OPEN = "OPEN"
+
+    RUNNING = "RUNNING"
+
+    CLOSED = "CLOSED"
+
+
+def GetDefaultSession():
+    global _sess_id2sess
+    default_sess_id = oneflow._oneflow_internal.GetDefaultSessionId()
+    assert default_sess_id in _sess_id2sess
+    return _sess_id2sess[default_sess_id]
+
+
+def OpenDefaultSession(sess):
+    global _sess_id2sess
+    assert sess.id not in _sess_id2sess
+    _sess_id2sess[sess.id] = sess
+
+
+def TryCloseDefaultSession():
+    global _sess_id2sess
+    default_sess_id = oneflow._oneflow_internal.GetDefaultSessionId()
+    assert default_sess_id in _sess_id2sess
+    if default_sess_id in _sess_id2sess:
+        _sess_id2sess[default_sess_id].TryClose()
+    del _sess_id2sess[default_sess_id]
+
+
+def try_init_default_session(func):
+    @functools.wraps(func)
+    def Func(*args, **kwargs):
+        GetDefaultSession().TryInit()
+        return func(*args, **kwargs)
+
+    return Func
+
+
+_sess_id2sess = {}
diff --git a/oneflow/compatible_single_client_python/framework/session_util.py b/oneflow/compatible_single_client_python/framework/session_util.py
new file mode 100644
index 0000000000000000000000000000000000000000..c0aa494a27b14e805dbccc4c6275be547c0c993d
--- /dev/null
+++ b/oneflow/compatible_single_client_python/framework/session_util.py
@@ -0,0 +1,539 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+import threading
+from oneflow.core.job.job_set_pb2 import ConfigProto
+from oneflow.core.job import job_set_pb2 as job_set_util
+from oneflow.compatible_single_client_python.framework import c_api_util as c_api_util
+from oneflow.compatible_single_client_python.framework import compiler as compiler
+from oneflow.compatible_single_client_python.framework import config_util as config_util
+from oneflow.compatible_single_client_python.framework import env_util as env_util
+from oneflow.compatible_single_client_python.framework import typing_util as oft_util
+from oneflow.compatible_single_client_python.framework import hob as hob
+from oneflow.compatible_single_client_python.framework import (
+    job_instance as job_instance_util,
+)
+from oneflow.compatible_single_client_python.framework import push_util as push_util
+from oneflow.compatible_single_client_python.framework import (
+    session_context as session_ctx,
+)
+from oneflow.compatible_single_client_python.lib.core import enable_if as enable_if
+from oneflow.compatible_single_client_python.eager import op_executor as op_executor
+from oneflow.compatible_single_client_python.experimental import (
+    interface_op_read_and_write,
+)
+from oneflow.core.job.job_set_pb2 import ConfigProto
+from oneflow.compatible_single_client_python.framework.function_desc import FunctionDesc
+from oneflow.compatible_single_client_python.framework import module as module_util
+from oneflow.compatible_single_client_python.framework.pull_util import (
+    LazyFutureRemoteBlobs,
+    EagerFutureRemoteBlobs,
+)
+from oneflow.compatible_single_client_python.framework.session_context import (
+    SessionStatus,
+)
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    oneflow_deprecate,
+)
+from oneflow.compatible_single_client_python.framework.function_desc import FunctionDesc
+from oneflow.compatible_single_client_python.framework.check_point import (
+    SnapshotManager,
+)
+from oneflow.compatible_single_client_python.framework import (
+    check_point_v2 as check_point_v2,
+)
+from contextlib import contextmanager
+from typing import Callable
+import inspect
+from oneflow.compatible import single_client as flow
+import oneflow._oneflow_internal
+import traceback
+from google.protobuf import text_format
+
+
+class Session(object):
+    def __init__(self, sess_id):
+        self.job_name2function_desc_ = {}
+        self.job_name2job_ = {}
+        self.status_ = SessionStatus.OPEN
+        self.cond_var_ = threading.Condition()
+        self.running_job_cnt_ = 0
+        self.inter_user_job_info_ = None
+        self.uuid2watch_handler_ = {}
+        self.config_proto_ = None
+        self.resource_ = None
+        self.job_name2var_name2var_blob_ = {}
+        self.job_name2module_name2module_ = {}
+        self.existed_module_names_ = set()
+        self.var_name2var_blob_ = {}
+        # parallel desc symbol id in op attribute does not always correct
+        # for lazy ops as parallel conf may be updated in some passes
+        # (like optimizer_placement_optimization_pass)
+        self.interface_op_name2op_attr_ = {}
+        self.interface_op_name2job_name_ = {}
+        self.lazy_interface_op_name2parallel_conf_ = {}
+        self.op_name2lazy_blob_cache_ = {}
+        self.job_name2name_scope_stack_ = {}
+        self.eager_global_function_desc_stack_ = []
+        self.function_flag_name2default_val_ = {}
+        self._UpdateFunctionFlagName2DefaultVal()
+        self.scope_attr_name2default_val_ = {}
+        self._UpdateScopeAttrName2DefaultVal()
+        self.sess_ = oneflow._oneflow_internal.RegsiterSession(sess_id)
+        self.backward_blob_register_ = oneflow._oneflow_internal.BlobRegister()
+        self.snapshot_mgr_ = SnapshotManager()
+        self.eager_config_proto_ctx_ = None
+
+    @property
+    def id(self):
+        return self.sess_.id
+
+    @property
+    def status(self):
+        return self.status_
+
+    @property
+    def is_running(self):
+        return self.status_ is SessionStatus.RUNNING
+
+    @property
+    def config_proto(self):
+        if self.config_proto_ is None:
+            self.config_proto_ = _GetDefaultConfigProto()
+        return self.config_proto_
+
+    @property
+    def resource(self):
+        if self.resource_ is None:
+            return flow.env.current_resource()
+        else:
+            return self.resource_
+
+    @property
+    def uuid2watch_handler(self):
+        return self.uuid2watch_handler_
+
+    @property
+    def function_flag_name2default_val(self):
+        return self.function_flag_name2default_val_
+
+    @property
+    def scope_attr_name2default_val(self):
+        return self.scope_attr_name2default_val_
+
+    @property
+    def inter_user_job_info(self):
+        return self.inter_user_job_info_
+
+    @property
+    def job_name2name_scope_stack(self):
+        return self.job_name2name_scope_stack_
+
+    @property
+    def backward_blob_register(self):
+        return self.backward_blob_register_
+
+    @property
+    def snapshot_mgr(self):
+        return self.snapshot_mgr_
+
+    @property
+    def var_name2var_blob(self):
+        return self.var_name2var_blob_
+
+    def GetLazyFunctionDesc(self, job_name):
+        if job_name in self.job_name2function_desc_:
+            return self.job_name2function_desc_[job_name]
+        return None
+
+    def AnyGlobalFunctionDefined(self):
+        return len(self.job_name2function_desc_) > 0
+
+    def GetJobConfigProto(self, job_name):
+        return self.job_name2function_desc_[job_name].job_config_proto
+
+    def GetFunctionDesc(self, job_name):
+        return self.job_name2function_desc_[job_name]
+
+    def _UpdateFunctionFlagName2DefaultVal(self):
+        items = c_api_util.GetFunctionConfigDef().attr_name2attr_def.items()
+        self.function_flag_name2default_val_ = {k: v.default_val for k, v in items}
+
+    def _UpdateScopeAttrName2DefaultVal(self):
+        items = c_api_util.GetScopeConfigDef().attr_name2attr_def.items()
+        self.scope_attr_name2default_val_ = {k: v.default_val for k, v in items}
+
+    def TryInit(self):
+        if self.status_ is SessionStatus.OPEN:
+            self.Init()
+        return self
+
+    def UpdateInfo4InterfaceOp(self):
+        for op_attr in c_api_util.GetInterfaceOpAttributes().op_attribute:
+            self.interface_op_name2op_attr_[op_attr.op_conf.name] = op_attr
+        for job in c_api_util.GetJobSet().job:
+            op_name2parallel_conf = {}
+            for placement_group in job.placement.placement_group:
+                for op_name in placement_group.op_set.op_name:
+                    op_name2parallel_conf[op_name] = placement_group.parallel_conf
+            for op_conf in job.net.op:
+                if c_api_util.IsInterfaceOpConf(op_conf):
+                    self.interface_op_name2job_name_[
+                        op_conf.name
+                    ] = job.job_conf.job_name
+                    self.lazy_interface_op_name2parallel_conf_[
+                        op_conf.name
+                    ] = op_name2parallel_conf[op_conf.name]
+
+    def Init(self):
+        assert self.status_ is SessionStatus.OPEN
+        self.status_ = SessionStatus.RUNNING
+        if not oneflow._oneflow_internal.IsEnvInited():
+            flow.env.init()
+        _TryCompleteConfigProto(self.config_proto)
+        self.resource_ = self.config_proto.resource
+        if not oneflow._oneflow_internal.EagerExecutionEnabled():
+            c_api_util.InitLazyGlobalSession(self.config_proto)
+            for job_name, func_desc in self.job_name2function_desc_.items():
+                compiler.Compile(self, func_desc, self.config_proto)
+                self.existed_module_names_ = set()
+            self.job_name2var_name2var_blob_ = dict()
+            assert len(self.job_name2function_desc_.items()) > 0
+            oneflow._oneflow_internal.StartLazyGlobalSession()
+            self.inter_user_job_info_ = c_api_util.GetInterUserJobInfo()
+            # Get latest op_attr and job_name after compiler.Compile
+            self.UpdateInfo4InterfaceOp()
+            if not config_util.api_legacy_model_io_enabled():
+                check_point_v2.Init()
+        else:
+            self.eager_config_proto_ctx_ = oneflow._oneflow_internal.LogicalConfigProtoContext(
+                str(self.config_proto)
+            )
+        return self
+
+    def FindOrCreateLazyBlob(self, op_name, Create):
+        if op_name not in self.op_name2lazy_blob_cache_:
+            self.op_name2lazy_blob_cache_[op_name] = Create()
+        return self.op_name2lazy_blob_cache_[op_name]
+
+    def TryClose(self):
+        if self.status_ is SessionStatus.RUNNING:
+            self.Close()
+
+    def Close(self):
+        assert self.status_ is SessionStatus.RUNNING
+        self.Sync()
+        assert len(self.job_name2var_name2var_blob_) == 0
+        del self.var_name2var_blob_
+        del self.job_name2module_name2module_
+        self.ReleaseLazyRefBlob()
+        self.ForceReleaseEagerBlobs()
+        oneflow._oneflow_internal.StopLazyGlobalSession()
+        oneflow._oneflow_internal.DestroyLazyGlobalSession()
+        self.resource_ = None
+        if self.eager_config_proto_ctx_:
+            del self.eager_config_proto_ctx_
+
+    def AddJob(self, function_desc):
+        assert self.status_ is SessionStatus.OPEN
+        assert isinstance(function_desc, FunctionDesc)
+        self.job_name2function_desc_[function_desc.job_func.__name__] = function_desc
+
+    def StashJob(self, job_name=None, key=None):
+        assert self.status_ is SessionStatus.RUNNING, "current status {}".format(
+            self.status_
+        )
+        job = c_api_util.GetCurrentJob()
+        if job_name is not None:
+            assert (
+                job.job_conf.job_name == job_name
+            ), "{} is not current job name".format(job_name)
+        else:
+            job_name = job.job_conf.job_name
+        if key is None:
+            key = job_name
+        self.job_name2job_[key] = job
+
+    def Job(self, job_name):
+        assert self.status_ is SessionStatus.RUNNING
+        if job_name not in self.job_name2job_:
+            return None
+        return self.job_name2job_[job_name]
+
+    def Sync(self):
+        assert self.status_ is SessionStatus.RUNNING
+        self.cond_var_.acquire()
+        while self.running_job_cnt_ > 0:
+            self.cond_var_.wait()
+        assert self.running_job_cnt_ == 0
+        self.cond_var_.release()
+
+    def ReleaseLazyRefBlob(self):
+        self.op_name2lazy_blob_cache_.clear()
+
+    def ForceReleaseEagerBlobs(self):
+        oneflow._oneflow_internal.GetDefaultBlobRegister().ForceReleaseAll()
+        self.backward_blob_register_.ForceReleaseAll()
+
+    def LazyRun(self, job_func, *arg):
+        assert self.status_ is SessionStatus.RUNNING
+        remote_blobs = self.LaunchUserJob(job_func, *arg)
+        if remote_blobs is None:
+            return
+        future_blob = LazyFutureRemoteBlobs(self).SetResult(remote_blobs).Inited()
+        annotation = inspect.signature(job_func).return_annotation
+        return oft_util.TransformGlobalFunctionResult(future_blob, annotation)
+
+    def EagerRun(self, function_desc, *arg):
+        with self._EagerGlobalFunctionDescScope(function_desc):
+            remote_blobs = compiler.EagerRun(
+                self, function_desc, self.config_proto, arg
+            )
+            if remote_blobs is None:
+                return
+            future_blob = EagerFutureRemoteBlobs().SetResult(remote_blobs).Inited()
+
+        annotation = inspect.signature(function_desc.job_func).return_annotation
+        return oft_util.TransformGlobalFunctionResult(future_blob, annotation)
+
+    def LaunchUserJob(self, job_func, *arg):
+        assert self.status_ is SessionStatus.RUNNING
+        job_name = job_func.__name__
+        push_util.AsyncPush(self, job_func, *arg)
+        self.LaunchJob(job_instance_util.MakeUserJobInstance(job_name))
+        return job_func.__oneflow_output_remote_blobs__
+
+    def LaunchJob(self, job_instance):
+        assert self.status_ is SessionStatus.RUNNING
+        self._IncRunningJobCnt()
+        job_instance.AddPostFinishCallback(lambda _: self._DecRunningJobCnt())
+        oneflow._oneflow_internal.LaunchJob(job_instance)
+
+    def AsyncPush(self, op_name, push_data_cb):
+        assert self.status_ is SessionStatus.RUNNING
+        push_job_name = self.inter_user_job_info.input_or_var_op_name2push_job_name[
+            op_name
+        ]
+        self.LaunchJob(
+            job_instance_util.MakePushJobInstance(push_job_name, op_name, push_data_cb)
+        )
+
+    def AsyncPull(self, op_name, pull_data_cb):
+        assert self.status_ is SessionStatus.RUNNING
+        pull_job_name = self.inter_user_job_info.output_or_var_op_name2pull_job_name[
+            op_name
+        ]
+        self.LaunchJob(
+            job_instance_util.MakePullJobInstance(pull_job_name, op_name, pull_data_cb)
+        )
+
+    def HasAnyCallbackAfterFunctionReturn(self):
+        return len(self.uuid2watch_handler) > 0
+
+    def StashVariableBlob4Job(self, job_name, var_name, var_blob):
+        if var_name not in self.var_name2var_blob_:
+            self.var_name2var_blob_[var_name] = var_blob
+        if job_name not in self.job_name2var_name2var_blob_:
+            self.job_name2var_name2var_blob_[job_name] = dict()
+        assert var_name not in self.job_name2var_name2var_blob_[job_name]
+        self.job_name2var_name2var_blob_[job_name][var_name] = var_blob
+
+    def AddInfo4InterfaceOpName(self, interface_op_name, op_attribute):
+        if flow.eager_execution_enabled():
+            self.interface_op_name2op_attr_[interface_op_name] = op_attribute
+            self.interface_op_name2job_name_[
+                interface_op_name
+            ] = oneflow._oneflow_internal.JobBuildAndInferCtx_GetCurrentJobName()
+        else:
+            # In lazy mode, we update fields with
+            # the latest info in another function after compiler.Compile
+            pass
+
+    def OpAttribute4InterfaceOpName(self, interface_op_name):
+        return self.interface_op_name2op_attr_[interface_op_name]
+
+    def ParallelConf4LazyInterfaceOpName(self, interface_op_name):
+        return self.lazy_interface_op_name2parallel_conf_[interface_op_name]
+
+    def JobName4InterfaceOpName(self, interface_op_name):
+        return self.interface_op_name2job_name_[interface_op_name]
+
+    @property
+    def interface_ops(self):
+        return self.interface_op_name2op_attr_.keys()
+
+    # return global_variable_blob, job_variable_blob
+    def TryGetVariableBlobOfJobFromStash(self, job_name, var_name):
+        if var_name not in self.var_name2var_blob_:
+            return None, None
+
+        global_variable_blob = self.var_name2var_blob_[var_name]
+
+        if job_name not in self.job_name2var_name2var_blob_:
+            return global_variable_blob, None
+
+        var_name2var_blob = self.job_name2var_name2var_blob_[job_name]
+        if var_name not in var_name2var_blob:
+            return global_variable_blob, None
+
+        return global_variable_blob, var_name2var_blob[var_name]
+
+    def CurrentEagerGlobalFunctionDesc(self):
+        if len(self.eager_global_function_desc_stack_) == 0:
+            return None
+        return self.eager_global_function_desc_stack_[0]
+
+    def has_empty_is_mirrored_strategy_enabled_stack(self):
+        return self.sess_.is_mirrored_strategy_enabled_stack_size() == 0
+
+    def push_mirrored_strategy_enabled(self, val):
+        assert isinstance(val, bool)
+        self.sess_.push_mirrored_strategy_enabled(val)
+
+    def pop_mirrored_strategy_enabled(self):
+        self.sess_.pop_mirrored_strategy_enabled()
+
+    def is_mirrored_strategy_enabled(self):
+        return self.sess_.is_mirrored_strategy_enabled()
+
+    def is_consistent_strategy_enabled(self):
+        return self.sess_.is_consistent_strategy_enabled()
+
+    @contextmanager
+    def _EagerGlobalFunctionDescScope(self, function_desc):
+        assert len(self.backward_blob_register.blob_name2object) == 0
+        assert len(self.job_name2var_name2var_blob_) == 0
+        self.eager_global_function_desc_stack_.insert(0, function_desc)
+        try:
+            yield
+        finally:
+            self.existed_module_names_ = set()
+            self.job_name2var_name2var_blob_ = dict()
+            self.eager_global_function_desc_stack_.pop(0)
+            keys = list(dict(self.backward_blob_register.blob_name2object).keys())
+            for key in keys:
+                self.backward_blob_register.ClearObject4BlobName(key)
+
+    def _IncRunningJobCnt(self):
+        assert self.status_ is SessionStatus.RUNNING
+        self.cond_var_.acquire()
+        self.running_job_cnt_ += 1
+        self.cond_var_.release()
+
+    def _DecRunningJobCnt(self):
+        self.cond_var_.acquire()
+        self.running_job_cnt_ -= 1
+        self.cond_var_.notify()
+        self.cond_var_.release()
+
+    def __del__(self):
+        oneflow._oneflow_internal.ClearSessionById(self.id)
+
+
+@oneflow_export("find_or_create_module")
+def api_find_or_create_module(
+    module_name: str, create: Callable[[], None], reuse: bool = False
+):
+    func = enable_if.unique([find_or_create_module])
+    return func(module_name, create, reuse)
+
+
+@enable_if.condition(hob.in_global_mode)
+def find_or_create_module(module_name, create, reuse=False):
+    assert callable(create)
+    sess = session_ctx.GetDefaultSession()
+    job_name = flow.current_global_function_desc().job_config_proto.job_name()
+    if job_name not in sess.job_name2module_name2module_:
+        sess.job_name2module_name2module_[job_name] = {}
+    module_name2module = sess.job_name2module_name2module_[job_name]
+    if module_name not in module_name2module:
+        module = create()
+        assert isinstance(module, module_util.Module)
+        module_name2module[module_name] = module
+    else:
+        if not reuse:
+            assert module_name not in sess.existed_module_names_, (
+                "duplicated module_name `%s' in global_function `%s'"
+                % (module_name, job_name)
+            )
+        else:
+            # do nothing
+            pass
+    sess.existed_module_names_.add(module_name)
+    return module_name2module[module_name]
+
+
+@oneflow_export("eager_execution_enabled")
+def api_eager_execution_enabled() -> bool:
+    """Get current setting of the job, if enable eager execution mode ,then return True
+
+    Returns:
+        bool: [description]
+    """
+    return oneflow._oneflow_internal.EagerExecutionEnabled()
+
+
+@oneflow_export("clear_default_session")
+def api_clear_default_session() -> None:
+    r"""Clear the default session. All compiled OneFlow functions will be deleted.
+    """
+    func = enable_if.unique([clear_default_session])
+    return func()
+
+
+@enable_if.condition(hob.in_normal_mode)
+def clear_default_session():
+    session_ctx.TryCloseDefaultSession()
+    session_ctx.OpenDefaultSession(Session(oneflow._oneflow_internal.NewSessionId()))
+
+
+@oneflow_export("sync_default_session")
+def api_sync_default_session() -> None:
+    r"""Synchronize the default session. Block until every synchronous OneFlow function and its callback finishes running.
+    """
+    func = enable_if.unique([sync_default_session])
+    return func()
+
+
+@enable_if.condition(hob.in_normal_mode)
+def sync_default_session() -> None:
+    session_ctx.GetDefaultSession().Sync()
+
+
+def _TryCompleteConfigProto(config_proto):
+    if config_proto.resource.machine_num == 0:
+        config_proto.resource.machine_num = oneflow._oneflow_internal.GetNodeSize()
+
+
+def _GetDefaultConfigProto():
+    config_proto = job_set_util.ConfigProto()
+    config_proto.resource.machine_num = 0
+    if oneflow._oneflow_internal.flags.with_cuda():
+        config_proto.resource.gpu_device_num = 1
+    else:
+        config_proto.resource.cpu_device_num = 1
+        config_proto.resource.gpu_device_num = 0
+    config_proto.session_id = session_ctx.GetDefaultSession().id
+    return config_proto
+
+
+@oneflow_export("InitEagerGlobalSession")
+def TmpInitEagerGlobalSession():
+    config_pb = _GetDefaultConfigProto()
+    config_proto_str = text_format.MessageToString(config_pb)
+    oneflow._oneflow_internal.InitEagerGlobalSession(config_proto_str)
diff --git a/oneflow/compatible_single_client_python/framework/sysconfig.py b/oneflow/compatible_single_client_python/framework/sysconfig.py
new file mode 100644
index 0000000000000000000000000000000000000000..c93189119250bf13a30b8c63b02099724046661c
--- /dev/null
+++ b/oneflow/compatible_single_client_python/framework/sysconfig.py
@@ -0,0 +1,82 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+import os
+import imp
+import importlib.util
+
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.oneflow_export import oneflow_export
+from typing import List
+import oneflow._oneflow_internal
+
+
+@oneflow_export("sysconfig.get_include")
+def get_include() -> str:
+    return os.path.join(os.path.dirname(oneflow.__file__), "include")
+
+
+@oneflow_export("sysconfig.get_lib")
+def get_lib() -> str:
+    return os.path.dirname(oneflow.__file__)
+
+
+@oneflow_export("sysconfig.get_compile_flags")
+def get_compile_flags() -> List[str]:
+    flags = []
+    flags.append("-I{}".format(get_include()))
+    flags.append("-DHALF_ENABLE_CPP11_USER_LITERALS=0")
+    if oneflow._oneflow_internal.flags.with_cuda():
+        flags.append("-DWITH_CUDA")
+    if oneflow._oneflow_internal.flags.use_cxx11_abi():
+        flags.append("-D_GLIBCXX_USE_CXX11_ABI=1")
+    else:
+        flags.append("-D_GLIBCXX_USE_CXX11_ABI=0")
+    return flags
+
+
+@oneflow_export("sysconfig.get_link_flags")
+def get_link_flags() -> List[str]:
+    flags = []
+    flags.append("-L{}".format(get_lib()))
+    file, oneflow_internal_lib_path, _ = imp.find_module(
+        "_oneflow_internal", [get_lib()]
+    )
+    if file:
+        file.close()
+    flags.append("-l:{}".format(os.path.basename(oneflow_internal_lib_path)))
+    return flags
+
+
+@oneflow_export("sysconfig.with_cuda")
+def with_cuda() -> bool:
+    return oneflow._oneflow_internal.flags.with_cuda()
+
+
+@oneflow_export("sysconfig.with_xla")
+def with_xla() -> bool:
+    return oneflow._oneflow_internal.flags.with_xla()
+
+
+@oneflow_export("sysconfig.has_rpc_backend_grpc")
+def has_rpc_backend_grpc() -> bool:
+    return oneflow._oneflow_internal.flags.has_rpc_backend_grpc()
+
+
+@oneflow_export("sysconfig.has_rpc_backend_local")
+def has_rpc_backend_local() -> bool:
+    return oneflow._oneflow_internal.flags.has_rpc_backend_local()
diff --git a/oneflow/compatible_single_client_python/framework/tensor.py b/oneflow/compatible_single_client_python/framework/tensor.py
new file mode 100644
index 0000000000000000000000000000000000000000..405e27a0872a8b84e0e860d7d6f3677ea28508dc
--- /dev/null
+++ b/oneflow/compatible_single_client_python/framework/tensor.py
@@ -0,0 +1,974 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from oneflow.core.job import initializer_conf_pb2 as initializer_conf_util
+from oneflow.compatible_single_client_python.oneflow_export import oneflow_export
+from oneflow.compatible_single_client_python.framework import (
+    remote_blob as remote_blob_util,
+)
+import oneflow._oneflow_internal
+import numpy as np
+import inspect
+from typing import Union
+from oneflow._oneflow_internal.oneflow.core.job import placement as placement_cfg
+from oneflow.compatible_single_client_python.framework import id_util as id_util
+from oneflow.compatible_single_client_python.framework import (
+    check_point_v2 as check_point_v2,
+)
+from oneflow.compatible_single_client_python.framework.function_util import (
+    global_function_or_identity,
+)
+from oneflow.compatible_single_client_python.framework import runtime_mode as rt_mode
+from oneflow.compatible_single_client_python.framework import ofblob as ofblob_util
+from oneflow.compatible_single_client_python.lib.core import async_util as async_util
+from oneflow.compatible_single_client_python.ops import (
+    initializer_util as initializer_util,
+)
+from oneflow.compatible_single_client_python.framework import dtype as dtype_util
+from oneflow.compatible_single_client_python.framework import (
+    tensor_str as tensor_str_util,
+)
+from oneflow.compatible import single_client as flow
+
+
+def register_local_tensor_method(name=None):
+    def decorator(method):
+        if name is None:
+            op_name = method.__name__
+        else:
+            op_name = name
+        setattr(oneflow._oneflow_internal.Tensor, op_name, method)
+        return method
+
+    return decorator
+
+
+@register_local_tensor_method("numpy")
+def _local_tensor_numpy(eager_local_tensor):
+    if eager_local_tensor.dtype == flow.tensor_buffer:
+        shapes, dtypes = eager_local_tensor._tensor_buffer_shapes_and_dtypes
+        tensors = flow.experimental.tensor_buffer_to_list_of_tensors(
+            Tensor(eager_local_tensor), shapes, dtypes
+        )
+        return [t.numpy() for t in tensors]
+    method_name = eager_local_tensor._get_copy_mirrored_tensor_to_numpy_func_name()
+    copy_to_numpy = getattr(eager_local_tensor, method_name)
+    ndarray = np.empty(
+        tuple(eager_local_tensor.shape),
+        dtype=flow.convert_oneflow_dtype_to_numpy_dtype(eager_local_tensor.dtype),
+    )
+    copy_to_numpy(ndarray)
+    return ndarray
+
+
+@register_local_tensor_method("copy_")
+def _copy_from_numpy_to_eager_local_tensor(eager_local_tensor, np_arr):
+    method_name = eager_local_tensor._get_copy_mirrored_tensor_from_numpy_func_name()
+    copy_from_numpy = getattr(eager_local_tensor, method_name)
+    assert np_arr.dtype == flow.convert_oneflow_dtype_to_numpy_dtype(
+        eager_local_tensor.dtype
+    )
+    if np_arr.shape == ():
+        assert tuple(eager_local_tensor.shape) == (1,)
+    else:
+        assert np_arr.shape == tuple(eager_local_tensor.shape)
+    copy_from_numpy(np_arr)
+
+
+@register_local_tensor_method("_init_by_initializer_conf")
+def _init_eager_local_tensor_by_initializer_conf(
+    eager_local_tensor, initializer_conf, random_seed=0
+):
+    shape = tuple(eager_local_tensor.shape)
+    initializer = initializer_util.GetInitializer(initializer_conf, random_seed, shape)
+    # initializer is None if and only if the initializer_conf is empty_initializer
+    if initializer is None:
+        return
+
+    _copy_from_numpy_to_eager_local_tensor(
+        eager_local_tensor,
+        check_point_v2.generate_values_by_initializer(
+            initializer, shape, eager_local_tensor.dtype
+        ),
+    )
+
+
+@oneflow_export("tensor")
+def construct_tensor(
+    data,
+    dtype=None,
+    device=None,
+    requires_grad=False,
+    placement=None,
+    sbp=None,
+    is_consistent=False,
+    is_lazy=False,
+):
+    if _is_scalar(data) or _input_args_is_data(data):
+        if (
+            not _input_args_is_numpy(data)
+            and dtype is None
+            and _input_dtype_is_float(data)
+        ):
+            dtype = flow.float32
+        data = np.array(data)
+        if dtype is None:
+            dtype = dtype_util.convert_numpy_dtype_to_oneflow_dtype(data.dtype)
+        return Tensor(
+            data,
+            dtype=dtype,
+            device=device,
+            requires_grad=requires_grad,
+            placement=placement,
+            sbp=sbp,
+            is_consistent=is_consistent,
+            is_lazy=is_lazy,
+        )
+    else:
+        raise TypeError("Construction error, invalid combination of arguments")
+
+
+@oneflow_export("Tensor")
+class Tensor:
+    def __init__(
+        self,
+        *args,
+        dtype=None,
+        device=None,
+        requires_grad=False,
+        placement=None,
+        sbp=None,
+        is_consistent=False,
+        is_lazy=False,
+        data_initializer=None,
+        determining_initializer=None,
+    ):
+        assert len(args) > 0
+        dtype = dtype if dtype is not None else oneflow._oneflow_internal.float32
+        if isinstance(device, str):
+            device = flow.device(device)
+        if placement is None:
+            device = (
+                device
+                if device is not None
+                else oneflow._oneflow_internal.device("cpu")
+            )
+        if _input_args_is_tensor(*args):
+            self._local_or_consistent_tensor = flow.to(
+                *args, device=args[0].device, dtype=args[0].dtype, copy=True
+            )
+            self._undetermined_tensor = None
+        elif _input_args_is_consistent_or_local(*args):
+            self._local_or_consistent_tensor = args[0]
+            self._undetermined_tensor = None
+        elif _input_args_is_data(*args):
+            self._local_or_consistent_tensor = None
+            self._construct_with_data(
+                *args,
+                dtype=dtype,
+                device=device,
+                requires_grad=requires_grad,
+                placement=placement,
+                sbp=sbp,
+                is_consistent=is_consistent,
+                is_lazy=is_lazy,
+            )
+        elif _input_args_is_shape(*args):
+            shape = args
+            self._local_or_consistent_tensor = None
+            self._undetermined_tensor = UndeterminedTensor(
+                shape,
+                dtype,
+                device=device,
+                requires_grad=requires_grad,
+                placement=placement,
+                sbp=sbp,
+                is_consistent=is_consistent,
+                is_lazy=is_lazy,
+                data_initializer=data_initializer,
+            )
+            if determining_initializer is None:
+                determining_initializer = _default_initializer_for_determining
+            self._determining_initializer = determining_initializer
+        else:
+            # Maybe some other arguments to be supported, reported as error for now
+            raise TypeError("new() received an invalid combination of arguments")
+
+    @property
+    def shape(self):
+        if self._local_or_consistent_tensor is not None:
+            return self._local_or_consistent_tensor.shape
+        else:
+            return self._undetermined_tensor.shape
+
+    @property
+    def device(self):
+        if self._local_or_consistent_tensor is not None:
+            return self._local_or_consistent_tensor.device
+        else:
+            return self._undetermined_tensor.device
+
+    @register_local_tensor_method("ndim")
+    @property
+    def ndim(self):
+        return len(self.shape)
+
+    @property
+    def is_cuda(self):
+        if self._local_or_consistent_tensor is not None:
+            return self._local_or_consistent_tensor.is_cuda
+        else:
+            return self._undetermined_tensor.is_cuda
+
+    @property
+    def dtype(self):
+        if self._local_or_consistent_tensor is not None:
+            return self._local_or_consistent_tensor.dtype
+        else:
+            return self._undetermined_tensor.dtype
+
+    # internal decorator
+    def _auto_determine(func):
+        def wrapped_func(*args, **kwargs):
+            tensor = args[0]
+            if not tensor.is_determined:
+                tensor.determine()
+            return func(*args, **kwargs)
+
+        return wrapped_func
+
+    @property
+    @_auto_determine
+    def data(self):
+        if self._local_or_consistent_tensor is not None:
+            return flow.Tensor(self._local_or_consistent_tensor.data)
+        else:
+            return None
+
+    @property
+    def grad(self):
+        if self._local_or_consistent_tensor is not None:
+            if self._local_or_consistent_tensor.grad is not None:
+                return flow.Tensor(self._local_or_consistent_tensor.grad)
+        else:
+            return None
+
+    @grad.setter
+    @_auto_determine
+    def grad(self, new_grad):
+        def check_grad(grad, new_grad):
+            assert grad.shape == new_grad.shape, "Shape of new grad is not equal"
+            assert grad.device == new_grad.device, "Device of new grad is not equal"
+            assert grad.dtype == new_grad.dtype, "Data type of new grad is not equal"
+            assert type(grad) == type(new_grad), "Type of new grad is not equal"
+
+        if self._local_or_consistent_tensor is not None:
+            if new_grad is None:
+                self._local_or_consistent_tensor.set_grad(None)
+            else:
+                new_grad_detach = new_grad.detach()._local_or_consistent_tensor
+                check_grad(self._local_or_consistent_tensor.grad, new_grad_detach)
+                self._local_or_consistent_tensor.set_grad(new_grad_detach)
+
+    @property
+    def grad_fn(self):
+        if self._local_or_consistent_tensor is not None:
+            return self._local_or_consistent_tensor.grad_fn
+        else:
+            return None
+
+    @property
+    def requires_grad(self):
+        if self._local_or_consistent_tensor is not None:
+            return self._local_or_consistent_tensor.requires_grad
+        else:
+            return self._undetermined_tensor.requires_grad
+
+    @property
+    def is_leaf(self):
+        if self._local_or_consistent_tensor is not None:
+            return self._local_or_consistent_tensor.is_leaf
+        else:
+            return True
+
+    @requires_grad.setter
+    def requires_grad(self, requires_grad):
+        if self._local_or_consistent_tensor is not None:
+            self._local_or_consistent_tensor.requires_grad = requires_grad
+        else:
+            self._undetermined_tensor.requires_grad = requires_grad
+
+    @register_local_tensor_method()
+    def size(self, idx=None):
+        if idx is None:
+            return self.shape
+        else:
+            return self.shape[idx]
+
+    @register_local_tensor_method()
+    def dim(self):
+        return self.ndim
+
+    @register_local_tensor_method()
+    def ndimension(self):
+        return self.ndim
+
+    @_auto_determine
+    def detach(self):
+        if self._local_or_consistent_tensor is not None:
+            return flow.Tensor(self._local_or_consistent_tensor.detach())
+        else:
+            return None
+
+    def requires_grad_(self, requires_grad=True):
+        self.requires_grad = requires_grad
+
+    def get_device(self):
+        if self._local_or_consistent_tensor is not None:
+            return self._local_or_consistent_tensor.device
+        else:
+            return self._undetermined_tensor.device
+
+    @register_local_tensor_method()
+    def nelement(self):
+        prod = 1
+        for dim in self.shape:
+            prod *= dim
+        return prod
+
+    @register_local_tensor_method()
+    def numel(self):
+        return self.nelement()
+
+    def retain_grad(self):
+        assert self.is_determined
+        self._local_or_consistent_tensor.retain_grad()
+
+    def data_ptr(self):
+        TODO()
+
+    def element_size(self):
+        return self.dtype.bytes
+
+    @_auto_determine
+    def numpy(self):
+        internal_tensor = self._local_or_consistent_tensor
+        if not internal_tensor.is_lazy and not internal_tensor.is_consistent:
+            return _local_tensor_numpy(internal_tensor)
+
+        raise NotImplementedError()
+
+    @register_local_tensor_method()
+    def tolist(self):
+        return self.numpy().tolist()
+
+    @_auto_determine
+    @register_local_tensor_method()
+    def backward(self, gradient=None, retain_graph=False, create_graph=False):
+        flow.autograd.backward(self, gradient, retain_graph, create_graph)
+
+    @register_local_tensor_method()
+    def _transform_ellipsis_type(self, key):
+        d = self.ndim - len(key)  # exclude all Ellipsis type
+        new_key = list()
+        for k in key:
+            if isinstance(k, type(Ellipsis)):
+                new_key.append(slice(None, None, None))
+                while d > 0:
+                    new_key.append(slice(None, None, None))
+                    d -= 1
+            else:
+                new_key.append(k)
+        return tuple(new_key)
+
+    @register_local_tensor_method()
+    def _get_slice_obj(self, key):
+        def get_or_default(x, default):
+            return x if x is not None else default
+
+        def get_canonical_index(index, length, *, start=0):
+            if index < 0:
+                index += length
+            if index > length or index < 0:
+                raise IndexError(f"Index should be in [0, {length}), but got {index}")
+            return max(min(index, length), start)
+
+        def get_slice_if_int(x):
+            if isinstance(x, slice):
+                return x
+            return slice(x, x + 1)
+
+        if isinstance(key, tuple):
+            assert all(isinstance(x, (slice, int)) for x in key)
+        else:
+            assert isinstance(key, (slice, int))
+            key = (key,)
+
+        key = list(map(get_slice_if_int, key))
+
+        assert len(key) <= len(self.shape)
+        for i in range(len(key), len(self.shape)):
+            key += (slice(None, None, None),)
+
+        starts = [
+            get_canonical_index(get_or_default(x.start, 0), self.shape[i])
+            for i, x in enumerate(key)
+        ]
+        stops = [
+            get_canonical_index(
+                get_or_default(x.stop, self.shape[i]), self.shape[i], start=starts[i]
+            )
+            for i, x in enumerate(key)
+        ]
+        steps = [get_or_default(x.step, 1) for x in key]
+        assert all(x > 0 for x in steps)
+        # np.abs is for compatibility of negative steps in the future
+        shape = (np.abs(np.array(stops) - np.array(starts)) - 1) // np.abs(
+            np.array(steps)
+        ) + 1
+        shape = shape.tolist()
+        return starts, stops, steps, shape
+
+    @_auto_determine
+    @register_local_tensor_method()
+    def __getitem__(self, key):
+        # TODO: support inplace __getitem__
+        assert (
+            isinstance(key, int) or isinstance(key, tuple) or isinstance(key, slice)
+        ), "Unsupported key type!"
+
+        squeeze_dims = None
+        if isinstance(key, tuple):
+            key = self._transform_ellipsis_type(key)
+            squeeze_dims = list(
+                filter(lambda idx: isinstance(key[idx], int), range(len(key)))
+            )
+        elif isinstance(key, int):
+            squeeze_dims = [0]
+        else:
+            # do nothing
+            pass
+
+        start, stop, step, _ = self._get_slice_obj(key)
+        res = flow.experimental.slice(self, list(zip(start, stop, step)))
+        if squeeze_dims is not None:
+            res = res.squeeze(dim=squeeze_dims)
+        return res
+
+    @_auto_determine
+    @register_local_tensor_method()
+    def __setitem__(self, key, value):
+        if isinstance(key, tuple):
+            key = self._transform_ellipsis_type(key)
+            unsqueeze_dims = list(
+                filter(lambda idx: isinstance(key[idx], int), range(len(key)))
+            )
+        elif isinstance(key, int):
+            unsqueeze_dims = [0]
+        else:
+            unsqueeze_dims = []
+
+        start, stop, step, shape = self._get_slice_obj(key)
+        if isinstance(value, (int, float)):
+            scalar = value
+            value = flow.Tensor(*shape)
+            value.fill_(scalar)
+        else:
+            prepended_broadcasting_dims = range(
+                len(self.shape) - len(unsqueeze_dims) - len(value.shape)
+            )
+            for dim in prepended_broadcasting_dims:
+                value = flow.experimental.unsqueeze(value, dim)
+            for dim in unsqueeze_dims:
+                value = flow.experimental.unsqueeze(value, dim)
+            value = flow.experimental.expand(value, *shape)
+
+        flow.experimental.tmp.logical_slice_assign(
+            self, value, list(zip(start, stop, step))
+        )
+        return self
+
+    @register_local_tensor_method()
+    def __str__(self):
+        return self.__repr__()
+
+    @register_local_tensor_method()
+    def __repr__(self):
+        return tensor_str_util._gen_tensor_str(self)
+
+    @register_local_tensor_method()
+    def __gt__(self, other):
+        return self.gt(other)
+
+    @register_local_tensor_method()
+    def __lt__(self, other):
+        return self.lt(other)
+
+    @register_local_tensor_method()
+    def __ge__(self, other):
+        return self.ge(other)
+
+    @register_local_tensor_method()
+    def __le__(self, other):
+        return self.le(other)
+
+    def __array__(self):
+        TODO()
+
+    def __sizeof__(self):
+        TODO()
+
+    def __deepcopy__(self, memo):
+        TODO()
+
+    @register_local_tensor_method()
+    def __mul__(self, other):
+        return self.mul(other)
+
+    @register_local_tensor_method()
+    def __rmul__(self, other):
+        return self.mul(other)
+
+    @register_local_tensor_method()
+    def __add__(self, other):
+        return self.add(other)
+
+    @register_local_tensor_method()
+    def __radd__(self, other):
+        return self.add(other)
+
+    @register_local_tensor_method()
+    def __sub__(self, other):
+        return self.sub(other)
+
+    @register_local_tensor_method()
+    def __rsub__(self, other):
+        return flow.experimental.sub(other, self)
+
+    @register_local_tensor_method()
+    def __truediv__(self, other):
+        return self.div(other)
+
+    @register_local_tensor_method()
+    def __rtruediv__(self, other):
+        return flow.experimental.div(other, self)
+
+    @register_local_tensor_method()
+    def __neg__(self):
+        return flow.experimental.neg(self)
+
+    @register_local_tensor_method()
+    def __pow__(self, b):
+        return flow.experimental.pow(self, b)
+
+    def _determine_if_needed(self, determining_initializer=None):
+        if not self.is_determined:
+            self.determine(determining_initializer)
+
+    def determine(self, determining_initializer=None):
+        assert not self.is_determined
+        if determining_initializer is None:
+            determining_initializer = self._determining_initializer
+        self._local_or_consistent_tensor = determining_initializer(self)
+        self._undetermined_tensor = None
+
+    @property
+    def is_determined(self):
+        if self._local_or_consistent_tensor is not None:
+            assert self._undetermined_tensor is None
+            return True
+        else:
+            assert self._undetermined_tensor is not None
+            return False
+
+    def set_placement(self, placement):
+        assert isinstance(placement, flow.placement)
+        assert self._local_or_consistent_tensor is None
+        assert self._undetermined_tensor is not None
+        self._undetermined_tensor.placement = placement
+        self._undetermined_tensor.device = None
+
+    def set_sbp(self, sbp):
+        assert isinstance(sbp, oneflow._oneflow_internal.Distribute)
+        assert self._local_or_consistent_tensor is None
+        assert self._undetermined_tensor is not None
+        self._undetermined_tensor.sbp = sbp
+
+    def set_is_consistent(self, is_consistent):
+        assert isinstance(is_consistent, bool)
+        assert self._local_or_consistent_tensor is None
+        assert self._undetermined_tensor is not None
+        self._undetermined_tensor.is_consistent = is_consistent
+
+    def set_is_lazy(self, is_lazy):
+        assert isinstance(is_lazy, bool)
+        assert self._local_or_consistent_tensor is None
+        assert self._undetermined_tensor is not None
+        self._undetermined_tensor.is_lazy = is_lazy
+
+    def set_data_initializer(self, data_initializer):
+        assert isinstance(data_initializer, initializer_conf_util.InitializerConf)
+        assert self._local_or_consistent_tensor is None
+        assert self._undetermined_tensor is not None
+        self._undetermined_tensor.data_initializer = data_initializer
+
+    @property
+    def placement(self):
+        if self._local_or_consistent_tensor is not None:
+            return self._local_or_consistent_tensor.placement
+        else:
+            return self._undetermined_tensor.placement
+
+    @property
+    def is_lazy(self):
+        if self._local_or_consistent_tensor is not None:
+            return self._local_or_consistent_tensor.is_lazy
+        else:
+            return self._undetermined_tensor.is_lazy
+
+    @property
+    def is_consistent(self):
+        if self._local_or_consistent_tensor is not None:
+            return self._local_or_consistent_tensor.is_consistent
+        else:
+            return self._undetermined_tensor.is_consistent
+
+    @property
+    def sbp(self):
+        if self._local_or_consistent_tensor is not None:
+            return self._local_or_consistent_tensor.sbp
+        else:
+            return self._undetermined_tensor.sbp
+
+    @register_local_tensor_method()
+    def uniform_(self, a=0, b=1):
+        initializer_conf = flow.random_uniform_initializer(
+            minval=a, maxval=b, dtype=self.dtype
+        )
+        return self._init_by_initializer_conf(initializer_conf)
+
+    @register_local_tensor_method()
+    def kaiming_uniform_(
+        self, a=0, mode="fan_in", nonlinearity="leaky_relu", *, data_format="NCHW"
+    ):
+        initializer_conf = flow.kaiming_initializer(
+            shape=self.shape,
+            distribution="random_uniform",
+            mode=mode,
+            nonlinearity=nonlinearity,
+            negative_slope=a,
+            data_format=data_format,
+        )
+        return self._init_by_initializer_conf(initializer_conf)
+
+    @register_local_tensor_method()
+    def kaiming_normal_(
+        self, a=0, mode="fan_in", nonlinearity="leaky_relu", *, data_format="NCHW"
+    ):
+        initializer_conf = flow.kaiming_initializer(
+            shape=self.shape,
+            distribution="random_normal",
+            mode=mode,
+            nonlinearity=nonlinearity,
+            negative_slope=a,
+            data_format=data_format,
+        )
+        return self._init_by_initializer_conf(initializer_conf)
+
+    @register_local_tensor_method()
+    def xavier_normal_(self, gain=1.0, *, data_format="NCHW"):
+        assert gain == 1.0, "Only gain == 1.0 is supported now"
+        initializer_conf = flow.xavier_normal_initializer(data_format=data_format)
+        return self._init_by_initializer_conf(initializer_conf)
+
+    @register_local_tensor_method()
+    def xavier_uniform_(self, gain=1.0, *, data_format="NCHW"):
+        assert gain == 1.0, "Only gain == 1.0 is supported now"
+        initializer_conf = flow.xavier_uniform_initializer(data_format=data_format)
+        return self._init_by_initializer_conf(initializer_conf)
+
+    @register_local_tensor_method()
+    def normal_(self, mean=0, std=1):
+        initializer_conf = flow.random_normal_initializer(mean=mean, stddev=std)
+        return self._init_by_initializer_conf(initializer_conf)
+
+    @register_local_tensor_method()
+    def fill_(self, value):
+        initializer_conf = flow.constant_initializer(value=value, dtype=self.dtype)
+        return self._init_by_initializer_conf(initializer_conf)
+
+    @_auto_determine
+    def zeros_(self):
+        internal_tensor = self._local_or_consistent_tensor
+        if internal_tensor.is_lazy:
+            TODO()
+        if internal_tensor.is_consistent:
+            TODO()
+        internal_tensor.zeros_()
+
+    @_auto_determine
+    @register_local_tensor_method()
+    def register_hook(self, hook):
+        assert self.is_leaf, "register_hook only supports leaf tensor for now"
+        assert (
+            self.requires_grad
+        ), "register_hook only supports tensor with requires_grad=True"
+
+        def hook_returning_determined_tensor(grad):
+            new_grad = hook(grad)
+            if isinstance(new_grad, Tensor) and not new_grad.is_determined:
+                new_grad.determine()
+                new_grad = new_grad._local_or_consistent_tensor
+            return new_grad
+
+        self._local_or_consistent_tensor._register_hook(
+            hook_returning_determined_tensor
+        )
+
+    @_auto_determine
+    def copy_(self, other: Union["Tensor", np.ndarray]):
+        internal_tensor = self._local_or_consistent_tensor
+        if internal_tensor.is_lazy:
+            TODO()
+        if internal_tensor.is_consistent:
+            TODO()
+
+        if isinstance(other, (Tensor, check_point_v2.FileBackendVariableBlob)):
+            src_np = other.numpy()
+        else:
+            assert isinstance(other, np.ndarray)
+            src_np = other
+
+        _copy_from_numpy_to_eager_local_tensor(internal_tensor, src_np)
+
+    def _init_by_initializer_conf(self, initializer_conf):
+        if self.is_determined:
+            if self.is_consistent:
+                with self._placement_scope():
+                    check_point_v2.init_by_initializer_conf(
+                        self, initializer_conf, True, None
+                    )
+            else:
+                _init_eager_local_tensor_by_initializer_conf(
+                    self._local_or_consistent_tensor, initializer_conf
+                )
+        else:
+            self.set_data_initializer(initializer_conf)
+        return self
+
+    def _placement_scope(self):
+        if self.is_consistent:
+            return _convert_to_placement_scope(self.placement)
+        else:
+            return _convert_to_placement_scope(self.device)
+
+    def _construct_with_data(
+        self,
+        *args,
+        dtype=None,
+        device=None,
+        requires_grad=False,
+        placement=None,
+        sbp=None,
+        is_consistent=False,
+        is_lazy=False,
+    ):
+        numpy_data = None
+        if _input_args_is_tuple_or_list(*args):
+            numpy_data = np.array(args[0])
+        elif _input_args_is_numpy(*args):
+            numpy_data = np.ascontiguousarray(args[0])
+        numpy_data = numpy_data.astype(flow.convert_oneflow_dtype_to_numpy_dtype(dtype))
+        shape = oneflow._oneflow_internal.Size(tuple(numpy_data.shape))
+        self._determining_initializer = _numpy_initializer_for_determining
+        self._undetermined_tensor = UndeterminedTensor(
+            shape,
+            dtype,
+            device=device,
+            requires_grad=requires_grad,
+            placement=placement,
+            sbp=sbp,
+            is_consistent=is_consistent,
+            is_lazy=is_lazy,
+            numpy_data=numpy_data,
+        )
+
+
+class UndeterminedTensor:
+    def __init__(
+        self,
+        shape,
+        dtype,
+        device=None,
+        requires_grad=False,
+        placement=None,
+        sbp=None,
+        is_consistent=False,
+        is_lazy=False,
+        data_initializer=None,
+        numpy_data=None,
+    ):
+        if not isinstance(shape, oneflow._oneflow_internal.Size):
+            if not isinstance(shape, tuple):
+                shape = tuple(shape)
+            shape = oneflow._oneflow_internal.Size(shape)
+        data_initializer = (
+            data_initializer
+            if data_initializer is not None
+            else flow.empty_initializer(dtype=dtype)
+        )
+        device = (
+            device if device is not None else oneflow._oneflow_internal.device("cpu")
+        )
+        self.shape = shape
+        self.dtype = dtype
+        self.device = device
+        self.requires_grad = requires_grad
+        self.placement = placement
+        self.sbp = sbp
+        self.is_consistent = is_consistent
+        self.is_lazy = is_lazy
+        self.data_initializer = data_initializer
+        self.numpy_data = numpy_data
+
+    @property
+    def is_cuda(self):
+        device_type = None
+        if self.placement is not None:
+            device_type = self.placement.device_tag
+        elif self.device is not None:
+            device_type = self.device.type
+        else:
+            raise ValueError("Neither placement nor device found.")
+        return device_type == "gpu" or device_type == "cuda"
+
+
+def _default_initializer_for_determining(tensor):
+    assert not tensor.is_determined
+    undetermined_tensor = tensor._undetermined_tensor
+    if undetermined_tensor.is_consistent:
+        raise NotImplementedError()
+    else:
+        shape = undetermined_tensor.shape
+        dtype = undetermined_tensor.dtype
+        determined_tensor = oneflow._oneflow_internal.Tensor(
+            shape,
+            dtype,
+            undetermined_tensor.device,
+            undetermined_tensor.is_lazy,
+            undetermined_tensor.requires_grad,
+            True,
+        )
+        _init_eager_local_tensor_by_initializer_conf(
+            determined_tensor, undetermined_tensor.data_initializer
+        )
+    return determined_tensor
+
+
+def _numpy_initializer_for_determining(tensor):
+    assert not tensor.is_determined
+    undetermined_tensor = tensor._undetermined_tensor
+    numpy_data = undetermined_tensor.numpy_data
+    assert numpy_data is not None
+
+    if undetermined_tensor.is_consistent:
+        raise NotImplementedError()
+    else:
+        determined_tensor = oneflow._oneflow_internal.Tensor(
+            undetermined_tensor.shape,
+            undetermined_tensor.dtype,
+            undetermined_tensor.device,
+            undetermined_tensor.is_lazy,
+            undetermined_tensor.requires_grad,
+            True,
+        )
+        _copy_from_numpy_to_eager_local_tensor(determined_tensor, numpy_data)
+
+    return determined_tensor
+
+
+def _input_args_is_tuple_or_list(*args):
+    return len(args) == 1 and isinstance(args[0], (tuple, list))
+
+
+def _input_args_is_numpy(*args):
+    return len(args) == 1 and isinstance(args[0], np.ndarray)
+
+
+def _input_args_is_consistent_or_local(*args):
+    return len(args) == 1 and isinstance(args[0], oneflow._oneflow_internal.Tensor)
+
+
+def _input_args_is_tensor(*args):
+    return len(args) == 1 and isinstance(args[0], flow.Tensor)
+
+
+def _input_args_is_data(*args):
+    return _input_args_is_numpy(*args) or _input_args_is_tuple_or_list(*args)
+
+
+def _input_args_is_shape(*args):
+    return all(isinstance(x, int) for x in args)
+
+
+def register_tensor_op(op_name):
+    def set_tensor_op(method):
+        setattr(Tensor, op_name, method)
+        setattr(oneflow._oneflow_internal.Tensor, op_name, method)
+        return method
+
+    return set_tensor_op
+
+
+def _convert_to_placement_scope(placement_or_device):
+    if isinstance(placement_or_device, flow.placement):
+        placement = placement_or_device
+        return flow.scope.placement(
+            placement.device_tag,
+            list(placement.parallel_conf.device_name()),
+            placement.hierarchy,
+        )
+    else:
+        device = placement_or_device
+        # TODO(jianhao): replace 0 with real machine id
+        machine_id = 0
+        # TODO(jianhao): support cuda in of
+        if device.type == "cuda":
+            device_tag = "gpu"
+        else:
+            device_tag = device.type
+        return flow.scope.placement(
+            device_tag, "{}:{}".format(machine_id, device.index), None
+        )
+
+
+def _is_scalar(data):
+    return isinstance(data, (int, float, bool, complex))
+
+
+def _flatten_list_or_tuple(list_or_tuple):
+    for item in list_or_tuple:
+        if isinstance(item, (list, tuple)):
+            yield from _flatten_list_or_tuple(item)
+        else:
+            yield item
+
+
+def _input_dtype_is_float(data):
+    if _is_scalar(data):
+        return isinstance(data, float)
+    elif isinstance(data, (list, tuple)):
+        return any(isinstance(x, float) for x in _flatten_list_or_tuple(data))
+    return False
diff --git a/oneflow/compatible_single_client_python/framework/tensor_str.py b/oneflow/compatible_single_client_python/framework/tensor_str.py
new file mode 100644
index 0000000000000000000000000000000000000000..2025daaf86a2d3b6cb17296d5db6fcd98ecde070
--- /dev/null
+++ b/oneflow/compatible_single_client_python/framework/tensor_str.py
@@ -0,0 +1,54 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import numpy as np
+from oneflow.compatible import single_client as flow
+
+
+def _add_suffixes(tensor_str, suffixes, indent):
+    tensor_strs = [tensor_str]
+    last_line_len = len(tensor_str) - tensor_str.rfind("\n") + 1
+    linewidth = 80
+    for suffix in suffixes:
+        suffix_len = len(suffix)
+        if last_line_len + suffix_len + 2 > linewidth:
+            tensor_strs.append(",\n" + " " * indent + suffix)
+            last_line_len = indent + suffix_len
+        else:
+            tensor_strs.append(", " + suffix)
+            last_line_len += suffix_len + 2
+    tensor_strs.append(")")
+    return "".join(tensor_strs)
+
+
+def _gen_tensor_str(tensor):
+    prefix = "tensor("
+    indent = len(prefix)
+    suffixes = []
+
+    if tensor.device.type != "cpu" or (
+        tensor.device.type == "cuda" and tensor.device.index != 0
+    ):
+        suffixes.append("device='" + str(tensor.device) + "'")
+    suffixes.append("dtype=" + str(tensor.dtype))
+    if tensor.grad_fn is not None:
+        name = tensor.grad_fn.name()
+        suffixes.append("grad_fn=<{}>".format(name))
+    elif tensor.requires_grad:
+        suffixes.append("requires_grad=True")
+    tensor_str = np.array2string(
+        tensor.numpy(), precision=4, separator=", ", prefix=prefix
+    )
+    return _add_suffixes(prefix + tensor_str, suffixes, indent)
diff --git a/oneflow/compatible_single_client_python/framework/tensor_tuple_util.py b/oneflow/compatible_single_client_python/framework/tensor_tuple_util.py
new file mode 100644
index 0000000000000000000000000000000000000000..4c7deb582c24eba230860578936ad6cf772c1a32
--- /dev/null
+++ b/oneflow/compatible_single_client_python/framework/tensor_tuple_util.py
@@ -0,0 +1,44 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import collections
+from typing import Union, Sequence, Tuple, Optional
+
+from oneflow.compatible_single_client_python.framework.tensor import Tensor as PyTensor
+from oneflow._oneflow_internal import TensorTuple, Tensor
+
+
+def convert_to_tensor_tuple(
+    args: Optional[Union[PyTensor, Sequence[PyTensor], Tensor, Sequence[Tensor]]]
+):
+    if args is None:
+        return TensorTuple()
+    elif isinstance(args, collections.abc.Sequence):
+        if isinstance(args[0], PyTensor):
+            for tensor in args:
+                if not tensor.is_determined:
+                    tensor.determine()
+            return TensorTuple([x._local_or_consistent_tensor for x in args])
+        return TensorTuple(args)
+    else:
+        tensor_tuple = TensorTuple()
+        if isinstance(args, PyTensor):
+            if not args.is_determined:
+                args.determine()
+            tensor_tuple.append(args._local_or_consistent_tensor)
+        else:
+            tensor_tuple.append(args)
+        return tensor_tuple
diff --git a/oneflow/compatible_single_client_python/framework/typing.py b/oneflow/compatible_single_client_python/framework/typing.py
new file mode 100644
index 0000000000000000000000000000000000000000..26dda4dbe40ee3f6a49d1f81f565d28606e7ab96
--- /dev/null
+++ b/oneflow/compatible_single_client_python/framework/typing.py
@@ -0,0 +1,160 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+from oneflow.compatible import single_client as flow
+from typing import Sequence, Optional
+from oneflow.compatible_single_client_python.oneflow_export import oneflow_export
+from oneflow.compatible_single_client_python.framework import (
+    input_blob_def as input_blob_def,
+)
+import typing
+import inspect
+import sys
+
+
+class PyStructCompatibleToBlob(object):
+    pass
+
+
+@oneflow_export("typing.Numpy")
+class Numpy(PyStructCompatibleToBlob):
+    """`Numpy` is a type hint for numpy output of a OneFlow global function
+    For instance::
+
+        @oneflow.compatible.single_client.global_function()
+        def foo() -> oneflow.compatible.single_client.typing.Numpy:
+            loss = ... # your network
+            return loss
+
+        loss = foo() # get a numpy.ndarray
+        print(loss)
+    """
+
+    def Placeholder(shape: Sequence[int], dtype=flow.float):
+        """`Numpy.Placeholder` is a typing function for numpy input of a OneFlow global function.
+        A `numpy.ndarray` takes a `Numpy.Placeholder`'s place must have an identical shape.
+        For instance::
+
+            @oneflow.compatible.single_client.global_function()
+            def foo(
+                image_blob: oneflow.compatible.single_client.typing.Numpy.Placeholder(
+                    (2, 255, 255, 3), dtype=flow.float32
+                )
+            ):
+                # your network
+
+            foo(np.random.randn(2, 255, 255, 3).astype(np.float32))
+
+        """
+        assert type(shape) is tuple, "shape should be a tuple. %s found" % shape
+        return type("Numpy.Placeholder", (NumpyDef,), dict(shape=shape, dtype=dtype),)
+
+
+@oneflow_export("typing.ListNumpy")
+class ListNumpy(PyStructCompatibleToBlob):
+    """`ListNumpy` is a type hint for numpy output of a OneFlow global function
+    For instance::
+
+        @oneflow.compatible.single_client.global_function()
+        def foo() -> oneflow.compatible.single_client.typing.ListNumpy:
+            mirrored_tensors = ... # your network
+            return mirrored_tensors
+
+        mirrored_tensors = foo() # get a list of numpy.ndarray
+        for tensor in mirrored_tensors:
+            print(mirrored_tensors)
+    """
+
+    def Placeholder(shape: Sequence[int], dtype=flow.float):
+        """`ListNumpy.Placeholder` is a typing function for numpy input of a OneFlow global function.
+        A `list` of `numpy.ndarray` takes a `ListNumpy.Placeholder`'s place. Each `numpy.ndarray` in the `list` could have any shape as long as it has the same rank and a smaller/equal size.
+        For instance::
+
+            @oneflow.compatible.single_client.global_function()
+            def foo(
+                image_blob: oneflow.compatible.single_client.typing.ListNumpy.Placeholder(
+                    (2, 255, 255, 3), dtype=flow.float32
+                )
+            ):
+                # your network
+
+            input1 = np.random.randn(2, 255, 255, 3).astype(np.float32)
+            input2 = np.random.randn(2, 251, 251, 3).astype(np.float32)
+            foo([input1])
+            foo([input2])
+
+        """
+        assert type(shape) is tuple, "shape should be a tuple. %s found" % shape
+        return type(
+            "ListNumpy.Placeholder", (ListOfNumpyDef,), dict(shape=shape, dtype=dtype),
+        )
+
+
+class OneflowNumpyDef(object):
+    @classmethod
+    def NewInputBlobDef(subclass):
+        raise NotImplementedError
+
+
+class NumpyDef(OneflowNumpyDef):
+    @classmethod
+    def NewInputBlobDef(subclass):
+        return input_blob_def.FixedTensorDef(subclass.shape, dtype=subclass.dtype)
+
+
+class ListOfNumpyDef(OneflowNumpyDef):
+    @classmethod
+    def NewInputBlobDef(subclass):
+        return input_blob_def.MirroredTensorDef(subclass.shape, dtype=subclass.dtype)
+
+
+@oneflow_export("typing.Callback")
+class Callback(typing.Generic[typing.TypeVar("T")]):
+    pass
+
+
+@oneflow_export("typing.Bundle")
+class Bundle(typing.Generic[typing.TypeVar("T")]):
+    """
+    One or a collection of  typing.Numpy/typing.ListNumpy,
+    such as x, [x], (x,), {"key": x} and the mixed form of them.
+    """
+
+    pass
+
+
+def OriginFrom(parameterised, generic):
+    if inspect.isclass(parameterised) and inspect.isclass(generic):
+        return issubclass(parameterised, generic)
+    if generic == OneflowNumpyDef:
+        assert not inspect.isclass(parameterised)
+        return False
+    if (sys.version_info.major, sys.version_info.minor) >= (3, 7):
+        if not hasattr(parameterised, "__origin__"):
+            return False
+        if generic == typing.Dict:
+            return parameterised.__origin__ is dict
+        if generic == typing.Tuple:
+            return parameterised.__origin__ is tuple
+        if generic == typing.List:
+            return parameterised.__origin__ is list
+        if generic == Callback:
+            return parameterised.__origin__ is Callback
+        if generic == Bundle:
+            return parameterised.__origin__ is Bundle
+
+    raise NotImplementedError("python typing is a monster torturing everyone.")
diff --git a/oneflow/compatible_single_client_python/framework/typing_util.py b/oneflow/compatible_single_client_python/framework/typing_util.py
new file mode 100644
index 0000000000000000000000000000000000000000..5f5f8fa61d77f243cddb69291d21d410afc7abd4
--- /dev/null
+++ b/oneflow/compatible_single_client_python/framework/typing_util.py
@@ -0,0 +1,311 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+import typing
+import inspect
+from oneflow.compatible_single_client_python.framework import (
+    remote_blob as remote_blob_util,
+)
+from oneflow.compatible_single_client_python.framework import (
+    local_blob as local_blob_util,
+)
+from oneflow.compatible_single_client_python.framework import pull_util as pull_util
+from oneflow.compatible_single_client_python.framework import typing as oft
+from oneflow.compatible_single_client_python.experimental import (
+    enable_typing_check as enable_typing_check,
+)
+import oneflow._oneflow_internal
+
+
+def CheckGlobalFunctionAnnotation(signature):
+    parameters = signature.parameters
+    if all(p.annotation is not inspect._empty for _, p in parameters.items()):
+        for _, p in parameters.items():
+            assert (
+                p.kind == inspect._ParameterKind.POSITIONAL_OR_KEYWORD
+            ), "no parameters like *args or **kwargs supported"
+            CheckGlobalFunctionParamAnnotation(p.annotation)
+    elif enable_typing_check.typing_check_enabled:
+        for param_name, p in parameters.items():
+            if p.annotaion is inspect._empty:
+                raise NotImplementedError("parameter %s is not annotated" % param_name)
+    else:
+        # do nothing
+        pass
+    return_annotation = signature.return_annotation
+    if return_annotation is not inspect._empty:
+        CheckGlobalFunctionReturnAnnotation(return_annotation)
+    elif enable_typing_check.typing_check_enabled:
+        raise NotImplementedError("no return annotation found.")
+    else:
+        # do nothing
+        pass
+
+
+def CheckGlobalFunctionParamAnnotation(cls):
+    if oft.OriginFrom(cls, typing.Tuple):
+        assert cls.__args__ is not None, "T in typing.Tuple[T, ...] cannot be omitted"
+        assert len(cls.__args__) > 0
+        for cls_arg in cls.__args__:
+            CheckGlobalFunctionParamAnnotation(cls_arg)
+    elif oft.OriginFrom(cls, oft.OneflowNumpyDef):
+        pass
+    else:
+        raise NotImplementedError("invalid parameter annotation %s found" % cls)
+
+
+def CheckGlobalFunctionReturnAnnotation(cls):
+    if cls is None:
+        pass
+    elif oft.OriginFrom(cls, oft.Callback):
+        assert (
+            cls.__args__ is not None
+        ), "T in oneflow.compatible.single_client.typing.Callback[T] cannot be omitted"
+        assert len(cls.__args__) == 1
+        _CheckGlobalFunctionReturnAnnotation(cls.__args__[0])
+    elif oft.OriginFrom(cls, oft.Bundle):
+        assert cls.__args__[0] in (
+            oft.Numpy,
+            oft.ListNumpy,
+        ), "T in oneflow.compatible.single_client.typing.Bundle[T] must be one of (oneflow.compatible.single_client.typing.Numpy, oneflow.compatible.single_client.typing.ListNumpy)"
+        assert len(cls.__args__) == 1
+        _CheckGlobalFunctionReturnAnnotation(cls.__args__[0])
+    else:
+        _CheckGlobalFunctionReturnAnnotation(cls)
+
+
+def _CheckGlobalFunctionReturnAnnotation(cls):
+    if oft.OriginFrom(cls, typing.Tuple):
+        assert cls.__args__ is not None, "T in typing.Tuple[T, ...] cannot be omitted"
+        assert len(cls.__args__) > 0
+        for cls_arg in cls.__args__:
+            _CheckGlobalFunctionReturnAnnotation(cls_arg)
+    elif oft.OriginFrom(cls, typing.List):
+        assert cls.__args__ is not None, "T in typing.List[T] cannot be omitted"
+        assert len(cls.__args__) == 1
+        _CheckGlobalFunctionReturnAnnotation(cls.__args__[0])
+    elif oft.OriginFrom(cls, typing.Dict):
+        assert cls.__args__ is not None, "(K, V) in typing.Dict[K,V] cannot be omitted"
+        assert len(cls.__args__) == 2
+        _CheckGlobalFunctionReturnAnnotation(cls.__args__[1])
+    elif oft.OriginFrom(cls, oft.PyStructCompatibleToBlob):
+        pass
+    else:
+        raise NotImplementedError("invalid return annotation %s found" % cls)
+
+
+def CheckReturnByAnnotation(function_name, ret, annotation):
+    if annotation is inspect._empty:
+        return
+    if annotation is None:
+        error_str = (
+            "%s does not matched return annotation %s of global_function %s."
+            % (ret, annotation, function_name)
+        )
+        assert ret is None, error_str
+    elif oft.OriginFrom(annotation, oft.Callback):
+        _CheckReturnByAnnotation(function_name, ret, annotation.__args__[0])
+    elif oft.OriginFrom(annotation, oft.Bundle):
+        if isinstance(ret, oneflow._oneflow_internal.BlobDesc):
+            _CheckReturnByAnnotation(function_name, ret, annotation.__args__[0])
+        elif isinstance(ret, (list, tuple)):
+            for elem in ret:
+                CheckReturnByAnnotation(function_name, elem, annotation)
+        elif type(ret) is dict:
+            for val in ret.values():
+                CheckReturnByAnnotation(function_name, val, annotation)
+        else:
+            raise NotImplementedError("invalid return  %s found" % (type(ret)))
+    else:
+        _CheckReturnByAnnotation(function_name, ret, annotation)
+
+
+def _CheckReturnByAnnotation(function_name, ret, annotation):
+    error_str = "%s does not matched return annotation %s of global_function %s." % (
+        ret,
+        annotation,
+        function_name,
+    )
+    if oft.OriginFrom(annotation, typing.Tuple):
+        assert type(ret) is tuple, error_str
+        assert len(ret) == len(annotation.__args__), "%s length compare: %s v.s. %s" % (
+            error_str,
+            len(ret),
+            len(annotation.__args__),
+        )
+        for ret_i, annotation_i in zip(ret, annotation.__args__):
+            _CheckReturnByAnnotation(function_name, ret_i, annotation_i)
+    elif oft.OriginFrom(annotation, typing.List):
+        assert type(ret) is list, error_str
+        assert len(annotation.__args__) == 1, (
+            "%s element type in list must be unique" % error_str
+        )
+        for ret_i in ret:
+            _CheckReturnByAnnotation(function_name, ret_i, annotation.__args__[0])
+    elif oft.OriginFrom(annotation, typing.Dict):
+        assert len(annotation.__args__) == 2
+        assert type(ret) is dict, error_str
+        for key, val in ret.items():
+            assert type(key) is annotation.__args__[0], (
+                "type of %s:%s and %s:%s do not matched return annotation (%s, %s) of global_function %s."
+                % (
+                    key,
+                    type(key),
+                    val,
+                    type(val),
+                    annotation.__args__[0],
+                    annotation.__args__[1],
+                    function_name,
+                )
+            )
+            _CheckReturnByAnnotation(function_name, val, annotation.__args__[1])
+    elif oft.OriginFrom(annotation, oft.Numpy):
+        assert isinstance(
+            ret, oneflow._oneflow_internal.BlobDesc
+        ), "type(ret): %s" % type(ret)
+        # TODO(chengcheng) oft.Numpy support dynamic.
+        assert not ret.is_dynamic, (
+            "only fixed shaped blob compatible to oneflow.compatible.single_client.typing.Numpy. "
+            "you can change annotation to oneflow.compatible.single_client.typing.ListNumpy "
+        )
+    elif oft.OriginFrom(annotation, oft.ListNumpy):
+        assert isinstance(
+            ret, oneflow._oneflow_internal.BlobDesc
+        ), "type(ret): %s" % type(ret)
+    else:
+        raise NotImplementedError("invalid return annotation %s found" % annotation)
+
+
+def TransformGlobalFunctionResult(future_blob, annotation):
+    if annotation is inspect._empty:
+        return future_blob
+    elif annotation is None:
+        assert future_blob is None
+        return None
+    elif oft.OriginFrom(annotation, oft.Callback):
+        annotation = annotation.__args__[0]
+
+        def Transform(f):
+            return lambda x: f(TransformReturnedLocalBlob(x, annotation))
+
+        return lambda f: future_blob.async_get(Transform(f))
+    elif oft.OriginFrom(annotation, oft.Bundle):
+        return TransformReturnedBundle(future_blob.get(), annotation)
+    else:
+        return TransformReturnedLocalBlob(future_blob.get(), annotation)
+
+
+def TransformReturnedBundle(bundle_blob, annotation):
+    """
+    Transform returned bundle blob from global_function(job_func),
+    the returned bundle blob could be the form like x, [x], (x, ),
+    {"key": x} or the mixed form of them.
+    """
+    if isinstance(bundle_blob, (local_blob_util.LocalBlob,),):
+        return TransformReturnedLocalBlob(bundle_blob, annotation.__args__[0])
+    elif isinstance(bundle_blob, (list, tuple)):
+        return type(bundle_blob)(
+            TransformReturnedBundle(elem, annotation) for elem in bundle_blob
+        )
+    elif type(bundle_blob) is dict:
+        return {
+            key: TransformReturnedBundle(val, annotation)
+            for key, val in bundle_blob.items()
+        }
+    else:
+        raise NotImplementedError(
+            "invalid return  %s : %s found" % (bundle_blob, type(bundle_blob))
+        )
+
+
+def TransformReturnedLocalBlob(local_blob, annotation):
+    if oft.OriginFrom(annotation, typing.Tuple):
+        assert type(local_blob) is tuple
+        assert len(local_blob) == len(annotation.__args__)
+        pairs = zip(local_blob, annotation.__args__)
+        return tuple(TransformReturnedLocalBlob(*pair) for pair in pairs)
+    elif oft.OriginFrom(annotation, typing.List):
+        assert type(local_blob) is list
+        assert len(annotation.__args__) == 1
+        return [
+            TransformReturnedLocalBlob(elem, annotation.__args__[0])
+            for elem in local_blob
+        ]
+    elif oft.OriginFrom(annotation, typing.Dict):
+        assert type(local_blob) is dict
+        assert len(annotation.__args__) == 2
+        vals = [
+            TransformReturnedLocalBlob(val, annotation.__args__[1])
+            for val in local_blob.values()
+        ]
+        return dict(zip(local_blob.keys(), vals))
+    elif oft.OriginFrom(annotation, oft.PyStructCompatibleToBlob):
+        return TransformLocalBlob(local_blob, annotation)
+    else:
+        raise NotImplementedError(
+            "invalid watch callback parameter annotation %s found" % annotation
+        )
+
+
+def CheckWatchCallbackParameterAnnotation(parameters):
+    assert len(parameters) == 1, "watch callback should accept only one parameter"
+    annotation = parameters[list(parameters.keys())[0]].annotation
+    if annotation is inspect._empty:
+        if enable_typing_check.typing_check_enabled:
+            raise NotImplementedError("the watch callback's parameter is not annotated")
+        return
+    if not oft.OriginFrom(annotation, oft.PyStructCompatibleToBlob):
+        raise NotImplementedError(
+            ("invalid watch callback paremeter annotation %s found. " % annotation)
+            + "candidate annotations: oneflow.compatible.single_client.typing.Numpy, oneflow.compatible.single_client.typing.ListNumpy. "
+        )
+
+
+def CheckWatchedBlobByAnnotation(blob, annotation):
+    if annotation is inspect._empty:
+        return
+    if oft.OriginFrom(annotation, oft.Numpy):
+        # TODO(chengcheng) oft.Numpy support dynamic.
+        assert not blob.is_dynamic, (
+            "only fixed shaped blob compatible to oneflow.compatible.single_client.typing.Numpy. "
+            "you can change annotation to oneflow.compatible.single_client.typing.ListNumpy "
+        )
+    elif oft.OriginFrom(annotation, oft.ListNumpy):
+        pass
+    else:
+        raise NotImplementedError(
+            "invalid watch callback parameter annotation %s found" % annotation
+        )
+
+
+def TransformWatchedBlob(future_blob, handler):
+    parameters = inspect.signature(handler).parameters
+    annotation = parameters[list(parameters.keys())[0]].annotation
+    if annotation is inspect._empty:
+        return future_blob
+    return TransformLocalBlob(future_blob, annotation)
+
+
+def TransformLocalBlob(future_blob, annotation):
+    if oft.OriginFrom(annotation, oft.Numpy):
+        return future_blob.numpy()
+    elif oft.OriginFrom(annotation, oft.ListNumpy):
+        return future_blob.numpy_list()
+    else:
+        raise NotImplementedError(
+            "invalid watch callback parameter annotation %s found" % annotation
+        )
diff --git a/oneflow/compatible_single_client_python/framework/unittest.py b/oneflow/compatible_single_client_python/framework/unittest.py
new file mode 100644
index 0000000000000000000000000000000000000000..b77398e97ca1656af51256765f4a3bfc6cc1ed36
--- /dev/null
+++ b/oneflow/compatible_single_client_python/framework/unittest.py
@@ -0,0 +1,392 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+import os
+import sys
+import imp
+import socket
+from contextlib import closing
+import uuid
+import unittest
+import atexit
+from tempfile import NamedTemporaryFile
+from google.protobuf import text_format as pbtxt
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.framework import env_util as env_util
+from oneflow.core.job.env_pb2 import EnvProto
+from oneflow.compatible_single_client_python.oneflow_export import oneflow_export
+from typing import Any, Dict, Callable
+import subprocess
+
+
+class _ClearDefaultSession(object):
+    def setUp(self):
+        flow.clear_default_session()
+        flow.enable_eager_execution(False)
+
+
+@oneflow_export("unittest.register_test_cases")
+def register_test_cases(
+    scope: Dict[str, Any],
+    directory: str,
+    filter_by_num_nodes: Callable[[bool], int],
+    base_class: unittest.TestCase = unittest.TestCase,
+    test_case_mixin=_ClearDefaultSession,
+) -> None:
+    def FilterTestPyFile(f):
+        return (
+            os.path.isfile(os.path.join(directory, f))
+            and f.endswith(".py")
+            and f.startswith("test")
+        )
+
+    def FilterMethodName(module, name):
+        method = getattr(module, name)
+        return (
+            name.startswith("test")
+            and callable(method)
+            and filter_by_num_nodes(_GetNumOfNodes(method))
+        )
+
+    onlytest_files = [f for f in os.listdir(directory) if FilterTestPyFile(f)]
+    for f in onlytest_files:
+        class_name = f[0:-3]
+        module = imp.load_source(class_name, os.path.join(directory, f))
+        test_func_names = [
+            name for name in dir(module) if FilterMethodName(module, name)
+        ]
+        method_dict = {k: getattr(module, k) for k in test_func_names}
+        scope[class_name] = type(class_name, (test_case_mixin, base_class), method_dict)
+
+
+@oneflow_export("unittest.num_nodes_required")
+def num_nodes_required(num_nodes: int) -> Callable[[Callable], Callable]:
+    def Decorator(f):
+        f.__oneflow_test_case_num_nodes_required__ = num_nodes
+        return f
+
+    return Decorator
+
+
+def _GetNumOfNodes(func):
+    if hasattr(func, "__oneflow_test_case_num_nodes_required__") == False:
+        return 1
+    return getattr(func, "__oneflow_test_case_num_nodes_required__")
+
+
+@oneflow_export("unittest.env.eager_execution_enabled")
+def eager_execution_enabled():
+    return os.getenv("ONEFLOW_TEST_ENABLE_EAGER") == "1"
+
+
+@oneflow_export("unittest.env.typing_check_enabled")
+def typing_check_enabled():
+    return os.getenv("ONEFLOW_TEST_ENABLE_TYPING_CHECK") == "1"
+
+
+@oneflow_export("unittest.env.node_list")
+def node_list():
+    node_list_str = os.getenv("ONEFLOW_TEST_NODE_LIST")
+    assert node_list_str
+    return node_list_str.split(",")
+
+
+@oneflow_export("unittest.env.has_node_list")
+def has_node_list():
+    if os.getenv("ONEFLOW_TEST_NODE_LIST"):
+        return True
+    else:
+        return False
+
+
+@oneflow_export("unittest.env.node_size")
+def node_size():
+    if has_node_list():
+        node_list_from_env = node_list()
+        return len(node_list_from_env)
+    else:
+        return 1
+
+
+@oneflow_export("unittest.env.has_world_size")
+def has_world_size():
+    if os.getenv("ONEFLOW_TEST_WORLD_SIZE"):
+        assert os.getenv(
+            "ONEFLOW_TEST_WORLD_SIZE"
+        ).isdigit(), "env var ONEFLOW_TEST_WORLD_SIZE must be num"
+        return True
+    else:
+        return False
+
+
+@oneflow_export("unittest.env.world_size")
+def world_size():
+    return int(os.getenv("ONEFLOW_TEST_WORLD_SIZE"))
+
+
+@oneflow_export("unittest.env.device_num")
+def device_num():
+    device_num_str = os.getenv("ONEFLOW_TEST_DEVICE_NUM")
+    if device_num_str:
+        return int(device_num_str)
+    else:
+        return 1
+
+
+def enable_init_by_host_list():
+    return os.getenv("ONEFLOW_TEST_ENABLE_INIT_BY_HOST_LIST") == "1"
+
+
+def enable_multi_process():
+    return os.getenv("ONEFLOW_TEST_MULTI_PROCESS") == "1"
+
+
+def find_free_port():
+    with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s:
+        s.bind(("localhost", 0))
+        s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+        return s.getsockname()[1]
+
+
+_unittest_env_initilized = False
+_unittest_worker_initilized = False
+
+
+def worker_agent_port():
+    port_txt = os.getenv("ONEFLOW_TEST_WORKER_AGENT_PORT")
+    if port_txt:
+        return int(port_txt)
+    else:
+        return None
+
+
+def worker_agent_authkey():
+    key = os.getenv("ONEFLOW_TEST_WORKER_AGENT_AUTHKEY")
+    assert key
+    return key
+
+
+def use_worker_agent():
+    return worker_agent_port() is not None
+
+
+def cast(conn=None, cmd=None, msg=None):
+    cmd = "cast/" + cmd
+    print("[unittest]", f"[{cmd}]", msg)
+    conn.send(cmd.encode())
+    conn.send(msg.encode())
+
+
+def call(conn=None, cmd=None, msg=None):
+    cmd = "call/" + cmd
+    print("[unittest]", f"[{cmd}]", msg)
+    conn.send(cmd.encode())
+    msg_ = ""
+    if msg is not None:
+        msg_ = msg
+    conn.send(msg_.encode())
+    return conn.recv().decode()
+
+
+def launch_worker_via_agent(host=None, env_proto=None):
+    print("[unittest]", "launching worker via agent at", host)
+    from multiprocessing.connection import Client
+
+    address = ("localhost", worker_agent_port())
+    conn = Client(address, authkey=worker_agent_authkey().encode())
+    cast(conn=conn, cmd="host", msg=host)
+    cast(conn=conn, cmd="env_proto", msg=pbtxt.MessageToString(env_proto))
+    assert call(conn=conn, cmd="start_worker") == "ok"
+    print("[unittest]", "worker launched via agent at", host)
+    conn.close()
+
+
+@oneflow_export("unittest.TestCase")
+class TestCase(unittest.TestCase):
+    def setUp(self):
+        global _unittest_env_initilized
+        global _unittest_worker_initilized
+        if has_node_list():
+            assert node_size() > 1
+            if _unittest_worker_initilized == False:
+                master_port = os.getenv("ONEFLOW_TEST_MASTER_PORT")
+                assert master_port, "env var ONEFLOW_TEST_MASTER_PORT not set"
+                flow.env.ctrl_port(int(master_port))
+                data_port = os.getenv("ONEFLOW_TEST_DATA_PORT")
+                if data_port:
+                    flow.env.data_port(int(data_port))
+                if enable_init_by_host_list():
+                    flow.env.machine(node_list())
+                    data_port = os.getenv("ONEFLOW_TEST_DATA_PORT")
+                    print("initializing worker...")
+                    for machine in env_util.default_env_proto.machine:
+                        if machine.id == 0:
+                            pass
+                        else:
+                            launch_worker_via_agent(
+                                host=machine.addr, env_proto=env_util.default_env_proto
+                            )
+                else:
+                    ctrl_port = os.getenv("ONEFLOW_TEST_CTRL_PORT")
+                    config_rank_ctrl_port = -1
+                    if ctrl_port:
+                        config_rank_ctrl_port = int(ctrl_port)
+
+                    if has_world_size():
+                        config_world_size = world_size()
+                    else:
+                        config_world_size = 0
+
+                    config_node_size = -1
+                    env_node_size = os.getenv("ONEFLOW_TEST_NODE_SIZE")
+                    if env_node_size:
+                        config_node_size = int(env_node_size)
+
+                    bootstrap_conf_list = flow.env.init_bootstrap_confs(
+                        node_list(),
+                        int(master_port),
+                        config_world_size,
+                        config_rank_ctrl_port,
+                        config_node_size,
+                    )
+                    worker_env_proto = EnvProto()
+                    worker_env_proto.CopyFrom(env_util.default_env_proto)
+                    worker_env_proto.ClearField("ctrl_bootstrap_conf")
+                    for bootstrap_conf in bootstrap_conf_list:
+                        if bootstrap_conf.rank == 0:
+                            continue
+                        # set ctrl_bootstrap_conf of worker
+                        assert bootstrap_conf.HasField("host")
+                        worker_env_proto.ctrl_bootstrap_conf.CopyFrom(bootstrap_conf)
+                        launch_worker_via_agent(
+                            host=bootstrap_conf.host, env_proto=worker_env_proto
+                        )
+                _unittest_worker_initilized = True
+        elif device_num() > 1 and enable_multi_process():
+            master_port = find_free_port()
+            flow.env.ctrl_port(master_port)
+            config_world_size = device_num()
+            bootstrap_conf_list = flow.env.init_bootstrap_confs(
+                ["127.0.0.1"], master_port, config_world_size
+            )
+            env_proto = env_util.default_env_proto
+            assert (
+                len(env_proto.machine) == 1
+                and env_proto.HasField("ctrl_bootstrap_conf") == 1
+            )
+            run_dir = os.getenv("HOME") + "/oneflow_temp/" + str(uuid.uuid1())
+            run_dir = os.path.abspath(os.path.expanduser(run_dir))
+            if not os.path.exists(run_dir):
+                os.makedirs(run_dir)
+            for rank in range(1, config_world_size):
+                worker_env_proto = EnvProto()
+                worker_env_proto.CopyFrom(env_proto)
+                worker_env_proto.ctrl_bootstrap_conf.rank = rank
+                worker_env_proto.cpp_logging_conf.log_dir = (
+                    run_dir + "/log_" + str(rank)
+                )
+                env_file = NamedTemporaryFile(delete=False)
+                if sys.version_info >= (3, 0):
+                    env_file.write(pbtxt.MessageToString(worker_env_proto).encode())
+                else:
+                    env_file.write(pbtxt.MessageToString(worker_env_proto))
+                env_file.close()
+                if not os.path.exists(run_dir + "/log_" + str(rank)):
+                    os.mkdir(run_dir + "/log_" + str(rank))
+                os.system(
+                    "cp "
+                    + env_file.name
+                    + " "
+                    + run_dir
+                    + "/log_"
+                    + str(rank)
+                    + "/env_proto_"
+                    + str(rank)
+                    + ".proto"
+                )
+                oneflow_cmd = (
+                    "python3 -m oneflow.compatible.single_client --start_worker"
+                    + " --env_proto="
+                    + run_dir
+                    + "/log_"
+                    + str(rank)
+                    + "/"
+                    + "env_proto_"
+                    + str(rank)
+                    + ".proto"
+                )
+                subprocess.Popen(
+                    oneflow_cmd,
+                    stdout=subprocess.DEVNULL,
+                    stderr=subprocess.DEVNULL,
+                    shell=True,
+                )
+                os.remove(env_file.name)
+            atexit.register(
+                flow.deprecated.delete_worker_of_multi_process, run_dir=run_dir,
+            )
+
+        log_dir = os.getenv("ONEFLOW_TEST_LOG_DIR")
+        if log_dir:
+            flow.env.log_dir(log_dir)
+
+        if _unittest_env_initilized == False:
+            flow.env.init()
+            _unittest_env_initilized = True
+
+        flow.clear_default_session()
+        flow.enable_eager_execution(eager_execution_enabled())
+        flow.experimental.enable_typing_check(typing_check_enabled())
+
+
+def skip_unless(n, d):
+    if node_size() == n and device_num() == d:
+        return lambda func: func
+    else:
+        return unittest.skip(
+            "only runs when node_size is {} and device_num is {}".format(n, d)
+        )
+
+
+@oneflow_export("unittest.skip_unless_1n1d")
+def skip_unless_1n1d():
+    return skip_unless(1, 1)
+
+
+@oneflow_export("unittest.skip_unless_1n2d")
+def skip_unless_1n2d():
+    return skip_unless(1, 2)
+
+
+@oneflow_export("unittest.skip_unless_1n4d")
+def skip_unless_1n4d():
+    return skip_unless(1, 4)
+
+
+@oneflow_export("unittest.skip_unless_2n1d")
+def skip_unless_2n1d():
+    return skip_unless(2, 1)
+
+
+@oneflow_export("unittest.skip_unless_2n2d")
+def skip_unless_2n2d():
+    return skip_unless(2, 2)
+
+
+@oneflow_export("unittest.skip_unless_2n4d")
+def skip_unless_2n4d():
+    return skip_unless(2, 4)
diff --git a/oneflow/compatible_single_client_python/framework/variable_getter_composite.py b/oneflow/compatible_single_client_python/framework/variable_getter_composite.py
new file mode 100644
index 0000000000000000000000000000000000000000..889e4623ad3e573b9bb6b65ffd7e80b4aec27a2e
--- /dev/null
+++ b/oneflow/compatible_single_client_python/framework/variable_getter_composite.py
@@ -0,0 +1,38 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import functools
+
+
+class VariableGetterComposite(object):
+    def __init__(self):
+        self.getter_stack = []
+
+    def __call__(self, var_gen_fn, *args, **kwargs):
+        def make_inner(outter, inner):
+            @functools.wraps(inner)
+            def inner_fn():
+                return outter(inner, *args, **kwargs)
+
+            return inner_fn
+
+        fn = var_gen_fn
+        for getter in self.getter_stack:
+            fn = make_inner(getter, fn)
+
+        return fn()
+
+    def register(self, fn):
+        self.getter_stack.append(fn)
diff --git a/oneflow/compatible_single_client_python/framework/watcher.py b/oneflow/compatible_single_client_python/framework/watcher.py
new file mode 100644
index 0000000000000000000000000000000000000000..20a1a779c6f7f8a41be8026eb0327369e023e875
--- /dev/null
+++ b/oneflow/compatible_single_client_python/framework/watcher.py
@@ -0,0 +1,64 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+import traceback
+
+from oneflow.core.record import record_pb2 as record_util
+from oneflow.compatible_single_client_python.framework import (
+    local_blob as local_blob_util,
+)
+from oneflow.compatible_single_client_python.framework import ofblob as ofblob
+from oneflow.compatible_single_client_python.framework import (
+    remote_blob as remote_blob_util,
+)
+from oneflow.compatible_single_client_python.framework import (
+    session_context as session_ctx,
+)
+from oneflow.compatible_single_client_python.framework import typing_util as oft_util
+import oneflow._oneflow_internal
+from google.protobuf import text_format
+
+
+def BindUuidAndHandler(uuid, blob_watched, handler):
+    assert isinstance(blob_watched, oneflow._oneflow_internal.ConsistentBlob)
+    session_ctx.GetDefaultSession().uuid2watch_handler[uuid] = (blob_watched, handler)
+
+
+class _Watcher(oneflow._oneflow_internal.ForeignWatcher):
+    def __init__(self):
+        oneflow._oneflow_internal.ForeignWatcher.__init__(self)
+
+    def Call(self, handler_uuid, of_blob_ptr):
+        try:
+            _WatcherHandler(handler_uuid, of_blob_ptr)
+        except Exception as e:
+            print(traceback.format_exc())
+            raise e
+
+
+def _WatcherHandler(handler_uuid, of_blob_ptr):
+    uuid2handler = session_ctx.GetDefaultSession().uuid2watch_handler
+    assert handler_uuid in uuid2handler
+    blob_watched, handler = uuid2handler[handler_uuid]
+    assert callable(handler)
+    ndarray = ofblob.OfBlob(of_blob_ptr).CopyToNdarray()
+    local_blob = local_blob_util.LocalBlob(ndarray, blob_watched.is_dynamic)
+    handler(oft_util.TransformWatchedBlob(local_blob, handler))
+
+
+# static lifetime
+_global_watcher = _Watcher()
diff --git a/oneflow/compatible_single_client_python/lib/__init__.py b/oneflow/compatible_single_client_python/lib/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/oneflow/compatible_single_client_python/lib/core/__init__.py b/oneflow/compatible_single_client_python/lib/core/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/oneflow/compatible_single_client_python/lib/core/async_util.py b/oneflow/compatible_single_client_python/lib/core/async_util.py
new file mode 100644
index 0000000000000000000000000000000000000000..8143316f54ae66eb5e431715bb009a4f140321f2
--- /dev/null
+++ b/oneflow/compatible_single_client_python/lib/core/async_util.py
@@ -0,0 +1,38 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import threading
+
+
+def Await(counter, func):
+    assert counter > 0
+    cond_var = threading.Condition()
+    counter_box = [counter]
+    result_list = []
+
+    def Yield(result=None):
+        result_list.append(result)
+        cond_var.acquire()
+        assert counter_box[0] > 0
+        counter_box[0] -= 1
+        cond_var.notify()
+        cond_var.release()
+
+    func(Yield)
+    cond_var.acquire()
+    while counter_box[0] > 0:
+        cond_var.wait()
+    cond_var.release()
+    return result_list
diff --git a/oneflow/compatible_single_client_python/lib/core/box.py b/oneflow/compatible_single_client_python/lib/core/box.py
new file mode 100644
index 0000000000000000000000000000000000000000..92c3cc986592720dbd339248d75a18256d11ac67
--- /dev/null
+++ b/oneflow/compatible_single_client_python/lib/core/box.py
@@ -0,0 +1,43 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+from oneflow.compatible_single_client_python.oneflow_export import oneflow_export
+
+
+class Box(object):
+    def __init__(self, *arg):
+        assert len(arg) <= 1
+        self.has_value_ = len(arg) > 0
+        self.value_ = None
+        if self.has_value_:
+            self.value_ = arg[0]
+
+    @property
+    def value(self):
+        assert self.has_value_
+        return self.value_
+
+    @property
+    def value_setter(self):
+        return lambda val: self.set_value(val)
+
+    def set_value(self, val):
+        self.value_ = val
+        self.has_value_ = True
+
+    def has_value(self):
+        return self.has_value_
diff --git a/oneflow/compatible_single_client_python/lib/core/enable_if.py b/oneflow/compatible_single_client_python/lib/core/enable_if.py
new file mode 100644
index 0000000000000000000000000000000000000000..b86e92925af0c4eabe46f430b1592209f2447622
--- /dev/null
+++ b/oneflow/compatible_single_client_python/lib/core/enable_if.py
@@ -0,0 +1,105 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import inspect
+
+from oneflow.compatible_single_client_python.lib.core import traceinfo as traceinfo
+
+
+def condition(hob_expr):
+    def Decorator(func):
+        func.__oneflow_condition_hob__ = hob_expr
+        return func
+
+    return Decorator
+
+
+def get_condition_hob(func):
+    assert hasattr(func, "__oneflow_condition_hob__")
+    return func.__oneflow_condition_hob__
+
+
+def set_condition_hob(func, hob):
+    func.__oneflow_condition_hob__ = hob
+
+
+def unique(arg_funcs, context=None, default=None):
+    assert isinstance(arg_funcs, (list, tuple))
+    conditional_functions = []
+    for arg_func in arg_funcs:
+        if isinstance(arg_func, tuple):
+            func, hob_expr = arg_func
+        elif inspect.isfunction(arg_func):
+            func = arg_func
+            assert hasattr(func, "__oneflow_condition_hob__")
+            hob_expr = func.__oneflow_condition_hob__
+        else:
+            raise NotImplementedError
+        debug_str = func.__name__
+        if hasattr(func, "__debug_str__"):
+            debug_str = func.__debug_str__
+        conditional_functions.append((hob_expr, func, debug_str))
+
+    if default is None:
+
+        def default(get_failed_info, *args, **kwargs):
+            raise NotImplementedError(get_failed_info())
+
+    matched_func = GetMatchedFunction(default, conditional_functions, context=context)
+    if matched_func is not None:
+        return matched_func
+
+    return MakeDefaultFunction(default, conditional_functions, context=context)
+
+
+def GetMatchedFunction(default, conditional_functions, context=None):
+    select_triple = (None, None, None)
+    for triple in conditional_functions:
+        if not triple[0](context):
+            continue
+        if select_triple[1] is not None:
+            return _MultiMatchedErrorFunction(
+                default, [select_triple, triple], context=context
+            )
+        select_triple = triple
+    return select_triple[1]
+
+
+def MakeDefaultFunction(default, conditional_functions, context=None):
+    def get_failed_info(customized_prompt=None):
+        failed_info = "no avaliable function found.\n"
+        for bf, func, location in conditional_functions:
+            prompt = location if customized_prompt is None else customized_prompt
+            failed_info += "\n%s: \033[1;31mFAILED\033[0m\n\t%s\n" % (
+                prompt,
+                bf.debug_str(context),
+            )
+        return failed_info
+
+    return lambda *args, **kwargs: default(get_failed_info, *args, **kwargs)
+
+
+def _MultiMatchedErrorFunction(default, matched_functions, context=None):
+    def get_failed_info(customized_prompt=None):
+        failed_info = "at least two conditional functions matched.\n"
+        for bf, func, location in matched_functions:
+            prompt = location if customized_prompt is None else customized_prompt
+            failed_info += "\n%s: \033[1;31mPASSED\033[0m\n\t%s\n" % (
+                prompt,
+                bf.debug_str(context),
+            )
+        return failed_info
+
+    return lambda *args, **kwargs: default(get_failed_info, *args, **kwargs)
diff --git a/oneflow/compatible_single_client_python/lib/core/func_inspect_util.py b/oneflow/compatible_single_client_python/lib/core/func_inspect_util.py
new file mode 100644
index 0000000000000000000000000000000000000000..ba358e917a58fdd1da4fb9c0d6e630978767a9d0
--- /dev/null
+++ b/oneflow/compatible_single_client_python/lib/core/func_inspect_util.py
@@ -0,0 +1,49 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import inspect
+import sys
+
+if sys.version_info > (2, 7) and sys.version_info < (3, 0):
+
+    def GetArgNameAndDefaultTuple(func):
+        """
+      returns a dictionary of arg_name:default_values for the input function
+      """
+        args, varargs, keywords, defaults = inspect.getargspec(func)
+        defaults = list(defaults) if defaults is not None else []
+        while len(defaults) < len(args):
+            defaults.insert(0, None)
+        return tuple(zip(args, defaults))
+
+
+elif sys.version_info >= (3, 0):
+
+    def GetArgNameAndDefaultTuple(func):
+        signature = inspect.signature(func)
+        return tuple(
+            [
+                (k, v.default if v.default is not inspect.Parameter.empty else None)
+                for k, v in signature.parameters.items()
+            ]
+        )
+
+
+else:
+    raise NotImplementedError
+
+
+def GetArgDefaults(func):
+    return tuple(map(lambda x: x[1], GetArgNameAndDefaultTuple(func)))
diff --git a/oneflow/compatible_single_client_python/lib/core/high_order_bool.py b/oneflow/compatible_single_client_python/lib/core/high_order_bool.py
new file mode 100644
index 0000000000000000000000000000000000000000..8f4bcc1d4309121546864d83dc0ba0f4f1773dbc
--- /dev/null
+++ b/oneflow/compatible_single_client_python/lib/core/high_order_bool.py
@@ -0,0 +1,208 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+from oneflow.compatible import single_client as flow
+import oneflow._oneflow_internal
+
+
+def bool_functor(verbose_debug_str):
+    def Decorator(match_function):
+        return HighOrderBool(verbose_debug_str, match_function)
+
+    return Decorator
+
+
+def hob_context_attr(attr_name):
+    def Decorator(attr_getter):
+        return HobContextAttr(attr_name, attr_getter)
+
+    return Decorator
+
+
+class BoolFunctor(object):
+    def debug_str(self, ctx, display_result=True):
+        if hasattr(self, "__debug_str__"):
+            if display_result:
+                return '"%s"[%s]' % (self.__debug_str__, self(ctx))
+            else:
+                return '"%s"' % self.__debug_str__
+        return self.verbose_debug_str(ctx, display_result=display_result)
+
+    def verbose_debug_str(self, ctx, display_result=True):
+        raise NotImplementedError
+
+    def __call__(self, ctx):
+        raise NotImplementedError
+
+    def __and__(self, rhs):
+        return _AndBoolFunctor(self, rhs)
+
+    def __or__(self, rhs):
+        return _OrBoolFunctor(self, rhs)
+
+    def __invert__(self):
+        return _NotBoolFunctor(self)
+
+
+class HighOrderBool(BoolFunctor):
+    def __init__(self, verbose_debug_str, function):
+        self.verbose_debug_str_ = verbose_debug_str
+        self.function_ = function
+
+    def verbose_debug_str(self, ctx, display_result=True):
+        if display_result:
+            return '"%s"[%s]' % (self.verbose_debug_str_, self.function_(ctx))
+        else:
+            return '"%s"' % self.verbose_debug_str_
+
+    def __call__(self, ctx):
+        return self.function_(ctx)
+
+
+always_true = HighOrderBool("Always true", lambda: True)
+always_false = HighOrderBool("Always false", lambda: False)
+
+
+class _AndBoolFunctor(BoolFunctor):
+    def __init__(self, lhs, rhs):
+        assert isinstance(lhs, BoolFunctor)
+        assert isinstance(rhs, BoolFunctor)
+        self.lhs_ = lhs
+        self.rhs_ = rhs
+
+    def verbose_debug_str(self, ctx, display_result=True):
+        left_display = self.lhs_.debug_str(ctx, display_result)
+        display_result = display_result and self.lhs_(ctx)
+        right_display = self.rhs_.debug_str(ctx, display_result)
+        return "(%s and %s)" % (left_display, right_display)
+
+    def __call__(self, ctx):
+        return self.lhs_(ctx) and self.rhs_(ctx)
+
+
+class _OrBoolFunctor(BoolFunctor):
+    def __init__(self, lhs, rhs):
+        assert isinstance(lhs, BoolFunctor)
+        assert isinstance(rhs, BoolFunctor)
+        self.lhs_ = lhs
+        self.rhs_ = rhs
+
+    def verbose_debug_str(self, ctx, display_result=True):
+        left_display = self.lhs_.debug_str(ctx, display_result)
+        display_result = display_result and (not self.lhs_(ctx))
+        right_display = self.rhs_.debug_str(ctx, display_result)
+        return "(%s or %s)" % (left_display, right_display)
+
+    def __call__(self, ctx):
+        return self.lhs_(ctx) or self.rhs_(ctx)
+
+
+class _NotBoolFunctor(BoolFunctor):
+    def __init__(self, x):
+        assert isinstance(x, BoolFunctor)
+        self.x_ = x
+
+    def verbose_debug_str(self, ctx, display_result=True):
+        return "(not %s)" % self.x_.debug_str(ctx, display_result)
+
+    def __call__(self, ctx):
+        return not self.x_(ctx)
+
+
+class HobContextGetter(object):
+    def __init__(self, attr_name, attr_getter):
+        self.attr_name_ = attr_name
+        self.attr_getter_ = attr_getter
+
+    @property
+    def attr_name(self):
+        return self.attr_name_
+
+    @property
+    def attr_getter(self):
+        return self.attr_getter_
+
+    def __eq__(self, other):
+        if not isinstance(other, HobContextGetter):
+            other = HobContextConstant(other)
+        return self._MakeHob(other, "==", lambda a, b: a == b)
+
+    def __ne__(self, other):
+        if not isinstance(other, HobContextGetter):
+            other = HobContextConstant(other)
+        return self._MakeHob(other, "!=", lambda a, b: a != b)
+
+    def __gt__(self, other):
+        if not isinstance(other, HobContextGetter):
+            other = HobContextConstant(other)
+        return self._MakeHob(other, ">", lambda a, b: a > b)
+
+    def __ge__(self, other):
+        if not isinstance(other, HobContextGetter):
+            other = HobContextConstant(other)
+        return self._MakeHob(other, ">=", lambda a, b: a >= b)
+
+    def __lt__(self, other):
+        if not isinstance(other, HobContextGetter):
+            other = HobContextConstant(other)
+        return self._MakeHob(other, "<", lambda a, b: a < b)
+
+    def __le__(self, other):
+        if not isinstance(other, HobContextGetter):
+            other = HobContextConstant(other)
+        return self._MakeHob(other, "<=", lambda a, b: a <= b)
+
+    def _MakeHob(self, other, cmp_str, cmp_func):
+        @bool_functor("%s %s %s" % (self.attr_name, cmp_str, other.attr_name))
+        def HobHob(context):
+            return cmp_func(self.attr_getter(context), other.attr_getter(context))
+
+        return HobHob
+
+
+class HobContextConstant(HobContextGetter):
+    def __init__(self, value):
+        HobContextGetter.__init__(self, str(value), lambda ctx: value)
+
+
+class HobContextAttr(HobContextGetter):
+    def __init__(self, attr_name, attr_getter):
+        HobContextGetter.__init__(self, attr_name, attr_getter)
+
+    def __getattr__(self, attr_name):
+        @hob_context_attr("%s.%s" % (self.attr_name, attr_name))
+        def HobCtxAttr(ctx):
+            obj = self.attr_getter(ctx)
+            if isinstance(obj, oneflow._oneflow_internal.CfgMessage):
+                return getattr(obj, attr_name)()
+            else:
+                return getattr(obj, attr_name)
+
+        return HobCtxAttr
+
+    def HasField(self, attr_name):
+        @bool_functor('%s.HasField("%s")' % (self.attr_name, attr_name))
+        def BoolFunctor(ctx):
+            obj = self.attr_getter(ctx)
+            if isinstance(obj, oneflow._oneflow_internal.CfgMessage):
+                assert hasattr(obj, "has_" + attr_name), type(obj)
+                return getattr(obj, "has_" + attr_name)()
+            elif hasattr(obj, "HasField"):
+                return obj.HasField(attr_name)
+            else:
+                return hasattr(obj, attr_name)
+
+        return BoolFunctor
diff --git a/oneflow/compatible_single_client_python/lib/core/lazy.py b/oneflow/compatible_single_client_python/lib/core/lazy.py
new file mode 100644
index 0000000000000000000000000000000000000000..27660fd03d5676f0c922128188101b471b82e8e1
--- /dev/null
+++ b/oneflow/compatible_single_client_python/lib/core/lazy.py
@@ -0,0 +1,29 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+
+class Lazy(object):
+    def __init__(self, get_value):
+        self.value_ = None
+        self.has_value_ = False
+        self.get_value_ = get_value
+
+    @property
+    def value(self):
+        if not self.has_value_:
+            self.value_ = self.get_value_()
+            self.has_value_ = True
+        return self.value_
diff --git a/oneflow/compatible_single_client_python/lib/core/pb_util.py b/oneflow/compatible_single_client_python/lib/core/pb_util.py
new file mode 100644
index 0000000000000000000000000000000000000000..4e8d91154a2f8cf932f2496159f2367bc063c378
--- /dev/null
+++ b/oneflow/compatible_single_client_python/lib/core/pb_util.py
@@ -0,0 +1,92 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+
+def PythonDict2CFG(value, msg):
+    def extend_dict(values, msg):
+        for k, v in values.items():
+            if type(v) is dict:
+                extend_dict(v, getattr(msg, "mutable_" + k)())
+            elif type(v) is list or type(v) is tuple:
+                extend_list_or_tuple(v, msg, k)
+            else:
+                getattr(msg, "set_" + k)(v)
+
+    def extend_list_or_tuple(values, msg, attr):
+        if len(values) == 0 or type(values[0]) is dict:
+            msg = getattr(msg, "mutable_" + attr)()
+            for v in values:
+                cmd = msg.Add()
+                extend_dict(v, cmd)
+        else:
+            for v in values:
+                getattr(msg, "add_" + attr)(v)
+
+    extend_dict(value, msg)
+    return msg
+
+
+def PythonDict2PbMessage(value, msg):
+    def extend_dict(values, msg):
+        for k, v in values.items():
+            if type(v) is dict:
+                extend_dict(v, getattr(msg, k))
+            elif type(v) is list or type(v) is tuple:
+                extend_list_or_tuple(v, getattr(msg, k))
+            else:
+                setattr(msg, k, v)
+        else:
+            msg.SetInParent()
+
+    def extend_list_or_tuple(values, msg):
+        if len(values) == 0:
+            return
+        if type(values[0]) is dict:
+            for v in values:
+                cmd = msg.add()
+                extend_dict(v, cmd)
+        else:
+            msg.extend(values)
+
+    extend_dict(value, msg)
+    return msg
+
+
+def MergePbMessage(dst, src):
+    assert type(dst) is type(src)
+    for field in dst.DESCRIPTOR.fields:
+        field_name = field.name
+        if field.containing_oneof is not None:
+            if dst.WhichOneof(field.containing_oneof.name) is not None:
+                continue
+            src_field_name = src.WhichOneof(field.containing_oneof.name)
+            if src_field_name is None:
+                continue
+            if field_name != src_field_name:
+                continue
+        else:
+            if dst.HasField(field_name):
+                continue
+            if not src.HasField(field_name):
+                continue
+        _MergePbMessageField(dst, src, field)
+
+
+def _MergePbMessageField(dst, src, field):
+    if field.message_type is None:
+        setattr(dst, field.name, getattr(src, field.name))
+    else:
+        MergePbMessage(getattr(dst, field.name), getattr(src, field.name))
diff --git a/oneflow/compatible_single_client_python/lib/core/scope_stack.py b/oneflow/compatible_single_client_python/lib/core/scope_stack.py
new file mode 100644
index 0000000000000000000000000000000000000000..9020294c557ae84b802198c9e7dd59ae880c9100
--- /dev/null
+++ b/oneflow/compatible_single_client_python/lib/core/scope_stack.py
@@ -0,0 +1,36 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+from contextlib import contextmanager
+
+
+class ScopeStack(object):
+    def __init__(self, init=[]):
+        if not isinstance(init, list):
+            init = [init]
+        assert isinstance(init, list)
+        self.stack_ = init
+
+    def Current(self):
+        assert len(self.stack_) > 0
+        return self.stack_[0]
+
+    @contextmanager
+    def NewScope(self, scope):
+        self.stack_.insert(0, scope)
+        yield
+        self.stack_.pop(0)
diff --git a/oneflow/compatible_single_client_python/lib/core/traceinfo.py b/oneflow/compatible_single_client_python/lib/core/traceinfo.py
new file mode 100644
index 0000000000000000000000000000000000000000..94cae3f84e6250e6cc2a1dd3a3fbe4c2450bda34
--- /dev/null
+++ b/oneflow/compatible_single_client_python/lib/core/traceinfo.py
@@ -0,0 +1,34 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import os
+import traceback
+
+
+def GetFrameLocationStr(depth=-1):
+    assert depth < 0
+    frame = traceback.extract_stack()[depth - 1]
+    return "%s:%d" % (frame[0], frame[1])
+
+
+def GetStackInfoExcludeOneflowPythonFile():
+    from oneflow.compatible import single_client as flow
+
+    dirname = os.path.dirname(oneflow.__file__)
+    stack_info = traceback.extract_stack()
+    filtered_stack_info = filter(
+        lambda x: x[0].startswith(dirname) == False, stack_info
+    )
+    return list(filtered_stack_info)
diff --git a/oneflow/compatible_single_client_python/nn/__init__.py b/oneflow/compatible_single_client_python/nn/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/oneflow/compatible_single_client_python/nn/common_types.py b/oneflow/compatible_single_client_python/nn/common_types.py
new file mode 100644
index 0000000000000000000000000000000000000000..c2be8a50b6d96a46b86710679dada7d1115f2469
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/common_types.py
@@ -0,0 +1,44 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from typing import TypeVar, Union, Tuple
+
+# Create some useful type aliases
+
+# Template for arguments which can be supplied as a tuple, or which can be a scalar which PyTorch will internally
+# broadcast to a tuple.
+# Comes in several variants: A tuple of unknown size, and a fixed-size tuple for 1d, 2d, or 3d operations.
+T = TypeVar("T")
+_scalar_or_tuple_any_t = Union[T, Tuple[T, ...]]
+_scalar_or_tuple_1_t = Union[T, Tuple[T]]
+_scalar_or_tuple_2_t = Union[T, Tuple[T, T]]
+_scalar_or_tuple_3_t = Union[T, Tuple[T, T, T]]
+_scalar_or_tuple_4_t = Union[T, Tuple[T, T, T, T]]
+_scalar_or_tuple_5_t = Union[T, Tuple[T, T, T, T, T]]
+_scalar_or_tuple_6_t = Union[T, Tuple[T, T, T, T, T, T]]
+
+# For arguments which represent size parameters (eg, kernel size, padding)
+_size_any_t = _scalar_or_tuple_any_t[int]
+_size_1_t = _scalar_or_tuple_1_t[int]
+_size_2_t = _scalar_or_tuple_2_t[int]
+_size_3_t = _scalar_or_tuple_3_t[int]
+_size_4_t = _scalar_or_tuple_4_t[int]
+_size_5_t = _scalar_or_tuple_5_t[int]
+_size_6_t = _scalar_or_tuple_6_t[int]
+
+# For arguments that represent a ratio to adjust each dimension of an input with (eg, upsampling parameters)
+_ratio_2_t = _scalar_or_tuple_2_t[float]
+_ratio_3_t = _scalar_or_tuple_3_t[float]
+_ratio_any_t = _scalar_or_tuple_any_t[float]
diff --git a/oneflow/compatible_single_client_python/nn/init.py b/oneflow/compatible_single_client_python/nn/init.py
new file mode 100644
index 0000000000000000000000000000000000000000..7f4ccc83b7379cdaa3e0cddcb963b3cfda219a7c
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/init.py
@@ -0,0 +1,93 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from oneflow.compatible_single_client_python.oneflow_export import oneflow_export
+from oneflow.compatible_single_client_python.ops.initializer_util import CalcGain
+
+
+@oneflow_export("nn.init.calculate_gain")
+def calculate_gain(nonlinearity, param=None):
+    return CalcGain(nonlinearity, param)
+
+
+@oneflow_export("nn.init.uniform_")
+def uniform_(tensor, a=0.0, b=1.0):
+    # TODO(jianhao): add with torch.no_grad() when autograd is ready
+    tensor.uniform_(a, b)
+
+
+@oneflow_export("nn.init.normal_")
+def normal_(tensor, mean=0.0, std=1.0):
+    tensor.normal_(mean, std)
+
+
+@oneflow_export("nn.init.xavier_uniform_")
+def xavier_uniform_(tensor, gain=1.0, *, data_format="NCHW"):
+    tensor.xavier_uniform_(gain, data_format=data_format)
+
+
+@oneflow_export("nn.init.xavier_normal_")
+def xavier_normal_(tensor, gain=1.0, *, data_format="NCHW"):
+    tensor.xavier_normal_(gain, data_format=data_format)
+
+
+@oneflow_export("nn.init.kaiming_uniform_")
+def kaiming_uniform_(
+    tensor, a=0, mode="fan_in", nonlinearity="leaky_relu", *, data_format="NCHW"
+):
+    tensor.kaiming_uniform_(a, mode, nonlinearity, data_format=data_format)
+
+
+@oneflow_export("nn.init.kaiming_normal_")
+def kaiming_normal_(
+    tensor, a=0, mode="fan_in", nonlinearity="leaky_relu", *, data_format="NCHW"
+):
+    tensor.kaiming_normal_(a, mode, nonlinearity, data_format=data_format)
+
+
+@oneflow_export("nn.init.constant_")
+def constant_(tensor, val):
+    tensor.fill_(val)
+
+
+@oneflow_export("nn.init.ones_")
+def ones_(tensor):
+    tensor.fill_(1)
+
+
+@oneflow_export("nn.init.zeros_")
+def zeros_(tensor):
+    tensor.fill_(0)
+
+
+def _calculate_fan_in_and_fan_out(tensor):
+    dimensions = tensor.ndimension()
+    if dimensions < 2:
+        raise ValueError(
+            "Fan in and fan out can not be computed for tensor with fewer than 2 dimensions"
+        )
+
+    num_input_fmaps = tensor.size(1)
+    num_output_fmaps = tensor.size(0)
+    receptive_field_size = 1
+    if tensor.ndimension() > 2:
+        # math.prod is not always available, accumulate the product manually
+        # we could use functools.reduce but that is not supported by TorchScript
+        for s in tensor.size()[2:]:
+            receptive_field_size *= s
+    fan_in = num_input_fmaps * receptive_field_size
+    fan_out = num_output_fmaps * receptive_field_size
+
+    return fan_in, fan_out
diff --git a/oneflow/compatible_single_client_python/nn/module.py b/oneflow/compatible_single_client_python/nn/module.py
new file mode 100644
index 0000000000000000000000000000000000000000..a8d580b5c2eb8e7a3850ccb32b9c38f99b7cdfbb
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/module.py
@@ -0,0 +1,535 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from __future__ import absolute_import
+from collections import OrderedDict, namedtuple
+from typing import Union, TypeVar, Iterator, Optional, Set, Tuple, Dict, List, Callable
+import itertools
+
+import numpy as np
+
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.oneflow_export import oneflow_export
+from oneflow.compatible_single_client_python.framework.check_point_v2 import (
+    FeedValueToVariable,
+)
+from oneflow.compatible_single_client_python.framework.function_util import (
+    global_function_or_identity,
+)
+from oneflow.compatible_single_client_python.framework.tensor import Tensor
+from oneflow.compatible_single_client_python.nn.parameter import Parameter
+
+
+class _IncompatibleKeys(
+    namedtuple("IncompatibleKeys", ["missing_keys", "unexpected_keys"])
+):
+    def __repr__(self):
+        if not self.missing_keys and not self.unexpected_keys:
+            return "<All keys matched successfully>"
+        return super(_IncompatibleKeys, self).__repr__()
+
+    __str__ = __repr__
+
+
+# See https://mypy.readthedocs.io/en/latest/generics.html#generic-methods-and-generic-self for the use
+# of `T` to annotate `self`. Many methods of `Module` return `self` and we want those return values to be
+# the type of the subclass, not the looser type of `Module`.
+T = TypeVar("T", bound="Module")
+
+
+@oneflow_export("nn.Module")
+class Module(object):
+    def __init__(self):
+        self.training = True
+        self._consistent = False
+        self._parameters = OrderedDict()
+        self._buffers = OrderedDict()
+        self._non_persistent_buffers_set = set()
+        self._backward_hooks = OrderedDict()
+        self._is_full_backward_hook = None
+        self._forward_hooks = OrderedDict()
+        self._forward_pre_hooks = OrderedDict()
+        self._state_dict_hooks = OrderedDict()
+        self._load_state_dict_pre_hooks = OrderedDict()
+        self._modules = OrderedDict()
+
+    @property
+    def consistent(self):
+        return self._consistent
+
+    def forward(self, *args):
+        raise NotImplementedError()
+
+    def consistent_forward(self, *args):
+        return self.forward(*args)
+
+    def force_mirrored_forward(self, *args):
+        raise NotImplementedError()
+
+    def __call__(self, *args):
+        for hook in itertools.chain(self._forward_pre_hooks.values()):
+            result = hook(self, args)
+            if result is not None:
+                if not isinstance(result, tuple):
+                    result = (result,)
+                args = result
+
+        res = self.forward(*args)
+
+        return res
+
+    def add_module(self, name: str, module: Optional["Module"]) -> None:
+        r"""Adds a child module to the current module.
+
+        The module can be accessed as an attribute using the given name.
+
+        Args:
+            name (string): name of the child module. The child module can be
+                accessed from this module using the given name
+            module (Module): child module to be added to the module.
+        """
+        if not isinstance(module, Module) and module is not None:
+            raise TypeError("{} is not a Module subclass".format(type(module)))
+        elif not isinstance(name, str):
+            raise TypeError("module name should be a string. Got {}".format(type(name)))
+        elif hasattr(self, name) and name not in self._modules:
+            raise KeyError("attribute '{}' already exists".format(name))
+        elif "." in name:
+            raise KeyError('module name can\'t contain ".", got: {}'.format(name))
+        elif name == "":
+            raise KeyError('module name can\'t be empty string ""')
+        self._modules[name] = module
+
+    def register_buffer(
+        self, name: str, tensor: Optional[Tensor], persistent: bool = True
+    ) -> None:
+        if "_buffers" not in self.__dict__:
+            raise AttributeError("cannot assign buffer before Module.__init__() call")
+        elif not isinstance(name, str):
+            raise TypeError(
+                "buffer name should be a string. " "Got {}".format(type(name))
+            )
+        elif "." in name:
+            raise KeyError('buffer name can\'t contain "."')
+        elif name == "":
+            raise KeyError('buffer name can\'t be empty string ""')
+        elif hasattr(self, name) and name not in self._buffers:
+            raise KeyError("attribute '{}' already exists".format(name))
+        elif tensor is not None and not isinstance(tensor, Tensor):
+            raise TypeError(
+                "cannot assign '{}' object to buffer '{}' "
+                "(Tensor or None required)".format(type(tensor), name)
+            )
+        else:
+            self._buffers[name] = tensor
+            if persistent:
+                self._non_persistent_buffers_set.discard(name)
+            else:
+                self._non_persistent_buffers_set.add(name)
+
+    def register_parameter(self, name: str, param: Optional[Parameter]) -> None:
+        if "_parameters" not in self.__dict__:
+            raise AttributeError(
+                "cannot assign parameter before Module.__init__() call"
+            )
+        elif not isinstance(name, str):
+            raise TypeError(
+                "parameter name should be a string. " "Got {}".format(type(name))
+            )
+        elif "." in name:
+            raise KeyError('parameter name can\'t contain "."')
+        elif name == "":
+            raise KeyError('parameter name can\'t be empty string ""')
+        elif hasattr(self, name) and name not in self._parameters:
+            raise KeyError("attribute '{}' already exists".format(name))
+
+        if param is None:
+            self._parameters[name] = None
+        elif not isinstance(param, Parameter):
+            raise TypeError(
+                "cannot assign '{}' object to parameter '{}' "
+                "(nn.Parameter or None required)".format(type(param), name)
+            )
+        # TODO: uncomment these lines when autograd is ready
+        # elif param.grad_fn:
+        #     raise ValueError(
+        #         "Cannot assign non-leaf Tensor to parameter '{0}'. Model "
+        #         "parameters must be created explicitly. To express '{0}' "
+        #         "as a function of another Tensor, compute the value in "
+        #         "the forward() method.".format(name))
+        else:
+            self._parameters[name] = param
+
+    def __getattr__(self, name: str) -> Union[Tensor, "Module"]:
+        if "_parameters" in self.__dict__:
+            _parameters = self.__dict__["_parameters"]
+            if name in _parameters:
+                return _parameters[name]
+        if "_buffers" in self.__dict__:
+            _buffers = self.__dict__["_buffers"]
+            if name in _buffers:
+                return _buffers[name]
+        if "_modules" in self.__dict__:
+            modules = self.__dict__["_modules"]
+            if name in modules:
+                return modules[name]
+        raise AttributeError(
+            "'{}' object has no attribute '{}'".format(type(self).__name__, name)
+        )
+
+    def __setattr__(self, name: str, value: Union[Tensor, "Module"]) -> None:
+        def remove_from(*dicts_or_sets):
+            for d in dicts_or_sets:
+                if name in d:
+                    if isinstance(d, dict):
+                        del d[name]
+                    else:
+                        d.discard(name)
+
+        params = self.__dict__.get("_parameters")
+        if isinstance(value, Parameter):
+            if params is None:
+                raise AttributeError(
+                    "cannot assign parameters before Module.__init__() call"
+                )
+            remove_from(
+                self.__dict__,
+                self._buffers,
+                self._modules,
+                self._non_persistent_buffers_set,
+            )
+            self.register_parameter(name, value)
+        elif params is not None and name in params:
+            if value is not None:
+                raise TypeError(
+                    "cannot assign '{}' as parameter '{}' "
+                    "(nn.Parameter or None expected)".format(type(value), name)
+                )
+            self.register_parameter(name, value)
+        else:
+            modules = self.__dict__.get("_modules")
+            if isinstance(value, Module):
+                if modules is None:
+                    raise AttributeError(
+                        "cannot assign module before Module.__init__() call"
+                    )
+                remove_from(
+                    self.__dict__,
+                    self._parameters,
+                    self._buffers,
+                    self._non_persistent_buffers_set,
+                )
+                modules[name] = value
+            elif modules is not None and name in modules:
+                if value is not None:
+                    raise TypeError(
+                        "cannot assign '{}' as child module '{}' "
+                        "(nn.Module or None expected)".format(type(value), name)
+                    )
+                modules[name] = value
+            else:
+                buffers = self.__dict__.get("_buffers")
+                if buffers is not None and name in buffers:
+                    if value is not None and not isinstance(value, Tensor):
+                        raise TypeError(
+                            "cannot assign '{}' as buffer '{}' "
+                            "(Tensor or None expected)".format(type(value), name)
+                        )
+                    buffers[name] = value
+                else:
+                    object.__setattr__(self, name, value)
+
+    def _named_members(self, get_members_fn, prefix="", recurse=True):
+        memo = set()
+        modules = self.named_modules(prefix=prefix) if recurse else [(prefix, self)]
+        for module_prefix, module in modules:
+            members = get_members_fn(module)
+            for k, v in members:
+                if v is None or v in memo:
+                    continue
+                memo.add(v)
+                name = module_prefix + ("." if module_prefix else "") + k
+                yield name, v
+
+    def parameters(self, recurse: bool = True) -> Iterator[Parameter]:
+        for name, param in self.named_parameters(recurse=recurse):
+            yield param
+
+    def named_parameters(
+        self, prefix: str = "", recurse: bool = True
+    ) -> Iterator[Tuple[str, Tensor]]:
+        gen = self._named_members(
+            lambda module: module._parameters.items(), prefix=prefix, recurse=recurse
+        )
+        for elem in gen:
+            yield elem
+
+    def buffers(self, recurse: bool = True) -> Iterator[Tensor]:
+        for name, buf in self.named_buffers(recurse=recurse):
+            yield buf
+
+    def named_buffers(
+        self, prefix: str = "", recurse: bool = True
+    ) -> Iterator[Tuple[str, Tensor]]:
+        gen = self._named_members(
+            lambda module: module._buffers.items(), prefix=prefix, recurse=recurse
+        )
+        for elem in gen:
+            yield elem
+
+    def children(self) -> Iterator["Module"]:
+        for name, module in self.named_children():
+            yield module
+
+    def named_children(self) -> Iterator[Tuple[str, "Module"]]:
+        memo = set()
+        for name, module in self._modules.items():
+            if module is not None and module not in memo:
+                memo.add(module)
+                yield name, module
+
+    def modules(self) -> Iterator["Module"]:
+        for name, module in self.named_modules():
+            yield module
+
+    def named_modules(self, memo: Optional[Set["Module"]] = None, prefix: str = ""):
+        if memo is None:
+            memo = set()
+        if self not in memo:
+            memo.add(self)
+            yield prefix, self
+            for name, module in self._modules.items():
+                if module is None:
+                    continue
+                submodule_prefix = prefix + ("." if prefix else "") + name
+                for m in module.named_modules(memo, submodule_prefix):
+                    yield m
+
+    def train(self: T, mode: bool = True) -> T:
+        self.training = mode
+        for module in self.children():
+            module.train(mode)
+        return self
+
+    def eval(self: T) -> T:
+        return self.train(False)
+
+    def _save_to_state_dict(self, destination, prefix, keep_vars):
+        for name, param in self._parameters.items():
+            if param is not None:
+                # TODO: uncomment these lines when autograd is ready
+                # destination[prefix + name] = param if keep_vars else param.detach()
+                destination[prefix + name] = param
+        for name, buf in self._buffers.items():
+            if buf is not None and name not in self._non_persistent_buffers_set:
+                # destination[prefix + name] = buf if keep_vars else buf.detach()
+                destination[prefix + name] = buf
+
+    def _load_from_state_dict(
+        self,
+        state_dict,
+        prefix,
+        local_metadata,
+        strict,
+        missing_keys,
+        unexpected_keys,
+        error_msgs,
+    ):
+        for hook in self._load_state_dict_pre_hooks.values():
+            hook(
+                state_dict,
+                prefix,
+                local_metadata,
+                strict,
+                missing_keys,
+                unexpected_keys,
+                error_msgs,
+            )
+
+        persistent_buffers = {
+            k: v
+            for k, v in self._buffers.items()
+            if k not in self._non_persistent_buffers_set
+        }
+        local_name_params = itertools.chain(
+            self._parameters.items(), persistent_buffers.items()
+        )
+        local_state = {k: v for k, v in local_name_params if v is not None}
+
+        for name, param in local_state.items():
+            key = prefix + name
+            if key in state_dict:
+                input_param = state_dict[key]
+
+                if tuple(input_param.shape) != tuple(param.shape):
+                    # local shape should match the one in checkpoint
+                    error_msgs.append(
+                        "size mismatch for {}: copying a param with shape {} from checkpoint, "
+                        "the shape in current model is {}.".format(
+                            key, input_param.shape, param.shape
+                        )
+                    )
+                    continue
+                try:
+                    # TODO(jianhao): uncomment this line when autograd is ready
+                    # with torch.no_grad():
+                    param.copy_(input_param)
+                    # TODO(jianhao): uncomment these lines when consistent <-> local conversion is ready
+                    # with param._placement_scope():
+                    # FeedValueToVariable(param, input_param, None)
+                except Exception as ex:
+                    error_msgs.append(
+                        'While copying the parameter named "{}", '
+                        "whose dimensions in the model are {} and "
+                        "whose dimensions in the checkpoint are {}, "
+                        "an exception occurred : {}.".format(
+                            key, param.shape, input_param.shape, ex.args
+                        )
+                    )
+            elif strict:
+                missing_keys.append(key)
+
+        if strict:
+            for key in state_dict.keys():
+                if key.startswith(prefix):
+                    input_name = key[len(prefix) :]
+                    input_name = input_name.split(".", 1)[
+                        0
+                    ]  # get the name of param/buffer/child
+                    if (
+                        input_name not in self._modules
+                        and input_name not in local_state
+                    ):
+                        unexpected_keys.append(key)
+
+    def load_state_dict(
+        self,
+        state_dict: Union[Dict[str, Tensor], Dict[str, Tensor]],
+        strict: bool = True,
+    ):
+        missing_keys = []
+        unexpected_keys = []
+        error_msgs = []
+
+        # copy state_dict so _load_from_state_dict can modify it
+        metadata = getattr(state_dict, "_metadata", None)
+        state_dict = state_dict.copy()
+        if metadata is not None:
+            state_dict._metadata = metadata
+
+        def load(module, prefix=""):
+            local_metadata = {} if metadata is None else metadata.get(prefix[:-1], {})
+            module._load_from_state_dict(
+                state_dict,
+                prefix,
+                local_metadata,
+                True,
+                missing_keys,
+                unexpected_keys,
+                error_msgs,
+            )
+            for name, child in module._modules.items():
+                if child is not None:
+                    load(child, prefix + name + ".")
+
+        load(self)
+        load = None  # break load->load reference cycle
+
+        if strict:
+            if len(unexpected_keys) > 0:
+                error_msgs.insert(
+                    0,
+                    "Unexpected key(s) in state_dict: {}. ".format(
+                        ", ".join('"{}"'.format(k) for k in unexpected_keys)
+                    ),
+                )
+            if len(missing_keys) > 0:
+                error_msgs.insert(
+                    0,
+                    "Missing key(s) in state_dict: {}. ".format(
+                        ", ".join('"{}"'.format(k) for k in missing_keys)
+                    ),
+                )
+
+        if len(error_msgs) > 0:
+            raise RuntimeError(
+                "Error(s) in loading state_dict for {}:\n\t{}".format(
+                    self.__class__.__name__, "\n\t".join(error_msgs)
+                )
+            )
+        return _IncompatibleKeys(missing_keys, unexpected_keys)
+
+    def state_dict(
+        self, destination=None, prefix="", keep_vars=False
+    ) -> Dict[str, Tensor]:
+        if destination is None:
+            destination = OrderedDict()
+            destination._metadata = OrderedDict()
+
+        self._save_to_state_dict(destination, prefix, keep_vars)
+        for name, module in self._modules.items():
+            if module is not None:
+                module.state_dict(destination, prefix + name + ".", keep_vars=keep_vars)
+        for hook in self._state_dict_hooks.values():
+            # hook_result = hook(self, destination, prefix, local_metadata)
+            hook_result = hook(self, destination, prefix)
+            if hook_result is not None:
+                destination = hook_result
+        return destination
+
+    def register_forward_pre_hook(self, hook: Callable[..., None]) -> None:
+        self._forward_pre_hooks[len(self._forward_pre_hooks)] = hook
+
+    def _apply(self, fn):
+        for module in self.children():
+            module._apply(fn)
+
+        for key, param in self._parameters.items():
+            if param is not None:
+                assert isinstance(param, Parameter)
+                assert param.is_leaf
+                with flow.no_grad():
+                    # TODO(xuxiaoyu): remove Tensor convert after Tensor refactoring
+                    param_applied = Tensor(fn(param))
+                self._parameters[key] = Parameter(param_applied, param.requires_grad)
+
+                if param.grad is not None:
+                    assert param.grad.is_leaf
+                    with flow.no_grad():
+                        # TODO(xuxiaoyu): remove Tensor convert after Tensor refactoring
+                        grad_applied = Tensor(fn(param.grad))
+                    self._parameters[key].grad = grad_applied.requires_grad_(
+                        param.grad.requires_grad
+                    )
+
+        for key, buf in self._buffers.items():
+            if buf is not None:
+                # TODO(xuxiaoyu): remove Tensor convert after Tensor refactoring
+                self._buffers[key] = Tensor(fn(buf))
+
+        return self
+
+    def apply(self: T, fn: Callable[["Module"], None]) -> T:
+        for module in self.children():
+            module.apply(fn)
+        fn(self)
+        return self
+
+    def to(self, device: Optional[Union[str, flow.device]] = None):
+        def convert(t):
+            return t.to(device)
+
+        return self._apply(convert)
diff --git a/oneflow/compatible_single_client_python/nn/modules/__init__.py b/oneflow/compatible_single_client_python/nn/modules/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/oneflow/compatible_single_client_python/nn/modules/abs.py b/oneflow/compatible_single_client_python/nn/modules/abs.py
new file mode 100644
index 0000000000000000000000000000000000000000..19c141e1d385fe15b5202151ff1f5e2cb822d4a3
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/abs.py
@@ -0,0 +1,62 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+from oneflow.compatible_single_client_python.nn.module import Module
+from oneflow.compatible_single_client_python.framework.tensor import register_tensor_op
+
+
+class Abs(Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, x):
+        return flow.F.abs(x)
+
+
+@oneflow_export("abs")
+@register_tensor_op("abs")
+@experimental_api
+def abs_op(x):
+    r"""Return the absolute value of each element in input tensor:math:`y = |x|` element-wise.
+
+    Args:
+        input (Tensor): the input tensor.
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> import numpy as np
+        >>> flow.enable_eager_execution()
+
+        >>> x = flow.Tensor(np.array([-1, 2, -3, 4]).astype(np.float32))
+        >>> flow.abs(x)
+        tensor([1., 2., 3., 4.], dtype=oneflow.float32)
+
+    """
+    return Abs()(x)
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(raise_on_error=True)
diff --git a/oneflow/compatible_single_client_python/nn/modules/acos.py b/oneflow/compatible_single_client_python/nn/modules/acos.py
new file mode 100644
index 0000000000000000000000000000000000000000..0e8701cbd1a066cae63e91b08f876eebcd0a8478
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/acos.py
@@ -0,0 +1,68 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+from oneflow.compatible_single_client_python.nn.module import Module
+from oneflow.compatible_single_client_python.framework.tensor import register_tensor_op
+
+
+class Acos(Module):
+    def __init__(self) -> None:
+        super().__init__()
+
+    def forward(self, x):
+        return flow.F.acos(x)
+
+
+@oneflow_export("acos")
+@register_tensor_op("acos")
+@experimental_api
+def acos_op(tensor):
+    r"""
+    Returns a new tensor with the inverse cosine of the elements of :attr:`input`.
+
+    .. math::
+        \text{out}_{i} = \arccos(\text{input}_{i})
+
+    Args:
+        input (Tensor): the input tensor.
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> import numpy as np
+        >>> flow.enable_eager_execution()
+
+        >>> arr = np.array([0.5, 0.6, 0.7])
+        >>> input = flow.Tensor(arr, dtype=flow.float32)
+        >>> output = flow.acos(input)
+        >>> print(output.numpy())
+        [1.0471976  0.9272952  0.79539883]
+
+    """
+
+    return Acos()(tensor)
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(raise_on_error=True)
diff --git a/oneflow/compatible_single_client_python/nn/modules/acosh.py b/oneflow/compatible_single_client_python/nn/modules/acosh.py
new file mode 100644
index 0000000000000000000000000000000000000000..d86420e6b5849bc1b3b75a390a14279072011df2
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/acosh.py
@@ -0,0 +1,110 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+from oneflow.compatible_single_client_python.framework.tensor import register_tensor_op
+from oneflow.compatible_single_client_python.nn.module import Module
+
+
+class Acosh(Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, x):
+        return flow.F.acosh(x)
+
+
+@oneflow_export("acosh")
+@experimental_api
+def acosh_op(x):
+    r"""Returns a new tensor with the inverse hyperbolic cosine of the elements of :attr:`input`.
+
+    .. math::
+
+        \text{out}_{i} = \cosh^{-1}(\text{input}_{i})
+
+    Args:
+        input (Tensor): the input tensor.
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> import numpy as np
+        >>> flow.enable_eager_execution()
+        >>> x1 = flow.Tensor(np.array([2, 3, 4]).astype(np.float32))
+        >>> out1 = flow.acosh(x1)
+        >>> out1
+        tensor([1.317 , 1.7627, 2.0634], dtype=oneflow.float32)
+        >>> x2 = flow.Tensor(np.array([1.5, 2.6, 3.7]).astype(np.float32),device=flow.device('cuda'))
+        >>> out2 = flow.acosh(x2)
+        >>> out2
+        tensor([0.9624, 1.6094, 1.9827], device='cuda:0', dtype=oneflow.float32)
+
+    """
+
+    return Acosh()(x)
+
+
+@register_tensor_op("acosh")
+@experimental_api
+def acosh_op_tensor(x):
+    r"""
+
+    acosh() -> Tensor
+
+    See :func:`oneflow.compatible.single_client.experimental.acosh`
+
+    """
+
+    return Acosh()(x)
+
+
+@oneflow_export("arccosh")
+@experimental_api
+def arccosh_op(x):
+    r"""
+
+    See :func:`oneflow.compatible.single_client.experimental.acosh`
+
+    """
+
+    return Acosh()(x)
+
+
+@register_tensor_op("arccosh")
+@experimental_api
+def arccosh_op_tensor(x):
+    r"""
+
+    arccosh() -> Tensor
+
+    See :func:`oneflow.compatible.single_client.experimental.acosh`
+
+    """
+
+    return Acosh()(x)
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(raise_on_error=True)
diff --git a/oneflow/compatible_single_client_python/nn/modules/activation.py b/oneflow/compatible_single_client_python/nn/modules/activation.py
new file mode 100644
index 0000000000000000000000000000000000000000..9375be4eb035a808baf3464097e8dfd8c9a8d33f
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/activation.py
@@ -0,0 +1,977 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from oneflow.compatible import single_client as flow
+import oneflow._oneflow_internal
+from oneflow.compatible_single_client_python.nn.module import Module
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+from oneflow.compatible_single_client_python.framework.tensor import register_tensor_op
+from typing import Optional
+
+
+def _softmax_need_transpose(x, axis):
+    assert type(axis) is int
+    dim_num = len(x.shape)
+    if dim_num == 1:
+        return False, None
+    if axis < 0:
+        axis += dim_num
+    assert axis >= 0
+    assert axis < dim_num
+
+    need_transpose = False
+    permute = list(range(dim_num))
+    if axis != dim_num - 1:
+        need_transpose = True
+        permute[axis] = permute[-1]
+        permute[-1] = axis
+    return need_transpose, permute
+
+
+@oneflow_export("nn.PReLU")
+@experimental_api
+class PReLU(Module):
+    """Applies the element-wise function:
+
+    .. math::
+        PReLU(x) = \max(0,x) + a * \min(0,x)
+
+    Here :math:`a` is a learnable parameter. When called without arguments, `nn.PReLU()` uses a single
+    parameter :math:`a` across all input channels. If called with `nn.PReLU(nChannels)`,
+    a separate :math:`a` is used for each input channel.
+
+
+    .. note::
+        weight decay should not be used when learning :math:`a` for good performance.
+
+    .. note::
+        Channel dim is the 2nd dim of input. When input has dims < 2, then there is
+        no channel dim and the number of channels = 1.
+
+    Args:
+        num_parameters (int): number of :math:`a` to learn.
+            Although it takes an int as input, there is only two values are legitimate:
+            1, or the number of channels at input. Default: 1
+        init (float): the initial value of :math:`a`. Default: 0.25
+
+    Shape:
+        - Input: :math:`(N, *)` where `*` means, any number of additional
+          dimensions
+        - Output: :math:`(N, *)`, same shape as the input
+
+    Attr:
+        - weight (Tensor): the learnable weights of shape (:attr:`num_parameters`).
+
+    .. code-block:: python
+
+        >>> import numpy as np
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> flow.enable_eager_execution()
+
+        >>> m = flow.nn.PReLU()
+        >>> input = flow.Tensor(np.asarray([[[[1, -2], [3, 4]]]]), dtype=flow.float32)
+        >>> print(m(input).numpy())
+        [[[[ 1.  -0.5]
+           [ 3.   4. ]]]]
+
+    """
+
+    def __init__(self, num_parameters: int = 1, init: float = 0.25) -> None:
+        super().__init__()
+        self.num_parameters = num_parameters
+        self.weight = flow.nn.Parameter(flow.Tensor(num_parameters, 1, 1).fill_(init))
+        self.op = flow.builtin_op("prelu").Input("x").Input("alpha").Output("y").Build()
+
+    def forward(self, x):
+        assert (
+            self.num_parameters == 1 or self.num_parameters == x.shape[1]
+        ), f"num_parameters in prelu must be 1 or {x.shape[1]}"
+        return self.op(x, self.weight)[0]
+
+
+@oneflow_export("nn.ReLU")
+@experimental_api
+class ReLU(Module):
+    r"""Applies the rectified linear unit function element-wise:
+
+    :math:`\text{ReLU}(x) = (x)^+ = \max(0, x)`
+
+    Args:
+        inplace: can optionally do the operation in-place. Default: ``False``
+
+    Shape:
+        - Input: :math:`(N, *)` where `*` means, any number of additional
+          dimensions
+        - Output: :math:`(N, *)`, same shape as the input
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> import numpy as np
+        >>> flow.enable_eager_execution()
+        >>> relu = flow.nn.ReLU()
+        >>> ndarr = np.asarray([1, -2, 3])
+        >>> x = flow.Tensor(ndarr)
+        >>> relu(x)
+        tensor([1., 0., 3.], dtype=oneflow.float32)
+
+    """
+
+    def __init__(self, inplace: bool = False):
+        super().__init__()
+
+    def forward(self, x):
+        return flow.F.relu(x)
+
+
+@oneflow_export("nn.ReLU6")
+@experimental_api
+class ReLU6(Module):
+    r"""Applies the element-wise function:
+
+    .. math::
+
+        \text{Relu6}(x) = \begin{cases}
+            6 & \text{ if } x > 6 \\
+            0 & \text{ if } x < 0 \\
+            x & \text{ otherwise } \\
+        \end{cases}
+
+    Args:
+        inplace: can optionally do the operation in-place. Default: ``False``
+
+    Shape:
+        - Input: :math:`(N, *)` where `*` means, any number of additional
+          dimensions
+        - Output: :math:`(N, *)`, same shape as the input
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import numpy as np
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> flow.enable_eager_execution()
+
+        >>> x = np.array([-0.5, 0, 0.5]).astype(np.float32)
+        >>> input = flow.Tensor(x)
+        >>> relu6 = flow.nn.ReLU6()
+
+        >>> out = relu6(input)
+        >>> out
+        tensor([0. , 0. , 0.5], dtype=oneflow.float32)
+
+    """
+
+    def __init__(self, inplace: bool = False):
+        super().__init__()
+
+    def forward(self, x):
+        return flow.F.hardtanh(x, min_val=0.0, max_val=6.0)
+
+
+@oneflow_export("nn.Tanh")
+@experimental_api
+class Tanh(Module):
+    r"""This operator computes the hyperbolic tangent value of Tensor.
+
+    The equation is:
+
+    .. math::
+
+        out = \frac{e^x-e^{-x}}{e^x+e^{-x}}
+
+    Args:
+        x (oneflow.compatible.single_client.Tensor): A Tensor
+
+    Returns:
+        oneflow.compatible.single_client.Tensor: The result Tensor
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import numpy as np
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> flow.enable_eager_execution()
+
+        >>> x = np.array([-1, 0, 1]).astype(np.float32)
+        >>> input = flow.Tensor(x)
+        >>> tanh = flow.nn.Tanh()
+        >>> out = tanh(input)
+        >>> out
+        tensor([-0.7616,  0.    ,  0.7616], dtype=oneflow.float32)
+
+    """
+
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, x):
+        return flow.F.tanh(x)
+
+
+@oneflow_export("tanh")
+@register_tensor_op("tanh")
+@experimental_api
+def tanh_op(x):
+    r"""This operator computes the hyperbolic tangent value of Tensor.
+
+    The equation is:
+
+    .. math::
+
+        out = \frac{e^x-e^{-x}}{e^x+e^{-x}}
+
+    Args:
+        x (oneflow.compatible.single_client.Tensor): A Tensor
+
+    Returns:
+        oneflow.compatible.single_client.Tensor: The result Tensor
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import numpy as np
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> flow.enable_eager_execution()
+
+        >>> x = np.array([-1, 0, 1]).astype(np.float32)
+        >>> input = flow.Tensor(x)
+        >>> tanh = flow.nn.Tanh()
+        >>> out = tanh(input)
+        >>> out
+        tensor([-0.7616,  0.    ,  0.7616], dtype=oneflow.float32)
+
+    """
+    return Tanh()(x)
+
+
+@oneflow_export("nn.ELU")
+@experimental_api
+class ELU(Module):
+    r"""Applies the element-wise function:
+
+    .. math::
+
+        \text{ELU}(x) = \begin{cases}
+				x & \text{ if } x \gt 0  \\
+                \alpha*(exp(x)-1) & \text{ if } x \le 0 \\
+    		    \end{cases}
+
+    Args:
+        alpha: the :math:`\alpha` value for the ELU formulation. Default: 1.0
+        inplace: can optionally do the operation in-place. Default: ``False``
+
+    Shape:
+        - Input: :math:`(N, *)` where `*` means, any number of additional
+          dimensions
+        - Output: :math:`(N, *)`, same shape as the input
+
+    For example:
+
+    .. code-block:: python
+
+
+        >>> import numpy as np
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> flow.enable_eager_execution()
+
+        >>> x = np.array([-0.5, 0, 0.5]).astype(np.float32)
+        >>> input = flow.Tensor(x)
+        >>> elu = flow.nn.ELU()
+
+        >>> out = elu(input)
+        >>> out
+        tensor([-0.3935,  0.    ,  0.5   ], dtype=oneflow.float32)
+
+    """
+
+    def __init__(self, alpha: float = 1.0, inplace: bool = False):
+        super().__init__()
+        self.alpha = alpha
+
+    def forward(self, x):
+        return flow.F.elu(x, alpha=self.alpha)
+
+
+@oneflow_export("nn.GELU")
+@experimental_api
+class GELU(Module):
+    r"""Gelu activation operator.
+
+    The equation is:
+
+    .. math::
+        out = 0.5 * x * (1 + tanh(\sqrt{\frac{2}{\pi}} * (x + 0.044715x^{3})))
+
+    Args:
+        x (oneflow.compatible.single_client.Tensor): Input Tensor
+
+    Returns:
+        oneflow.compatible.single_client.Tensor: A Tensor.
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import numpy as np
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> flow.enable_eager_execution()
+
+        >>> x = np.array([-0.5, 0, 0.5]).astype(np.float32)
+        >>> input = flow.Tensor(x)
+        >>> gelu = flow.nn.GELU()
+
+        >>> out = gelu(input)
+        >>> out
+        tensor([-0.1543,  0.    ,  0.3457], dtype=oneflow.float32)
+
+    """
+
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, x):
+        return flow.F.gelu(x)
+
+
+@oneflow_export("gelu")
+@register_tensor_op("gelu")
+@experimental_api
+def gelu_op(x):
+    r"""Gelu activation operator.
+
+    The equation is:
+
+    .. math::
+        out = 0.5 * x * (1 + tanh(\sqrt{\frac{2}{\pi}} * (x + 0.044715x^{3})))
+
+    Args:
+        x (oneflow.compatible.single_client.Tensor): Input Tensor
+
+    Returns:
+        oneflow.compatible.single_client.Tensor: A Tensor.
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import numpy as np
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> flow.enable_eager_execution()
+
+        >>> x = np.array([-0.5, 0, 0.5]).astype(np.float32)
+        >>> input = flow.Tensor(x)
+        >>> gelu = flow.nn.GELU()
+
+        >>> out = gelu(input)
+        >>> out
+        tensor([-0.1543,  0.    ,  0.3457], dtype=oneflow.float32)
+
+    """
+    return GELU()(x)
+
+
+@oneflow_export("nn.Sigmoid")
+@experimental_api
+class Sigmoid(Module):
+    r"""Applies the element-wise function:
+
+    .. math::
+        \text{Sigmoid}(x) = \sigma(x) = \frac{1}{1 + \exp(-x)}
+
+    Shape:
+        - Input: :math:`(N, *)` where `*` means, any number of additional
+          dimensions
+        - Output: :math:`(N, *)`, same shape as the input
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import numpy as np
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> flow.enable_eager_execution()
+
+        >>> x = flow.Tensor(np.array([0.81733328, 0.43621480, 0.10351428]))
+        >>> m = flow.nn.Sigmoid()
+        >>> out = m(x)
+        >>> out
+        tensor([0.6937, 0.6074, 0.5259], dtype=oneflow.float32)
+    """
+
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, x):
+        return flow.F.sigmoid(x)
+
+
+@oneflow_export("sigmoid")
+@register_tensor_op("sigmoid")
+@experimental_api
+def sigmoid_op(x):
+    r"""Applies the element-wise function:
+
+    .. math::
+        \text{Sigmoid}(x) = \sigma(x) = \frac{1}{1 + \exp(-x)}
+
+    Shape:
+        - Input: :math:`(N, *)` where `*` means, any number of additional
+          dimensions
+        - Output: :math:`(N, *)`, same shape as the input
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import numpy as np
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> flow.enable_eager_execution()
+
+        >>> x = flow.Tensor(np.array([0.81733328, 0.43621480, 0.10351428]))
+        >>> out = flow.sigmoid(x)
+        >>> out
+        tensor([0.6937, 0.6074, 0.5259], dtype=oneflow.float32)
+
+    """
+    return Sigmoid()(x)
+
+
+@oneflow_export("nn.Hardsigmoid")
+@experimental_api
+class Hardsigmoid(Module):
+    r"""Applies the element-wise function:
+
+    .. math::
+        \text{Hardsigmoid}(x) = \begin{cases}
+            0 & \text{ if } x \le -3  \\
+            1 & \text{ if } x \ge +3 \\
+            \frac{x}{6} + \frac{1}{2} & \text{ otherwise } \\
+        \end{cases}
+
+    Args:
+        inplace: can optionally do the operation in-place. Default: ``False``
+
+    Shape:
+        - Input: :math:`(N, *)` where `*` means, any number of additional
+          dimensions
+        - Output: :math:`(N, *)`, same shape as the input
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import numpy as np
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> flow.enable_eager_execution()
+
+        >>> x = np.array([-0.5, 0, 0.5]).astype(np.float32)
+        >>> input = flow.Tensor(x)
+        >>> hardsigmoid = flow.nn.Hardsigmoid()
+
+        >>> out = hardsigmoid(input)
+        >>> out
+        tensor([0.4167, 0.5   , 0.5833], dtype=oneflow.float32)
+
+
+    """
+
+    def __init__(self, inplace: bool = False):
+        super().__init__()
+
+    def forward(self, x):
+        return flow.F.hardsigmoid(x)
+
+
+@oneflow_export("nn.Softmax")
+@experimental_api
+class Softmax(Module):
+    def __init__(self, dim: Optional[int] = None):
+        super().__init__()
+        self.axis = -1 if dim is None else dim
+
+    def forward(self, x):
+        need_transpose, permute = _softmax_need_transpose(x, self.axis)
+        if need_transpose:
+            x = flow.F.transpose(x, perm=permute)
+
+        res = flow.F.softmax(x)
+        if need_transpose:
+            res = flow.F.transpose(res, perm=permute)
+        return res
+
+
+@oneflow_export("softmax")
+@register_tensor_op("softmax")
+@experimental_api
+def softmax_op(tensor, dim=None):
+    r"""Applies the Softmax function to an n-dimensional input Tensor
+    rescaling them so that the elements of the n-dimensional output Tensor
+    lie in the range [0,1] and sum to 1.
+
+    Softmax is defined as:
+
+    .. math::
+        \text{Softmax}(x_{i}) = \frac{\exp(x_i)}{\sum_j \exp(x_j)}
+
+    When the input Tensor is a sparse tensor then the unspecifed
+    values are treated as ``-inf``.
+
+    Shape:
+        - Input: :math:`(*)` where `*` means, any number of additional
+          dimensions
+        - Output: :math:`(*)`, same shape as the input
+
+    Returns:
+        a Tensor of the same dimension and shape as the input with
+        values in the range [0, 1]
+
+    Args:
+        dim (int): A dimension along which Softmax will be computed (so every slice
+            along dim will sum to 1).
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import numpy as np
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> flow.enable_eager_execution()
+
+        >>> m = flow.nn.Softmax(dim = 2)
+        >>> x = flow.Tensor(
+        ...    np.array(
+        ...        [[[-0.46716809,  0.40112534,  0.61984003],
+        ...        [-1.31244969, -0.42528763,  1.47953856]]]
+        ...    )
+        ... )
+        >>> out = m(x)
+        >>> out
+        tensor([[[0.1575, 0.3754, 0.4671],
+                 [0.0507, 0.123 , 0.8263]]], dtype=oneflow.float32)
+    """
+    return Softmax(dim)(tensor)
+
+
+@oneflow_export("nn.LogSoftmax")
+@experimental_api
+class LogSoftmax(Module):
+    r"""Applies the :math:`\log(\text{Softmax}(x))` function to an n-dimensional
+    input Tensor.
+    The LogSoftmax formulation can be simplified as:
+
+    .. math::
+        \text{LogSoftmax}(x_{i}) = \log\left(\frac{\exp(x_i) }{ \sum_j \exp(x_j)} \right)
+
+    Args:
+        dim (int): A dimension along which LogSoftmax will be computed.
+
+    Shape:
+        - Input: :math:`(N, *)` where `*` means, any number of additional
+          dimensions
+        - Output: :math:`(N, *)`, same shape as the input
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import numpy as np
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> flow.enable_eager_execution()
+
+        >>> m = flow.nn.LogSoftmax(dim=1)
+        >>> x = flow.Tensor(
+        ...    np.array(
+        ...        [[ 0.4296, -1.1957,  2.5463],
+        ...        [ 1.2552, -1.5747,  0.6923]]
+        ...    )
+        ... )
+        >>> out = m(x)
+        >>> out
+        tensor([[-2.2513, -3.8766, -0.1346],
+                [-0.4877, -3.3176, -1.0506]], dtype=oneflow.float32)
+    """
+
+    def __init__(
+        self, dim: Optional[int] = 1,
+    ):
+        super().__init__()
+        self.dim = dim
+
+    def __setstate__(self, state):
+        self.__dict__.update(state)
+        if not hasattr(self, "dim"):
+            self.dim = None
+
+    def forward(self, x):
+        need_transpose, permute = _softmax_need_transpose(x, self.dim)
+        if need_transpose:
+            x = flow.F.transpose(x, perm=permute)
+
+        x = x.softmax()
+        res = x.log()
+
+        if need_transpose:
+            res = flow.F.transpose(res, perm=permute)
+
+        return res
+
+    def extra_repr(self):
+        return "dim={dim}".format(dim=self.dim)
+
+
+@oneflow_export("nn.LogSigmoid")
+@experimental_api
+class LogSigmoid(Module):
+    r"""Applies the element-wise function:
+
+    .. math::
+        \text{LogSigmoid}(x) = \log\left(\frac{ 1 }{ 1 + \exp(-x)}\right)
+
+    Shape:
+        - Input: :math:`(N, *)` where `*` means, any number of additional
+          dimensions
+        - Output: :math:`(N, *)`, same shape as the input
+
+    For example:
+
+    .. code-block:: python
+
+
+        >>> import numpy as np
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> flow.enable_eager_execution()
+
+        >>> x = np.array([-0.5, 0, 0.5]).astype(np.float32)
+        >>> input = flow.Tensor(x)
+        >>> logsigmoid = flow.nn.LogSigmoid()
+
+        >>> out = logsigmoid(input)
+        >>> out
+        tensor([-0.9741, -0.6931, -0.4741], dtype=oneflow.float32)
+
+    """
+
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, x):
+        sigmoid_res = flow.experimental.sigmoid(x)
+        res = flow.experimental.log(sigmoid_res)
+        return res
+
+
+@oneflow_export("nn.Softplus")
+@experimental_api
+class Softplus(Module):
+    r"""Applies the element-wise function:
+
+    .. math::
+        \text{Softplus}(x) = \frac{1}{\beta} * \log(1 + \exp(\beta * x))
+
+    SoftPlus is a smooth approximation to the ReLU function and can be used
+    to constrain the output of a machine to always be positive.
+
+    For numerical stability the implementation reverts to the linear function
+    when :math:`input \times \beta > threshold`.
+
+    Args:
+        beta: the :math:`\beta` value for the Softplus formulation. Default: 1
+        threshold: values above this revert to a linear function. Default: 20
+
+    Shape:
+        - Input: :math:`(N, *)` where `*` means, any number of additional
+          dimensions
+        - Output: :math:`(N, *)`, same shape as the input
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import numpy as np
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> flow.enable_eager_execution()
+
+        >>> x = np.array([-0.5, 0, 0.5]).astype(np.float32)
+        >>> input = flow.Tensor(x)
+        >>> softplus = flow.nn.Softplus()
+
+        >>> out = softplus(input)
+        >>> out
+        tensor([0.4741, 0.6931, 0.9741], dtype=oneflow.float32)
+    """
+
+    def __init__(self, beta: int = 1, threshold: int = 20):
+        super().__init__()
+        self.beta = beta
+        self.threshold = threshold
+
+    def forward(self, x):
+        return flow.experimental.where(
+            x * self.beta > self.threshold,
+            x,
+            1
+            / self.beta
+            * flow.experimental.log(1.0 + flow.experimental.exp(self.beta * x)),
+        )
+
+
+@oneflow_export("nn.Hardswish")
+@experimental_api
+class Hardswish(Module):
+    r"""Applies the hardswish function, element-wise, as described in the paper:
+    `Searching for MobileNetV3`_.
+
+    .. math::
+        \text{Hardswish}(x) = \begin{cases}
+            0 & \text{ if } x \le -3  \\
+            x & \text{ if } x \ge +3 \\
+            x*(x+3)/6 & \text{ otherwise } \\
+        \end{cases}
+
+    Args:
+        inplace: can optionally do the operation in-place. Default: ``False``
+
+    Shape:
+        - Input: :math:`(N, *)` where `*` means, any number of additional
+          dimensions
+        - Output: :math:`(N, *)`, same shape as the input
+
+    .. code-block:: python
+
+        >>> import numpy as np
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> flow.enable_eager_execution()
+
+        >>> x = np.array([-0.5, 0, 0.5]).astype(np.float32)
+        >>> input = flow.Tensor(x)
+        >>> hardswish = flow.nn.Hardswish()
+
+        >>> out = hardswish(input)
+        >>> out
+        tensor([-0.2083,  0.    ,  0.2917], dtype=oneflow.float32)
+
+    .. _`Searching for MobileNetV3`:
+        https://arxiv.org/abs/1905.02244
+    """
+
+    def __init__(self, inplace: bool = False):
+        super().__init__()
+
+    def forward(self, x):
+        return flow.F.hardswish(x)
+
+
+@oneflow_export("nn.Hardtanh")
+@experimental_api
+class Hardtanh(Module):
+    r"""
+    Applies the HardTanh function element-wise
+
+    HardTanh is defined as:
+
+    .. math::
+        \text{HardTanh}(x) = \begin{cases}
+            1 & \text{ if } x > 1 \\
+            -1 & \text{ if } x < -1 \\
+            x & \text{ otherwise } \\
+        \end{cases}
+
+    The range of the linear region :math:`[-1, 1]` can be adjusted using
+    :attr:`min_val` and :attr:`max_val`.
+
+    Args:
+        min_val: minimum value of the linear region range. Default: -1
+        max_val: maximum value of the linear region range. Default: 1
+        inplace: can optionally do the operation in-place. Default: ``False``
+
+    Keyword arguments :attr:`min_value` and :attr:`max_value`
+    have been deprecated in favor of :attr:`min_val` and :attr:`max_val`.
+
+    Shape:
+        - Input: :math:`(N, *)` where `*` means, any number of additional
+          dimensions
+        - Output: :math:`(N, *)`, same shape as the input
+
+    For example:
+
+    .. code-block:: python
+
+
+        >>> import numpy as np
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> flow.enable_eager_execution()
+
+        >>> m = flow.nn.Hardtanh()
+        >>> arr = np.array([0.2, 0.3, 3.0, 4.0])
+        >>> x = flow.Tensor(arr)
+        >>> out = m(x)
+        >>> out
+        tensor([0.2, 0.3, 1. , 1. ], dtype=oneflow.float32)
+
+    """
+
+    def __init__(
+        self,
+        min_val: float = -1,
+        max_val: float = 1,
+        inplace: bool = False,
+        min_value: Optional[float] = None,
+        max_value: Optional[float] = None,
+    ):
+        super().__init__()
+        if min_value is not None:
+            warnings.warn(
+                "keyword argument min_value is deprecated and rename to min_val"
+            )
+            min_val = min_value
+        if max_value is not None:
+            warnings.warn(
+                "keyword argument max_value is deprecated and rename to max_val"
+            )
+            max_val = max_value
+
+        self.min_val = min_val
+        self.max_val = max_val
+
+    def forward(self, x):
+        return flow.F.hardtanh(x, min_val=self.min_val, max_val=self.max_val)
+
+
+@oneflow_export("nn.LeakyReLU")
+@experimental_api
+class LeakyReLU(Module):
+    r"""Applies the element-wise function:
+
+    .. math::
+        \text{LeakyRELU}(x) = \begin{cases}
+            x, & \text{ if } x \geq 0 \\
+            \text{negative_slope} \times x, & \text{ otherwise }
+        \end{cases}
+
+    Args:
+        negative_slope: Controls the angle of the negative slope. Default: 1e-2
+        inplace: can optionally do the operation in-place. Default: ``False``
+
+    Shape:
+        - Input: :math:`(N, *)` where `*` means, any number of additional
+          dimensions
+        - Output: :math:`(N, *)`, same shape as the input
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import numpy as np
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> flow.enable_eager_execution()
+
+        >>> m = flow.nn.LeakyReLU(0.1)
+        >>> arr = np.array([0.2, 0.3, 3.0, 4.0])
+        >>> x = flow.Tensor(arr)
+        >>> out = m(x)
+        >>> out
+        tensor([0.2, 0.3, 3. , 4. ], dtype=oneflow.float32)
+    """
+
+    def __init__(self, negative_slope: float = 1e-2, inplace: bool = False):
+        super().__init__()
+        self.negative_slope = negative_slope
+
+    def forward(self, x):
+        return flow.F.leaky_relu(x, alpha=self.negative_slope)
+
+
+@oneflow_export("nn.Mish")
+@experimental_api
+class Mish(Module):
+    r"""Applies the element-wise function:
+
+    .. math::
+        \text{Mish}(x) = x * \text{Tanh}(\text{Softplus}(x))
+
+    .. note::
+        See `Mish: A Self Regularized Non-Monotonic Neural Activation Function <https://arxiv.org/abs/1908.08681>`_
+
+    Shape:
+        - Input: :math:`(N, *)` where `*` means, any number of additional
+          dimensions
+        - Output: :math:`(N, *)`, same shape as the input
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import numpy as np
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> flow.enable_eager_execution()
+
+        >>> x = np.array([1, 2, 3]).astype(np.float32)
+        >>> input = flow.Tensor(x)
+        >>> mish = flow.nn.Mish()
+
+        >>> out = mish(input)
+        >>> out
+        tensor([0.8651, 1.944 , 2.9865], dtype=oneflow.float32)
+    """
+
+    def __init__(self, inplace: bool = False):
+        assert not inplace, "In-place operation is not currently supported"
+        super().__init__()
+
+    def forward(self, x):
+        return x * flow.experimental.tanh(flow.experimental.softplus(x))
+
+
+@oneflow_export("mish")
+@experimental_api
+def mish_op(x):
+    r"""Applies the element-wise function:
+
+    .. math::
+        \text{Mish}(x) = x * \text{Tanh}(\text{Softplus}(x))
+
+    .. note::
+        See `Mish: A Self Regularized Non-Monotonic Neural Activation Function <https://arxiv.org/abs/1908.08681>`_
+
+    See :mod:`oneflow.compatible.single_client.experimental.nn.Mish`
+    """
+
+    return Mish()(x)
+
+
+@register_tensor_op("mish")
+@experimental_api
+def mish_op_tensor(x):
+    r"""
+    mish() -> Tensor
+    See :func:`oneflow.compatible.single_client.experimental.mish`
+    """
+
+    return Mish()(x)
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(raise_on_error=True)
diff --git a/oneflow/compatible_single_client_python/nn/modules/adaptive_pool.py b/oneflow/compatible_single_client_python/nn/modules/adaptive_pool.py
new file mode 100644
index 0000000000000000000000000000000000000000..425e72a7913716389d17faafa14fced468a0b2bc
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/adaptive_pool.py
@@ -0,0 +1,109 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.nn.module import Module
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+
+
+@oneflow_export("nn.AdaptiveAvgPool2d")
+@experimental_api
+class AdaptiveAvgPool2d(Module):
+    r"""Applies a 2D adaptive average pooling over an input signal composed of several input planes.
+
+    The output is of size H x W, for any input size.
+    The number of output features is equal to the number of input planes.
+
+    Args:
+        output_size: the target output size of the image of the form H x W.
+                     Can be a tuple (H, W) or a single H for a square image H x H.
+                     H and W can be either a ``int``, or ``None`` which means the size will
+                     be the same as that of the input.
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import numpy as np
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> import oneflow.compatible.single_client.experimental.nn as nn
+        >>> flow.enable_eager_execution()
+
+        >>> m = nn.AdaptiveAvgPool2d((5,7))
+        >>> input = flow.Tensor(np.random.randn(1, 64, 8, 9))
+        >>> output = m(input)
+        >>> output.size()
+        flow.Size([1, 64, 5, 7])
+
+        >>> m = nn.AdaptiveAvgPool2d(7)
+        >>> input = flow.Tensor(np.random.randn(1, 64, 10, 9))
+        >>> output = m(input)
+        >>> output.size()
+        flow.Size([1, 64, 7, 7])
+
+        >>> m = nn.AdaptiveAvgPool2d((None, 7))
+        >>> input = flow.Tensor(np.random.randn(1, 64, 10, 9))
+        >>> output = m(input)
+        >>> output.size()
+        flow.Size([1, 64, 10, 7])
+
+    """
+
+    def __init__(self, output_size) -> None:
+        super().__init__()
+        self.output_size = output_size
+
+        self._op = (
+            flow.builtin_op("adaptive_avg_pool2d")
+            .Input("x")
+            .Attr("output_size", [])
+            .Output("y")
+            .Build()
+        )
+
+    def forward(self, x):
+        new_output_size = []
+        assert len(x.shape) == 4
+
+        if isinstance(self.output_size, int):
+            new_output_size.append(self.output_size)
+            new_output_size.append(self.output_size)
+        elif isinstance(self.output_size, tuple):
+            new_output_size = list(self.output_size)
+            if self.output_size[0] is None:
+                new_output_size[0] = x.shape[2]
+            if self.output_size[1] is None:
+                new_output_size[1] = x.shape[3]
+        else:
+            raise NotImplementedError("output_size param wrong, please check!")
+
+        new_output_size = tuple(new_output_size)
+        assert (
+            new_output_size[0] <= x.shape[2]
+        ), f"output_size param wrong, please check!"
+        assert (
+            new_output_size[1] <= x.shape[3]
+        ), f"output_size param wrong, please check!"
+
+        return self._op(x, output_size=new_output_size)[0]
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(raise_on_error=True)
diff --git a/oneflow/compatible_single_client_python/nn/modules/arange.py b/oneflow/compatible_single_client_python/nn/modules/arange.py
new file mode 100644
index 0000000000000000000000000000000000000000..21ccd4742ed960df1aa57c3b4d0652917e7afece
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/arange.py
@@ -0,0 +1,112 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from typing import Union
+
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.nn.module import Module
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+from oneflow.compatible_single_client_python.framework.tensor import register_tensor_op
+
+
+class Arange(Module):
+    def __init__(
+        self,
+        start: int = 0,
+        end: int = None,
+        step: int = 1,
+        dtype: flow.dtype = None,
+        device: Union[str, flow.device] = "cpu",
+        requires_grad: bool = False,
+    ) -> None:
+        super().__init__()
+        assert end > start, "end should be larger than start"
+        assert step <= end - start, "step is ilegal"
+
+        self.start = start
+        self.end = end
+        self.step = step
+        self.dtype = dtype
+        self.device = device
+        self.requires_grad = requires_grad
+
+    def forward(self):
+        tmp = flow.F.range(
+            start=self.start, limit=self.end, delta=self.step, dtype=flow.int64
+        )
+        tmp.requires_grad = self.requires_grad
+
+        if isinstance(self.device, str):
+            device = flow.device(self.device)
+        else:
+            device = self.device
+
+        res = tmp.to(device, dtype=self.dtype)
+        return res
+
+
+@oneflow_export("arange")
+@experimental_api
+def arange_op(
+    start: int = 0,
+    end: int = None,
+    step: int = 1,
+    dtype: flow.dtype = flow.int64,
+    device: Union[str, flow.device] = "cpu",
+    requires_grad: bool = False,
+):
+    r"""
+    Returns a 1-D tensor of size :math:`\left\lfloor \frac{\text{end} - \text{start}}{\text{step}} \right\rfloor + 1`
+    with values from :attr:`start` to :attr:`end` with step :attr:`step`. Step is
+    the gap between two values in the tensor.
+
+    .. math::
+        \text{out}_{i+1} = \text{out}_i + \text{step}.
+
+    Args:
+        start (int): the starting value for the set of points. Default: ``0``.
+        end (int): the ending value for the set of points
+        step (int): the gap between each pair of adjacent points. Default: ``1``.
+
+    Keyword args:
+        dtype(flow.dtype, optional): If `dtype` is not given, the `dtype` is inferred to be `flow.int64`.
+        device(flow.device, optional): the desired device of returned tensor. Default: if None, uses the current device for the default tensor.
+        requires_grad(bool, optional): If autograd should record operations on the returned tensor. Default: `False`.
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> flow.enable_eager_execution()
+
+        >>> y = flow.arange(0, 5)
+        >>> y
+        tensor([0, 1, 2, 3, 4], dtype=oneflow.int64)
+
+    """
+    if end is None:
+        end = start
+        start = 0
+    return Arange(start, end, step, dtype, device, requires_grad)()
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(raise_on_error=True)
diff --git a/oneflow/compatible_single_client_python/nn/modules/argmax.py b/oneflow/compatible_single_client_python/nn/modules/argmax.py
new file mode 100644
index 0000000000000000000000000000000000000000..d2ca40f5242e4b645ac223e86368c687194e0db9
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/argmax.py
@@ -0,0 +1,98 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.nn.module import Module
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+from oneflow.compatible_single_client_python.framework.tensor import register_tensor_op
+from oneflow.compatible_single_client_python.ops.transpose_util import (
+    get_perm_when_transpose_axis_to_last_dim,
+    get_inversed_perm,
+)
+
+
+class Argmax(Module):
+    def __init__(self, dim: int = None, keepdim: bool = False) -> None:
+        super().__init__()
+        self.dim = dim
+        self.keepdim = keepdim
+
+    def forward(self, input):
+        if self.dim == None:
+            input = flow.F.flatten(input)
+            self.dim = 0
+
+        num_axes = len(input.shape)
+        axis = self.dim if self.dim >= 0 else self.dim + num_axes
+        assert 0 <= axis < num_axes, "axis out of range"
+        if axis == num_axes - 1:
+            x = flow.F.argmax(input)
+            if self.keepdim == True:
+                x = flow.experimental.unsqueeze(x, -1)
+            return x
+        else:
+            perm = get_perm_when_transpose_axis_to_last_dim(num_axes, axis)
+            x = flow.F.transpose(input, perm=perm)
+            x = flow.F.argmax(x)
+            x = flow.experimental.unsqueeze(x, -1)
+            x = flow.F.transpose(x, perm=get_inversed_perm(perm))
+            if self.keepdim == False:
+                x = x.squeeze(dim=[axis])
+            return x
+
+
+@oneflow_export("argmax")
+@register_tensor_op("argmax")
+@experimental_api
+def argmax_op(input, dim: int = None, keepdim: bool = False):
+    """The op computes the index with the largest value of a Tensor at specified axis.
+
+    Args:
+        input (oneflow.compatible.single_client.Tensor): Input Tensor
+        dim (int, optional): dimension to be calculated. Defaults to the last dim (-1)
+        keepdim (bool optional):  whether the output tensor has dim retained or not. Ignored if dim=None.
+
+    Returns:
+        oneflow.compatible.single_client.Tensor: A Tensor(dtype=int32) contains the index with the largest value of `input`
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import numpy as np
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> flow.enable_eager_execution()
+
+        >>> x = np.array([[1, 3, 8, 7, 2],
+        ...            [1, 9, 4, 3, 2]], dtype=np.float32)
+
+        >>> out = flow.argmax(flow.Tensor(x))
+        >>> out
+        tensor([6], dtype=oneflow.int32)
+        >>> out = flow.argmax(flow.Tensor(x), dim=1)
+        >>> out
+        tensor([2, 1], dtype=oneflow.int32)
+
+    """
+    return Argmax(dim=dim, keepdim=keepdim)(input)
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(raise_on_error=True)
diff --git a/oneflow/compatible_single_client_python/nn/modules/argsort.py b/oneflow/compatible_single_client_python/nn/modules/argsort.py
new file mode 100644
index 0000000000000000000000000000000000000000..e4c3cad4372f0309ba1788748ee08aef9677ed21
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/argsort.py
@@ -0,0 +1,100 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.nn.module import Module
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+from oneflow.compatible_single_client_python.framework.tensor import register_tensor_op
+from oneflow.compatible_single_client_python.ops.transpose_util import (
+    get_perm_when_transpose_axis_to_last_dim,
+    get_inversed_perm,
+)
+
+
+class Argsort(Module):
+    def __init__(self, dim: int = -1, descending: bool = False) -> None:
+        super().__init__()
+        self.dim = dim
+        direction = "DESCENDING" if descending else "ASCENDING"
+        self._argsort_op = (
+            flow.builtin_op("arg_sort")
+            .Input("in")
+            .Output("out")
+            .Attr("direction", direction)
+            .Build()
+        )
+
+    def forward(self, input):
+        num_dims = len(input.shape)
+        dim = self.dim if self.dim >= 0 else self.dim + num_dims
+        assert 0 <= dim < num_dims, "dim out of range"
+        if dim == num_dims - 1:
+            return self._argsort_op(input)[0]
+        else:
+            perm = get_perm_when_transpose_axis_to_last_dim(num_dims, dim)
+            x = flow.F.transpose(input, perm=perm)
+            x = self._argsort_op(x)[0]
+            return flow.F.transpose(x, perm=get_inversed_perm(perm))
+
+
+@oneflow_export("argsort")
+@register_tensor_op("argsort")
+@experimental_api
+def argsort_op(input, dim: int = -1, descending: bool = False):
+    """This operator sorts the input Tensor at specified dim and return the indices of the sorted Tensor.
+
+    Args:
+        input (oneflow.compatible.single_client.Tensor): The input Tensor.
+        dim (int, optional): dimension to be sorted. Defaults to the last dim (-1).
+        descending (bool, optional): controls the sorting order (ascending or descending).
+
+    Returns:
+        oneflow.compatible.single_client.Tensor: The indices of the sorted Tensor.
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import numpy as np
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> flow.enable_eager_execution()
+
+        >>> x = np.array([[10, 2, 9, 3, 7],
+        ...               [1, 9, 4, 3, 2]]).astype("float32")
+        >>> input = flow.Tensor(x)
+        >>> output = flow.argsort(input)
+        >>> output
+        tensor([[1, 3, 4, 2, 0],
+                [0, 4, 3, 2, 1]], dtype=oneflow.int32)
+        >>> output = flow.argsort(input, descending=True)
+        >>> output
+        tensor([[0, 2, 4, 3, 1],
+                [1, 2, 3, 4, 0]], dtype=oneflow.int32)
+        >>> output = flow.argsort(input, dim=0)
+        >>> output
+        tensor([[1, 0, 1, 0, 1],
+                [0, 1, 0, 1, 0]], dtype=oneflow.int32)
+
+    """
+    return Argsort(dim=dim, descending=descending)(input)
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(raise_on_error=True)
diff --git a/oneflow/compatible_single_client_python/nn/modules/argwhere.py b/oneflow/compatible_single_client_python/nn/modules/argwhere.py
new file mode 100644
index 0000000000000000000000000000000000000000..72167a45ff6df77b9d427d83cbc9b3ed56312ea4
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/argwhere.py
@@ -0,0 +1,93 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from typing import Optional
+
+from oneflow.compatible import single_client as flow
+import numpy as np
+from oneflow.compatible_single_client_python.nn.module import Module
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+from oneflow.compatible_single_client_python.framework.tensor import register_tensor_op
+
+
+class Argwhere(Module):
+    def __init__(self, dtype) -> None:
+        super().__init__()
+        if dtype == None:
+            dtype = flow.int32
+        self.dtype = dtype
+
+    def forward(self, x):
+        res, size = flow.F.argwhere(x, dtype=self.dtype)
+        slice_tup_list = [[0, int(size.numpy()), 1]]
+        return flow.experimental.slice(res, slice_tup_list=slice_tup_list)
+
+
+@oneflow_export("argwhere")
+@experimental_api
+def argwhere_op(x, dtype: Optional[flow.dtype] = None):
+    """This operator finds the indices of input Tensor `x` elements that are non-zero. 
+
+    It returns a list in which each element is a coordinate that points to a non-zero element in the condition.
+
+    Args:
+        x (oneflow.compatible.single_client.Tensor): The input Tensor.
+        dtype (Optional[flow.dtype], optional): The data type of output. Defaults to None.
+
+    Returns:
+        oneflow.compatible.single_client.Tensor: The result Tensor.
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import numpy as np
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> flow.enable_eager_execution()
+
+        >>> x = np.array([[0, 1, 0],
+        ...            [2, 0, 2]]).astype(np.float32)
+        
+        >>> input = flow.Tensor(x)
+        >>> output = flow.argwhere(input)
+        >>> output
+        tensor([[0, 1],
+                [1, 0],
+                [1, 2]], dtype=oneflow.int32)
+
+    """
+    return Argwhere(dtype=dtype)(x)
+
+
+@register_tensor_op("argwhere")
+@experimental_api
+def argwhere_tebsor_op(x, dtype: Optional[flow.dtype] = None):
+    """
+
+    argwhere() -> Tensor
+
+    See :func:`oneflow.compatible.single_client.experimental.argwhere`
+
+    """
+    return Argwhere(dtype=dtype)(x)
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(raise_on_error=True)
diff --git a/oneflow/compatible_single_client_python/nn/modules/atan2.py b/oneflow/compatible_single_client_python/nn/modules/atan2.py
new file mode 100644
index 0000000000000000000000000000000000000000..7870612fa99d01e9f8fcdfb05814e36a6ef4b706
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/atan2.py
@@ -0,0 +1,92 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+from oneflow.compatible_single_client_python.framework.tensor import register_tensor_op
+from oneflow.compatible_single_client_python.nn.module import Module
+
+
+class Atan2(Module):
+    def __init__(self) -> None:
+        super().__init__()
+        self.atan2_op = (
+            flow.builtin_op("atan2").Input("x").Input("y").Output("z").Build()
+        )
+
+    def forward(self, x, y):
+        return self.atan2_op(x, y)[0]
+
+
+@oneflow_export("atan2")
+@experimental_api
+def atan2_op(input, other):
+    r"""Element-wise arctangent of input{i}/other{i}
+    with consideration of the quadrant. Returns a new tensor with the signed
+    angles in radians between vector (other{i},input{i}) and vector (1, 0).
+
+    The shapes of input and other must be broadcastable.
+
+    Args:
+        input (Tensor): the first input tensor.
+
+        other (Tensor): the second input tensor.
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> import numpy as np
+
+        >>> x1 = flow.Tensor(np.array([1,2,3]))
+        >>> y1 = flow.Tensor(np.array([3,2,1]))
+        >>> x2 = flow.Tensor(np.array([1.53123589,0.54242598,0.15117185]))
+        >>> y2 = flow.Tensor(np.array([-0.21906378,0.09467151,-0.75562878]))
+        >>> x3 = flow.Tensor(np.array([1,0,-1]))
+        >>> y3 = flow.Tensor(np.array([0,1,0]))
+
+        >>> flow.enable_eager_execution()
+        >>> flow.atan2(x1,y1).numpy()
+        array([0.32175055, 0.7853982 , 1.2490457 ], dtype=float32)
+        >>> flow.atan2(x2,y2).numpy()
+        array([1.7128955, 1.3980033, 2.9441385], dtype=float32)
+        >>> flow.atan2(x3,y3).numpy()
+        array([ 1.5707964,  0.       , -1.5707964], dtype=float32)
+
+    """
+    return Atan2()(input, other)
+
+
+@register_tensor_op("atan2")
+@experimental_api
+def atan2_op_tensor(input, other):
+    r"""
+
+    atan2(other) -> Tensor
+
+    See :func:`oneflow.compatible.single_client.experimental.atan2`
+    """
+    return Atan2()(input, other)
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(raise_on_error=True)
diff --git a/oneflow/compatible_single_client_python/nn/modules/atanh.py b/oneflow/compatible_single_client_python/nn/modules/atanh.py
new file mode 100644
index 0000000000000000000000000000000000000000..141b39c50c3a4b7ef06570c47a9a6e43a92edb04
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/atanh.py
@@ -0,0 +1,99 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+from oneflow.compatible_single_client_python.nn.module import Module
+from oneflow.compatible_single_client_python.framework.tensor import register_tensor_op
+
+
+class Atanh(Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, x):
+        return flow.F.atanh(x)
+
+
+@oneflow_export("atanh")
+@experimental_api
+def atanh_op(input):
+    r"""Returns a new tensor with the inverse hyperbolic tangent of the elements of :attr:`input`.
+
+    .. math::
+        \text{out}_{i} = \tanh^{-1}(\text{input}_{i})
+
+    Args:
+        input (Tensor): the input tensor.
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> import numpy as np
+        >>> flow.enable_eager_execution()
+        >>> np_arr = np.array([0.5, 0.6, 0.7]).astype(np.float32)
+        >>> input = flow.Tensor(np_arr)
+        >>> output = flow.atanh(input)
+        >>> output
+        tensor([0.5493, 0.6931, 0.8673], dtype=oneflow.float32)
+
+    """
+
+    return Atanh()(input)
+
+
+@register_tensor_op("atanh")
+@experimental_api
+def atanh_op_tensor(x):
+    r"""
+    atanh() -> Tensor
+    See :func:`oneflow.compatible.single_client.experimental.atanh`
+
+    """
+
+    return Atanh()(x)
+
+
+@oneflow_export("arctanh")
+@experimental_api
+def arctanh_op(input):
+    r"""
+
+    Alias for :func:`oneflow.compatible.single_client.experimental.atanh`
+    """
+
+    return Atanh()(input)
+
+
+@register_tensor_op("arctanh")
+@experimental_api
+def arctanh_op_tensor(input):
+    r"""
+
+    Alias for :func:`oneflow.compatible.single_client.experimental.atanh`
+    """
+
+    return Atanh()(input)
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(raise_on_error=True)
diff --git a/oneflow/compatible_single_client_python/nn/modules/batchnorm.py b/oneflow/compatible_single_client_python/nn/modules/batchnorm.py
new file mode 100644
index 0000000000000000000000000000000000000000..a860108b4e1702cb7f6b8290879c450956cb2a1c
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/batchnorm.py
@@ -0,0 +1,357 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from typing import Union
+
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+from oneflow.compatible_single_client_python.nn.module import Module
+
+
+class _NormBase(Module):
+    """Common base of _InstanceNorm and _BatchNorm"""
+
+    def __init__(
+        self,
+        num_features: int,
+        eps: float = 1e-5,
+        momentum: float = 0.1,
+        affine: bool = True,
+        track_running_stats: bool = True,
+        device: Union[str, flow.device] = None,
+        dtype: flow.dtype = None,
+    ) -> None:
+        super().__init__()
+        self.num_features = num_features
+        self.eps = eps
+        self.momentum = momentum
+        self.affine = affine
+        self.track_running_stats = track_running_stats
+        self.device = device
+        self.dtype = dtype
+
+        if self.affine:
+            self.weight = flow.nn.Parameter(
+                flow.Tensor(num_features, device=self.device)
+            )
+            self.bias = flow.nn.Parameter(flow.Tensor(num_features, device=self.device))
+        else:
+            self.register_parameter("weight", None)
+            self.register_parameter("bias", None)
+        if self.track_running_stats:
+            self.register_buffer(
+                "running_mean", flow.Tensor(num_features, device=self.device),
+            )
+            self.register_buffer(
+                "running_var", flow.Tensor(num_features, device=self.device),
+            )
+        else:
+            self.register_parameter("running_mean", None)
+            self.register_parameter("running_var", None)
+
+        self.reset_parameters()
+
+    def reset_running_stats(self) -> None:
+        if self.track_running_stats:
+            self.running_mean.fill_(0)
+            self.running_var.fill_(1)
+
+    def reset_parameters(self) -> None:
+        self.reset_running_stats()
+        if self.affine:
+            flow.nn.init.ones_(self.weight)
+            flow.nn.init.zeros_(self.bias)
+
+    def _check_input_dim(self, input):
+        raise NotImplementedError
+
+    def _load_from_state_dict(
+        self,
+        state_dict,
+        prefix,
+        local_metadata,
+        strict,
+        missing_keys,
+        unexpected_keys,
+        error_msgs,
+    ):
+        super(_NormBase, self)._load_from_state_dict(
+            state_dict,
+            prefix,
+            local_metadata,
+            strict,
+            missing_keys,
+            unexpected_keys,
+            error_msgs,
+        )
+
+
+class _BatchNorm(_NormBase):
+    def __init__(
+        self,
+        num_features,
+        eps=1e-5,
+        momentum=0.1,
+        affine=True,
+        track_running_stats=True,
+        device=None,
+        dtype=None,
+    ):
+        super().__init__(
+            num_features, eps, momentum, affine, track_running_stats, device, dtype
+        )
+
+    def forward(self, x):
+        if self.dtype is None:
+            self.dtype = x.dtype
+        if self.device is None:
+            self.device = x.device
+
+        self._check_input_dim(x)
+        reduce_axis = []
+        for dim in range(len(x.shape)):
+            if dim != 1:
+                reduce_axis.append(dim)
+        mean = x.mean(dim=reduce_axis, keepdim=False)
+        variance = x.var(dim=reduce_axis, keepdim=False)
+
+        if x.device == flow.device("cpu"):
+            if self.training and self.track_running_stats:
+                running_mean = (
+                    self.momentum * self.running_mean + (1 - self.momentum) * mean
+                )
+                running_var = (
+                    self.momentum * self.running_var + (1 - self.momentum) * variance
+                )
+                # update training buffers
+                self.__setattr__("running_mean", flow.Tensor(running_mean))
+                self.__setattr__("running_var", flow.Tensor(running_var))
+
+            else:
+                mean = mean if self.running_mean is None else self.running_mean
+                variance = variance if self.running_var is None else self.running_var
+
+            axis = 1
+            params_shape = [x.shape[axis]]
+            weight = self.weight
+            bias = self.bias
+
+            if len(mean.shape) == 1:
+                nd_params_shape = [1] * len(x.shape)
+                nd_params_shape[axis] = params_shape[0]
+                mean = mean.reshape(shape=nd_params_shape)
+                variance = variance.reshape(shape=nd_params_shape)
+
+                if self.weight and params_shape[0] == self.weight.nelement():
+                    weight = self.weight.reshape(shape=nd_params_shape)
+                if self.bias and params_shape[0] == self.bias.nelement():
+                    bias = self.bias.reshape(shape=nd_params_shape)
+            elif len(mean.shape) == len(x.shape):
+                pass
+            else:
+                raise ValueError(
+                    "shape of mean and variance should be 1D or has number of axes and x's"
+                )
+
+            variance += self.eps
+            normalized = (x - mean) * variance.rsqrt()
+            affined = normalized
+
+            if self.weight:
+                affined = affined * weight
+            if self.bias:
+                affined = affined + bias
+            return affined.to(dtype=self.dtype)
+
+        else:
+            res = flow.F.normalization(
+                x,
+                self.running_mean if self.track_running_stats else mean,
+                self.running_var if self.track_running_stats else variance,
+                self.weight,
+                self.bias,
+                axis=1,
+                epsilon=self.eps,
+                momentum=self.momentum,
+                is_training=self.training,
+            )
+            return res.to(dtype=self.dtype, device=self.device)
+
+
+@oneflow_export("nn.BatchNorm1d")
+@experimental_api
+class BatchNorm1d(_BatchNorm):
+    r"""Applies Batch Normalization over a 2D or 3D input (a mini-batch of 1D
+    inputs with optional additional channel dimension) as described in the paper
+    `Batch Normalization: Accelerating Deep Network Training by Reducing
+    Internal Covariate Shift <https://arxiv.org/abs/1502.03167>`__ .
+
+    .. math::
+
+        y = \frac{x - \mathrm{E}[x]}{\sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta
+
+    The mean and standard-deviation are calculated per-dimension over
+    the mini-batches and :math:`\gamma` and :math:`\beta` are learnable parameter vectors
+    of size `C` (where `C` is the input size). By default, the elements of :math:`\gamma` are set
+    to 1 and the elements of :math:`\beta` are set to 0. The standard-deviation is calculated
+    via the biased estimator, equivalent to `torch.var(input, unbiased=False)`.
+
+    Also by default, during training this layer keeps running estimates of its
+    computed mean and variance, which are then used for normalization during
+    evaluation. The running estimates are kept with a default :attr:`momentum`
+    of 0.1.
+
+    If :attr:`track_running_stats` is set to ``False``, this layer then does not
+    keep running estimates, and batch statistics are instead used during
+    evaluation time as well.
+
+    .. note::
+        This :attr:`momentum` argument is different from one used in optimizer
+        classes and the conventional notion of momentum. Mathematically, the
+        update rule for running statistics here is
+        :math:`\hat{x}_\text{new} = (1 - \text{momentum}) \times \hat{x} + \text{momentum} \times x_t`,
+        where :math:`\hat{x}` is the estimated statistic and :math:`x_t` is the
+        new observed value.
+
+    Because the Batch Normalization is done over the `C` dimension, computing statistics
+    on `(N, L)` slices, it's common terminology to call this Temporal Batch Normalization.
+
+    Args:
+        num_features: :math:`C` from an expected input of size
+            :math:`(N, C, L)` or :math:`L` from input of size :math:`(N, L)`
+        eps: a value added to the denominator for numerical stability.
+            Default: 1e-5
+        momentum: the value used for the running_mean and running_var
+            computation. Can be set to ``None`` for cumulative moving average
+            (i.e. simple average). Default: 0.1
+        affine: a boolean value that when set to ``True``, this module has
+            learnable affine parameters. Default: ``True``
+        track_running_stats: a boolean value that when set to ``True``, this
+            module tracks the running mean and variance, and when set to ``False``,
+            this module does not track such statistics, and initializes statistics
+            buffers :attr:`running_mean` and :attr:`running_var` as ``None``.
+            When these buffers are ``None``, this module always uses batch statistics.
+            in both training and eval modes. Default: ``True``
+
+    Shape:
+        - Input: :math:`(N, C)` or :math:`(N, C, L)`
+        - Output: :math:`(N, C)` or :math:`(N, C, L)` (same shape as input)
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> import numpy as np
+        >>> flow.enable_eager_execution()
+
+        >>> x = flow.Tensor(np.random.randn(20, 100))
+        >>> m = flow.nn.BatchNorm1d(100)
+        >>> y = m(x)
+
+    """
+
+    def _check_input_dim(self, input):
+        if input.ndim != 2 and input.ndim != 3:
+            raise ValueError(
+                "expected 2D or 3D input (got {}D input)".format(input.ndim)
+            )
+
+
+@oneflow_export("nn.BatchNorm2d")
+@experimental_api
+class BatchNorm2d(_BatchNorm):
+    r"""Applies Batch Normalization over a 4D input (a mini-batch of 2D inputs
+    with additional channel dimension) as described in the paper
+    `Batch Normalization: Accelerating Deep Network Training by Reducing
+    Internal Covariate Shift <https://arxiv.org/abs/1502.03167>`__ .
+
+    .. math::
+
+        y = \frac{x - \mathrm{E}[x]}{ \sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta
+
+    The mean and standard-deviation are calculated per-dimension over
+    the mini-batches and :math:`\gamma` and :math:`\beta` are learnable parameter vectors
+    of size `C` (where `C` is the input size). By default, the elements of :math:`\gamma` are set
+    to 1 and the elements of :math:`\beta` are set to 0. The standard-deviation is calculated
+    via the biased estimator, equivalent to `torch.var(input, unbiased=False)`.
+
+    Also by default, during training this layer keeps running estimates of its
+    computed mean and variance, which are then used for normalization during
+    evaluation. The running estimates are kept with a default :attr:`momentum`
+    of 0.1.
+
+    If :attr:`track_running_stats` is set to ``False``, this layer then does not
+    keep running estimates, and batch statistics are instead used during
+    evaluation time as well.
+
+    .. note::
+        This :attr:`momentum` argument is different from one used in optimizer
+        classes and the conventional notion of momentum. Mathematically, the
+        update rule for running statistics here is
+        :math:`\hat{x}_\text{new} = (1 - \text{momentum}) \times \hat{x} + \text{momentum} \times x_t`,
+        where :math:`\hat{x}` is the estimated statistic and :math:`x_t` is the
+        new observed value.
+
+    Because the Batch Normalization is done over the `C` dimension, computing statistics
+    on `(N, H, W)` slices, it's common terminology to call this Spatial Batch Normalization.
+
+    Args:
+        num_features: :math:`C` from an expected input of size
+            :math:`(N, C, H, W)`
+        eps: a value added to the denominator for numerical stability.
+            Default: 1e-5
+        momentum: the value used for the running_mean and running_var
+            computation. Can be set to ``None`` for cumulative moving average
+            (i.e. simple average). Default: 0.1
+        affine: a boolean value that when set to ``True``, this module has
+            learnable affine parameters. Default: ``True``
+        track_running_stats: a boolean value that when set to ``True``, this
+            module tracks the running mean and variance, and when set to ``False``,
+            this module does not track such statistics, and initializes statistics
+            buffers :attr:`running_mean` and :attr:`running_var` as ``None``.
+            When these buffers are ``None``, this module always uses batch statistics.
+            in both training and eval modes. Default: ``True``
+
+    Shape:
+        - Input: :math:`(N, C, H, W)`
+        - Output: :math:`(N, C, H, W)` (same shape as input)
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> import numpy as np
+        >>> flow.enable_eager_execution()
+
+        >>> x = flow.Tensor(np.random.randn(4, 2, 8, 3))
+        >>> m = flow.nn.BatchNorm2d(num_features=2, eps=1e-5, momentum=0.1)
+        >>> y = m(x)
+
+    """
+
+    def _check_input_dim(self, input):
+        if input.ndim != 4:
+            raise ValueError("expected 4D input (got {}D input)".format(input.ndim()))
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(raise_on_error=True)
diff --git a/oneflow/compatible_single_client_python/nn/modules/bmm.py b/oneflow/compatible_single_client_python/nn/modules/bmm.py
new file mode 100644
index 0000000000000000000000000000000000000000..b552e73c8d4995cacf9336931bccc962d8d196b3
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/bmm.py
@@ -0,0 +1,82 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.nn.module import Module
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+from oneflow.compatible_single_client_python.framework.tensor import register_tensor_op
+
+
+class BMM(Module):
+    def __init__(self) -> None:
+        super().__init__()
+
+    def forward(self, input, mat2):
+        assert (
+            input.shape[0] == mat2.shape[0] and input.shape[2] == mat2.shape[1]
+        ), f"batch dim or matmul dim not match, please check input!"
+        return flow.F.batch_matmul(input, mat2)
+
+
+@oneflow_export("bmm")
+@experimental_api
+def bmm_op(x, y):
+    """
+    Performs a batch matrix-matrix product of matrices stored in input and mat2.
+
+    `input` and `mat2` must be 3-D tensors each containing the same number of matrices.
+
+    If input is a (b x n x m) tensor, mat2 is a (b x m x p) tensor, out will be a (b x n x p) tensor.
+
+    Args:
+        input(oneflow.compatible.single_client.Tensor):  the first batch of matrices to be multiplied
+        mat2(oneflow.compatible.single_client.Tensor): the second batch of matrices to be multiplied
+    
+    For example:
+
+    .. code-block:: python
+
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> import numpy as np
+        >>> flow.enable_eager_execution()
+        >>> input1 = flow.Tensor(np.random.randn(10, 3, 4), dtype=flow.float32)
+        >>> input2 = flow.Tensor(np.random.randn(10, 4, 5), dtype=flow.float32)
+        >>> of_out = flow.bmm(input1, input2)
+        >>> of_out.shape
+        flow.Size([10, 3, 5])
+    """
+    return BMM()(x, y)
+
+
+@register_tensor_op("bmm")
+@experimental_api
+def bmm_op_tensor(x, y):
+    r"""
+
+    bmm() -> Tensor
+
+    See :func:`oneflow.compatible.single_client.experimental.bmm`
+
+    """
+    return BMM()(x, y)
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(raise_on_error=True)
diff --git a/oneflow/compatible_single_client_python/nn/modules/broadcast_like.py b/oneflow/compatible_single_client_python/nn/modules/broadcast_like.py
new file mode 100644
index 0000000000000000000000000000000000000000..e386112e8b09b8a56e5ef8e74638c7c8540e0da9
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/broadcast_like.py
@@ -0,0 +1,36 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.nn.module import Module
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+
+
+class BroadCastLike(Module):
+    def __init__(self, broadcast_axes: None) -> None:
+        super().__init__()
+        self.broadcast_axes = broadcast_axes
+
+    def forward(self, x, like_tensor):
+        return flow.F.broadcast_like(x, like_tensor, broadcast_axes=self.broadcast_axes)
+
+
+@oneflow_export("broadcast_like")
+@experimental_api
+def broadcast_like_op(x, like_tensor, broadcast_axes: None):
+    return BroadCastLike(broadcast_axes=broadcast_axes)(x, like_tensor)
diff --git a/oneflow/compatible_single_client_python/nn/modules/cast.py b/oneflow/compatible_single_client_python/nn/modules/cast.py
new file mode 100644
index 0000000000000000000000000000000000000000..b6ea4bf634022920bfdaded83cfedec1779f8e93
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/cast.py
@@ -0,0 +1,68 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.nn.module import Module
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+from oneflow.compatible_single_client_python.framework.tensor import register_tensor_op
+
+
+class Cast(Module):
+    def __init__(self, dtype: flow.dtype) -> None:
+        super().__init__()
+        self.dtype = dtype
+
+    def forward(self, x):
+        return flow.F.cast(x, dtype=self.dtype)
+
+
+@oneflow_export("cast")
+@register_tensor_op("cast")
+@experimental_api
+def cast_op(x, dtype):
+    r"""The operation takes input tensor `x` and casts it to the output with `dtype`
+
+    Args:
+        x (oneflow.compatible.single_client.Tensor): A Tensor
+        dtype (flow.dtype): Data type of the output tensor
+
+    Returns:
+        oneflow.compatible.single_client.Tensor: A Tensor with specific dtype.
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> import numpy as np
+        >>> flow.enable_eager_execution()
+
+        >>> np_arr = np.random.randn(2, 3, 4, 5).astype(np.float32)
+        >>> input = flow.Tensor(np_arr, dtype=flow.float32)
+        >>> output = flow.cast(input, flow.int8)
+        >>> np.array_equal(output.numpy(), np_arr.astype(np.int8))
+        True
+
+    """
+    return Cast(dtype)(x)
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(raise_on_error=True)
diff --git a/oneflow/compatible_single_client_python/nn/modules/chunk.py b/oneflow/compatible_single_client_python/nn/modules/chunk.py
new file mode 100644
index 0000000000000000000000000000000000000000..eed0e2b6dd5cdaf2cc74820b774ec8f323aa9a99
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/chunk.py
@@ -0,0 +1,142 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from typing import Optional
+
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.framework.tensor import Tensor
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+from oneflow.compatible_single_client_python.framework.tensor import register_tensor_op
+from oneflow.compatible_single_client_python.nn.module import Module
+from oneflow.compatible_single_client_python.ops.array_ops import check_slice_tup_list
+
+
+class Chunk(Module):
+    def __init__(self) -> None:
+        super().__init__()
+
+    def forward(self, input, chunks, dim):
+        if dim is not None:
+            assert input.shape[dim] > 0, "chunk expects at least a 1-dimensional tensor"
+
+            assert chunks > 0, "chunk expects `chunks` to be greater than 0"
+
+            channel = input.dim()
+            dim_size = input.shape[dim]
+            chunk_size = (
+                dim_size / chunks
+                if dim_size % chunks == 0
+                else (int)(dim_size / chunks)
+            )
+            last_chunk_size = (
+                dim_size / chunks
+                if dim_size % chunks == 0
+                else dim_size - (chunk_size * (chunks - 1))
+            )
+
+            chunk_dim_dict = {}
+            tup_ndim = []
+            splits = []
+
+            for chunk in range(0, chunks):
+                if dim_size % chunks == 0:
+                    start = chunk * chunk_size
+                    stop = (chunk + 1) * chunk_size
+                else:
+                    start = (
+                        chunk * chunk_size
+                        if chunk < chunks - 1
+                        else chunk_size * (chunks - 1)
+                    )
+                    stop = (chunk + 1) * chunk_size if chunk < chunks - 1 else dim_size
+                step = 1
+                chunk_dim_dict.setdefault(dim, []).append(
+                    [int(start), int(stop), int(step)]
+                )
+
+            for k, v in chunk_dim_dict.items():
+                for v_chunk in v:
+                    tup_list = []
+                    for i in range(0, channel):
+                        if i != dim:
+                            tup_list.append([None, None, None])
+                        else:
+                            tup_list.append(v_chunk)
+                    start_tup, stop_tup, step_tup = check_slice_tup_list(
+                        tup_list, input.shape
+                    )
+                    splits.append(
+                        flow.F.slice(
+                            input, start=start_tup, stop=stop_tup, step=step_tup
+                        )
+                    )
+            return splits
+
+
+@oneflow_export("chunk")
+@register_tensor_op("chunk")
+@experimental_api
+def chunk_op(input, chunks, dim):
+    r"""Splits a tensor into a specific number of chunks. Each chunk is a view of the input tensor. Last chunk will be smaller if the tensor size along the given dimension dim is not divisible by chunks.
+
+    Args:
+        input (oneflow.compatible.single_client.experimental.Tensor): The tensor to split.
+        chunks (int): Number of chunks to return.
+        dim (int): Dimension along which to split the tensor.
+
+    Returns:
+        List of Tensors.
+
+    For example:
+
+    .. code-block:: python
+    
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> import numpy as np
+        >>> flow.enable_eager_execution()
+       
+        >>> np_arr = np.random.randn(5, 3, 6, 9).astype(np.float32)
+        >>> input = flow.Tensor(np_arr)
+        >>> of_out = []
+        >>> of_out = flow.chunk(input, chunks=3, dim=2)
+        >>> chunks = 3
+        >>> of_out_shape = []
+        >>> for i in range(0, chunks):
+        ...     of_out_shape.append(of_out[i].numpy().shape)
+        >>> of_out_shape
+        [(5, 3, 2, 9), (5, 3, 2, 9), (5, 3, 2, 9)]
+
+        >>> np_arr = np.random.randn(5, 3, 6, 9).astype(np.float32)
+        >>> input = flow.Tensor(np_arr)
+        >>> of_out = []
+        >>> of_out = flow.chunk(input, chunks=4, dim=3)
+        >>> chunks = 4
+        >>> of_out_shape = []
+        >>> for i in range(0, chunks):
+        ...     of_out_shape.append(of_out[i].numpy().shape)
+        >>> of_out_shape
+        [(5, 3, 6, 2), (5, 3, 6, 2), (5, 3, 6, 2), (5, 3, 6, 3)]
+
+    """
+    return Chunk()(input, chunks, dim)
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(raise_on_error=True)
diff --git a/oneflow/compatible_single_client_python/nn/modules/concat.py b/oneflow/compatible_single_client_python/nn/modules/concat.py
new file mode 100644
index 0000000000000000000000000000000000000000..2e17e5cf348d417d96d02b22794e2bbafa009a9b
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/concat.py
@@ -0,0 +1,94 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.nn.module import Module
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+from oneflow.compatible_single_client_python.framework.tensor import Tensor
+from oneflow.compatible_single_client_python.framework.tensor import register_tensor_op
+from typing import Optional, Sequence
+
+
+class Cat(Module):
+    def __init__(self, dim=0) -> None:
+        super().__init__()
+        self.axis = dim
+
+    def forward(self, inputs):
+        if len(inputs) == 1:
+            return inputs[0]
+
+        axis = self.axis
+        assert len(inputs) >= 2
+        if axis < 0:
+            axis += len(inputs[0].shape)
+        assert axis >= 0 and axis < len(
+            inputs[0].shape
+        ), "axis must be in range [0, num_axes of inputs)"
+
+        first_input_shape = inputs[0].shape
+        dynamic_dim_size = 0
+        for input in inputs:
+            assert len(input.shape) == len(first_input_shape)
+            for i in range(len(input.shape)):
+                if i == axis:
+                    dynamic_dim_size += input.shape[i]
+                else:
+                    assert input.shape[i] == first_input_shape[i]
+
+        return flow.F.concat(inputs, axis=axis, max_dim_size=dynamic_dim_size)
+
+
+@oneflow_export("cat")
+@experimental_api
+def concat_op(inputs, dim=0):
+    r"""Concatenate two or more `Tensor` s at specified axis.
+
+    Analogous to `numpy.concatenate <https://docs.scipy.org/doc/numpy/reference/generated/numpy.concatenate.html>`_
+
+    Args:
+        inputs: a `list` of `Tensor`
+        dim: a `int`.
+
+    Returns:
+        A `Tensor`
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> flow.enable_eager_execution()
+        >>> import numpy as np
+
+        >>> input1 = flow.Tensor(np.random.randn(2, 6, 5, 3), dtype=flow.float32)
+        >>> input2 = flow.Tensor(np.random.randn(2, 6, 5, 3), dtype=flow.float32)
+        >>> input3 = flow.Tensor(np.random.randn(2, 6, 5, 3), dtype=flow.float32)
+
+        >>> out = flow.cat([input1, input2, input3], dim=1)
+        >>> out.shape
+        flow.Size([2, 18, 5, 3])
+
+    """
+    return Cat(dim=dim)(inputs)
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(raise_on_error=True)
diff --git a/oneflow/compatible_single_client_python/nn/modules/constant.py b/oneflow/compatible_single_client_python/nn/modules/constant.py
new file mode 100644
index 0000000000000000000000000000000000000000..51e8126e483bc5356c3f36ab092cb129e54d26ea
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/constant.py
@@ -0,0 +1,294 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.framework.tensor import register_tensor_op
+from oneflow.compatible_single_client_python.nn.module import Module
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+from oneflow.compatible_single_client_python.nn.common_types import _size_any_t
+from oneflow.compatible_single_client_python.nn.modules.utils import _single
+
+from typing import Optional, Union
+
+
+class _ConstantBase(Module):
+    def __init__(
+        self,
+        size: Union[_size_any_t, flow.Size],
+        value: Union[float, int],
+        dtype: Optional[flow.dtype],
+        device: Union[flow.device, str] = None,
+        requires_grad: bool = False,
+    ) -> None:
+        super().__init__()
+        assert size is not None, "shape must not be None!"
+        assert isinstance(
+            size, (int, tuple, flow.Size)
+        ), "shape should be int or tuple int!"
+
+        self.device = device
+        self.requires_grad = requires_grad
+        size = _single(size)
+        if dtype is None:
+            dtype = flow.float32
+
+        if device is None:
+            self.device = flow.device("cpu")
+
+        self.shape = size
+        self.value = value
+        self.dtype = dtype
+
+    def forward(self):
+        res = flow.F.constant(self.shape, self.value, self.dtype)
+        res = res.to(device=self.device)
+        res.requires_grad = self.requires_grad
+        return res
+
+
+class Ones(_ConstantBase):
+    def __init__(self, size, dtype=None, device=None, requires_grad=False):
+        super().__init__(size, 1, dtype, device, requires_grad)
+
+
+@oneflow_export("ones")
+@experimental_api
+def ones_op(
+    size: Union[_size_any_t, flow.Size],
+    dtype: Optional[flow.dtype] = None,
+    device: Union[flow.device, str, None] = None,
+    requires_grad: bool = False,
+):
+    r"""
+    Returns a tensor filled with the scalar value 1,
+    with the shape defined by the variable argument `size`.
+
+    Args:
+        size (an integer or tuple of integer values) â€“ defining the shape of the output tensor. Can be \
+         a variable number of arguments or a collection like a list or tuple.
+        dtype (flow.dtype, optional) â€“ the desired data type of returned tensor.
+        device (torch.device, optional) â€“ the desired device of returned tensor. Default: if None, uses the current device for the default tensor type
+        requires_grad (bool, optional) â€“ If autograd should record operations on the returned tensor. Default: False.
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> flow.enable_eager_execution()
+
+        >>> y = flow.ones(5)
+        >>> y
+        tensor([1., 1., 1., 1., 1.], dtype=oneflow.float32)
+
+    """
+    return Ones(size, dtype, device, requires_grad)()
+
+
+class Zeros(_ConstantBase):
+    def __init__(self, size, dtype=None, device=None, requires_grad=False):
+        super().__init__(size, 0, dtype, device, requires_grad)
+
+
+@oneflow_export("zeros")
+@experimental_api
+def zeros_op(
+    size: Union[_size_any_t, flow.Size],
+    dtype: Optional[flow.dtype] = None,
+    device: Union[flow.device, str, None] = None,
+    requires_grad: bool = False,
+):
+    r"""
+    Returns a tensor filled with the scalar value 0,
+    with the shape defined by the variable argument `size`.
+
+    Args:
+        size(an integer or tuple of integer values) - defining the shape of the output tensor. Can be \
+         a variable number of arguments or a collection like a list or tuple.
+        dtype (flow.dtype, optional) â€“ the desired data type of returned tensor.
+        device (torch.device, optional) â€“ the desired device of returned tensor. Default: if None, uses the current device for the default tensor type
+        requires_grad (bool, optional) â€“ If autograd should record operations on the returned tensor. Default: False.
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> flow.enable_eager_execution()
+
+        >>> y = flow.zeros(5)
+        >>> y
+        tensor([0., 0., 0., 0., 0.], dtype=oneflow.float32)
+
+    """
+    return Zeros(size, dtype, device, requires_grad)()
+
+
+class ZerosLike(Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, other):
+        return flow.F.zeros_like(other)
+
+
+@oneflow_export("zeros_like")
+@experimental_api
+def zeros_like_op(other):
+    r"""
+    Returns a tensor filled with the scalar value 0, with the same size as input.
+    flow.zeros_like(input) is equivalent to flow.zeros(input.shape, dtype=input.dtype)
+
+    Args:
+        other(Tensor): The size of input will determine size of the output tensor.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client.experimental as flow
+        import numpy as np
+
+        x = flow.Tensor(np.random.rand([5]))
+        y = flow.zeros_like(x)
+        # [0. 0. 0. 0. 0. ]
+
+    """
+    return ZerosLike()(other)
+
+
+class OnesLike(Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, other):
+        return flow.F.ones_like(other)
+
+
+@oneflow_export("ones_like")
+@experimental_api
+def ones_like_op(other):
+    r"""
+    Returns a tensor filled with the scalar value 1, with the same size as input.
+    flow.ones_like(input) is equivalent to flow.ones(input.shape, dtype=input.dtype)
+
+    Args:
+        other(Tensor): The size of input will determine size of the output tensor.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client.experimental as flow
+        import numpy as np
+
+        x = flow.Tensor(np.random.rand([5]))
+        y = flow.ones_like(x)
+        # [1. 1. 1. 1. 1. ]
+
+    """
+    return OnesLike()(other)
+
+
+class NewOnes(Module):
+    def __init__(
+        self,
+        size: Union[_size_any_t, flow.Size] = None,
+        dtype: Optional[flow.dtype] = None,
+        device: Union[flow.device, str] = None,
+        requires_grad: bool = False,
+    ):
+        super().__init__()
+
+        self.device = device
+        self.requires_grad = requires_grad
+        if size != None:
+            size = _single(size)
+        self.size = size
+        self.dtype = dtype
+
+    def forward(self, x):
+        new_size = self.size
+        new_dtype = self.dtype
+        new_device = self.device
+        new_requires_grad = self.requires_grad
+
+        if self.size is None:
+            new_size = x.shape
+
+        if self.dtype is None:
+            new_dtype = x.dtype
+
+        if self.device is None:
+            new_device = x.device
+
+        assert isinstance(
+            new_size, (int, tuple, flow.Size)
+        ), f"size parameter not correct, please check!"
+        assert isinstance(
+            new_dtype, (flow.dtype)
+        ), f"dtype parameter not correct, please check!"
+        assert isinstance(
+            new_device, (str, flow.device)
+        ), f"device parameter not correct, please check!"
+        assert isinstance(
+            new_requires_grad, bool
+        ), f"requires_grad parameter not correct, please check!"
+
+        res = flow.F.constant(new_size, 1.0, new_dtype)
+        res = res.to(new_device)
+        res.requires_grad = new_requires_grad
+        return res
+
+
+@register_tensor_op("new_ones")
+@experimental_api
+def new_ones_op(x, size=None, dtype=None, device=None, requires_grad=False):
+    r"""
+    
+    Returns a Tensor of size size filled with 1. By default, the returned Tensor has the same torch.dtype and torch.device as this tensor.
+
+    Args:
+        size (int...): a list, tuple, or flow.Size of integers defining the shape of the output tensor.
+        dtype (flow.dtype, optional):  the desired type of returned tensor. Default: if None, same flow.dtype as this tensor.
+        device (flow.device, optional): the desired device of returned tensor. Default: if None, same flow.device as this tensor.
+        requires_grad (bool, optional): If autograd should record operations on the returned tensor. Default: False.
+    
+    For example:
+
+    .. code-block:: python
+
+        >>> import numpy as np
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> flow.enable_eager_execution()
+
+        >>> x = flow.Tensor(np.ones((1, 2, 3)))
+        >>> y = x.new_ones((2, 2))
+        >>> y
+        tensor([[1., 1.],
+                [1., 1.]], dtype=oneflow.float32)
+    """
+    return NewOnes(size=size, dtype=dtype, device=device, requires_grad=requires_grad)(
+        x
+    )
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(raise_on_error=True)
diff --git a/oneflow/compatible_single_client_python/nn/modules/constantpad2d.py b/oneflow/compatible_single_client_python/nn/modules/constantpad2d.py
new file mode 100644
index 0000000000000000000000000000000000000000..afb99226799bf70752bb50d28776dc6ed71ecd94
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/constantpad2d.py
@@ -0,0 +1,130 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+from typing import Union
+
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+from oneflow.compatible_single_client_python.nn.module import Module
+
+
+@oneflow_export("nn.ConstantPad2d")
+@experimental_api
+class ConstantPad2d(Module):
+    r"""The interface is consistent with PyTorch.
+    The documentation is referenced from:
+    https://pytorch.org/docs/stable/generated/torch.nn.ConstantPad2d.html?highlight=constantpad2d#torch.nn.ConstantPad2d
+
+    This operator pads the input with constant value that user specifies. User can set the amount of padding by setting the parameter `paddings`.
+
+    Args:
+        padding (Union[int, tuple, list]):  the size of the padding. If is `int`, uses the same padding in all boundaries. If a 4-`tuple`, uses (:math:`\mathrm{padding_{left}}`, :math:`\mathrm{padding_{right}}`, :math:`\mathrm{padding_{top}}`, :math:`\mathrm{padding_{bottom}}`)
+        
+        value (Union[int, float]): The constant value used for padding. Defaults to 0.
+
+    Shape:
+        - Input: :math:`(N, C, H_{in}, W_{in})`
+        - Output: :math:`(N, C, H_{out}, W_{out})` where
+
+            :math:`H_{out} = H_{in} + \mathrm{padding_{top}} + \mathrm{padding_{bottom}}`
+
+            :math:`W_{out} = W_{in} + \mathrm{padding_{left}} + \mathrm{padding_{right}}`
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> import numpy as np
+        >>> flow.enable_eager_execution()
+        >>> constantpad_layer_0 = flow.nn.ConstantPad2d((2, 2, 1, 1), 1)
+        >>> input = flow.Tensor(np.arange(18).reshape((1, 2, 3, 3)).astype(np.float32))
+        >>> input_int = flow.Tensor(np.arange(18).reshape((1, 2, 3, 3)).astype(np.int32))
+        >>> output = constantpad_layer_0(input)
+        >>> output.shape
+        flow.Size([1, 2, 5, 7])
+        >>> output
+        tensor([[[[ 1.,  1.,  1.,  1.,  1.,  1.,  1.],
+                  [ 1.,  1.,  0.,  1.,  2.,  1.,  1.],
+                  [ 1.,  1.,  3.,  4.,  5.,  1.,  1.],
+                  [ 1.,  1.,  6.,  7.,  8.,  1.,  1.],
+                  [ 1.,  1.,  1.,  1.,  1.,  1.,  1.]],
+        <BLANKLINE>
+                 [[ 1.,  1.,  1.,  1.,  1.,  1.,  1.],
+                  [ 1.,  1.,  9., 10., 11.,  1.,  1.],
+                  [ 1.,  1., 12., 13., 14.,  1.,  1.],
+                  [ 1.,  1., 15., 16., 17.,  1.,  1.],
+                  [ 1.,  1.,  1.,  1.,  1.,  1.,  1.]]]], dtype=oneflow.float32)
+        >>> output_int = constantpad_layer_0(input_int)
+        >>> output_int
+        tensor([[[[ 1.,  1.,  1.,  1.,  1.,  1.,  1.],
+                  [ 1.,  1.,  0.,  1.,  2.,  1.,  1.],
+                  [ 1.,  1.,  3.,  4.,  5.,  1.,  1.],
+                  [ 1.,  1.,  6.,  7.,  8.,  1.,  1.],
+                  [ 1.,  1.,  1.,  1.,  1.,  1.,  1.]],
+        <BLANKLINE>
+                 [[ 1.,  1.,  1.,  1.,  1.,  1.,  1.],
+                  [ 1.,  1.,  9., 10., 11.,  1.,  1.],
+                  [ 1.,  1., 12., 13., 14.,  1.,  1.],
+                  [ 1.,  1., 15., 16., 17.,  1.,  1.],
+                  [ 1.,  1.,  1.,  1.,  1.,  1.,  1.]]]], dtype=oneflow.float32)
+    """
+
+    def __init__(self, padding: Union[int, tuple, list], value: Union[int, float] = 0):
+        super().__init__()
+        if isinstance(padding, (tuple, list)):
+            assert len(padding) == 4, ValueError("Length of padding must be 4")
+            boundary = [padding[0], padding[1], padding[2], padding[3]]
+        elif isinstance(padding, int):
+            boundary = [padding, padding, padding, padding]
+        else:
+            raise ValueError("padding must be int or list or tuple!")
+
+        self.padding = boundary
+        self.value = value
+
+    def forward(self, x):
+        _, _, h, w = x.shape
+
+        if x.dtype in [flow.float32, flow.float16, flow.float64]:
+            floating_value = float(self.value)
+            integral_value = int(0)
+        else:
+            floating_value = float(0)
+            integral_value = int(self.value)
+
+        self._op = (
+            flow.builtin_op("constant_pad2d")
+            .Input("x")
+            .Output("y")
+            .Attr("padding", self.padding)
+            .Attr("floating_value", floating_value)
+            .Attr("integral_value", integral_value)
+            .Build()
+        )
+
+        res = self._op(x)[0]
+        return res
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(raise_on_error=True)
diff --git a/oneflow/compatible_single_client_python/nn/modules/container.py b/oneflow/compatible_single_client_python/nn/modules/container.py
new file mode 100644
index 0000000000000000000000000000000000000000..6e342abb4a16227b408813d613ef653b03d4b223
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/container.py
@@ -0,0 +1,540 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from collections import OrderedDict
+import collections.abc
+
+from itertools import islice
+import operator
+
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+from oneflow.compatible_single_client_python.nn.module import Module
+
+from typing import (
+    Any,
+    Iterable,
+    Iterator,
+    Mapping,
+    Optional,
+    overload,
+    Tuple,
+    TypeVar,
+    Union,
+)
+
+
+T = TypeVar("T")
+
+
+@oneflow_export("nn.Sequential")
+@experimental_api
+class Sequential(Module):
+    r"""A sequential container.
+    Modules will be added to it in the order they are passed in the constructor.
+    Alternatively, an ordered dict of modules can also be passed in.
+
+    To make it easier to understand, here is a small example:
+
+    .. code-block:: python
+
+        >>> import oneflow.compatible.single_client.experimental.nn as nn
+        >>> nn.Sequential(nn.Conv2d(1,20,5), nn.ReLU(), nn.Conv2d(20,64,5), nn.ReLU()) #doctest: +ELLIPSIS
+        <oneflow.compatible_single_client_python.nn.modules.container.Sequential object at 0x...>
+        >>> nn.Sequential(OrderedDict([
+        ...    ('conv1', nn.Conv2d(1,20,5)),
+        ...    ('relu1', nn.ReLU()),
+        ...    ('conv2', nn.Conv2d(20,64,5)),
+        ...    ('relu2', nn.ReLU())
+        ... ])) #doctest: +ELLIPSIS
+        <oneflow.compatible_single_client_python.nn.modules.container.Sequential object at 0x...>
+
+    """
+
+    @overload
+    def __init__(self, *args: Module) -> None:
+        ...
+
+    @overload
+    def __init__(self, arg: "OrderedDict[str, Module]") -> None:
+        ...
+
+    def __init__(self, *args: Any):
+        super(Sequential, self).__init__()
+        if len(args) == 1 and isinstance(args[0], OrderedDict):
+            for key, module in args[0].items():
+                self.add_module(key, module)
+        else:
+            for idx, module in enumerate(args):
+                self.add_module(str(idx), module)
+
+    def _get_item_by_idx(self, iterator, idx):
+        """Get the idx-th item of the iterator"""
+        size = len(self)
+        idx = operator.index(idx)
+        if not -size <= idx < size:
+            raise IndexError("index {} is out of range".format(idx))
+        idx %= size
+        return next(islice(iterator, idx, None))
+
+    def __getitem__(self: T, idx) -> T:
+        if isinstance(idx, slice):
+            return self.__class__(OrderedDict(list(self._modules.items())[idx]))
+        else:
+            return self._get_item_by_idx(self._modules.values(), idx)
+
+    def __setitem__(self, idx: int, module: Module) -> None:
+        key = self._get_item_by_idx(self._modules.keys(), idx)
+        return setattr(self, key, module)
+
+    def __delitem__(self, idx: Union[slice, int]) -> None:
+        if isinstance(idx, slice):
+            for key in list(self._modules.keys())[idx]:
+                delattr(self, key)
+        else:
+            key = self._get_item_by_idx(self._modules.keys(), idx)
+            delattr(self, key)
+
+    def __len__(self) -> int:
+        return len(self._modules)
+
+    def __dir__(self):
+        keys = super(Sequential, self).__dir__()
+        keys = [key for key in keys if not key.isdigit()]
+        return keys
+
+    def __iter__(self) -> Iterator[Module]:
+        return iter(self._modules.values())
+
+    def forward(self, input):
+        for module in self:
+            input = module(input)
+        return input
+
+
+@oneflow_export("nn.ParameterList")
+@experimental_api
+class ParameterList(Module):
+    def __init__(self, parameters: Optional[Iterable["Parameter"]] = None) -> None:
+        super(ParameterList, self).__init__()
+        self._initialized = True
+        if parameters is not None:
+            self += parameters
+
+    def __setstate__(self, state):
+        state["_initialized"] = False
+        super(ParameterList, self).__setstate__(state)
+        self._initialized = True
+
+    def _get_abs_string_index(self, idx):
+        """Get the absolute index for the list of modules"""
+        idx = operator.index(idx)
+        if not (-len(self) <= idx < len(self)):
+            raise IndexError("index {} is out of range".format(idx))
+        if idx < 0:
+            idx += len(self)
+        return str(idx)
+
+    @overload
+    def __getitem__(self, idx: int) -> "Parameter":
+        ...
+
+    @overload
+    def __getitem__(self: T, idx: slice) -> T:
+        ...
+
+    def __getitem__(self, idx):
+        if isinstance(idx, slice):
+            return self.__class__(list(self._parameters.values())[idx])
+        else:
+            idx = self._get_abs_string_index(idx)
+            return self._parameters[str(idx)]
+
+    def __setitem__(self, idx: int, param: "Parameter") -> None:
+        idx = self._get_abs_string_index(idx)
+        return self.register_parameter(str(idx), param)
+
+    def __setattr__(self, key: Any, value: Any) -> None:
+        if getattr(self, "_initialized", False):
+            if not hasattr(self, key) and not isinstance(value, flow.nn.Parameter):
+                warnings.warn("Setting attributes on ParameterList is not supported.")
+        super(ParameterList, self).__setattr__(key, value)
+
+    def __len__(self) -> int:
+        return len(self._parameters)
+
+    def __iter__(self) -> Iterator["Parameter"]:
+        return iter(self._parameters.values())
+
+    def __iadd__(self: T, parameters: Iterable["Parameter"]) -> T:
+        return self.extend(parameters)
+
+    def __dir__(self):
+        keys = super(ParameterList, self).__dir__()
+        keys = [key for key in keys if not key.isdigit()]
+        return keys
+
+    def append(self: T, parameter: "Parameter") -> T:
+        """Appends a given parameter at the end of the list.
+
+        Arguments:
+            parameter (nn.Parameter): parameter to append
+        """
+        self.register_parameter(str(len(self)), parameter)
+        return self
+
+    def extend(self: T, parameters: Iterable["Parameter"]) -> T:
+        """Appends parameters from a Python iterable to the end of the list.
+
+        Arguments:
+            parameters (iterable): iterable of parameters to append
+        """
+        if not isinstance(parameters, collections.abc.Iterable):
+            raise TypeError(
+                "ParameterList.extend should be called with an "
+                "iterable, but got " + type(parameters).__name__
+            )
+        offset = len(self)
+        for i, param in enumerate(parameters):
+            self.register_parameter(str(offset + i), param)
+        return self
+
+    def extra_repr(self) -> str:
+        child_lines = []
+        for k, p in self._parameters.items():
+            size_str = "x".join(str(size) for size in p.size())
+            device_str = "" if not p.is_cuda else " (GPU {})".format(p.get_device())
+            parastr = "Parameter containing: [{} of size {}{}]".format(
+                type(p), size_str, device_str
+            )
+            child_lines.append("  (" + str(k) + "): " + parastr)
+        tmpstr = "\n".join(child_lines)
+        return tmpstr
+
+    def __call__(self, input):
+        raise RuntimeError("ParameterList should not be called.")
+
+    def _replicate_for_data_parallel(self):
+        warnings.warn(
+            "nn.ParameterList is being used with DataParallel but this is not "
+            "supported. This list will appear empty for the models replicated "
+            "on each GPU except the original one."
+        )
+
+        return super(ParameterList, self)._replicate_for_data_parallel()
+
+
+@oneflow_export("nn.ParameterDict")
+@experimental_api
+class ParameterDict(Module):
+    def __init__(self, parameters: Optional[Mapping[str, "Parameter"]] = None) -> None:
+        super(ParameterDict, self).__init__()
+        self._initialized = True
+        if parameters is not None:
+            self.update(parameters)
+
+    def __setstate__(self, state):
+        state["_initialized"] = False
+        super(ParameterDict, self).__setstate__(state)
+        self._initialized = True
+
+    def __getitem__(self, key: str) -> "Parameter":
+        return self._parameters[key]
+
+    def __setitem__(self, key: str, parameter: "Parameter") -> None:
+        self.register_parameter(key, parameter)
+
+    def __delitem__(self, key: str) -> None:
+        del self._parameters[key]
+
+    def __setattr__(self, key: Any, value: Any) -> None:
+        if getattr(self, "_initialized", False):
+            if not hasattr(self, key) and not isinstance(value, flow.nn.Parameter):
+                warnings.warn("Setting attributes on ParameterDict is not supported.")
+        super(ParameterDict, self).__setattr__(key, value)
+
+    def __len__(self) -> int:
+        return len(self._parameters)
+
+    def __iter__(self) -> Iterator[str]:
+        return iter(self._parameters.keys())
+
+    def __contains__(self, key: str) -> bool:
+        return key in self._parameters
+
+    def clear(self) -> None:
+        """Remove all items from the ParameterDict.
+        """
+        self._parameters.clear()
+
+
+@oneflow_export("nn.ModuleList")
+@experimental_api
+class ModuleList(Module):
+    def __init__(self, modules: Optional[Iterable[Module]] = None) -> None:
+        super(ModuleList, self).__init__()
+        if modules is not None:
+            self += modules
+
+    def _get_abs_string_index(self, idx):
+        """Get the absolute index for the list of modules"""
+        idx = operator.index(idx)
+        if not (-len(self) <= idx < len(self)):
+            raise IndexError("index {} is out of range".format(idx))
+        if idx < 0:
+            idx += len(self)
+        return str(idx)
+
+    def __getitem__(self, idx: int) -> Module:
+        if isinstance(idx, slice):
+            return self.__class__(list(self._modules.values())[idx])
+        else:
+            return self._modules[self._get_abs_string_index(idx)]
+
+    def __setitem__(self, idx: int, module: Module) -> None:
+        idx = self._get_abs_string_index(idx)
+        return setattr(self, str(idx), module)
+
+    def __delitem__(self, idx: Union[int, slice]) -> None:
+        if isinstance(idx, slice):
+            for k in range(len(self._modules))[idx]:
+                delattr(self, str(k))
+        else:
+            delattr(self, self._get_abs_string_index(idx))
+        # To preserve numbering, self._modules is being reconstructed with modules after deletion
+        str_indices = [str(i) for i in range(len(self._modules))]
+        self._modules = OrderedDict(list(zip(str_indices, self._modules.values())))
+
+    def __len__(self) -> int:
+        return len(self._modules)
+
+    def __iter__(self) -> Iterator[Module]:
+        return iter(self._modules.values())
+
+    def __iadd__(self: T, modules: Iterable[Module]) -> T:
+        return self.extend(modules)
+
+    def __dir__(self):
+        keys = super(ModuleList, self).__dir__()
+        keys = [key for key in keys if not key.isdigit()]
+        return keys
+
+    def insert(self, index: int, module: Module) -> None:
+        r"""Insert a given module before a given index in the list.
+
+        Arguments:
+            index (int): index to insert.
+            module (nn.Module): module to insert
+        """
+        for i in range(len(self._modules), index, -1):
+            self._modules[str(i)] = self._modules[str(i - 1)]
+        self._modules[str(index)] = module
+
+    def append(self: T, module: Module) -> T:
+        r"""Appends a given module to the end of the list.
+
+        Arguments:
+            module (nn.Module): module to append
+        """
+        self.add_module(str(len(self)), module)
+        return self
+
+    def extend(self: T, modules: Iterable[Module]) -> T:
+        r"""Appends modules from a Python iterable to the end of the list.
+
+        Arguments:
+            modules (iterable): iterable of modules to append
+        """
+        if not isinstance(modules, collections.abc.Iterable):
+            raise TypeError(
+                "ModuleList.extend should be called with an "
+                "iterable, but got " + type(modules).__name__
+            )
+        offset = len(self)
+        for i, module in enumerate(modules):
+            self.add_module(str(offset + i), module)
+        return self
+
+    def forward(self):
+        raise NotImplementedError()
+
+
+@oneflow_export("nn.ModuleDict")
+@experimental_api
+class ModuleDict(Module):
+    def __init__(self, modules: Optional[Mapping[str, Module]] = None) -> None:
+        super(ModuleDict, self).__init__()
+        if modules is not None:
+            self.update(modules)
+
+    def __getitem__(self, key: str) -> Module:
+        return self._modules[key]
+
+    def __setitem__(self, key: str, module: Module) -> None:
+        self.add_module(key, module)
+
+    def __delitem__(self, key: str) -> None:
+        del self._modules[key]
+
+    def __len__(self) -> int:
+        return len(self._modules)
+
+    def __iter__(self) -> Iterator[str]:
+        return iter(self._modules)
+
+    def __contains__(self, key: str) -> bool:
+        return key in self._modules
+
+    def clear(self) -> None:
+        """Remove all items from the ModuleDict.
+        """
+        self._modules.clear()
+
+    def pop(self, key: str) -> Module:
+        r"""Remove key from the ModuleDict and return its module.
+
+        Arguments:
+            key (string): key to pop from the ModuleDict
+        """
+        v = self[key]
+        del self[key]
+        return v
+
+    def keys(self) -> Iterable[str]:
+        r"""Return an iterable of the ModuleDict keys.
+        """
+        return self._modules.keys()
+
+    def items(self) -> Iterable[Tuple[str, Module]]:
+        r"""Return an iterable of the ModuleDict key/value pairs.
+        """
+        return self._modules.items()
+
+    def values(self) -> Iterable[Module]:
+        r"""Return an iterable of the ModuleDict values.
+        """
+        return self._modules.values()
+
+    def update(self, modules: Mapping[str, Module]) -> None:
+        if not isinstance(modules, collections.abc.Iterable):
+            raise TypeError(
+                "ModuleDict.update should be called with an "
+                "iterable of key/value pairs, but got " + type(modules).__name__
+            )
+
+        if isinstance(modules, (OrderedDict, ModuleDict, collections.abc.Mapping)):
+            for key, module in modules.items():
+                self[key] = module
+        else:
+            for j, m in enumerate(modules):
+                if not isinstance(m, collections.abc.Iterable):
+                    raise TypeError(
+                        "ModuleDict update sequence element "
+                        "#" + str(j) + " should be Iterable; is" + type(m).__name__
+                    )
+                if not len(m) == 2:
+                    raise ValueError(
+                        "ModuleDict update sequence element "
+                        "#" + str(j) + " has length " + str(len(m)) + "; 2 is required"
+                    )
+                self[m[0]] = m[1]
+
+    def forward(self):
+        raise NotImplementedError()
+
+    def pop(self, key: str) -> "Parameter":
+        r"""Remove key from the ParameterDict and return its parameter.
+
+        Arguments:
+            key (string): key to pop from the ParameterDict
+        """
+        v = self[key]
+        del self[key]
+        return v
+
+    def keys(self) -> Iterable[str]:
+        r"""Return an iterable of the ParameterDict keys.
+        """
+        return self._parameters.keys()
+
+    def items(self) -> Iterable[Tuple[str, "Parameter"]]:
+        r"""Return an iterable of the ParameterDict key/value pairs.
+        """
+        return self._parameters.items()
+
+    def values(self) -> Iterable["Parameter"]:
+        r"""Return an iterable of the ParameterDict values.
+        """
+        return self._parameters.values()
+
+    def update(self, parameters: Mapping[str, "Parameter"]) -> None:
+        if not isinstance(parameters, collections.abc.Iterable):
+            raise TypeError(
+                "ParametersDict.update should be called with an "
+                "iterable of key/value pairs, but got " + type(parameters).__name__
+            )
+
+        if isinstance(parameters, (OrderedDict, ParameterDict)):
+            for key, parameter in parameters.items():
+                self[key] = parameter
+        elif isinstance(parameters, collections.abc.Mapping):
+            for key, parameter in sorted(parameters.items()):
+                self[key] = parameter
+        else:
+            for j, p in enumerate(parameters):
+                if not isinstance(p, collections.abc.Iterable):
+                    raise TypeError(
+                        "ParameterDict update sequence element "
+                        "#" + str(j) + " should be Iterable; is" + type(p).__name__
+                    )
+                if not len(p) == 2:
+                    raise ValueError(
+                        "ParameterDict update sequence element "
+                        "#" + str(j) + " has length " + str(len(p)) + "; 2 is required"
+                    )
+                self[p[0]] = p[1]
+
+    def extra_repr(self) -> str:
+        child_lines = []
+        for k, p in self._parameters.items():
+            size_str = "x".join(str(size) for size in p.size())
+            device_str = "" if not p.is_cuda else " (GPU {})".format(p.get_device())
+            parastr = "Parameter containing: [{} of size {}{}]".format(
+                type(p), size_str, device_str
+            )
+            child_lines.append("  (" + k + "): " + parastr)
+        tmpstr = "\n".join(child_lines)
+        return tmpstr
+
+    def __call__(self, input):
+        raise RuntimeError("ParameterDict should not be called.")
+
+    def _replicate_for_data_parallel(self):
+        warnings.warn(
+            "nn.ParameterDict is being used with DataParallel but this is not "
+            "supported. This dict will appear empty for the models replicated "
+            "on each GPU except the original one."
+        )
+
+        return super(ParameterDict, self)._replicate_for_data_parallel()
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(raise_on_error=True)
diff --git a/oneflow/compatible_single_client_python/nn/modules/conv.py b/oneflow/compatible_single_client_python/nn/modules/conv.py
new file mode 100644
index 0000000000000000000000000000000000000000..e27e77ab75a9cde518c8b81dd220f0fcdc40b806
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/conv.py
@@ -0,0 +1,476 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import math
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+from oneflow.compatible_single_client_python.nn.module import Module
+from oneflow.compatible_single_client_python.nn.modules.utils import _single, _pair
+from oneflow.compatible_single_client_python.nn.common_types import _size_1_t, _size_2_t
+from oneflow.compatible_single_client_python.nn import init
+
+
+def slice(x, begin, size):
+    ndim = len(x.shape)
+    if not isinstance(begin, (list, tuple)) or len(begin) != ndim:
+        raise ValueError(
+            "begin must be a list/tuple with the same length as input tensor's number of dimensions"
+        )
+
+    if not all(isinstance(b, int) or b is None for b in begin):
+        raise ValueError("element of begin must be a int or None")
+
+    if not isinstance(size, (list, tuple)) or len(size) != ndim:
+        raise ValueError(
+            "size must be a list/tuple with the same length as input tensor's number of dimensions."
+        )
+
+    if not all(isinstance(s, int) or s is None for s in size):
+        raise ValueError("element of size must be a int or None")
+
+    slice_tup_list = []
+    for b, s, dim_size in zip(begin, size, x.shape):
+        start, stop, step = (None, None, 1)
+        if b is not None:
+            if b < -dim_size or b >= dim_size:
+                raise ValueError("element of begin is out of range")
+            start = b
+
+        if s is not None:
+            if s == -1:
+                stop = dim_size
+            else:
+                if s <= 0 or s > dim_size:
+                    raise ValueError("element of size is invalid")
+                if b + s < dim_size:
+                    stop = b + s
+
+        slice_tup_list.append((start, stop, step))
+    return flow.experimental.slice(x, slice_tup_list)
+
+
+class ConvUtil(object):
+    @classmethod
+    def split(cls, x, axis, split_num):
+        split_len = x.shape[axis] // split_num
+        result_list = []
+        slice_begin = [0] * len(x.shape)
+        slice_size = [-1] * len(x.shape)
+        slice_size[axis] = split_len
+        for i in range(split_num):
+            slice_begin[axis] = i * split_len
+            result = slice(x, slice_begin, slice_size)
+            result_list.append(result)
+        return result_list
+
+
+@oneflow_export("nn.Conv1d")
+@experimental_api
+class Conv1d(Module):
+    r"""The interface is consistent with PyTorch.    
+    The documentation is referenced from: https://pytorch.org/docs/master/generated/torch.nn.Conv1d.html#conv1d
+    
+    Applies a 1D convolution over an input signal composed of several input
+    planes.
+
+    In the simplest case, the output value of the layer with input size
+    :math:`(N, C_{\text{in}}, L)` and output :math:`(N, C_{\text{out}}, L_{\text{out}})` can be
+    precisely described as:
+
+    .. math::
+        \text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) +
+        \sum_{k = 0}^{C_{in} - 1} \text{weight}(C_{\text{out}_j}, k)
+        \star \text{input}(N_i, k)
+
+    where :math:`\star` is the valid `cross-correlation`_ operator,
+    :math:`N` is a batch size, :math:`C` denotes a number of channels,
+    :math:`L` is a length of signal sequence.
+
+    * :attr:`stride` controls the stride for the cross-correlation, a single
+      number or a one-element tuple.
+
+    * :attr:`padding` controls the amount of padding applied to the input. It
+      can be either a string {{'valid', 'same'}} or a tuple of ints giving the
+      amount of implicit padding applied on both sides.
+
+    * :attr:`dilation` controls the spacing between the kernel points; also
+      known as the Ã  trous algorithm. It is harder to describe, but this `link`_
+      has a nice visualization of what :attr:`dilation` does.
+
+    Note:
+        ``padding='valid'`` is the same as no padding. ``padding='same'`` pads
+        the input so the output has the shape as the input. However, this mode
+        doesn't support any stride values other than 1.
+
+    Args:
+        in_channels (int): Number of channels in the input image
+        out_channels (int): Number of channels produced by the convolution
+        kernel_size (int or tuple): Size of the convolving kernel
+        stride (int or tuple, optional): Stride of the convolution. Default: 1
+        padding (int, tuple or str, optional): Padding added to both sides of
+            the input. Default: 0
+        padding_mode (string, optional): ``'zeros'``, ``'reflect'``,
+            ``'replicate'`` or ``'circular'``. Default: ``'zeros'``
+        dilation (int or tuple, optional): Spacing between kernel
+            elements. Default: 1
+        groups (int, optional): Number of blocked connections from input
+            channels to output channels. Default: 1
+        bias (bool, optional): If ``True``, adds a learnable bias to the
+            output. Default: ``True``
+
+    Shape:
+        - Input: :math:`(N, C_{in}, L_{in})`
+        - Output: :math:`(N, C_{out}, L_{out})` where
+
+          .. math::
+              L_{out} = \left\lfloor\frac{L_{in} + 2 \times \text{padding} - \text{dilation}
+                        \times (\text{kernel\_size} - 1) - 1}{\text{stride}} + 1\right\rfloor
+
+    Attributes:
+        weight (Tensor): the learnable weights of the module of shape
+            :math:`(\text{out\_channels},
+            \frac{\text{in\_channels}}{\text{groups}}, \text{kernel\_size})`.
+            The values of these weights are sampled from
+            :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where
+            :math:`k = \frac{groups}{C_\text{in} * \text{kernel\_size}}`
+        bias (Tensor):   the learnable bias of the module of shape
+            (out_channels). If :attr:`bias` is ``True``, then the values of these weights are
+            sampled from :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where
+            :math:`k = \frac{groups}{C_\text{in} * \text{kernel\_size}}`
+
+    For example: 
+
+    .. code-block:: python
+
+        >>> import numpy as np
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> import oneflow.compatible.single_client.experimental.nn as nn
+        >>> flow.enable_eager_execution()
+
+        >>> arr = np.random.randn(20, 16, 50)
+        >>> input = flow.Tensor(arr)
+        >>> m = nn.Conv1d(16, 33, 3, stride=2)
+        >>> output = m(input)
+
+    .. _cross-correlation:
+        https://en.wikipedia.org/wiki/Cross-correlation
+
+    .. _link:
+        https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md
+    """
+
+    def __init__(
+        self,
+        in_channels: int,
+        out_channels: int,
+        kernel_size: _size_1_t,
+        stride: _size_1_t = 1,
+        padding: _size_1_t = 0,
+        dilation: _size_1_t = 1,
+        groups: int = 1,
+        bias: bool = True,
+        padding_mode: str = "zeros",  # TODO: refine this type
+    ):
+        super().__init__()
+
+        assert padding_mode == "zeros"
+        self.kernel_size = _single(kernel_size)
+        self.stride = _single(stride)
+        self.padding = _single(padding)
+        self.dilation = _single(dilation)
+        self.groups = groups
+        assert in_channels % groups == 0
+        assert out_channels % groups == 0
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.weight = flow.nn.Parameter(
+            flow.Tensor(out_channels, in_channels // groups, *self.kernel_size)
+        )
+        self.out_channel_groups = out_channels // groups
+        self.bias = None
+        if bias:
+            self.bias = flow.nn.Parameter(flow.Tensor(out_channels))
+        self.reset_parameters()
+
+    def reset_parameters(self) -> None:
+        init.kaiming_uniform_(self.weight, a=math.sqrt(5))
+        if self.bias is not None:
+            fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight)
+            bound = 1 / math.sqrt(fan_in)
+            init.uniform_(self.bias, -bound, bound)
+
+    def forward(self, x):
+        if x.device.type == "cpu" and self.groups > 1:
+            in_channel_axis = 1
+            weight_channel_axis = 0
+            bias_channel_axis = 0
+            in_split_list = ConvUtil.split(
+                x, axis=in_channel_axis, split_num=self.groups
+            )
+            out_list = []
+            for i in range(len(in_split_list)):
+                out_list.append(
+                    flow.F.conv1d(
+                        in_split_list[i],
+                        self.weight[
+                            i
+                            * self.out_channel_groups : (i + 1)
+                            * self.out_channel_groups,
+                            :,
+                            :,
+                        ],
+                        self.bias[
+                            i
+                            * self.out_channel_groups : (i + 1)
+                            * self.out_channel_groups
+                        ]
+                        if self.bias
+                        else None,
+                        stride=self.stride,
+                        padding=self.padding,
+                        dilation=self.dilation,
+                        groups=1,
+                    )
+                )
+            res = flow.experimental.cat(out_list, dim=in_channel_axis)
+        else:
+            res = flow.F.conv1d(
+                x,
+                self.weight,
+                self.bias,
+                stride=self.stride,
+                padding=self.padding,
+                dilation=self.dilation,
+                groups=self.groups,
+            )
+        return res
+
+
+@oneflow_export("nn.Conv2d")
+@experimental_api
+class Conv2d(Module):
+    r"""The interface is consistent with PyTorch.    
+    The documentation is referenced from: https://pytorch.org/docs/master/generated/torch.nn.Conv2d.html#conv2d
+    
+    Applies a 2D convolution over an input signal composed of several input
+    planes.
+
+    In the simplest case, the output value of the layer with input size
+    :math:`(N, C_{\text{in}}, H, W)` and output :math:`(N, C_{\text{out}}, H_{\text{out}}, W_{\text{out}})`
+    can be precisely described as:
+
+    .. math::
+        \text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) +
+        \sum_{k = 0}^{C_{\text{in}} - 1} \text{weight}(C_{\text{out}_j}, k) \star \text{input}(N_i, k)
+
+
+    where :math:`\star` is the valid 2D `cross-correlation`_ operator,
+    :math:`N` is a batch size, :math:`C` denotes a number of channels,
+    :math:`H` is a height of input planes in pixels, and :math:`W` is
+    width in pixels.
+
+
+    * :attr:`stride` controls the stride for the cross-correlation, a single
+      number or a tuple.
+    * :attr:`padding` controls the amount of implicit padding on both
+      sides for :attr:`padding` number of points for each dimension.
+    * :attr:`dilation` controls the spacing between the kernel points; also
+      known as the Ã  trous algorithm. It is harder to describe, but this `link`_
+      has a nice visualization of what :attr:`dilation` does.
+    * :attr:`groups` controls the connections between inputs and outputs.
+      :attr:`in_channels` and :attr:`out_channels` must both be divisible by
+      :attr:`groups`. For example,
+
+        * At groups=1, all inputs are convolved to all outputs.
+        * At groups=2, the operation becomes equivalent to having two conv
+          layers side by side, each seeing half the input channels
+          and producing half the output channels, and both subsequently
+          concatenated.
+        * At groups= :attr:`in_channels`, each input channel is convolved with
+          its own set of filters (of size
+          :math:`\frac{\text{out_channels}}{\text{in_channels}}`).,
+
+    The parameters :attr:`kernel_size`, :attr:`stride`, :attr:`padding`, :attr:`dilation` can either be:
+
+        - a single ``int`` -- in which case the same value is used for the height and width dimension
+        - a ``tuple`` of two ints -- in which case, the first `int` is used for the height dimension,
+          and the second `int` for the width dimension
+
+    Note:
+        When `groups == in_channels` and `out_channels == K * in_channels`,
+        where `K` is a positive integer, this operation is also known as a "depthwise convolution".
+
+        In other words, for an input of size :math:`(N, C_{in}, L_{in})`,
+        a depthwise convolution with a depthwise multiplier `K` can be performed with the arguments
+        :math:`(C_\text{in}=C_\text{in}, C_\text{out}=C_\text{in} \times \text{K}, ..., \text{groups}=C_\text{in})`.
+
+
+    Args:
+        in_channels (int): Number of channels in the input image
+        out_channels (int): Number of channels produced by the convolution
+        kernel_size (int or tuple): Size of the convolving kernel
+        stride (int or tuple, optional): Stride of the convolution. Default: 1
+        padding (int or tuple, optional): Zero-padding added to both sides of
+            the input. Default: 0
+        padding_mode (string, optional): ``'zeros'``, ``'reflect'``,
+            ``'replicate'`` or ``'circular'``. Default: ``'zeros'``
+        dilation (int or tuple, optional): Spacing between kernel elements. Default: 1
+        groups (int, optional): Number of blocked connections from input
+            channels to output channels. Default: 1
+        bias (bool, optional): If ``True``, adds a learnable bias to the
+            output. Default: ``True``
+
+    Shape:
+        - Input: :math:`(N, C_{in}, H_{in}, W_{in})`
+        - Output: :math:`(N, C_{out}, H_{out}, W_{out})` where
+
+          .. math::
+              H_{out} = \left\lfloor\frac{H_{in}  + 2 \times \text{padding}[0] - \text{dilation}[0]
+                        \times (\text{kernel_size}[0] - 1) - 1}{\text{stride}[0]} + 1\right\rfloor
+
+          .. math::
+              W_{out} = \left\lfloor\frac{W_{in}  + 2 \times \text{padding}[1] - \text{dilation}[1]
+                        \times (\text{kernel_size}[1] - 1) - 1}{\text{stride}[1]} + 1\right\rfloor
+
+    Attr:
+        - weight (Tensor): the learnable weights of the module of shape
+            :math:`(\text{out_channels}, \frac{\text{in_channels}}{\text{groups}},`
+            :math:`\text{kernel_size[0]}, \text{kernel_size[1]})`.
+            The values of these weights are sampled from
+            :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where
+            :math:`k = \frac{groups}{C_\text{in} * \prod_{i=0}^{1}\text{kernel_size}[i]}`
+
+        - bias (Tensor):   the learnable bias of the module of shape
+            (out_channels). If :attr:`bias` is ``True``,
+            then the values of these weights are
+            sampled from :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where
+            :math:`k = \frac{groups}{C_\text{in} * \prod_{i=0}^{1}\text{kernel_size}[i]}`
+
+    For example: 
+
+    .. code-block:: python
+
+        >>> import numpy as np
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> import oneflow.compatible.single_client.experimental.nn as nn
+        >>> flow.enable_eager_execution()
+
+        >>> arr = np.random.randn(20, 16, 50, 100)
+        >>> input = flow.Tensor(arr)
+        >>> m = nn.Conv2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2), dilation=(3, 1))
+        >>> output = m(input)
+
+    .. _cross-correlation:
+        https://en.wikipedia.org/wiki/Cross-correlation
+
+    .. _link:
+        https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md
+    """
+
+    def __init__(
+        self,
+        in_channels: int,
+        out_channels: int,
+        kernel_size: _size_2_t,
+        stride: _size_2_t = 1,
+        padding: _size_2_t = 0,
+        dilation: _size_2_t = 1,
+        groups: int = 1,
+        bias: bool = True,
+        padding_mode: str = "zeros",  # TODO: refine this type
+    ):
+        super().__init__()
+
+        assert padding_mode == "zeros"
+        self.kernel_size = _pair(kernel_size)
+        self.stride = _pair(stride)
+        self.padding = _pair(padding)
+        self.dilation = _pair(dilation)
+        self.groups = groups
+        assert in_channels % groups == 0
+        assert out_channels % groups == 0
+        self.in_channels = in_channels
+        self.out_channels = out_channels
+        self.weight = flow.nn.Parameter(
+            flow.Tensor(out_channels, in_channels // groups, *self.kernel_size)
+        )
+        self.out_channel_groups = out_channels // groups
+        self.bias = None
+        if bias:
+            self.bias = flow.nn.Parameter(flow.Tensor(out_channels))
+        self.reset_parameters()
+
+    def reset_parameters(self) -> None:
+        init.kaiming_uniform_(self.weight, a=math.sqrt(5))
+        if self.bias is not None:
+            fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight)
+            bound = 1 / math.sqrt(fan_in)
+            init.uniform_(self.bias, -bound, bound)
+
+    def forward(self, x):
+        if x.shape[1] != self.in_channels:
+            raise ValueError("The input channels should be equal to self.in_channels")
+        if x.device.type == "cpu" and self.groups > 1:
+            in_channel_axis = 1
+            in_split_list = ConvUtil.split(
+                x, axis=in_channel_axis, split_num=self.groups
+            )
+            out_list = []
+            for i in range(len(in_split_list)):
+                out_list.append(
+                    flow.F.conv2d(
+                        in_split_list[i],
+                        self.weight[
+                            i
+                            * self.out_channel_groups : (i + 1)
+                            * self.out_channel_groups,
+                            :,
+                            :,
+                            :,
+                        ],
+                        self.bias[
+                            i
+                            * self.out_channel_groups : (i + 1)
+                            * self.out_channel_groups
+                        ]
+                        if self.bias
+                        else None,
+                        stride=self.stride,
+                        padding=self.padding,
+                        dilation=self.dilation,
+                        groups=1,
+                    )
+                )
+            res = flow.experimental.cat(out_list, dim=in_channel_axis)
+        else:
+            res = flow.F.conv2d(
+                x,
+                self.weight,
+                self.bias,
+                stride=self.stride,
+                padding=self.padding,
+                dilation=self.dilation,
+                groups=self.groups,
+            )
+        return res
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(raise_on_error=True)
diff --git a/oneflow/compatible_single_client_python/nn/modules/dataset.py b/oneflow/compatible_single_client_python/nn/modules/dataset.py
new file mode 100644
index 0000000000000000000000000000000000000000..f7ac2d38710c687c0dbc98ff425b6ba445a4c344
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/dataset.py
@@ -0,0 +1,582 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from oneflow.compatible import single_client as flow
+
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+from oneflow.compatible_single_client_python.nn.module import Module
+from oneflow.compatible_single_client_python.nn.modules.utils import (
+    _single,
+    _pair,
+    _triple,
+    _reverse_repeat_tuple,
+)
+from oneflow.compatible_single_client_python.nn.common_types import (
+    _size_1_t,
+    _size_2_t,
+    _size_3_t,
+    _size_any_t,
+)
+from typing import Optional, List, Tuple, Sequence, Union
+import random
+import sys
+import traceback
+
+
+def mirrored_gen_random_seed(seed=None):
+    if seed is None:
+        seed = -1
+        has_seed = False
+    else:
+        has_seed = True
+
+    return seed, has_seed
+
+
+@oneflow_export("nn.OfrecordReader")
+@experimental_api
+class OfrecordReader(Module):
+    def __init__(
+        self,
+        ofrecord_dir: str,
+        batch_size: int = 1,
+        data_part_num: int = 1,
+        part_name_prefix: str = "part-",
+        part_name_suffix_length: int = -1,
+        random_shuffle: bool = False,
+        shuffle_buffer_size: int = 1024,
+        shuffle_after_epoch: bool = False,
+        random_seed: int = -1,
+        name: Optional[str] = None,
+    ):
+        super().__init__()
+        seed, has_seed = mirrored_gen_random_seed(random_seed)
+        self._op = (
+            flow.builtin_op("OFRecordReader", name)
+            .Output("out")
+            .Attr("data_dir", ofrecord_dir)
+            .Attr("data_part_num", data_part_num)
+            .Attr("batch_size", batch_size)
+            .Attr("part_name_prefix", part_name_prefix)
+            .Attr("random_shuffle", random_shuffle)
+            .Attr("shuffle_buffer_size", shuffle_buffer_size)
+            .Attr("shuffle_after_epoch", shuffle_after_epoch)
+            .Attr("part_name_suffix_length", part_name_suffix_length)
+            .Attr("seed", seed)
+            .Build()
+        )
+
+    def forward(self):
+        res = self._op()[0]
+        return res
+
+
+@oneflow_export("nn.OfrecordRawDecoder")
+@experimental_api
+class OfrecordRawDecoder(Module):
+    def __init__(
+        self,
+        blob_name: str,
+        shape: Sequence[int],
+        dtype: flow.dtype,
+        dim1_varying_length: bool = False,
+        truncate: bool = False,
+        auto_zero_padding: bool = False,
+        name: Optional[str] = None,
+    ):
+        super().__init__()
+        if auto_zero_padding:
+            print(
+                """WARNING: auto_zero_padding has been deprecated, Please use truncate instead.
+                """
+            )
+        self._op = (
+            flow.builtin_op("ofrecord_raw_decoder", name)
+            .Input("in")
+            .Output("out")
+            .Attr("name", blob_name)
+            .Attr("shape", shape)
+            .Attr("data_type", dtype)
+            .Attr("dim1_varying_length", dim1_varying_length)
+            .Attr("truncate", truncate or auto_zero_padding)
+            .Build()
+        )
+
+    def forward(self, input):
+        res = self._op(input)[0]
+        return res
+
+
+@oneflow_export("nn.CoinFlip")
+@experimental_api
+class CoinFlip(Module):
+    def __init__(
+        self,
+        batch_size: int = 1,
+        random_seed: Optional[int] = None,
+        probability: float = 0.5,
+    ):
+        super().__init__()
+        seed, has_seed = mirrored_gen_random_seed(random_seed)
+        self._op = (
+            flow.builtin_op("coin_flip")
+            .Output("out")
+            .Attr("batch_size", batch_size)
+            .Attr("probability", probability)
+            .Attr("has_seed", has_seed)
+            .Attr("seed", seed)
+            .Build()
+        )
+
+    def forward(self):
+        res = self._op()[0]
+        return res
+
+
+@oneflow_export("nn.CropMirrorNormalize")
+@experimental_api
+class CropMirrorNormalize(Module):
+    def __init__(
+        self,
+        color_space: str = "BGR",
+        output_layout: str = "NCHW",
+        crop_h: int = 0,
+        crop_w: int = 0,
+        crop_pos_y: float = 0.5,
+        crop_pos_x: float = 0.5,
+        mean: Sequence[float] = [0.0],
+        std: Sequence[float] = [1.0],
+        output_dtype: flow.dtype = flow.float,
+    ):
+        super().__init__()
+        self._op = (
+            flow.builtin_op("crop_mirror_normalize_from_uint8")
+            .Input("in")
+            .Input("mirror")
+            .Output("out")
+            .Attr("color_space", color_space)
+            .Attr("output_layout", output_layout)
+            .Attr("mean", mean)
+            .Attr("std", std)
+            .Attr("crop_h", crop_h)
+            .Attr("crop_w", crop_w)
+            .Attr("crop_pos_y", crop_pos_y)
+            .Attr("crop_pos_x", crop_pos_x)
+            .Attr("output_dtype", output_dtype)
+            .Build()
+        )
+
+        self._val_op = (
+            flow.builtin_op("crop_mirror_normalize_from_tensorbuffer")
+            .Input("in")
+            .Output("out")
+            .Attr("color_space", color_space)
+            .Attr("output_layout", output_layout)
+            .Attr("mean", mean)
+            .Attr("std", std)
+            .Attr("crop_h", crop_h)
+            .Attr("crop_w", crop_w)
+            .Attr("crop_pos_y", crop_pos_y)
+            .Attr("crop_pos_x", crop_pos_x)
+            .Attr("output_dtype", output_dtype)
+            .Build()
+        )
+
+    def forward(self, input, mirror=None):
+        if mirror != None:
+            res = self._op(input, mirror)[0]
+        else:
+            res = self._val_op(input)[0]
+        return res
+
+
+@oneflow_export("nn.OFRecordImageDecoderRandomCrop")
+@experimental_api
+class OFRecordImageDecoderRandomCrop(Module):
+    def __init__(
+        self,
+        blob_name: str,
+        color_space: str = "BGR",
+        num_attempts: int = 10,
+        random_seed: Optional[int] = None,
+        random_area: Sequence[float] = [0.08, 1.0],
+        random_aspect_ratio: Sequence[float] = [0.75, 1.333333],
+    ):
+        super().__init__()
+        seed, has_seed = mirrored_gen_random_seed(random_seed)
+        self._op = (
+            flow.builtin_op("ofrecord_image_decoder_random_crop")
+            .Input("in")
+            .Output("out")
+            .Attr("name", blob_name)
+            .Attr("color_space", color_space)
+            .Attr("num_attempts", num_attempts)
+            .Attr("random_area", random_area)
+            .Attr("random_aspect_ratio", random_aspect_ratio)
+            .Attr("has_seed", has_seed)
+            .Attr("seed", seed)
+            .Build()
+        )
+
+    def forward(self, input):
+        res = self._op(input)[0]
+        return res
+
+
+@oneflow_export("nn.OFRecordImageDecoder")
+@experimental_api
+class OFRecordImageDecoder(Module):
+    def __init__(
+        self, blob_name: str, color_space: str = "BGR",
+    ):
+        super().__init__()
+        self._op = (
+            flow.builtin_op("ofrecord_image_decoder")
+            .Input("in")
+            .Output("out")
+            .Attr("name", blob_name)
+            .Attr("color_space", color_space)
+            .Build()
+        )
+
+    def forward(self, input):
+        res = self._op(input)[0]
+        return res
+
+
+class TensorBufferToListOfTensors(Module):
+    def __init__(
+        self, out_shapes, out_dtypes, out_num: int = 1, dynamic_out: bool = False
+    ):
+        super().__init__()
+        self._op = (
+            flow.builtin_op("tensor_buffer_to_list_of_tensors_v2")
+            .Input("in")
+            .Output("out", out_num)
+            .Attr("out_shapes", out_shapes)
+            .Attr("out_dtypes", out_dtypes)
+            .Attr("dynamic_out", dynamic_out)
+            .Build()
+        )
+
+    def forward(self, input):
+        return self._op(input)
+
+
+@oneflow_export("tensor_buffer_to_list_of_tensors")
+@experimental_api
+def tensor_buffer_to_list_of_tensors(tensor, out_shapes, out_dtypes):
+    return TensorBufferToListOfTensors(
+        [list(out_shape) for out_shape in out_shapes], out_dtypes, len(out_shapes)
+    )(tensor)
+
+
+@oneflow_export("nn.image.Resize")
+@experimental_api
+class ImageResize(Module):
+    def __init__(
+        self,
+        target_size: Union[int, Sequence[int]] = None,
+        min_size: Optional[int] = None,
+        max_size: Optional[int] = None,
+        keep_aspect_ratio: bool = False,
+        resize_side: str = "shorter",
+        channels: int = 3,
+        dtype: Optional[flow.dtype] = None,
+        interpolation_type: str = "auto",
+        name: Optional[str] = None,
+        # deprecated params, reserve for backward compatible
+        color_space: Optional[str] = None,
+        interp_type: Optional[str] = None,
+        resize_shorter: int = 0,
+        resize_x: int = 0,
+        resize_y: int = 0,
+    ):
+        super().__init__()
+        # process deprecated params
+        deprecated_param_used = False
+        if color_space is not None:
+            print(
+                "WARNING: color_space has been deprecated. Please use channels instead."
+            )
+            print(traceback.format_stack()[-2])
+            deprecated_param_used = True
+            assert isinstance(color_space, str)
+            if color_space.upper() == "RGB" or color_space.upper() == "BGR":
+                channels = 3
+            elif color_space.upper() == "GRAY":
+                channels = 1
+            else:
+                raise ValueError("invalid color_space")
+        if interp_type is not None:
+            print(
+                "WARNING: interp_type has been deprecated. Please use interpolation_type instead."
+            )
+            print(traceback.format_stack()[-2])
+            deprecated_param_used = True
+            assert isinstance(interp_type, str)
+            if interp_type == "Linear":
+                interpolation_type = "bilinear"
+            elif interp_type == "NN":
+                interpolation_type = "nearest_neighbor"
+            elif interp_type == "Cubic":
+                interpolation_type = "bicubic"
+            else:
+                raise ValueError("invalid interp_type")
+
+        if resize_x > 0 and resize_y > 0:
+            print(
+                "WARNING: resize_x and resize_y has been deprecated. Please use target_size instead."
+            )
+            print(traceback.format_stack()[-2])
+            deprecated_param_used = True
+            target_size = (resize_x, resize_y)
+            keep_aspect_ratio = False
+
+        if resize_shorter > 0:
+            print(
+                "WARNING: resize_shorter has been deprecated. Please use target_size instead."
+            )
+            print(traceback.format_stack()[-2])
+            deprecated_param_used = True
+            target_size = resize_shorter
+            keep_aspect_ratio = True
+            resize_side = "shorter"
+
+        if keep_aspect_ratio:
+            if not isinstance(target_size, int):
+                raise ValueError(
+                    "target_size must be an int when keep_aspect_ratio is True"
+                )
+
+            if min_size is None:
+                min_size = 0
+
+            if max_size is None:
+                max_size = 0
+
+            if resize_side == "shorter":
+                resize_longer = False
+            elif resize_side == "longer":
+                resize_longer = True
+            else:
+                raise ValueError('resize_side must be "shorter" or "longer"')
+
+            self._op = (
+                flow.builtin_op("image_resize_keep_aspect_ratio")
+                .Input("in")
+                .Output("out")
+                .Output("size")
+                .Output("scale")
+                .Attr("target_size", target_size)
+                .Attr("min_size", min_size)
+                .Attr("max_size", max_size)
+                .Attr("resize_longer", resize_longer)
+                .Attr("interpolation_type", interpolation_type)
+                .Build()
+            )
+            # TODO(Liang Depeng)
+            # scale = flow.tensor_buffer_to_tensor(
+            #     scale, dtype=flow.float32, instance_shape=(2,)
+            # )
+            # new_size = flow.tensor_buffer_to_tensor(
+            #     new_size, dtype=flow.int32, instance_shape=(2,)
+            # )
+        else:
+            if (
+                not isinstance(target_size, (list, tuple))
+                or len(target_size) != 2
+                or not all(isinstance(size, int) for size in target_size)
+            ):
+                raise ValueError(
+                    "target_size must be a form like (width, height) when keep_aspect_ratio is False"
+                )
+
+            if dtype is None:
+                dtype = flow.uint8
+
+            target_w, target_h = target_size
+            self._op = (
+                flow.builtin_op("image_resize_to_fixed")
+                .Input("in")
+                .Output("out")
+                .Output("scale")
+                .Attr("target_width", target_w)
+                .Attr("target_height", target_h)
+                .Attr("channels", channels)
+                .Attr("data_type", dtype)
+                .Attr("interpolation_type", interpolation_type)
+                .Build()
+            )
+
+    def forward(self, input):
+        res = self._op(input)[0]
+        return res
+
+
+@oneflow_export("tmp.RawDecoder")
+@experimental_api
+def raw_decoder(
+    input_record,
+    blob_name: str,
+    shape: Sequence[int],
+    dtype: flow.dtype,
+    dim1_varying_length: bool = False,
+    truncate: bool = False,
+    auto_zero_padding: bool = False,
+    name: Optional[str] = None,
+):
+    if auto_zero_padding:
+        print(
+            """WARNING: auto_zero_padding has been deprecated, Please use truncate instead.
+            """
+        )
+    return OfrecordRawDecoder(
+        blob_name,
+        shape,
+        dtype,
+        dim1_varying_length,
+        truncate or auto_zero_padding,
+        name,
+    ).forward(input_record)
+
+
+@oneflow_export("tmp.OfrecordReader")
+@experimental_api
+def get_ofrecord_handle(
+    ofrecord_dir: str,
+    batch_size: int = 1,
+    data_part_num: int = 1,
+    part_name_prefix: str = "part-",
+    part_name_suffix_length: int = -1,
+    random_shuffle: bool = False,
+    shuffle_buffer_size: int = 1024,
+    shuffle_after_epoch: bool = False,
+    name: Optional[str] = None,
+):
+    return OfrecordReader(
+        ofrecord_dir,
+        batch_size,
+        data_part_num,
+        part_name_prefix,
+        part_name_suffix_length,
+        random_shuffle,
+        shuffle_buffer_size,
+        shuffle_after_epoch,
+        name,
+    )()
+
+
+@oneflow_export("nn.image.decode")
+@experimental_api
+class ImageDecode(Module):
+    def __init__(self, dtype: flow.dtype = flow.uint8, color_space: str = "BGR"):
+        super().__init__()
+        self._op = (
+            flow.builtin_op("image_decode")
+            .Input("in")
+            .Output("out")
+            .Attr("color_space", color_space)
+            .Attr("data_type", dtype)
+            .Build()
+        )
+
+    def forward(self, input):
+        return self._op(input)[0]
+
+
+@oneflow_export("nn.image.normalize")
+@experimental_api
+class ImageNormalize(Module):
+    def __init__(self, std: Sequence[float], mean: Sequence[float]):
+        super().__init__()
+        self._op = (
+            flow.builtin_op("image_normalize")
+            .Input("in")
+            .Output("out")
+            .Attr("std", std)
+            .Attr("mean", mean)
+            .Build()
+        )
+
+    def forward(self, input):
+        return self._op(input)[0]
+
+
+@oneflow_export("nn.COCOReader")
+@experimental_api
+class COCOReader(Module):
+    def __init__(
+        self,
+        annotation_file: str,
+        image_dir: str,
+        batch_size: int,
+        shuffle: bool = True,
+        random_seed: Optional[int] = None,
+        group_by_aspect_ratio: bool = True,
+        remove_images_without_annotations: bool = True,
+        stride_partition: bool = True,
+    ):
+        super().__init__()
+        if random_seed is None:
+            random_seed = random.randrange(sys.maxsize)
+        self._op = (
+            flow.builtin_op("COCOReader")
+            .Output("image")
+            .Output("image_id")
+            .Output("image_size")
+            .Output("gt_bbox")
+            .Output("gt_label")
+            .Output("gt_segm")
+            .Output("gt_segm_index")
+            .Attr("session_id", flow.current_scope().session_id)
+            .Attr("annotation_file", annotation_file)
+            .Attr("image_dir", image_dir)
+            .Attr("batch_size", batch_size)
+            .Attr("shuffle_after_epoch", shuffle)
+            .Attr("random_seed", random_seed)
+            .Attr("group_by_ratio", group_by_aspect_ratio)
+            .Attr(
+                "remove_images_without_annotations", remove_images_without_annotations
+            )
+            .Attr("stride_partition", stride_partition)
+            .Build()
+        )
+
+    def forward(self):
+        res = self._op()
+        return res
+
+
+@oneflow_export("nn.image.batch_align")
+@experimental_api
+class ImageBatchAlign(Module):
+    def __init__(self, shape: Sequence[int], dtype: flow.dtype, alignment: int):
+        super().__init__()
+        self._op = (
+            flow.builtin_op("image_batch_align")
+            .Input("in")
+            .Output("out")
+            .Attr("shape", shape)
+            .Attr("data_type", dtype)
+            .Attr("alignment", alignment)
+            .Build()
+        )
+
+    def forward(self, input):
+        return self._op(input)[0]
diff --git a/oneflow/compatible_single_client_python/nn/modules/deconv.py b/oneflow/compatible_single_client_python/nn/modules/deconv.py
new file mode 100644
index 0000000000000000000000000000000000000000..f12a4a7f9a523d05f83238c575f044ccd22a4cde
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/deconv.py
@@ -0,0 +1,251 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import math
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.nn.module import Module
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+from oneflow.compatible_single_client_python.nn.modules.utils import _pair
+from oneflow.compatible_single_client_python.nn.common_types import _size_2_t
+from oneflow.compatible_single_client_python.nn import init
+
+
+def slice(x, begin, size):
+    ndim = len(x.shape)
+    if not isinstance(begin, (list, tuple)) or len(begin) != ndim:
+        raise ValueError(
+            "begin must be a list/tuple with the same length as input tensor's number of dimensions"
+        )
+
+    if not all(isinstance(b, int) or b is None for b in begin):
+        raise ValueError("element of begin must be a int or None")
+
+    if not isinstance(size, (list, tuple)) or len(size) != ndim:
+        raise ValueError(
+            "size must be a list/tuple with the same length as input tensor's number of dimensions."
+        )
+
+    if not all(isinstance(s, int) or s is None for s in size):
+        raise ValueError("element of size must be a int or None")
+
+    slice_tup_list = []
+    for b, s, dim_size in zip(begin, size, x.shape):
+        start, stop, step = (None, None, 1)
+        if b is not None:
+            if b < -dim_size or b >= dim_size:
+                raise ValueError("element of begin is out of range")
+            start = b
+
+        if s is not None:
+            if s == -1:
+                stop = dim_size
+            else:
+                if s <= 0 or s > dim_size:
+                    raise ValueError("element of size is invalid")
+                if b + s < dim_size:
+                    stop = b + s
+
+        slice_tup_list.append((start, stop, step))
+    return flow.experimental.slice(x, slice_tup_list)
+
+
+class ConvUtil(object):
+    @classmethod
+    def split(cls, x, axis, split_num):
+        split_len = x.shape[axis] // split_num
+        result_list = []
+        slice_begin = [0] * len(x.shape)
+        slice_size = [-1] * len(x.shape)
+        slice_size[axis] = split_len
+        for i in range(split_num):
+            slice_begin[axis] = i * split_len
+            result = slice(x, slice_begin, slice_size)
+            result_list.append(result)
+        return result_list
+
+
+@oneflow_export("nn.ConvTranspose2d")
+@experimental_api
+class ConvTranspose2d(Module):
+    r"""
+    
+    Applies a 2D transposed convolution operator over an input image composed of several input planes.
+
+    This module can be seen as the gradient of Conv2d with respect to its input.
+    It is also known as a fractionally-strided convolution or
+    a deconvolution (although it is not an actual deconvolution operation).
+
+    Args:
+        in_channels (int): Number of channels in the input image
+        out_channels (int): Number of channels produced by the convolution
+        kernel_size (int or tuple): Size of the convolving kernel
+        stride (int or tuple, optional): Stride of the convolution. Default: 1
+        padding (int or tuple, optional): ``dilation * (kernel_size - 1) - padding`` zero-padding
+            will be added to both sides of each dimension in the input. Default: 0
+        output_padding (int or tuple, optional): Additional size added to one side
+            of each dimension in the output shape. Default: 0
+        groups (int, optional): Number of blocked connections from input channels to output channels. Default: 1
+        bias (bool, optional): If ``True``, adds a learnable bias to the output. Default: ``True``
+        dilation (int or tuple, optional): Spacing between kernel elements. Default: 1
+
+    Shape:
+        - Input: :math:`(N, C_{in}, H_{in}, W_{in})`
+        - Output: :math:`(N, C_{out}, H_{out}, W_{out})` where
+
+        .. math::
+              H_{out} = (H_{in} - 1) \times \text{stride}[0] - 2 \times \text{padding}[0] + \text{dilation}[0] 
+
+                        \times (\text{kernel_size}[0] - 1) + \text{output_padding}[0] + 1
+        .. math::
+              W_{out} = (W_{in} - 1) \times \text{stride}[1] - 2 \times \text{padding}[1] + \text{dilation}[1]
+              
+                        \times (\text{kernel_size}[1] - 1) + \text{output_padding}[1] + 1
+
+    Attributes:
+        ConvTranspose2d.weight (Tensor): the learnable weights of the module of shape
+                         :math:`(\text{in_channels}, \frac{\text{out_channels}}{\text{groups}},`
+                         :math:`\text{kernel_size[0]}, \text{kernel_size[1]})`.
+                         The values of these weights are sampled from
+                         :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where
+                         :math:`k = \frac{groups}{C_\text{out} * \prod_{i=0}^{1}\text{kernel_size}[i]}`
+        ConvTranspose2d.bias (Tensor): the learnable bias of the module of shape (out_channels)
+                         If :attr:`bias` is ``True``, then the values of these weights are
+                         sampled from :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where
+                         :math:`k = \frac{groups}{C_\text{out} * \prod_{i=0}^{1}\text{kernel_size}[i]}`
+
+    Examples::
+
+        >>> import numpy as np
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> import oneflow.compatible.single_client.experimental.nn as nn
+        >>> flow.enable_eager_execution()
+
+        >>> m = nn.ConvTranspose2d(16, 33, 3, stride=2)
+        >>> # non-square kernels and unequal stride and with padding
+        >>> m = nn.ConvTranspose2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2))
+        >>> m = m.to("cuda")
+        >>> input = flow.Tensor(np.random.randn(20, 16, 50, 100), device=flow.device("cuda"))
+        >>> output = m(input)
+        >>> output.size()
+        flow.Size([20, 33, 93, 100])
+
+    .. _cross-correlation:
+        https://en.wikipedia.org/wiki/Cross-correlation
+
+    .. _link:
+        https://github.com/vdumoulin/conv_arithmetic/blob/master/README.md
+    """
+
+    def __init__(
+        self,
+        in_channels: int,
+        out_channels: int,
+        kernel_size: _size_2_t,
+        stride: _size_2_t = 1,
+        padding: _size_2_t = 0,
+        output_padding: _size_2_t = 0,
+        groups: int = 1,
+        bias: bool = True,
+        dilation: int = 1,
+        padding_mode: str = "zeros",
+    ) -> None:
+        super().__init__()
+
+        assert padding_mode == "zeros"
+        kernel_size = _pair(kernel_size)
+        stride = _pair(stride)
+        padding = _pair(padding)
+        output_padding = _pair(output_padding)
+        dilation = _pair(dilation)
+        self.groups = groups
+        assert in_channels % groups == 0
+        assert out_channels % groups == 0
+        self.weight = flow.nn.Parameter(
+            flow.Tensor(in_channels, out_channels // groups, *kernel_size)
+        )
+        self.in_channel_groups = in_channels // groups
+        self.bias = None
+        self._bias_add_op = None
+        if bias:
+            self.bias = flow.nn.Parameter(flow.Tensor(out_channels))
+            self._bias_add_op = (
+                flow.builtin_op("bias_add")
+                .Input("a")
+                .Input("b")
+                .Output("out")
+                .Attr("axis", 1)
+                .Build()
+            )
+
+        self._op = (
+            flow.builtin_op("deconv2d")
+            .Input("in")
+            .Input("weight")
+            .Attr("filters", out_channels // groups)
+            .Attr("padding_before", padding)
+            .Attr("data_format", "channels_first")
+            .Attr("kernel_size", kernel_size)
+            .Attr("strides", stride)
+            .Attr("dilation_rate", dilation)
+            .Attr("output_padding", output_padding)
+            .Attr("groups", 1)
+            .Output("out")
+            .Build()
+        )
+        self.reset_parameters()
+
+    def reset_parameters(self) -> None:
+        init.kaiming_uniform_(self.weight, a=math.sqrt(5))
+        if self.bias is not None:
+            fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight)
+            bound = 1 / math.sqrt(fan_in)
+            init.uniform_(self.bias, -bound, bound)
+
+    def forward(self, x):
+        if self.groups > 1:
+            in_channel_axis = 1
+            in_split_list = ConvUtil.split(
+                x, axis=in_channel_axis, split_num=self.groups
+            )
+            out_list = []
+            for i in range(len(in_split_list)):
+                out_list.append(
+                    self._op(
+                        in_split_list[i],
+                        self.weight[
+                            i
+                            * self.in_channel_groups : (i + 1)
+                            * self.in_channel_groups,
+                            :,
+                            :,
+                            :,
+                        ],
+                    )[0]
+                )
+            res = flow.experimental.cat(out_list, dim=in_channel_axis)
+        else:
+            res = self._op(x, self.weight)[0]
+        if self._bias_add_op is not None:
+            res = self._bias_add_op(res, self.bias)[0]
+        return res
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(raise_on_error=True)
diff --git a/oneflow/compatible_single_client_python/nn/modules/dropout.py b/oneflow/compatible_single_client_python/nn/modules/dropout.py
new file mode 100644
index 0000000000000000000000000000000000000000..4de6cd8dc66fc63a843a7d3b89fa19c8b6015a73
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/dropout.py
@@ -0,0 +1,113 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import sys
+import random
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.nn.module import Module
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+from oneflow.compatible_single_client_python.framework import id_util as id_util
+
+
+class _DropoutNd(Module):
+    __constants__ = ["p", "inplace"]
+    p: float
+    inplace: bool
+
+    def __init__(self, p: float = 0.5, inplace: bool = False) -> None:
+        super(_DropoutNd, self).__init__()
+        if p < 0 or p > 1:
+            raise ValueError(
+                "dropout probability has to be between 0 and 1, " "but got {}".format(p)
+            )
+        self.p = p
+        self.inplace = inplace
+
+    def extra_repr(self) -> str:
+        return "p={}, inplace={}".format(self.p, self.inplace)
+
+
+@oneflow_export("nn.Dropout")
+@experimental_api
+class Dropout(_DropoutNd):
+    r"""During training, randomly zeroes some of the elements of the input
+    tensor with probability :attr:`p` using samples from a Bernoulli
+    distribution. Each channel will be zeroed out independently on every forward
+    call.
+
+    This has proven to be an effective technique for regularization and
+    preventing the co-adaptation of neurons as described in the paper
+    "Improving neural networks by preventing co-adaptation of feature
+    detectors".
+
+    Furthermore, the outputs are scaled by a factor of :math:`\frac{1}{1-p}` during
+    training. This means that during evaluation the module simply computes an
+    identity function.
+
+    Args:
+        p: probability of an element to be zeroed. Default: 0.5
+        inplace: If set to ``True``, will do this operation in-place. Default: ``False``
+
+    Shape:
+        - Input: :math:`(*)`. Input can be of any shape
+        - Output: :math:`(*)`. Output is of the same shape as input
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import numpy as np
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> flow.enable_eager_execution()
+
+        >>> m = flow.nn.Dropout(p=0)
+        >>> arr = np.array(
+        ...    [
+        ...        [-0.7797, 0.2264, 0.2458, 0.4163],
+        ...        [0.4299, 0.3626, -0.4892, 0.4141],
+        ...        [-1.4115, 1.2183, -0.5503, 0.6520],
+        ...    ]
+        ... )
+        >>> x = flow.Tensor(arr)
+        >>> y = m(x)
+        >>> y #doctest: +ELLIPSIS
+        tensor([[-0.7797,  0.2264,  0.2458,  0.4163],
+                ...
+                [-1.4115,  1.2183, -0.5503,  0.652 ]], dtype=oneflow.float32)
+
+
+    """
+
+    def __init__(self, p: float = 0.5, inplace: bool = False, generator=None):
+        _DropoutNd.__init__(self, p, inplace)
+
+        self.p = p
+        if generator is None:
+            generator = flow.Generator()
+        self.generator = generator
+
+    def forward(self, x):
+        if self.p == 0.0 or not self.training:
+            return x
+        return flow.F.dropout(x, self.p, self.generator)
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(raise_on_error=True)
diff --git a/oneflow/compatible_single_client_python/nn/modules/eq.py b/oneflow/compatible_single_client_python/nn/modules/eq.py
new file mode 100644
index 0000000000000000000000000000000000000000..d2045b8512392bb90d73df6e1e34e9c747d12f3c
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/eq.py
@@ -0,0 +1,86 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.nn.module import Module
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+from oneflow.compatible_single_client_python.framework.tensor import register_tensor_op
+
+
+class Eq(Module):
+    def __init__(self) -> None:
+        super().__init__()
+
+    def forward(self, input, other):
+        if isinstance(other, flow.Tensor) or isinstance(
+            other, oneflow._oneflow_internal.Tensor
+        ):
+            for i in range(len(input.size())):
+                assert (
+                    input.shape[i] >= other.shape[i]
+                ), "The second tensor's shape should broadcastable with the first argument."
+                if input.dtype != other.dtype:
+                    other = other.to(dtype=input.dtype)
+        elif isinstance(other, int) or isinstance(other, float):
+            other = flow.Tensor([other], dtype=input.dtype, device=input.device)
+        else:
+            raise NotImplementedError(
+                "Unsupport data type, The second argument can be a tensor whose shape is broadcastable with the first argument."
+            )
+        return flow.F.broadcast_equal(input, other)
+
+
+@oneflow_export("eq", "equal")
+@register_tensor_op("eq")
+@experimental_api
+def eq_op(input, other):
+    r"""
+    Computes element-wise equality.
+    The second argument can be a number or a tensor whose shape is broadcastable with the first argument.
+
+    Args:
+        input (oneflow.compatible.single_client.Tensor): the tensor to compare
+        other (oneflow.compatible.single_client.Tensor, float or int): the target to compare
+
+    Returns:
+
+        - A boolean tensor that is True where :attr:`input` is equal to :attr:`other` and False elsewhere
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> import numpy as np
+        >>> flow.enable_eager_execution()
+
+        >>> input = flow.Tensor(np.array([2, 3, 4, 5]), dtype=flow.float32)
+        >>> other = flow.Tensor(np.array([2, 3, 4, 1]), dtype=flow.float32)
+
+        >>> y = flow.eq(input, other)
+        >>> y
+        tensor([1, 1, 1, 0], dtype=oneflow.int8)
+
+    """
+    return Eq()(input, other)
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(raise_on_error=True)
diff --git a/oneflow/compatible_single_client_python/nn/modules/exp.py b/oneflow/compatible_single_client_python/nn/modules/exp.py
new file mode 100644
index 0000000000000000000000000000000000000000..b80a4bebffc818fcd87891f41aa57e858b470ba0
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/exp.py
@@ -0,0 +1,71 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.nn.module import Module
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+from oneflow.compatible_single_client_python.framework.tensor import register_tensor_op
+
+
+class Exp(Module):
+    def __init__(self) -> None:
+        super().__init__()
+
+    def forward(self, x):
+        return flow.F.exp(x)
+
+
+@oneflow_export("exp")
+@register_tensor_op("exp")
+@experimental_api
+def exp_op(x):
+    """This operator computes the exponential of Tensor.
+
+    The equation is:
+
+    .. math::
+
+        out = e^x
+
+    Args:
+        x (oneflow.compatible.single_client.Tensor): A Tensor
+
+    Returns:
+        oneflow.compatible.single_client.Tensor: The result Tensor
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import numpy as np
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> flow.enable_eager_execution()
+
+        >>> x = flow.Tensor(np.array([1, 2, 3]).astype(np.float32))
+        >>> y = x.exp()
+        >>> y
+        tensor([ 2.7183,  7.3891, 20.0855], dtype=oneflow.float32)
+
+    """
+    return Exp()(x)
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(raise_on_error=True)
diff --git a/oneflow/compatible_single_client_python/nn/modules/expand.py b/oneflow/compatible_single_client_python/nn/modules/expand.py
new file mode 100644
index 0000000000000000000000000000000000000000..c4ad53187dc49b092b269e53f62746d4af078b51
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/expand.py
@@ -0,0 +1,113 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.nn.module import Module
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+from oneflow.compatible_single_client_python.framework.tensor import register_tensor_op
+from typing import Optional
+
+
+class Expand(Module):
+    def __init__(self, *sizes) -> None:
+        super().__init__()
+        self.expand_size = list(*sizes)
+
+    def forward(self, x):
+        if x.dtype == flow.int8:
+            x = flow.experimental.cast(x, flow.int32)
+        expand_size = self.expand_size
+        assert len(expand_size) >= len(
+            x.shape
+        ), "The desired expanded dims should not be less than the input dims."
+        # calculate the original stride
+        original_stride = [1]
+        for i in range(len(x.shape) - 2, -1, -1):
+            original_stride.insert(0, original_stride[0] * x.shape[i + 1])
+
+        # calculate the output shape and stride
+        new_size = []
+        new_stride = []
+        diff = len(expand_size) - len(x.shape)
+        for i in range(len(expand_size) - 1, -1, -1):
+            if i >= diff:
+                if expand_size[i] == -1 or expand_size[i] == x.shape[i - diff]:
+                    new_size.insert(0, x.shape[i - diff])
+                    new_stride.insert(0, original_stride[i - diff])
+                else:
+                    assert expand_size[i] >= 1 and x.shape[i - diff] == 1
+                    new_size.insert(0, expand_size[i])
+                    new_stride.insert(0, 0)
+            else:
+                assert expand_size[i] >= 1
+                new_size.insert(0, expand_size[i])
+                if expand_size[i] == 1:
+                    new_stride.insert(0, new_stride[0])
+                else:
+                    new_stride.insert(0, 0)
+
+        return flow.F.expand(
+            x, in_shape=list(x.shape), out_shape=new_size, stride=new_stride
+        )
+
+
+@oneflow_export("expand")
+@register_tensor_op("expand")
+@experimental_api
+def expand_op(x, *sizes):
+    """This operator expand the input tensor to a larger size.
+
+    Passing -1 as the size for a dimension means not changing the size of that dimension.
+
+    Tensor can be also expanded to a larger number of dimensions and the new ones will be appended at the front.
+
+    For the new dimensions, the size cannot be set to -1.
+
+    Args:
+        x (oneflow.compatible.single_client.Tensor): The input Tensor.
+        *sizes  (flow.Size or int): The desired expanded size.
+
+    Returns:
+        oneflow.compatible.single_client.Tensor: The result Tensor.
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> import numpy as np
+        >>> flow.enable_eager_execution()
+
+        >>> x = np.array([[[[0, 1]],
+        ...               [[2, 3]],
+        ...               [[4, 5]]]]).astype(np.int32)
+
+        >>> input = flow.Tensor(x)
+
+        >>> out = input.expand(1, 3, 2, 2)
+        >>> out.shape
+        flow.Size([1, 3, 2, 2])
+
+    """
+    return Expand(sizes)(x)
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(raise_on_error=True)
diff --git a/oneflow/compatible_single_client_python/nn/modules/flatten.py b/oneflow/compatible_single_client_python/nn/modules/flatten.py
new file mode 100644
index 0000000000000000000000000000000000000000..94d8c7e385df853f4eeb580c9152f0014d1e36e2
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/flatten.py
@@ -0,0 +1,88 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.nn.module import Module
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+from oneflow.compatible_single_client_python.framework.tensor import register_tensor_op
+
+
+@oneflow_export("nn.Flatten")
+@experimental_api
+class Flatten(Module):
+    """Flattens a contiguous range of dims into a tensor. For use with: nn.Sequential.
+
+    Args:
+        start_dim: first dim to flatten (default = 1).
+        end_dim: last dim to flatten (default = -1).
+    
+
+    For example: 
+
+    .. code-block:: python 
+
+        import oneflow.compatible.single_client.experimental as flow
+        
+        input = flow.Tensor(32, 1, 5, 5)
+        m = flow.nn.Flatten()
+        output = m(input)
+        output.size()
+        # out flow.Size([32, 25])
+
+    """
+
+    def __init__(self, start_dim: int = 1, end_dim: int = -1) -> None:
+        super().__init__()
+        self.start_dim = start_dim
+        self.end_dim = end_dim
+
+    def forward(self, input):
+        return flow.F.flatten(input, start_dim=self.start_dim, end_dim=self.end_dim)
+
+
+@oneflow_export("flatten")
+@register_tensor_op("flatten")
+@experimental_api
+def _flow_flatten(input, start_dim: int = 0, end_dim: int = -1):
+    """Flattens a contiguous range of dims into a tensor.
+
+    Args:
+        start_dim: first dim to flatten (default = 0).
+        end_dim: last dim to flatten (default = -1).
+    
+    For example: 
+
+    .. code-block:: python 
+
+        >>> import numpy as np
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> flow.enable_eager_execution()
+
+        >>> input = flow.Tensor(32, 1, 5, 5)
+        >>> output = input.flatten(start_dim=1)
+        >>> output.size()
+        flow.Size([32, 25])
+
+    """
+    return Flatten(start_dim=start_dim, end_dim=end_dim)(input)
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(raise_on_error=True)
diff --git a/oneflow/compatible_single_client_python/nn/modules/floor.py b/oneflow/compatible_single_client_python/nn/modules/floor.py
new file mode 100644
index 0000000000000000000000000000000000000000..a6c6670a2a29b089e31ec6e1e2bcb64015a2876a
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/floor.py
@@ -0,0 +1,90 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import collections
+from typing import Optional, Sequence, Union
+
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+from oneflow.compatible_single_client_python.nn.module import Module
+from oneflow.compatible_single_client_python.framework.tensor import register_tensor_op
+from oneflow.compatible_single_client_python.nn.modules.utils import _check_axis
+
+
+class Floor(Module):
+    def __init__(self) -> None:
+        super().__init__()
+
+    def forward(self, x):
+        return flow.F.floor(x)
+
+
+@oneflow_export("floor")
+@experimental_api
+def floor_op(x):
+
+    r"""
+    Returns a new tensor with the arcsine of the elements of :attr:`input`.
+
+    .. math::
+        \text{out}_{i} = \lfloor \text{input}_{i} \rfloor
+
+    Args:
+        input (Tensor): the input tensor.
+        
+    For example:
+
+    .. code-block:: python
+
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> import numpy as np
+        >>> flow.enable_eager_execution()
+        >>> input = flow.Tensor(np.array([-0.5,  1.5, 0,  0.8]), dtype=flow.float32)
+        >>> output = flow.floor(input)
+        >>> output.shape
+        flow.Size([4])
+        >>> output.numpy()
+        array([-1.,  1.,  0.,  0.], dtype=float32)
+        
+        >>> input1 = flow.Tensor(np.array([[0.8, 1.0], [-0.6, 2.5]]), dtype=flow.float32)
+        >>> output1 = input1.floor()
+        >>> output1.shape
+        flow.Size([2, 2])
+        >>> output1.numpy()
+        array([[ 0.,  1.],
+               [-1.,  2.]], dtype=float32)
+
+    """
+
+    return Floor()(x)
+
+
+@register_tensor_op("floor")
+@experimental_api
+def floor_op_tensor(input):
+    r"""
+    See :func:`oneflow.compatible.single_client.experimental.floor`
+    """
+    return Floor()(input)
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(raise_on_error=True)
diff --git a/oneflow/compatible_single_client_python/nn/modules/gather.py b/oneflow/compatible_single_client_python/nn/modules/gather.py
new file mode 100644
index 0000000000000000000000000000000000000000..a7f020a12343a9f06895e97fa44f2616a952d229
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/gather.py
@@ -0,0 +1,99 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from oneflow.compatible import single_client as flow
+
+from oneflow.compatible_single_client_python.framework.tensor import Tensor
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+from oneflow.compatible_single_client_python.framework.tensor import register_tensor_op
+from oneflow.compatible_single_client_python.nn.module import Module
+
+from typing import Optional, List, Tuple
+
+
+class Gather(Module):
+    def __init__(
+        self, dim: int = 0, sparse_grad: bool = False,
+    ):
+        super().__init__()
+        assert sparse_grad is False, "Only support bool = False for now!"
+        self.dim = dim
+
+    def forward(self, input, index):
+        assert self.dim < len(
+            index.shape
+        ), "Value of dim is out of range(dim should be less than len(index.shape))"
+        assert len(input.shape) == len(
+            index.shape
+        ), "Dimensions of input and index should equal"
+
+        for i in range(0, len(input.shape)):
+            if self.dim == i:
+                continue
+            else:
+                assert (
+                    input.shape[i] == index.shape[i]
+                ), "Dimensions of input and index should be same except at dim"
+
+        return flow.F.dim_gather(input, index, dim=self.dim)
+
+
+@oneflow_export("gather")
+@register_tensor_op("gather")
+@experimental_api
+def gather_op(input, index, dim=0, sparse_grad=False):
+    r"""Gathers values along an axis specified by `dim`.
+
+    For a 3-D tensor the output is specified by:
+
+        out[i][j][k] = input[index[i][j][k]][j][k]  # if dim == 0
+        out[i][j][k] = input[i][index[i][j][k]][k]  # if dim == 1
+        out[i][j][k] = input[i][j][index[i][j][k]]  # if dim == 2
+
+    :attr:`input` and :attr:`index` must have the same number of dimensions.
+    It is also required that ``index.size(d) <= input.size(d)`` for all
+    dimensions ``d != dim``.  :attr:`out` will have the same shape as :attr:`index`.
+    Note that ``input`` and ``index`` do not broadcast against each other.
+
+    Args:
+        input (Tensor): the source tensor
+        dim (int): the axis along which to index
+        index (LongTensor): the indices of elements to gather
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> import numpy as np
+        >>> flow.enable_eager_execution()
+
+        >>> input = np.random.randn(3, 4, 3, 5)
+        >>> index = np.random.choice(np.arange(3), size=180, replace=True).reshape((3, 4, 3, 5))
+        >>> output = flow.gather(flow.Tensor(input), flow.Tensor(index, dtype=flow.int), dim=1)
+        >>> output.shape
+        flow.Size([3, 4, 3, 5])
+
+    """
+    return Gather(dim, sparse_grad)(input, index)
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(raise_on_error=True)
diff --git a/oneflow/compatible_single_client_python/nn/modules/greater.py b/oneflow/compatible_single_client_python/nn/modules/greater.py
new file mode 100644
index 0000000000000000000000000000000000000000..b2896eb8b5b31d65f134f75c64a07a52be2ae8b3
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/greater.py
@@ -0,0 +1,88 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.nn.module import Module
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+from oneflow.compatible_single_client_python.framework.tensor import register_tensor_op
+
+
+class Greater(Module):
+    def __init__(self) -> None:
+        super().__init__()
+
+    def forward(self, x, y):
+        if x.dtype != flow.float32:
+            x = flow.experimental.cast(x, flow.float32)
+        if isinstance(y, int) or isinstance(y, float):
+            y = flow.Tensor(
+                [float(y)], dtype=flow.float32, device=flow.device(x.device.type)
+            )
+        if y.dtype != flow.float32:
+            y = flow.experimental.cast(y, flow.float32)
+        return flow.F.broadcast_greater(x, y)
+
+
+@oneflow_export("gt")
+@experimental_api
+def greater_op(x, y):
+    r"""Returns the truth value of :math:`x > y` element-wise.
+
+    Args:
+        x (oneflow.compatible.single_client.Tensor): A Tensor
+        y (oneflow.compatible.single_client.Tensor): A Tensor
+
+    Returns:
+        oneflow.compatible.single_client.Tensor: A Tensor with int8 type.
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import numpy as np
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> flow.enable_eager_execution()
+
+        >>> input1 = flow.Tensor(np.random.randn(2, 6, 5, 3), dtype=flow.float32)
+        >>> input2 = flow.Tensor(np.random.randn(2, 6, 5, 3), dtype=flow.float32)
+
+        >>> out = flow.gt(input1, input2).shape
+        >>> out
+        flow.Size([2, 6, 5, 3])
+
+    """
+    return Greater()(x, y)
+
+
+@register_tensor_op("gt")
+@experimental_api
+def greater_op_tensor(x, y):
+    r"""
+
+    gt() -> Tensor
+
+    See :func:`oneflow.compatible.single_client.experimental.gt`
+
+    """
+    return Greater()(x, y)
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(raise_on_error=True)
diff --git a/oneflow/compatible_single_client_python/nn/modules/greater_equal.py b/oneflow/compatible_single_client_python/nn/modules/greater_equal.py
new file mode 100644
index 0000000000000000000000000000000000000000..780faf307e8b520cc05ed043a371f49b65bbbcd1
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/greater_equal.py
@@ -0,0 +1,88 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.nn.module import Module
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+from oneflow.compatible_single_client_python.framework.tensor import register_tensor_op
+
+
+class GreaterEqual(Module):
+    def __init__(self) -> None:
+        super().__init__()
+
+    def forward(self, x, y):
+        if x.dtype != flow.float32:
+            x = flow.experimental.cast(x, flow.float32)
+        if isinstance(y, int) or isinstance(y, float):
+            y = flow.Tensor(
+                [float(y)], dtype=flow.float32, device=flow.device(x.device.type)
+            )
+        if y.dtype != flow.float32:
+            y = flow.experimental.cast(y, flow.float32)
+        return flow.F.broadcast_greater_equal(x, y)
+
+
+@oneflow_export("ge")
+@experimental_api
+def greater_equal_op(x, y):
+    r"""Returns the truth value of :math:`x >= y` element-wise.
+
+    Args:
+        x (oneflow.compatible.single_client.Tensor): A Tensor
+        y (oneflow.compatible.single_client.Tensor): A Tensor
+
+    Returns:
+        oneflow.compatible.single_client.Tensor: A Tensor with int8 type.
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import numpy as np
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> flow.enable_eager_execution()
+
+        >>> input1 = flow.Tensor(np.array([1, 2, 3]).astype(np.float32), dtype=flow.float32)
+        >>> input2 = flow.Tensor(np.array([1, 1, 4]).astype(np.float32), dtype=flow.float32)
+
+        >>> out = flow.ge(input1, input2)
+        >>> out
+        tensor([1, 1, 0], dtype=oneflow.int8)
+
+    """
+    return GreaterEqual()(x, y)
+
+
+@register_tensor_op("ge")
+@experimental_api
+def greater_equal_op_tensor(x, y):
+    r"""
+
+    ge() -> Tensor
+
+    See :func:`oneflow.compatible.single_client.experimental.ge`
+
+    """
+    return GreaterEqual()(x, y)
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(raise_on_error=True)
diff --git a/oneflow/compatible_single_client_python/nn/modules/instancenorm.py b/oneflow/compatible_single_client_python/nn/modules/instancenorm.py
new file mode 100644
index 0000000000000000000000000000000000000000..30ddf0a9ee1f60c4b937c48d8e07682688f48af8
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/instancenorm.py
@@ -0,0 +1,338 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from oneflow.compatible import single_client as flow
+
+from oneflow.compatible_single_client_python.nn.modules.batchnorm import _NormBase
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+
+
+class _InstanceNorm(_NormBase):
+    def __init__(
+        self,
+        num_features: int,
+        eps: float = 1e-5,
+        momentum: float = 0.1,
+        affine: bool = False,
+        track_running_stats: bool = False,
+    ):
+        super().__init__(num_features, eps, momentum, affine, track_running_stats)
+
+    def _forward(self, x):
+        axis = 1
+        params_shape = [x.shape[axis]]
+        weight = self.weight
+        bias = self.bias
+
+        nd_params_shape = [1] * len(x.shape)
+        nd_params_shape[axis] = params_shape[0]
+
+        mean = x.mean(2, keepdim=True)
+        variance = x.var(2, keepdim=True)
+        normalized = (x - mean) / flow.experimental.sqrt(variance + self.eps)
+
+        if self.weight and params_shape[0] == self.weight.nelement():
+            weight = self.weight.reshape(shape=nd_params_shape)
+        if self.bias and params_shape[0] == self.bias.nelement():
+            bias = self.bias.reshape(shape=nd_params_shape)
+
+        if self.weight:
+            normalized = normalized * weight
+        if self.bias:
+            normalized = normalized + bias
+
+        return normalized
+
+    def forward(self, x):
+        self._check_input_dim(x)
+
+        reshape_to_1d = x.reshape([x.shape[0], x.shape[1], -1])
+        normalized_1d_out = self._forward(reshape_to_1d)
+        reshape_back_to_nd = normalized_1d_out.reshape(list(x.shape))
+        return reshape_back_to_nd
+
+
+@oneflow_export("nn.InstanceNorm1d")
+@experimental_api
+class InstanceNorm1d(_InstanceNorm):
+    r"""The interface is consistent with PyTorch.
+    The documentation is referenced from: https://pytorch.org/docs/stable/generated/torch.nn.InstanceNorm1d.html
+
+    Applies Instance Normalization over a 3D input (a mini-batch of 1D
+    inputs with optional additional channel dimension) as described in the paper
+    `Instance Normalization: The Missing Ingredient for Fast Stylization
+    <https://arxiv.org/abs/1607.08022>`__.
+
+    .. math::
+
+        y = \frac{x - \mathrm{E}[x]}{ \sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta
+
+    The mean and standard-deviation are calculated per-dimension separately
+    for each object in a mini-batch. :math:`\gamma` and :math:`\beta` are learnable parameter vectors
+    of size `C` (where `C` is the input size) if :attr:`affine` is ``True``.
+    The standard-deviation is calculated via the biased estimator, equivalent to
+    `torch.var(input, unbiased=False)`.
+
+    By default, this layer uses instance statistics computed from input data in
+    both training and evaluation modes.
+
+    If :attr:`track_running_stats` is set to ``True``, during training this
+    layer keeps running estimates of its computed mean and variance, which are
+    then used for normalization during evaluation. The running estimates are
+    kept with a default :attr:`momentum` of 0.1.
+
+    .. note::
+        This :attr:`momentum` argument is different from one used in optimizer
+        classes and the conventional notion of momentum. Mathematically, the
+        update rule for running statistics here is
+        :math:`\hat{x}_\text{new} = (1 - \text{momentum}) \times \hat{x} + \text{momentum} \times x_t`,
+        where :math:`\hat{x}` is the estimated statistic and :math:`x_t` is the
+        new observed value.
+
+    .. note::
+        :class:`InstanceNorm1d` and :class:`LayerNorm` are very similar, but
+        have some subtle differences. :class:`InstanceNorm1d` is applied
+        on each channel of channeled data like multidimensional time series, but
+        :class:`LayerNorm` is usually applied on entire sample and often in NLP
+        tasks. Additionally, :class:`LayerNorm` applies elementwise affine
+        transform, while :class:`InstanceNorm1d` usually don't apply affine
+        transform.
+
+    Args:
+        num_features: :math:`C` from an expected input of size
+            :math:`(N, C, L)` or :math:`L` from input of size :math:`(N, L)`
+        eps: a value added to the denominator for numerical stability. Default: 1e-5
+        momentum: the value used for the running_mean and running_var computation. Default: 0.1
+        affine: a boolean value that when set to ``True``, this module has
+            learnable affine parameters, initialized the same way as done for batch normalization.
+            Default: ``False``.
+        track_running_stats: a boolean value that when set to ``True``, this
+            module tracks the running mean and variance, and when set to ``False``,
+            this module does not track such statistics and always uses batch
+            statistics in both training and eval modes. Default: ``False``
+
+    Shape:
+        - Input: :math:`(N, C, L)`
+        - Output: :math:`(N, C, L)` (same shape as input)
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> import numpy as np
+        >>> flow.enable_eager_execution()
+
+
+        >>> # Without Learnable Parameters
+        >>> m = flow.nn.InstanceNorm1d(100)
+        >>> # With Learnable Parameters
+        >>> m = flow.nn.InstanceNorm1d(100, affine=True)
+        >>> x = flow.Tensor(np.random.randn(20, 100, 40))
+        >>> output = m(x)
+
+    """
+
+    def _check_input_dim(self, input):
+        if input.dim() == 2:
+            raise ValueError(
+                "InstanceNorm1d returns 0-filled tensor to 2D tensor."
+                "This is because InstanceNorm1d reshapes inputs to"
+                "(1, N * C, ...) from (N, C,...) and this makes"
+                "variances 0."
+            )
+        if input.dim() != 3:
+            raise ValueError("expected 3D input (got {}D input)".format(input.dim()))
+
+
+@oneflow_export("nn.InstanceNorm2d")
+@experimental_api
+class InstanceNorm2d(_InstanceNorm):
+    r"""The interface is consistent with PyTorch.
+    The documentation is referenced from: https://pytorch.org/docs/stable/generated/torch.nn.InstanceNorm2d.html
+
+    Applies Instance Normalization over a 4D input (a mini-batch of 2D inputs
+    with additional channel dimension) as described in the paper
+    `Instance Normalization: The Missing Ingredient for Fast Stylization
+    <https://arxiv.org/abs/1607.08022>`__.
+
+    .. math::
+
+        y = \frac{x - \mathrm{E}[x]}{ \sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta
+
+    The mean and standard-deviation are calculated per-dimension separately
+    for each object in a mini-batch. :math:`\gamma` and :math:`\beta` are learnable parameter vectors
+    of size `C` (where `C` is the input size) if :attr:`affine` is ``True``.
+    The standard-deviation is calculated via the biased estimator, equivalent to
+    `torch.var(input, unbiased=False)`.
+
+    By default, this layer uses instance statistics computed from input data in
+    both training and evaluation modes.
+
+    If :attr:`track_running_stats` is set to ``True``, during training this
+    layer keeps running estimates of its computed mean and variance, which are
+    then used for normalization during evaluation. The running estimates are
+    kept with a default :attr:`momentum` of 0.1.
+
+    .. note::
+        This :attr:`momentum` argument is different from one used in optimizer
+        classes and the conventional notion of momentum. Mathematically, the
+        update rule for running statistics here is
+        :math:`\hat{x}_\text{new} = (1 - \text{momentum}) \times \hat{x} + \text{momentum} \times x_t`,
+        where :math:`\hat{x}` is the estimated statistic and :math:`x_t` is the
+        new observed value.
+
+    .. note::
+        :class:`InstanceNorm2d` and :class:`LayerNorm` are very similar, but
+        have some subtle differences. :class:`InstanceNorm2d` is applied
+        on each channel of channeled data like RGB images, but
+        :class:`LayerNorm` is usually applied on entire sample and often in NLP
+        tasks. Additionally, :class:`LayerNorm` applies elementwise affine
+        transform, while :class:`InstanceNorm2d` usually don't apply affine
+        transform.
+
+    Args:
+        num_features: :math:`C` from an expected input of size
+            :math:`(N, C, H, W)`
+        eps: a value added to the denominator for numerical stability. Default: 1e-5
+        momentum: the value used for the running_mean and running_var computation. Default: 0.1
+        affine: a boolean value that when set to ``True``, this module has
+            learnable affine parameters, initialized the same way as done for batch normalization.
+            Default: ``False``.
+        track_running_stats: a boolean value that when set to ``True``, this
+            module tracks the running mean and variance, and when set to ``False``,
+            this module does not track such statistics and always uses batch
+            statistics in both training and eval modes. Default: ``False``
+
+    Shape:
+        - Input: :math:`(N, C, H, W)`
+        - Output: :math:`(N, C, H, W)` (same shape as input)
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> import numpy as np
+        >>> flow.enable_eager_execution()
+
+
+        >>> # Without Learnable Parameters
+        >>> m = flow.nn.InstanceNorm2d(100)
+        >>> # With Learnable Parameters
+        >>> m = flow.nn.InstanceNorm2d(100, affine=True)
+        >>> x = flow.Tensor(np.random.randn(20, 100, 35, 45))
+        >>> output = m(x)
+
+    """
+
+    def _check_input_dim(self, input):
+        if input.dim() != 4:
+            raise ValueError("expected 4D input (got {}D input)".format(input.dim()))
+
+
+@oneflow_export("nn.InstanceNorm3d")
+@experimental_api
+class InstanceNorm3d(_InstanceNorm):
+    r"""The interface is consistent with PyTorch.
+    The documentation is referenced from: https://pytorch.org/docs/stable/generated/torch.nn.InstanceNorm3d.html
+
+    Applies Instance Normalization over a 5D input (a mini-batch of 3D inputs
+    with additional channel dimension) as described in the paper
+    `Instance Normalization: The Missing Ingredient for Fast Stylization
+    <https://arxiv.org/abs/1607.08022>`__.
+
+    .. math::
+
+        y = \frac{x - \mathrm{E}[x]}{ \sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta
+
+    The mean and standard-deviation are calculated per-dimension separately
+    for each object in a mini-batch. :math:`\gamma` and :math:`\beta` are learnable parameter vectors
+    of size C (where C is the input size) if :attr:`affine` is ``True``.
+    The standard-deviation is calculated via the biased estimator, equivalent to
+    `torch.var(input, unbiased=False)`.
+
+    By default, this layer uses instance statistics computed from input data in
+    both training and evaluation modes.
+
+    If :attr:`track_running_stats` is set to ``True``, during training this
+    layer keeps running estimates of its computed mean and variance, which are
+    then used for normalization during evaluation. The running estimates are
+    kept with a default :attr:`momentum` of 0.1.
+
+    .. note::
+        This :attr:`momentum` argument is different from one used in optimizer
+        classes and the conventional notion of momentum. Mathematically, the
+        update rule for running statistics here is
+        :math:`\hat{x}_\text{new} = (1 - \text{momentum}) \times \hat{x} + \text{momentum} \times x_t`,
+        where :math:`\hat{x}` is the estimated statistic and :math:`x_t` is the
+        new observed value.
+
+    .. note::
+        :class:`InstanceNorm3d` and :class:`LayerNorm` are very similar, but
+        have some subtle differences. :class:`InstanceNorm3d` is applied
+        on each channel of channeled data like 3D models with RGB color, but
+        :class:`LayerNorm` is usually applied on entire sample and often in NLP
+        tasks. Additionally, :class:`LayerNorm` applies elementwise affine
+        transform, while :class:`InstanceNorm3d` usually don't apply affine
+        transform.
+
+    Args:
+        num_features: :math:`C` from an expected input of size
+            :math:`(N, C, D, H, W)`
+        eps: a value added to the denominator for numerical stability. Default: 1e-5
+        momentum: the value used for the running_mean and running_var computation. Default: 0.1
+        affine: a boolean value that when set to ``True``, this module has
+            learnable affine parameters, initialized the same way as done for batch normalization.
+            Default: ``False``.
+        track_running_stats: a boolean value that when set to ``True``, this
+            module tracks the running mean and variance, and when set to ``False``,
+            this module does not track such statistics and always uses batch
+            statistics in both training and eval modes. Default: ``False``
+
+    Shape:
+        - Input: :math:`(N, C, D, H, W)`
+        - Output: :math:`(N, C, D, H, W)` (same shape as input)
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> import numpy as np
+        >>> flow.enable_eager_execution()
+
+
+        >>> # Without Learnable Parameters
+        >>> m = flow.nn.InstanceNorm3d(100)
+        >>> # With Learnable Parameters
+        >>> m = flow.nn.InstanceNorm3d(100, affine=True)
+        >>> x = flow.Tensor(np.random.randn(20, 100, 35, 45, 10))
+        >>> output = m(x)
+
+    """
+
+    def _check_input_dim(self, input):
+        if input.dim() != 5:
+            raise ValueError("expected 5D input (got {}D input)".format(input.dim()))
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(raise_on_error=True)
diff --git a/oneflow/compatible_single_client_python/nn/modules/less.py b/oneflow/compatible_single_client_python/nn/modules/less.py
new file mode 100644
index 0000000000000000000000000000000000000000..5e0ef753bccf0cb39444eda337e0b05e531a76fd
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/less.py
@@ -0,0 +1,76 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.nn.module import Module
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+from oneflow.compatible_single_client_python.framework.tensor import register_tensor_op
+
+
+class Less(Module):
+    def __init__(self) -> None:
+        super().__init__()
+
+    def forward(self, x, y):
+        if x.dtype != flow.float32:
+            x = flow.experimental.cast(x, flow.float32)
+        if isinstance(y, int) or isinstance(y, float):
+            y = flow.Tensor(
+                [float(y)], dtype=flow.float32, device=flow.device(x.device.type)
+            )
+        if y.dtype != flow.float32:
+            y = flow.experimental.cast(y, flow.float32)
+        return flow.F.broadcast_less(x, y)
+
+
+@oneflow_export("lt")
+@register_tensor_op("lt")
+@experimental_api
+def less_op(x, y):
+    r"""Returns the truth value of :math:`x < y` element-wise.
+
+    Args:
+        x (oneflow.compatible.single_client.Tensor): A Tensor
+        y (oneflow.compatible.single_client.Tensor): A Tensor
+
+    Returns:
+        oneflow.compatible.single_client.Tensor: A Tensor with int8 type.
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import numpy as np
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> flow.enable_eager_execution()
+
+        >>> input1 = flow.Tensor(np.array([1, 2, 3]).astype(np.float32), dtype=flow.float32)
+        >>> input2 = flow.Tensor(np.array([1, 2, 4]).astype(np.float32), dtype=flow.float32)
+
+        >>> out = flow.lt(input1, input2)
+        >>> out
+        tensor([0, 0, 1], dtype=oneflow.int8)
+
+    """
+    return Less()(x, y)
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(raise_on_error=True)
diff --git a/oneflow/compatible_single_client_python/nn/modules/less_equal.py b/oneflow/compatible_single_client_python/nn/modules/less_equal.py
new file mode 100644
index 0000000000000000000000000000000000000000..57287dbf33bd01c96d2ae327cc8722ab7d8a7f10
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/less_equal.py
@@ -0,0 +1,76 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.nn.module import Module
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+from oneflow.compatible_single_client_python.framework.tensor import register_tensor_op
+
+
+class LessEqual(Module):
+    def __init__(self) -> None:
+        super().__init__()
+
+    def forward(self, x, y):
+        if x.dtype != flow.float32:
+            x = flow.experimental.cast(x, flow.float32)
+        if isinstance(y, int) or isinstance(y, float):
+            y = flow.Tensor(
+                [float(y)], dtype=flow.float32, device=flow.device(x.device.type)
+            )
+        if y.dtype != flow.float32:
+            y = flow.experimental.cast(y, flow.float32)
+        return flow.F.broadcast_less_equal(x, y)
+
+
+@oneflow_export("le")
+@register_tensor_op("le")
+@experimental_api
+def less_equal_op(x, y):
+    r"""Returns the truth value of :math:`x <= y` element-wise.
+
+    Args:
+        x (oneflow.compatible.single_client.Tensor): A Tensor
+        y (oneflow.compatible.single_client.Tensor): A Tensor
+
+    Returns:
+        oneflow.compatible.single_client.Tensor: A Tensor with int8 type.
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import numpy as np
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> flow.enable_eager_execution()
+
+        >>> input1 = flow.Tensor(np.array([1, 2, 3]).astype(np.float32), dtype=flow.float32)
+        >>> input2 = flow.Tensor(np.array([1, 1, 4]).astype(np.float32), dtype=flow.float32)
+
+        >>> out = flow.le(input1, input2)
+        >>> out
+        tensor([1, 0, 1], dtype=oneflow.int8)
+
+    """
+    return LessEqual()(x, y)
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(raise_on_error=True)
diff --git a/oneflow/compatible_single_client_python/nn/modules/linear.py b/oneflow/compatible_single_client_python/nn/modules/linear.py
new file mode 100644
index 0000000000000000000000000000000000000000..28994db2349a35ae212b1672d58e22367651d488
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/linear.py
@@ -0,0 +1,143 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+from oneflow.compatible_single_client_python.framework.tensor import Tensor
+from oneflow.compatible_single_client_python.nn.module import Module
+from oneflow.compatible_single_client_python.nn.init import (
+    _calculate_fan_in_and_fan_out,
+)
+from typing import Optional, List, Tuple
+import math
+
+
+@oneflow_export("nn.Identity")
+@experimental_api
+class Identity(Module):
+    """A placeholder identity operator that is argument-insensitive.
+
+    Args:
+        args: any argument (unused)
+        kwargs: any keyword argument (unused)
+
+    For example:
+
+    .. code-block:: python
+
+        import numpy as np
+        import oneflow.compatible.single_client as flow
+
+        m = flow.nn.Identity()
+        input = flow.Tensor(np.random.rand(2, 3, 4, 5))
+
+        output = m(input)
+
+        # output = input
+
+    """
+
+    def __init__(self, *args, **kwargs):
+        super().__init__()
+
+    def forward(self, input: Tensor) -> Tensor:
+        return input
+
+
+@oneflow_export("nn.Linear")
+@experimental_api
+class Linear(Module):
+    """Applies a linear transformation to the incoming data: :math:`y = xA^T + b`
+
+    Args:
+
+        - in_features: size of each input sample
+
+        - out_features: size of each output sample
+
+        - bias: If set to ``False``, the layer will not learn an additive bias. Default: ``True``
+
+    Shape:
+        - Input: :math:`(N, *, H_{in})` where :math:`*` means any number of
+          additional dimensions and :math:`H_{in} = {in\_features}`
+
+        - Output: :math:`(N, *, H_{out})` where all but the last dimension
+          are the same shape as the input and :math:`H_{out} = {out\_features}`.
+
+    Attr:
+        - :attr:`weight`: the learnable weights of the module of shape :math:`({out\_features}, {in\_features})`. The values are initialized from :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})`, where :math:`(k = 1 / {in\_features})`
+
+        - :attr:`bias`: the learnable bias of the module of shape :math:`({out\_features})`. If :attr:`bias` is ``True``, the values are initialized from :math:`\mathcal{U}(-\sqrt{k}, \sqrt{k})` where :math:`(k = 1 / {in\_features})`
+
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import numpy as np
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> flow.enable_eager_execution()
+
+
+        >>> m = flow.nn.Linear(20, 30, False)
+        >>> input = flow.Tensor(np.random.randn(128, 20))
+        >>> output = m(input)
+        >>> output.size()
+        flow.Size([128, 30])
+
+    """
+
+    def __init__(self, in_features: int, out_features: int, bias: bool = True) -> None:
+        super().__init__()
+
+        self.use_bias = bias
+        self.weight = flow.nn.Parameter(flow.Tensor(out_features, in_features))
+        self.bias = None
+
+        if bias:
+            self.bias = flow.nn.Parameter(flow.Tensor(out_features))
+        self.reset_parameters()
+
+    def reset_parameters(self) -> None:
+        flow.nn.init.kaiming_uniform_(self.weight, a=math.sqrt(5))
+
+        if self.bias is not None:
+            fan_in, _ = _calculate_fan_in_and_fan_out(self.weight)
+            bound = 1 / math.sqrt(fan_in)
+            flow.nn.init.uniform_(self.bias, -bound, bound)
+
+    def forward(self, x):
+        assert len(x.shape) >= 2, "Tensor x's dim should >=2"
+
+        if len(x.shape) == 2:
+            res = flow.F.matmul(x, self.weight, transpose_a=False, transpose_b=True)
+        else:
+            res = flow.F.broadcast_matmul(
+                x, self.weight, transpose_a=False, transpose_b=True
+            )
+
+        if self.use_bias:
+            res += self.bias
+
+        return res
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(raise_on_error=True)
diff --git a/oneflow/compatible_single_client_python/nn/modules/log1p.py b/oneflow/compatible_single_client_python/nn/modules/log1p.py
new file mode 100644
index 0000000000000000000000000000000000000000..76e0248686151c13a52d3e1cbd0f2570f363db4e
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/log1p.py
@@ -0,0 +1,64 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.nn.module import Module
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+from oneflow.compatible_single_client_python.framework.tensor import register_tensor_op
+
+
+class Log1p(Module):
+    def __init__(self) -> None:
+        super().__init__()
+        self._op = flow.builtin_op("log1p").Input("x").Output("y").Build()
+
+    def forward(self, x):
+        return self._op(x)[0]
+
+
+@oneflow_export("log1p")
+@register_tensor_op("log1p")
+@experimental_api
+def log1p_op(input):
+    r"""Returns a new tensor with the natural logarithm of (1 + input).
+
+    .. math::
+        \text{out}_{i}=\log_e(1+\text{input}_{i})
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> import numpy as np
+        >>> flow.enable_eager_execution()
+        >>> x = flow.Tensor(np.array([1.3, 1.5, 2.7]))
+        >>> out = flow.log1p(x).numpy()
+        >>> out
+        array([0.8329091 , 0.91629076, 1.3083328 ], dtype=float32)
+
+    """
+    return Log1p()(input)
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(raise_on_error=True)
diff --git a/oneflow/compatible_single_client_python/nn/modules/loss.py b/oneflow/compatible_single_client_python/nn/modules/loss.py
new file mode 100644
index 0000000000000000000000000000000000000000..0ba45f3cc4f247c219ba97575646be0bb9851722
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/loss.py
@@ -0,0 +1,1115 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from typing import Optional
+
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.framework.tensor import Tensor
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+from oneflow.compatible_single_client_python.nn.module import Module
+from oneflow.compatible_single_client_python.nn.modules.constant import _ConstantBase
+
+
+@oneflow_export("nn.L1Loss")
+@experimental_api
+class L1Loss(Module):
+    r"""This operator computes the L1 Loss between each element in `input` and `target`.
+
+    The equation is:
+
+    if reduction = "none":
+
+    .. math::
+
+        output = |Target - Input|
+
+    if reduction = "mean":
+
+    .. math::
+
+        output = \frac{1}{n}\sum_{i=1}^n|Target_i - Input_i|
+
+    if reduction = "sum":
+
+    .. math::
+
+        output = \sum_{i=1}^n|Target_i - Input_i|
+
+    Args:
+        input (oneflow.compatible.single_client.experimental.Tensor): The input Tensor.
+        target (oneflow.compatible.single_client.experimental.Tensor): The target Tensor.
+        reduction (str): The reduce type, it can be one of "none", "mean", "sum". Defaults to "mean".
+
+    Returns:
+        oneflow.compatible.single_client.experimental.Tensor: The result Tensor.
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> import numpy as np
+        >>> flow.enable_eager_execution()
+
+        >>> input = flow.Tensor([[1, 1, 1], [2, 2, 2], [7, 7, 7]], dtype = flow.float32)
+        >>> target = flow.Tensor([[4, 4, 4], [4, 4, 4], [4, 4, 4]], dtype = flow.float32)
+        >>> m = flow.nn.L1Loss(reduction="none")
+        >>> out = m(input, target)
+        >>> out
+        tensor([[3., 3., 3.],
+                [2., 2., 2.],
+                [3., 3., 3.]], dtype=oneflow.float32)
+        >>> m_mean = flow.nn.L1Loss(reduction="mean")
+        >>> out = m_mean(input, target)
+        >>> out
+        tensor([2.6667], dtype=oneflow.float32)
+        >>> m_mean = flow.nn.L1Loss(reduction="sum")
+        >>> out = m_mean(input, target)
+        >>> out
+        tensor([24.], dtype=oneflow.float32)
+    """
+
+    def __init__(self, reduction: str = "mean", reduce=True) -> None:
+        super().__init__()
+        if reduce is not None and not reduce:
+            raise ValueError("Argument reduce is not supported yet")
+        assert reduction in [
+            "none",
+            "mean",
+            "sum",
+            None,
+        ], "only 'sum', 'mean' and 'none' supported by now"
+
+        self.reduction = reduction
+
+    def forward(self, input, target):
+        assert (
+            input.shape == target.shape
+        ), "The Input shape must be the same as Target shape"
+
+        l1_value = flow.experimental.abs(flow.experimental.sub(input, target))
+        if self.reduction == "mean":
+            return flow.experimental.mean(l1_value)
+        elif self.reduction == "sum":
+            return flow.experimental.sum(l1_value)
+        else:
+            return l1_value
+
+
+@oneflow_export("nn.CrossEntropyLoss")
+@experimental_api
+class CrossEntropyLoss(Module):
+    r"""This criterion combines :class:`~flow.nn.LogSoftmax` and :class:`~flow.nn.NLLLoss` in one single class.
+
+    It is useful when training a classification problem with `C` classes.
+
+    The `input` is expected to contain raw, unnormalized scores for each class.
+
+    `input` has to be a Tensor of size either :math:`(minibatch, C)` or
+    :math:`(minibatch, C, d_1, d_2, ..., d_K)`
+    with :math:`K \geq 1` for the `K`-dimensional case (described later).
+
+    This criterion expects a class index in the range :math:`[0, C-1]` as the
+    `target` for each value of a 1D tensor of size `minibatch`;
+
+    The loss can be described as:
+
+    .. math::
+        \text{loss}(x, class) = -\log\left(\frac{\exp(x[class])}{\sum_j \exp(x[j])}\right)
+                       = -x[class] + \log\left(\sum_j \exp(x[j])\right)
+
+    Can also be used for higher dimension inputs, such as 2D images, by providing
+    an input of size :math:`(minibatch, C, d_1, d_2, ..., d_K)` with :math:`K \geq 1`,
+    where :math:`K` is the number of dimensions, and a target of appropriate shape
+    (see below).
+
+    Args:
+        reduction (string, optional): Specifies the reduction to apply to the output:
+            ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will
+            be applied, ``'mean'``: the weighted mean of the output is taken,
+            ``'sum'``: the output will be summed. Default: ``'mean'``
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> import numpy as np
+        >>> flow.enable_eager_execution()
+
+        >>> input = flow.Tensor(
+        ...    [[-0.1664078, -1.7256707, -0.14690138],
+        ...        [-0.21474946, 0.53737473, 0.99684894],
+        ...        [-1.135804, -0.50371903, 0.7645404]], dtype=flow.float32)
+        >>> target = flow.Tensor(np.array([0, 1, 2]), dtype=flow.int32)
+        >>> out = flow.nn.CrossEntropyLoss(reduction="none")(input, target)
+        >>> out
+        tensor([0.802 , 1.1167, 0.3583], dtype=oneflow.float32)
+        >>> out_sum = flow.nn.CrossEntropyLoss(reduction="sum")(input, target)
+        >>> out_sum
+        tensor([2.2769], dtype=oneflow.float32)
+        >>> out_mean = flow.nn.CrossEntropyLoss(reduction="mean")(input, target)
+        >>> out_mean
+        tensor([0.759], dtype=oneflow.float32)
+
+    """
+
+    def __init__(
+        self,
+        weight=None,
+        ignore_index: Optional[int] = None,
+        reduction: Optional[str] = "mean",
+    ) -> None:
+        super().__init__()
+        if weight is not None:
+            raise ValueError("Argument weight is not supported yet")
+        assert reduction in [
+            "sum",
+            "none",
+            "mean",
+            None,
+        ], "only 'sum', 'mean' and None supported by now"
+
+        self.ignore_index = ignore_index
+        self.reduction = reduction
+
+    def forward(self, input, target):
+        assert len(input.shape) <= 4
+        assert len(target.shape) == len(input.shape) - 1
+        input_shape_len = len(input.shape)
+        if input_shape_len == 3:
+            b, c, h = input.shape[0], input.shape[1], input.shape[2]
+            input = flow.F.transpose(input, perm=(0, 2, 1))
+            input = input.reshape(shape=[-1, input.shape[2]])
+            target = target.flatten()
+        elif input_shape_len == 4:
+            b, c, h, w = input.shape[0], input.shape[1], input.shape[2], input.shape[3]
+            input = flow.F.transpose(input, perm=(0, 2, 3, 1))
+            input = input.reshape(shape=[-1, input.shape[3]])
+            target = target.flatten()
+        elif input_shape_len >= 5:
+            raise NotImplemented
+
+        out = flow.F.sparse_softmax_cross_entropy(
+            input, target, depth=input.shape[len(input.shape) - 1]
+        )
+        if self.ignore_index is not None:
+            zeros = flow.experimental.zeros(
+                size=out.shape, dtype=out.dtype, device=out.device
+            )
+            condition = flow.experimental.eq(target, self.ignore_index)
+            ones = flow.experimental.ones(
+                size=condition.shape, dtype=condition.dtype, device=condition.device
+            )
+            condition = ones.sub(condition).reshape(tuple(out.shape))
+            out = flow.experimental.where(condition, out, zeros)
+            if self.reduction == "mean":
+                reduce_sum = out.sum()
+                reduce_count = condition.argwhere().shape[0]
+                out = flow.experimental.mul(reduce_sum, 1.0 / reduce_count)
+
+        if self.reduction == "mean":
+            return out.mean()
+        elif self.reduction == "sum":
+            return out.sum()
+        else:
+            if input_shape_len == 4:
+                out = out.reshape((b, h, w))
+            return out
+
+
+@oneflow_export("nn.BCELoss")
+@experimental_api
+class BCELoss(Module):
+    r"""This operator computes the binary cross entropy loss.
+
+    The equation is:
+
+    if reduction = "none":
+
+    .. math::
+
+        out = -(Target_i*log(Input_i) + (1-Target_i)*log(1-Input_i))
+
+    if reduction = "mean":
+
+    .. math::
+
+        out = -\frac{1}{n}\sum_{i=1}^n(Target_i*log(Input_i) + (1-Target_i)*log(1-Input_i))
+
+    if reduction = "sum":
+
+    .. math::
+
+        out = -\sum_{i=1}^n(Target_i*log(Input_i) + (1-Target_i)*log(1-Input_i))
+
+    Args:
+        weight (oneflow.compatible.single_client.experimental.Tensor, optional): The manual rescaling weight to the loss. Default to None, whose corresponding weight value is 1.
+        reduction (str, optional): The reduce type, it can be one of "none", "mean", "sum". Defaults to "mean".
+
+    Attention:
+        The input value must be in the range of (0, 1). Or the loss function may return `nan` value.
+
+    Returns:
+        oneflow.compatible.single_client.experimental.Tensor: The result Tensor.
+
+    For example:
+
+    .. code-block:: python
+    
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> import numpy as np
+        >>> flow.enable_eager_execution()
+
+        >>> input = flow.Tensor(np.array([[1.2, 0.2, -0.3], [0.7, 0.6, -2]]).astype(np.float32))
+        >>> target = flow.Tensor(np.array([[0, 1, 0], [1, 0, 1]]).astype(np.float32))
+        >>> weight = flow.Tensor(np.array([[2, 2, 2], [2, 2, 2]]).astype(np.float32))
+        >>> activation = flow.nn.Sigmoid()
+        >>> sigmoid_input = activation(input)
+        >>> m = flow.nn.BCELoss(weight, reduction="none")
+        >>> out = m(sigmoid_input, target)
+        >>> out
+        tensor([[2.9266, 1.1963, 1.1087],
+                [0.8064, 2.075 , 4.2539]], dtype=oneflow.float32)
+        >>> m_sum = flow.nn.BCELoss(weight, reduction="sum")
+        >>> out = m_sum(sigmoid_input, target)
+        >>> out
+        tensor([12.3668], dtype=oneflow.float32)
+        >>> m_mean = flow.nn.BCELoss(weight, reduction="mean")
+        >>> out = m_mean(sigmoid_input, target)
+        >>> out
+        tensor([2.0611], dtype=oneflow.float32)
+        >>> m_none = flow.nn.BCELoss()
+        >>> out = m_none(sigmoid_input, target)
+        >>> out
+        tensor([1.0306], dtype=oneflow.float32)
+
+    """
+
+    def __init__(self, weight: Tensor = None, reduction: str = "mean") -> None:
+        super().__init__()
+        assert reduction in [
+            "none",
+            "sum",
+            "mean",
+            None,
+        ], "only 'sum', 'mean' and 'none' supported by now"
+
+        self.weight = weight
+        self.reduction = reduction
+
+    def forward(self, input, target):
+        assert (
+            input.shape == target.shape
+        ), "The Input shape must be the same as Target shape"
+
+        _cross_entropy_loss = flow.experimental.negative(
+            target * flow.experimental.log(input)
+            + (1 - target) * flow.experimental.log(1 - input)
+        )
+
+        if self.weight is not None:
+            assert (
+                self.weight.shape == input.shape
+            ), "The weight shape must be the same as Input shape"
+            _weighted_loss = self.weight * _cross_entropy_loss
+        else:
+            _weighted_loss = _cross_entropy_loss
+
+        if self.reduction == "mean":
+            return flow.experimental.mean(_weighted_loss)
+        elif self.reduction == "sum":
+            return flow.experimental.sum(_weighted_loss)
+        else:
+            return _weighted_loss
+
+
+@oneflow_export("nn.NLLLoss")
+@experimental_api
+class NLLLoss(Module):
+    r""" The negative log likelihood loss. It is useful to train a classification
+    problem with `C` classes.
+
+    The `input` given through a forward call is expected to contain
+    log-probabilities of each class. `input` has to be a Tensor of size either
+    :math:`(minibatch, C)` or :math:`(minibatch, C, d_1, d_2, ..., d_K)`
+    with :math:`K \geq 1` for the `K`-dimensional case (described later).
+
+    Obtaining log-probabilities in a neural network is easily achieved by
+    adding a  `LogSoftmax`  layer in the last layer of your network.
+    You may use `CrossEntropyLoss` instead, if you prefer not to add an extra
+    layer.
+
+    The `target` that this loss expects should be a class index in the range :math:`[0, C-1]`
+    where `C = number of classes`;
+
+    The unreduced (i.e. with :attr:`reduction` set to ``'none'``) loss can be described as:
+
+    .. math::
+        \ell(x, y) = L = \{l_1,\dots,l_N\}^\top, \quad
+        l_n = - w_{y_n} x_{n,y_n}, \quad
+        w_{c} = \mathbb{1},
+
+    where :math:`x` is the input, :math:`y` is the target, :math:`w` is the weight, and
+    :math:`N` is the batch size. If :attr:`reduction` is not ``'none'``
+    (default ``'mean'``), then
+
+    .. math::
+        \ell(x, y) = \begin{cases}
+            \sum_{n=1}^N \frac{1}{N} l_n, &
+            \text{if reduction} = \text{`mean';}\\
+            \sum_{n=1}^N l_n,  &
+            \text{if reduction} = \text{`sum'.}
+        \end{cases}
+
+    Can also be used for higher dimension inputs, such as 2D images, by providing
+    an input of size :math:`(minibatch, C, d_1, d_2, ..., d_K)` with :math:`K \geq 1`,
+    where :math:`K` is the number of dimensions, and a target of appropriate shape
+    (see below). In the case of images, it computes NLL loss per-pixel.
+
+    Args:
+        reduction (string, optional): Specifies the reduction to apply to the output:
+            ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will
+            be applied, ``'mean'``: the weighted mean of the output is taken,
+            ``'sum'``: the output will be summed. Default: ``'mean'``
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> flow.enable_eager_execution()
+        >>> import numpy as np
+
+        >>> input = flow.Tensor(
+        ... [[-0.1664078, -1.7256707, -0.14690138],
+        ... [-0.21474946, 0.53737473, 0.99684894],
+        ... [-1.135804, -0.50371903, 0.7645404]], dtype=flow.float32)
+        >>> target = flow.Tensor(np.array([0, 1, 2]), dtype=flow.int32)
+        >>> m = flow.nn.NLLLoss(reduction="none")
+        >>> out = m(input, target)
+        >>> out
+        tensor([ 0.1664, -0.5374, -0.7645], dtype=oneflow.float32)
+
+        >>> m = flow.nn.NLLLoss(reduction="sum")
+        >>> out = m(input, target)
+        >>> out
+        tensor([-1.1355], dtype=oneflow.float32)
+
+        >>> m = flow.nn.NLLLoss(reduction="mean")
+        >>> out = m(input, target)
+        >>> out
+        tensor([-0.3785], dtype=oneflow.float32)
+
+    """
+
+    def __init__(
+        self, weight=None, ignore_index: int = None, reduction: str = "mean",
+    ) -> None:
+        super().__init__()
+        if weight != None:
+            raise ValueError("Argument weight is not supported yet")
+        assert reduction in [
+            "sum",
+            "none",
+            "mean",
+            None,
+        ], "only 'sum', 'mean' and None supported by now"
+
+        self.ignore_index = ignore_index
+        self.reduction = reduction
+
+    def nllloss_1d(self, input, target):
+        target = flow.F.reshape(target, shape=(target.shape[0], 1))
+        res = flow.F.dim_gather(input, target, dim=1)
+        res = flow.F.squeeze(res, dim=[1])
+        return res
+
+    def forward(self, input, target):
+        assert len(input.shape) <= 4
+        assert len(target.shape) == len(input.shape) - 1
+        input = input.negative()
+        if len(input.shape) == 2:
+            res = self.nllloss_1d(input, target)
+        elif len(input.shape) == 3:
+            b, c, h = input.shape[0], input.shape[1], input.shape[2]
+            input = flow.F.transpose(input, perm=(0, 2, 1))
+            input = input.reshape(shape=[-1, input.shape[2]])
+            target = target.flatten()
+            res = self.nllloss_1d(input, target)
+            res = res.reshape((b, h))
+        elif len(input.shape) == 4:
+            b, c, h, w = input.shape[0], input.shape[1], input.shape[2], input.shape[3]
+            input = flow.F.transpose(input, perm=(0, 2, 3, 1))
+            input = input.reshape(shape=[-1, input.shape[3]])
+            target = target.flatten()
+            res = self.nllloss_1d(input, target)
+            res = res.reshape((b, h, w))
+        else:
+            raise NotImplemented
+
+        if self.ignore_index is not None:
+            zeros = flow.experimental.zeros(
+                size=res.shape, dtype=res.dtype, device=res.device
+            )
+            condition = flow.experimental.eq(target, self.ignore_index)
+            ones = flow.experimental.ones(
+                size=condition.shape, dtype=condition.dtype, device=condition.device
+            )
+            condition = ones.sub(condition).reshape(tuple(res.shape))
+            res = flow.experimental.where(condition, res, zeros)
+            if self.reduction == "mean":
+                res = res.sum()
+                reduce_count = condition.argwhere().shape[0]
+                res = flow.experimental.mul(res, 1.0 / reduce_count)
+
+        if self.reduction == "none":
+            return res
+        elif self.reduction == "sum":
+            return res.sum()
+        else:
+            return res.mean()
+
+
+@oneflow_export("nn.KLDivLoss")
+@experimental_api
+class KLDivLoss(Module):
+    r"""The interface is consistent with PyTorch.
+    The documentation is referenced from:
+    https://pytorch.org/docs/stable/generated/torch.nn.KLDivLoss.html?highlight=kldivloss#torch.nn.KLDivLoss
+
+    The Kullback-Leibler divergence loss measure
+
+    `Kullback-Leibler divergence`_ is a useful distance measure for continuous
+    distributions and is often useful when performing direct regression over
+    the space of (discretely sampled) continuous output distributions.
+
+    As with :class:`~torch.nn.NLLLoss`, the `input` given is expected to contain
+    *log-probabilities* and is not restricted to a 2D Tensor.
+    The targets are interpreted as *probabilities* by default, but could be considered
+    as *log-probabilities* with :attr:`log_target` set to ``True``.
+
+    This criterion expects a `target` `Tensor` of the same size as the
+    `input` `Tensor`.
+
+    The unreduced (i.e. with :attr:`reduction` set to ``'none'``) loss can be described as:
+
+    .. math::
+        l(x,y) = L = \{ l_1,\dots,l_N \}, \quad
+        l_n = y_n \cdot \left( \log y_n - x_n \right)
+
+    where the index :math:`N` spans all dimensions of ``input`` and :math:`L` has the same
+    shape as ``input``. If :attr:`reduction` is not ``'none'`` (default ``'mean'``), then:
+
+    .. math::
+        \ell(x, y) = \begin{cases}
+            \operatorname{mean}(L), & \text{if reduction} = \text{`mean';} \\
+            \operatorname{sum}(L),  & \text{if reduction} = \text{`sum'.}
+        \end{cases}
+
+    In default :attr:`reduction` mode ``'mean'``, the losses are averaged for each minibatch over observations
+    **as well as** over dimensions. ``'batchmean'`` mode gives the correct KL divergence where losses
+    are averaged over batch dimension only. ``'mean'`` mode's behavior will be changed to the same as
+    ``'batchmean'`` in the next major release.
+
+    .. _`kullback-leibler divergence`: https://en.wikipedia.org/wiki/Kullback-Leibler_divergence
+
+    Args:
+        reduction (string, optional): Specifies the reduction to apply to the output:
+            ``'none'`` | ``'batchmean'`` | ``'sum'`` | ``'mean'``.
+            ``'none'``: no reduction will be applied.
+            ``'batchmean'``: the sum of the output will be divided by batchsize.
+            ``'sum'``: the output will be summed.
+            ``'mean'``: the output will be divided by the number of elements in the output.
+            Default: ``'mean'``
+        log_target (bool, optional): Specifies whether `target` is passed in the log space.
+            Default: ``False``
+
+    .. note::
+        :attr:`reduction` = ``'mean'`` doesn't return the true kl divergence value, please use
+        :attr:`reduction` = ``'batchmean'`` which aligns with KL math definition.
+        In the next major release, ``'mean'`` will be changed to be the same as ``'batchmean'``.
+
+    Shape:
+        - Input: :math:`(N, *)` where :math:`*` means, any number of additional
+          dimensions
+        - Target: :math:`(N, *)`, same shape as the input
+        - Output: scalar by default. If :attr:``reduction`` is ``'none'``, then :math:`(N, *)`,
+          the same shape as the input
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> import numpy as np
+        >>> flow.enable_eager_execution()
+
+        >>> input = flow.Tensor([-0.9021705, 0.08798598, 1.04686249], dtype=flow.float32)
+        >>> target = flow.Tensor([1.22386942, -0.89729659, 0.01615712], dtype=flow.float32)
+        >>> m = flow.nn.KLDivLoss(reduction="none", log_target=False)
+        >>> out = m(input, target)
+        >>> out
+        tensor([ 1.3514,  0.    , -0.0836], dtype=oneflow.float32)
+        >>> m = flow.nn.KLDivLoss(reduction="mean", log_target=False)
+        >>> out = m(input, target)
+        >>> out
+        tensor([0.4226], dtype=oneflow.float32)
+        >>> m = flow.nn.KLDivLoss(reduction="sum", log_target=True)
+        >>> out = m(input, target)
+        >>> out
+        tensor([5.7801], dtype=oneflow.float32)
+
+    """
+
+    def __init__(self, reduction: str = "mean", log_target: bool = False,) -> None:
+        super().__init__()
+        assert reduction in [
+            "sum",
+            "none",
+            "mean",
+            None,
+        ], "Argument reduction only support 'sum'/'mean'/'none'/None for now!"
+        self.reduction = reduction
+        self.log_target = log_target
+
+    def forward(self, input: Tensor, target: Tensor) -> Tensor:
+        if self.log_target:
+            _kl_div_loss = flow.experimental.exp(target) * (target - input)
+        else:
+            _kl_div_out_loss = target * (flow.experimental.log(target) - input)
+            _zeros = flow.experimental.zeros(
+                size=_kl_div_out_loss.shape,
+                dtype=_kl_div_out_loss.dtype,
+                device=_kl_div_out_loss.device,
+            )
+            # when target < 0, we set to `0`, when target > 0, we set to `1`.
+            _condition = flow.experimental.gt(target, 0)
+            # To avoid the `nan` value in log operation
+            # We set those positions which `target` is less than zero as `0`
+            _kl_div_loss = flow.experimental.where(_condition, _kl_div_out_loss, _zeros)
+
+        if self.reduction == "mean":
+            return flow.experimental.mean(_kl_div_loss)
+        elif self.reduction == "sum":
+            return flow.experimental.sum(_kl_div_loss)
+        else:
+            return _kl_div_loss
+
+
+@oneflow_export("nn.MSELoss")
+@experimental_api
+class MSELoss(Module):
+    r"""The interface is consistent with PyTorch.
+    The documentation is referenced from:
+    https://pytorch.org/docs/stable/generated/torch.nn.MSELoss.html?highlight=mseloss#torch.nn.MSELoss
+
+    Creates a criterion that measures the mean squared error (squared L2 norm) between
+    each element in the input :math:`x` and target :math:`y`.
+
+    The unreduced (i.e. with :attr:`reduction` set to ``'none'``) loss can be described as:
+
+    .. math::
+        \ell(x, y) = L = \{l_1,\dots,l_N\}^\top, \quad
+        l_n = \left( x_n - y_n \right)^2,
+
+    where :math:`N` is the batch size. If :attr:`reduction` is not ``'none'``
+    (default ``'mean'``), then:
+
+    .. math::
+        \ell(x, y) =
+        \begin{cases}
+            \operatorname{mean}(L), &  \text{if reduction} = \text{`mean';}\\
+            \operatorname{sum}(L),  &  \text{if reduction} = \text{`sum'.}
+        \end{cases}
+
+    :math:`x` and :math:`y` are tensors of arbitrary shapes with a total
+    of :math:`n` elements each.
+
+    The mean operation still operates over all the elements, and divides by :math:`n`.
+
+    The division by :math:`n` can be avoided if one sets ``reduction = 'sum'``.
+
+    Args:
+        reduction (string, optional): Specifies the reduction to apply to the output:
+            ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be applied,
+            ``'mean'``: the sum of the output will be divided by the number of
+            elements in the output, ``'sum'``: the output will be summed. Default: ``'mean'``
+
+    Shape:
+        - Input: :math:`(N, *)` where :math:`*` means, any number of additional
+          dimensions
+        - Target: :math:`(N, *)`, same shape as the input
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> import numpy as np
+        >>> flow.enable_eager_execution()
+
+        >>> input = flow.Tensor(
+        ... [[-0.02557137, 0.03101675, 1.37493674],
+        ... [0.25599439, -1.08372561, -0.21006816]], dtype=flow.float32)
+        >>> target = flow.Tensor(
+        ... [[-1.53105064, -0.68137555, 0.5931354],
+        ... [-0.49158347, 0.93673637, 0.1324141]], dtype=flow.float32)
+        >>> m = flow.nn.MSELoss(reduction="none")
+        >>> out = m(input, target)
+        >>> out
+        tensor([[2.2665, 0.5075, 0.6112],
+                [0.5589, 4.0823, 0.1173]], dtype=oneflow.float32)
+        >>> m = flow.nn.MSELoss(reduction="mean")
+        >>> out = m(input, target)
+        >>> out
+        tensor([1.3573], dtype=oneflow.float32)
+        >>> m = flow.nn.MSELoss(reduction="sum")
+        >>> out = m(input, target)
+        >>> out
+        tensor([8.1436], dtype=oneflow.float32)
+
+    """
+
+    def __init__(self, reduction: str = "mean") -> None:
+        super().__init__()
+        assert reduction in [
+            "sum",
+            "none",
+            "mean",
+            None,
+        ], "Argument reduction only support 'sum'/'mean'/'none'/None for now!"
+
+        self.reduction = reduction
+
+    def forward(self, input: Tensor, target: Tensor) -> Tensor:
+        mean_squared_difference = flow.experimental.square(
+            flow.experimental.sub(input, target)
+        )
+        if self.reduction == "mean":
+            return flow.experimental.mean(mean_squared_difference)
+        elif self.reduction == "sum":
+            return flow.experimental.sum(mean_squared_difference)
+        else:
+            # Do no reduction
+            return mean_squared_difference
+
+
+@oneflow_export("nn.MarginRankingLoss")
+@experimental_api
+class MarginRankingLoss(Module):
+    r"""Creates a criterion that measures the loss given
+    inputs :math:`x1`, :math:`x2`, two 1D mini-batch `Tensors`,
+    and a label 1D mini-batch tensor :math:`y` (containing 1 or -1).
+
+    If :math:`y = 1` then it assumed the first input should be ranked higher
+    (have a larger value) than the second input, and vice-versa for :math:`y = -1`.
+
+    The loss function for each sample in the mini-batch is:
+
+    .. math::
+        \text{loss}(x1, x2, y) = \max(0, -y * (x1 - x2) + \text{margin})
+
+    Args:
+        margin (float, optional): Has a default value of :math:`0`.
+        reduction (string, optional): Specifies the reduction to apply to the output:
+            ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be applied,
+            ``'mean'``: the sum of the output will be divided by the number of
+            elements in the output, ``'sum'``: the output will be summed. Default: ``'mean'``
+
+    Shape:
+        - `x1` : :math:`(N, D)` where `N` is the batch size and `D` is the size of a sample.
+        - `x2` : :math:`(N, D)` where `N` is the batch size and `D` is the size of a sample.
+        - Target: :math:`(N)`
+        - Output: scalar. If :attr:`reduction` is ``'none'``, then :math:`(N)`.
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> flow.enable_eager_execution()
+        >>> import numpy as np
+
+        >>> x1 = flow.Tensor(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]), dtype=flow.float32)
+        >>> x2 = flow.Tensor(np.array([[2, 2, 2], [2, 2, 2], [2, 2, 2]]), dtype=flow.float32)
+        >>> target = flow.Tensor(np.array([[1, -1, 1],[-1, 1, -1], [1, 1, 1]]), dtype=flow.float32)
+        >>> m = flow.nn.MarginRankingLoss(margin =1.0, reduction="none")
+        >>> out = m(x1, x2, target)
+        >>> out
+        tensor([[2., 1., 0.],
+                [3., 0., 5.],
+                [0., 0., 0.]], dtype=oneflow.float32)
+
+        >>> m = flow.nn.MarginRankingLoss(margin = 0.3, reduction="sum")
+        >>> out = m(x1, x2, target)
+        >>> out
+        tensor([8.2], dtype=oneflow.float32)
+
+        >>> m = flow.nn.MarginRankingLoss(margin = 10, reduction="mean")
+        >>> out = m(x1, x2, target)
+        >>> out
+        tensor([8.3333], dtype=oneflow.float32)
+
+
+    """
+
+    def __init__(self, margin=0.0, reduction: str = "mean") -> None:
+        super().__init__()
+        self.margin = margin
+        assert reduction in [
+            "sum",
+            "none",
+            "mean",
+            None,
+        ], "only 'sum', 'mean' and None supported by now"
+
+        self.reduction = reduction
+
+    def forward(self, input1, input2, target):
+        res = flow.experimental.clip(
+            flow.experimental.add(
+                self.margin,
+                flow.experimental.mul(
+                    target,
+                    flow.experimental.mul(-1, flow.experimental.sub(input1, input2)),
+                ),
+            ),
+            min=0.0,
+        )
+
+        if self.reduction == "none":
+            return res
+        elif self.reduction == "sum":
+            return res.sum()
+        else:
+            return res.mean()
+
+
+@oneflow_export("nn.CTCLoss")
+@experimental_api
+class CTCLoss(Module):
+    r"""The Connectionist Temporal Classification loss.
+    The interface is consistent with PyTorch.
+    The documentation is referenced from:
+    https://pytorch.org/docs/stable/generated/torch.nn.CTCLoss.html#torch.nn.CTCLoss
+
+    Calculates loss between a continuous (unsegmented) time series and a target sequence. CTCLoss sums over the
+    probability of possible alignments of input to target, producing a loss value which is differentiable
+    with respect to each input node. The alignment of input to target is assumed to be "many-to-one", which
+    limits the length of the target sequence such that it must be :math:`\leq` the input length.
+
+    Args:
+        blank (int, optional): blank label. Default :math:`0`.
+        reduction (string, optional): Specifies the reduction to apply to the output:
+            ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be applied,
+            ``'mean'``: the output losses will be divided by the target lengths and
+            then the mean over the batch is taken. Default: ``'mean'``
+        zero_infinity (bool, optional):
+            Whether to zero infinite losses and the associated gradients.
+            Default: ``False``
+            Infinite losses mainly occur when the inputs are too short
+            to be aligned to the targets.
+
+    Shape:
+        - Log_probs: Tensor of size :math:`(T, N, C)`,
+          where :math:`T = \text{input length}`,
+          :math:`N = \text{batch size}`, and
+          :math:`C = \text{number of classes (including blank)}`.
+        - Targets: Tensor of size :math:`(N, S)` or
+          :math:`(\operatorname{sum}(\text{target\_lengths}))`,
+          where :math:`N = \text{batch size}` and
+          :math:`S = \text{max target length, if shape is } (N, S)`.
+          It represent the target sequences. Each element in the target
+          sequence is a class index. And the target index cannot be blank (default=0).
+          In the :math:`(N, S)` form, targets are padded to the
+          length of the longest sequence, and stacked.
+          In the :math:`(\operatorname{sum}(\text{target\_lengths}))` form,
+          the targets are assumed to be un-padded and
+          concatenated within 1 dimension.
+        - Input_lengths: Tuple or tensor of size :math:`(N)`,
+          where :math:`N = \text{batch size}`. It represent the lengths of the
+          inputs (must each be :math:`\leq T`). And the lengths are specified
+          for each sequence to achieve masking under the assumption that sequences
+          are padded to equal lengths.
+        - Target_lengths: Tuple or tensor of size :math:`(N)`,
+          where :math:`N = \text{batch size}`. It represent lengths of the targets.
+          Lengths are specified for each sequence to achieve masking under the
+          assumption that sequences are padded to equal lengths. If target shape is
+          :math:`(N,S)`, target_lengths are effectively the stop index
+          :math:`s_n` for each target sequence, such that ``target_n = targets[n,0:s_n]`` for
+          each target in a batch. Lengths must each be :math:`\leq S`
+          If the targets are given as a 1d tensor that is the concatenation of individual
+          targets, the target_lengths must add up to the total length of the tensor.
+
+    Reference:
+        A. Graves et al.: Connectionist Temporal Classification:
+        Labelling Unsegmented Sequence Data with Recurrent Neural Networks:
+        https://www.cs.toronto.edu/~graves/icml_2006.pdf
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> flow.enable_eager_execution()
+        >>> import numpy as np
+        >>> log_probs = np.array(
+        ...             [
+        ...                 [[-1.1031, -0.7998, -1.5200], [-0.9808, -1.1363, -1.1908]],
+        ...                 [[-1.2258, -1.0665, -1.0153], [-1.1135, -1.2331, -0.9671]],
+        ...                 [[-1.3348, -0.6611, -1.5118], [-0.9823, -1.2355, -1.0941]],
+        ...                 [[-1.3850, -1.3273, -0.7247], [-0.8235, -1.4783, -1.0994]],
+        ...                 [[-0.9049, -0.8867, -1.6962], [-1.4938, -1.3630, -0.6547]],
+        ...             ]
+        ...         ).astype(np.float32)
+        >>> log_probs = flow.Tensor(log_probs, dtype=flow.float32)
+        >>> targets = flow.Tensor(np.array([[1, 2, 2], [1, 2, 2]]).astype("int32"), dtype=flow.int32)
+        >>> input_lengths = flow.Tensor(np.array([5, 5]).astype("int32"), dtype=flow.int32)
+        >>> target_lengths = flow.Tensor(np.array([3, 3]).astype("int32"), dtype=flow.int32)
+        >>> loss_mean = flow.nn.CTCLoss()
+        >>> out = loss_mean(log_probs, targets, input_lengths, target_lengths)
+        >>> out
+        tensor([1.1376], dtype=oneflow.float32)
+        >>> loss_sum = flow.nn.CTCLoss(blank=0, reduction="sum")
+        >>> out = loss_sum(log_probs, targets, input_lengths, target_lengths)
+        >>> out
+        tensor([6.8257], dtype=oneflow.float32)
+        >>> 
+
+    """
+
+    def __init__(
+        self, blank: int = 0, reduction: str = "mean", zero_infinity: bool = False,
+    ) -> None:
+        super().__init__()
+        assert reduction in [
+            "sum",
+            "none",
+            "mean",
+            None,
+        ], "only 'sum', 'mean' and None supported by now"
+
+        self.reduction = reduction
+        self.zero_infinity = zero_infinity
+
+        self._op = (
+            flow.builtin_op("ctc_loss")
+            .Input("log_probs")
+            .Input("targets")
+            .Input("input_lengths")
+            .Input("target_lengths")
+            .Output("loss")
+            .Output("alpha")
+            .Attr("blank", int(blank))
+            .Attr("zero_infinity", zero_infinity)
+            .Build()
+        )
+        self._xdivy_op = (
+            flow.builtin_op("xdivy").Input("x").Input("y").Output("z").Build()
+        )
+        self.constant = _ConstantBase
+
+    def forward(
+        self,
+        log_probs: Tensor,
+        targets: Tensor,
+        input_lengths: Tensor,
+        target_lengths: Tensor,
+    ) -> Tensor:
+        loss, _ = self._op(log_probs, targets, input_lengths, target_lengths)
+        if self.zero_infinity:
+            cond = flow.experimental.eq(
+                loss,
+                self.constant(
+                    size=loss.shape,
+                    value=float("inf"),
+                    dtype=loss.dtype,
+                    device=loss.device,
+                )(),
+            )
+            loss = flow.experimental.where(
+                cond,
+                flow.experimental.zeros(
+                    size=loss.shape, dtype=loss.dtype, device=loss.device
+                ),
+                loss,
+            )
+
+        if self.reduction == "mean":
+
+            return flow.experimental.mean(
+                self._xdivy_op(
+                    loss,
+                    flow.experimental.cast(
+                        flow.experimental.clamp(target_lengths, min=1),
+                        dtype=log_probs.dtype,
+                    ),
+                )[0]
+            )
+        elif self.reduction == "sum":
+            return flow.experimental.sum(loss)
+        else:
+            return loss
+
+
+@oneflow_export("nn.BCEWithLogitsLoss")
+@experimental_api
+class BCEWithLogitsLoss(Module):
+    r"""This operator combines the `Sigmoid` and `BCELoss` together. For numerical stability,
+    we apply some math tricks instead of using `Sigmoid` layer with `BCELoss`.
+
+    The equation is:
+
+    if :attr:`reduction` = ``"none"``:
+
+    .. math::
+
+        out = -weight*[Pos\_weight*y*log\sigma({x}) + (1-y)*log(1-\sigma(x))]
+
+    if :attr:`reduction` = ``"mean"``:
+
+    .. math::
+
+        out = -\frac{weight}{n}\sum_{i=1}^n[Pos\_weight*y*log\sigma({x}) + (1-y)*log(1-\sigma(x))]
+
+    if :attr:`reduction` = ``"sum"``:
+
+    .. math::
+
+        out = -weight*\sum_{i=1}^n[Pos\_weight*y*log\sigma({x}) + (1-y)*log(1-\sigma(x))]
+
+    Args:
+        weight (Tensor, optional): The manual rescaling weight to the loss. Default: ``None``
+        size_average (bool, optional) â€“ Deprecated (see :attr:`reduction`). Default: ``True``
+        reduce (bool, optional) â€“ Deprecated (see :attr:`reduction`). Default: ``True``
+        reduction (str, optional): The reduce type, it can be one of ``"none"``, ``"mean"``, ``"sum"``.
+            ``'none'``: no reduction will be applied, ``'mean'``: the sum of the output will be divided
+            by the number of elements in the output, ``'sum'``: the output will be summed. Default: ``"mean"``
+        pos_weight (Tensor, optional): The manual rescaling weight to the positive examples.
+            Default: ``None``
+
+    Shape:
+        - Input: :math:`(N,*)` where `*` means, any number of additional dimensions
+        - Target: :math:`(N,*)`, same shape as the input
+        - Output: scalar. If :attr:`reduction` is ``"none"``, then :math:`(N,*)`, same shape as input.
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> flow.enable_eager_execution()
+        >>> import oneflow.compatible.single_client.typing as tp
+
+        >>> input = flow.Tensor([[1.2, 0.2, -0.3], [0.7, 0.6, -2], [0.7, 0.6, -2]], dtype=flow.float32)
+        >>> target = flow.Tensor([[0, 1, 0], [1, 0, 1], [1, 0, 1]], dtype=flow.float32)
+        >>> weight = flow.Tensor([[2, 2, 2], [2, 2, 2], [2, 2, 2]], dtype=flow.float32)
+        >>> pos_weight = flow.Tensor([1.2, 1.3, 1.4], dtype=flow.float32)
+
+        >>> m = flow.nn.BCEWithLogitsLoss(weight=weight, pos_weight=pos_weight, reduction="none")
+        >>> out = m(input, target)
+        >>> out
+        tensor([[2.9266, 1.5552, 1.1087],
+                [0.9676, 2.075 , 5.9554],
+                [0.9676, 2.075 , 5.9554]], dtype=oneflow.float32)
+
+        >>> m = flow.nn.BCEWithLogitsLoss(weight=weight, pos_weight=pos_weight, reduction="mean")
+        >>> out = m(input, target)
+        >>> out
+        tensor([2.6207], dtype=oneflow.float32)
+
+        >>> m = flow.nn.BCEWithLogitsLoss(weight=weight, pos_weight=pos_weight, reduction="sum")
+        >>> out = m(input, target)
+        >>> out
+        tensor([23.5865], dtype=oneflow.float32)
+
+
+    """
+
+    def __init__(
+        self,
+        weight=None,
+        size_average: bool = True,
+        reduce: bool = True,
+        reduction: Optional[str] = "mean",
+        pos_weight=None,
+    ) -> None:
+        super().__init__()
+        assert reduction in [
+            "sum",
+            "none",
+            "mean",
+            None,
+        ], "only 'sum', 'mean' and None supported by now"
+
+        self.weight = weight
+        self.size_average = size_average
+        self.reduce = reduce
+        self.reduction = reduction
+        self.pos_weight = pos_weight
+
+    def forward(self, input, target):
+        if not (target.shape == input.shape):
+            raise ValueError(
+                "Target size ({}) must be the same as input size ({})".format(
+                    target.size(), input.size()
+                )
+            )
+
+        _neg_input = flow.experimental.negative(input)
+        _max_val = flow.experimental.clip(_neg_input, 0)
+        _neg_max_val = flow.experimental.negative(_max_val)
+
+        if self.pos_weight:
+            _log_weight = ((self.pos_weight - 1) * target) + 1
+            _loss = (1 - target) * input + _log_weight * (
+                flow.experimental.log(
+                    flow.experimental.exp(_neg_max_val)
+                    + flow.experimental.exp(_neg_input - _max_val)
+                )
+                + _max_val
+            )
+        else:
+            _loss = (1 - target) * input + _max_val
+            _loss += flow.experimental.log(
+                flow.experimental.exp(_neg_max_val)
+                + flow.experimental.exp(_neg_input - _max_val)
+            )
+
+        if self.weight is not None:
+            assert (
+                self.weight.shape == input.shape
+            ), "The weight shape must be the same as Input shape"
+            _weighted_loss = self.weight * _loss
+        else:
+            _weighted_loss = _loss
+
+        if self.reduction == "mean":
+            return flow.experimental.mean(_weighted_loss)
+        elif self.reduction == "sum":
+            return flow.experimental.sum(_weighted_loss)
+        else:
+            # Do no reduction
+            return _weighted_loss
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(raise_on_error=True)
diff --git a/oneflow/compatible_single_client_python/nn/modules/masked_fill.py b/oneflow/compatible_single_client_python/nn/modules/masked_fill.py
new file mode 100644
index 0000000000000000000000000000000000000000..155d69ef0524301e16ad24ce196f852826c77722
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/masked_fill.py
@@ -0,0 +1,84 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.nn.module import Module
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+from oneflow.compatible_single_client_python.framework.tensor import register_tensor_op
+
+
+class MaskedFill(Module):
+    def __init__(self, value) -> None:
+        super().__init__()
+        self.value = value
+
+    def forward(self, input, mask):
+        in_shape = tuple(input.shape)
+        value_like_x = flow.Tensor(*in_shape, device=input.device)
+        value_like_x.fill_(self.value)
+        return flow.F.where(mask, value_like_x, input)
+
+
+@oneflow_export("masked_fill")
+@register_tensor_op("masked_fill")
+@experimental_api
+def masked_fill_op(tensor, mask, value):
+    r"""
+    Fills elements of :attr:`self` tensor with :attr:`value` where :attr:`mask` is True.
+    The shape of :attr:`mask` must be broadcastable with the shape of the underlying tensor.
+
+    Args:
+        mask (BoolTensor): the boolean mask
+        value (float): the value to fill in with
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> import numpy as np
+        >>> flow.enable_eager_execution()
+        >>> in_arr = np.array(
+        ...     [[[-0.13169311,  0.97277078,  1.23305363,  1.56752789],
+        ...     [-1.51954275,  1.87629473, -0.53301206,  0.53006478],
+        ...     [-1.38244183, -2.63448052,  1.30845795, -0.67144869]],
+        ...     [[ 0.41502161,  0.14452418,  0.38968   , -1.76905653],
+        ...     [ 0.34675095, -0.7050969 , -0.7647731 , -0.73233418],
+        ...     [-1.90089858,  0.01262963,  0.74693893,  0.57132389]]]
+        ... )
+        >>> fill_value = 8.7654321 # random value e.g. -1e9 3.1415
+        >>> input = flow.Tensor(in_arr, dtype=flow.float32)
+        >>> mask = flow.Tensor((in_arr > 0).astype(np.int8), dtype=flow.int)
+        >>> output = flow.masked_fill(input, mask, fill_value)
+
+        # tensor([[[-0.1317,  8.7654,  8.7654,  8.7654],
+        #  [-1.5195,  8.7654, -0.533 ,  8.7654],
+        #  [-1.3824, -2.6345,  8.7654, -0.6714]],
+
+        # [[ 8.7654,  8.7654,  8.7654, -1.7691],
+        #  [ 8.7654, -0.7051, -0.7648, -0.7323],
+        #  [-1.9009,  8.7654,  8.7654,  8.7654]]], dtype=oneflow.float32)
+
+    """
+    return MaskedFill(value)(tensor, mask)
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(raise_on_error=True)
diff --git a/oneflow/compatible_single_client_python/nn/modules/masked_select.py b/oneflow/compatible_single_client_python/nn/modules/masked_select.py
new file mode 100644
index 0000000000000000000000000000000000000000..98c856f38a19dab44947130732f9403a91a7c294
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/masked_select.py
@@ -0,0 +1,114 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+from oneflow.compatible_single_client_python.nn.module import Module
+from oneflow.compatible_single_client_python.framework.tensor import register_tensor_op
+from oneflow.compatible_single_client_python.ops.array_ops import (
+    argwhere,
+    gather,
+    gather_nd,
+)
+
+
+class MaskedSelect(Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, x, mask):
+        assert len(x.shape) == len(
+            mask.shape
+        ), f"The dim of masked_select module's inputs can not match, please check!"
+        broadcast_like_shape = []
+        broadcast_x_axes = []
+        broadcast_mask_axes = []
+        for i in range(len(x.shape)):
+            max_dim = max(x.shape[i], mask.shape[i])
+            broadcast_like_shape.append(max_dim)
+            if max_dim != x.shape[i]:
+                broadcast_x_axes.append(i)
+            if max_dim != mask.shape[i]:
+                broadcast_mask_axes.append(i)
+        broadcast_like_tensor = flow.experimental.zeros(
+            tuple(broadcast_like_shape), dtype=flow.float32, device=x.device,
+        )
+        broadcast_like_tensor.requires_grad = x.requires_grad or mask.requires_grad
+        if len(broadcast_x_axes) != 0:
+            x = flow.experimental.broadcast_like(
+                x, broadcast_like_tensor, broadcast_axes=tuple(broadcast_x_axes)
+            )
+
+        if len(broadcast_mask_axes) != 0:
+            mask = flow.experimental.broadcast_like(
+                mask, broadcast_like_tensor, broadcast_axes=tuple(broadcast_mask_axes)
+            )
+        mask = mask.to(dtype=x.dtype)
+
+        res = flow.F.mul(x, mask)
+        indices = flow.experimental.argwhere(res)
+        gather_res = flow.F.gather_nd(res, indices)
+        return gather_res.flatten()
+
+
+@oneflow_export("masked_select")
+@experimental_api
+def masked_select_op(x, mask):
+    r"""
+
+    Returns a new 1-D tensor which indexes the input tensor according to the boolean mask mask which is a BoolTensor(In oneFlow BoolTensor is replaced by Int8Tensor).
+
+    The shapes of the mask tensor and the input tensor donâ€™t need to match, but they must be broadcastable.
+
+    Args:
+        input (Tensor): the input tensor.
+        mask (Tensor): the tensor containing the binary mask to index with
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> import numpy as np
+        >>> flow.enable_eager_execution()
+
+        >>> x = flow.Tensor(np.array([[-0.4620, 0.3139], [0.3898, -0.7197], [0.0478, -0.1657]]), dtype=flow.float32)
+        >>> mask = x.gt(0.05)
+        >>> out = flow.masked_select(x, mask)
+        >>> out
+        tensor([0.3139, 0.3898], dtype=oneflow.float32)
+    """
+    return MaskedSelect()(x, mask)
+
+
+@register_tensor_op("masked_select")
+@experimental_api
+def tensor_masked_select_op(x, mask):
+    r"""
+
+    See :func:`oneflow.compatible.single_client.experimental.masked_select`
+
+    """
+    return MaskedSelect()(x, mask)
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(raise_on_error=True)
diff --git a/oneflow/compatible_single_client_python/nn/modules/math_ops.py b/oneflow/compatible_single_client_python/nn/modules/math_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..a65a18ca395ce578a152a3028f7c7e3fed3d6591
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/math_ops.py
@@ -0,0 +1,1704 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import collections
+from typing import Optional, Sequence, Union
+
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+from oneflow.compatible_single_client_python.nn.module import Module
+from oneflow.compatible_single_client_python.framework.tensor import register_tensor_op
+from oneflow.compatible_single_client_python.nn.modules.utils import _check_axis
+from oneflow.compatible_single_client_python.ops.transpose_util import (
+    get_perm_when_transpose_axis_to_last_dim,
+    get_inversed_perm,
+)
+
+
+class ScalarMul(Module):
+    def __init__(self, alpha) -> None:
+        super().__init__()
+        if not isinstance(alpha, (int, float)):
+            raise ValueError("alpha type can only be int or float")
+        self.alpha = alpha
+
+    def forward(self, x):
+        return flow.F.mul_scalar(x, self.alpha)
+
+
+class ScalarMulByTensor(Module):
+    def __init__(self) -> None:
+        super().__init__()
+
+    def forward(self, x, y):
+        return flow.F.mul_scalar_by_tensor(x, y)
+
+
+class ElementwiseMul(Module):
+    def __init__(self) -> None:
+        super().__init__()
+
+    def forward(self, x, y):
+        return flow.F.mul(x, y)
+
+
+class BroadcastMul(Module):
+    def __init__(self) -> None:
+        super().__init__()
+
+    def forward(self, x, y):
+        return flow.F.broadcast_mul(x, y)
+
+
+@oneflow_export("mul")
+@register_tensor_op("mul")
+@experimental_api
+def _mul(x, y):
+    r"""Computes the multiplication of x by y for each element, scalar and broadcast promotation are supported.
+    
+    The formula is:
+
+    .. math::
+        out = x \times y
+    
+    For example:
+
+    .. code-block:: python
+
+        >>> import numpy as np
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> flow.enable_eager_execution()
+
+        # element-wise multiply
+        >>> x = flow.Tensor(np.random.randn(2,3))
+        >>> y = flow.Tensor(np.random.randn(2,3))
+        >>> out = flow.mul(x,y).numpy()
+        >>> out.shape
+        (2, 3)
+
+        # scalar mutiply
+        >>> x = 5
+        >>> y = flow.Tensor(np.random.randn(2,3))
+        >>> out = flow.mul(x,y).numpy()
+        >>> out.shape
+        (2, 3)
+
+        # broadcast mutiply
+        >>> x = flow.Tensor(np.random.randn(1,1))
+        >>> y = flow.Tensor(np.random.randn(2,3))
+        >>> out = flow.mul(x,y).numpy()
+        >>> out.shape 
+        (2, 3)
+
+    """
+
+    if isinstance(x, (int, float)):
+        return ScalarMul(x)(y)
+    elif isinstance(y, (int, float)):
+        return ScalarMul(y)(x)
+    elif x.shape == y.shape:
+        return ElementwiseMul()(x, y)
+    elif x.shape == (1,):
+        return ScalarMulByTensor()(y, x)
+    elif y.shape == (1,):
+        return ScalarMulByTensor()(x, y)
+    else:
+        return BroadcastMul()(x, y)
+
+
+class Variance(Module):
+    def __init__(self, dim: int = None, keepdim: bool = False) -> None:
+        super().__init__()
+        self.dim = dim
+        self.keepdim = keepdim
+
+    def forward(self, input):
+        axis = _check_axis(self.dim, input.shape)
+        if isinstance(axis, list) and len(axis) == 0:
+            return flow.experimental.zeros(size=input.shape)
+        else:
+            return flow.experimental.sub(
+                flow.experimental.mean(
+                    flow.experimental.square(input), axis, self.keepdim
+                ),
+                flow.experimental.square(
+                    flow.experimental.mean(input, axis, self.keepdim)
+                ),
+            )
+
+
+@oneflow_export("var")
+@register_tensor_op("var")
+@experimental_api
+def variance_op(input, dim=None, keepdim=False):
+    r"""Returns the variance of each row of the `input` tensor in the given dimension `dim`.
+
+    If `keepdim` is `True`, the output tensor is of the same size as `input` except in the dimension(s) `dim` 
+    where it is of size 1. Otherwise, dim is squeezed (see `flow.squeeze()`), resulting in the output 
+    tensor having 1 (or `len(dim)`) fewer dimension(s).
+
+    Args:
+        input (Tensor): the input tensor.
+        dim (int or tuple of python:ints): the dimension or dimensions to reduce. Defaults to None.
+        keepdim (bool, optional): whether the output tensor has dim retained or not. Defaults to False.
+
+    Returns:
+        Tensor: The result of variance on the specified axis of input Tensor
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import numpy as np
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> flow.enable_eager_execution()
+
+        >>> np_arr = np.random.randn(2,3,4,5)
+        >>> input = flow.Tensor(np_arr)
+        >>> output = flow.var(input, 1, True)
+
+    """
+    return Variance(dim, keepdim)(input)
+
+
+class ScalarSubByTensor(Module):
+    def __init__(self) -> None:
+        super().__init__()
+
+    def forward(self, x, y):
+        return flow.F.sub_scalar_by_tensor(x, y)
+
+
+class BroadcastSub(Module):
+    def __init__(self) -> None:
+        super().__init__()
+
+    def forward(self, x, y):
+        return flow.F.broadcast_sub(x, y)
+
+
+class ScalarAdd(Module):
+    def __init__(self, alpha) -> None:
+        super().__init__()
+        if not isinstance(alpha, int) and not isinstance(alpha, float):
+            raise ValueError("scalar type can only be int or float")
+        self.alpha = alpha
+
+    def forward(self, x):
+        return flow.F.add_scalar(x, self.alpha)
+
+
+@oneflow_export("sub")
+@register_tensor_op("sub")
+@experimental_api
+def _sub(x, y):
+    r"""Computes the subtraction of x by y for each element, scalar and broadcast promotation are supported.
+    The formula is:
+
+    .. math::
+        out = x - y
+    
+    For example:
+
+    .. code-block:: python
+
+        >>> import numpy as np
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> flow.enable_eager_execution()
+
+        # element-wise subtract
+        >>> x = flow.Tensor(np.random.randn(2,3))
+        >>> y = flow.Tensor(np.random.randn(2,3))
+        >>> out = flow.sub(x,y).numpy()
+        >>> out.shape
+        (2, 3)
+
+        # scalar subtract
+        >>> x = 5
+        >>> y = flow.Tensor(np.random.randn(2,3))
+        >>> out = flow.sub(x,y).numpy()
+        >>> out.shape
+        (2, 3)
+
+        # broadcast subtract
+        >>> x = flow.Tensor(np.random.randn(1,1))
+        >>> y = flow.Tensor(np.random.randn(2,3))
+        >>> out = flow.sub(x,y).numpy()
+        >>> out.shape
+        (2, 3)
+
+    """
+
+    if isinstance(x, (int, float)):
+        return ScalarAdd(x)(ScalarMul(-1)(y))
+    elif isinstance(y, (int, float)):
+        return ScalarAdd(-1 * y)(x)
+    elif x.shape == y.shape:
+        # TODO: add element-wise op
+        return BroadcastSub()(x, y)
+    elif y.shape == (1,):
+        return ScalarSubByTensor()(x, y)
+    else:
+        return BroadcastSub()(x, y)
+
+
+class BroadcastDiv(Module):
+    def __init__(self) -> None:
+        super().__init__()
+
+    def forward(self, x, y):
+        return flow.F.broadcast_div(x, y)
+
+
+class ScalarDivByTensor(Module):
+    def __init__(self) -> None:
+        super().__init__()
+
+    def forward(self, x, scalar):
+        return flow.F.div_scalar_by_tensor(x, scalar)
+
+
+@oneflow_export("div")
+@register_tensor_op("div")
+@experimental_api
+def _div(x, y):
+    r"""Computes the division of x by y for each element, scalar and broadcast promotation are supported.
+    The formula is:
+
+    .. math::
+        out = \frac{X}{Y}
+    
+    Args:
+        x (Union[int, float, flow.Tensor]): X.
+        y (Union[int, float, flow.Tensor]): Y.
+    
+    For example:
+
+    .. code-block:: python
+
+        >>> import numpy as np
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> flow.enable_eager_execution()
+
+        # element-wise divide
+        >>> x = flow.Tensor(np.random.randn(2,3))
+        >>> y = flow.Tensor(np.random.randn(2,3))
+        >>> out = flow.div(x,y).numpy()
+        >>> out.shape
+        (2, 3)
+
+        # scalar divide
+        >>> x = 5
+        >>> y = flow.Tensor(np.random.randn(2,3))
+        >>> out = flow.div(x,y).numpy()
+        >>> out.shape
+        (2, 3)
+
+        # broadcast divide
+        >>> x = flow.Tensor(np.random.randn(1,1))
+        >>> y = flow.Tensor(np.random.randn(2,3))
+        >>> out = flow.div(x,y).numpy()
+        >>> out.shape 
+        (2, 3)
+
+    """
+
+    if isinstance(x, (int, float)):
+        return ScalarMul(x)(flow.experimental.reciprocal(y))
+    elif isinstance(y, (int, float)):
+        if y == 0 or y == 0.0:
+            y = 0.0
+        else:
+            y = 1.0 / (float(y))
+        return ScalarMul(y)(x)
+    elif x.shape == y.shape:
+        return BroadcastDiv()(x, y)
+    elif y.shape == (1,):
+        return ScalarDivByTensor()(x, y)
+    else:
+        return BroadcastDiv()(x, y)
+
+
+class Reciprocal(Module):
+    def __init__(self) -> None:
+        super().__init__()
+
+    def forward(self, x):
+        return flow.F.reciprocal_no_nan(x)
+
+
+@oneflow_export("reciprocal")
+@register_tensor_op("reciprocal")
+@experimental_api
+def _reciprocal(x):
+    r"""Computes the safe reciprocal of x. If x is zero, the reciprocal will
+    be also set to zero.
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import numpy as np
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> flow.enable_eager_execution()
+
+        >>> x = flow.Tensor(np.array([[1, 2, 3], [4, 5, 6]]))
+        >>> out = flow.reciprocal(x)
+        >>> out.numpy()
+        array([[1.        , 0.5       , 0.33333334],
+               [0.25      , 0.2       , 0.16666667]], dtype=float32)
+    """
+
+    return Reciprocal()(x)
+
+
+class ScalarAddByTensor(Module):
+    def __init__(self) -> None:
+        super().__init__()
+
+    def forward(self, x, y):
+        return flow.F.add_scalar_by_tensor(x, y)
+
+
+class ElementwiseAdd(Module):
+    def __init__(self) -> None:
+        super().__init__()
+
+    def forward(self, x, y):
+        return flow.F.add(x, y)
+
+
+class BroadcastAdd(Module):
+    def __init__(self) -> None:
+        super().__init__()
+
+    def forward(self, x, y):
+        return flow.F.broadcast_add(x, y)
+
+
+@oneflow_export("add")
+@register_tensor_op("add")
+@experimental_api
+def _add(x, y):
+    r"""Computes the addition of x by y for each element, scalar and broadcast promotation are supported.
+    The formula is:
+
+    .. math::
+        out = x + y
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import numpy as np
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> flow.enable_eager_execution()
+
+        # element-wise add
+        >>> x = flow.Tensor(np.random.randn(2,3))
+        >>> y = flow.Tensor(np.random.randn(2,3))
+        >>> out = flow.add(x, y).numpy()
+        >>> out.shape
+        (2, 3)
+
+        # scalar add
+        >>> x = 5
+        >>> y = flow.Tensor(np.random.randn(2,3))
+        >>> out = flow.add(x, y).numpy()
+        >>> out.shape
+        (2, 3)
+
+        # broadcast add
+        >>> x = flow.Tensor(np.random.randn(1,1))
+        >>> y = flow.Tensor(np.random.randn(2,3))
+        >>> out = flow.add(x, y).numpy()
+        >>> out.shape
+        (2, 3)
+
+    """
+
+    if isinstance(x, (int, float)):
+        return ScalarAdd(x)(y)
+    elif isinstance(y, (int, float)):
+        return ScalarAdd(y)(x)
+    elif x.shape == y.shape:
+        return ElementwiseAdd()(x, y)
+    elif x.shape == (1,):
+        return ScalarAddByTensor()(y, x)
+    elif y.shape == (1,):
+        return ScalarAddByTensor()(x, y)
+    else:
+        return BroadcastAdd()(x, y)
+
+
+class Asin(Module):
+    def __init__(self) -> None:
+        super().__init__()
+
+    def forward(self, x):
+        return flow.F.asin(x)
+
+
+@oneflow_export("asin")
+@experimental_api
+def asin_op(input):
+    r"""
+    Returns a new tensor with the arcsine of the elements of :attr:`input`.
+
+    .. math::
+        \text{out}_{i} = \sin^{-1}(\text{input}_{i})
+
+    Args:
+        input (Tensor): the input tensor.
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> import numpy as np
+        >>> flow.enable_eager_execution()
+        >>> input = flow.Tensor(np.array([-0.5,  0.8, 1.0,  -0.8]), dtype=flow.float32)
+        >>> output = flow.asin(input)
+        >>> output.shape
+        flow.Size([4])
+        >>> output
+        tensor([-0.5236,  0.9273,  1.5708, -0.9273], dtype=oneflow.float32)
+        >>> input1 = flow.Tensor(np.array([[0.8, 1.0], [-0.6, -1.0]]), dtype=flow.float32)
+        >>> output1 = input1.asin()
+        >>> output1.shape
+        flow.Size([2, 2])
+        >>> output1
+        tensor([[ 0.9273,  1.5708],
+                [-0.6435, -1.5708]], dtype=oneflow.float32)
+    """
+    return Asin()(input)
+
+
+@register_tensor_op("asin")
+@experimental_api
+def asin_op_tensor(input):
+    r"""
+
+    See :func:`oneflow.compatible.single_client.experimental.asin`
+    """
+    return Asin()(input)
+
+
+@oneflow_export("arcsin")
+@experimental_api
+def arcsin_op(input):
+    r"""
+  
+    Alias for :func:`oneflow.compatible.single_client.experimental.asin`
+    """
+    return Asin()(input)
+
+
+@register_tensor_op("arcsin")
+@experimental_api
+def arcsin_op_tensor(input):
+    r"""
+
+    See :func:`oneflow.compatible.single_client.experimental.asin`
+    """
+    return Asin()(input)
+
+
+class Asinh(Module):
+    def __init__(self) -> None:
+        super().__init__()
+
+    def forward(self, x):
+        return flow.F.asinh(x)
+
+
+@oneflow_export("asinh")
+@experimental_api
+def asinh_op(input):
+    r"""
+    Returns a new tensor with the inverse hyperbolic sine of the elements of :attr:`input`.
+
+    .. math::
+        \text{out}_{i} = \sinh^{-1}(\text{input}_{i})
+
+    Args:
+        input (Tensor): the input tensor.
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> import numpy as np
+        >>> flow.enable_eager_execution() 
+        >>> input = flow.Tensor(np.array([2, 3, 4]), dtype=flow.float32)
+        >>> output = flow.asinh(input)
+        >>> output.shape
+        flow.Size([3])
+        >>> output
+        tensor([1.4436, 1.8184, 2.0947], dtype=oneflow.float32)
+
+        >>> input1 = flow.Tensor(np.array([[-1, 0, -0.4], [5, 7, 0.8]]), dtype=flow.float32)
+        >>> output1 = input1.asinh()
+        >>> output1.shape
+        flow.Size([2, 3])
+        >>> output1
+        tensor([[-0.8814,  0.    , -0.39  ],
+                [ 2.3124,  2.6441,  0.7327]], dtype=oneflow.float32)
+
+    """
+    return Asinh()(input)
+
+
+@oneflow_export("arcsinh")
+@experimental_api
+def arcsinh_op(input):
+    r"""
+  
+    Alias for :func:`oneflow.compatible.single_client.experimental.asinh`
+    """
+    return Asinh()(input)
+
+
+@register_tensor_op("asinh")
+@experimental_api
+def asinh_op_tensor(input):
+    r"""
+
+    See :func:`oneflow.compatible.single_client.experimental.asinh`
+    """
+    return Asinh()(input)
+
+
+@register_tensor_op("arcsinh")
+@experimental_api
+def arcsinh_op_tensor(input):
+    r"""
+
+    See :func:`oneflow.compatible.single_client.experimental.asinh`
+    """
+    return Asinh()(input)
+
+
+class Sin(Module):
+    def __init__(self) -> None:
+        super().__init__()
+
+    def forward(self, x):
+        return flow.F.sin(x)
+
+
+@oneflow_export("sin")
+@experimental_api
+def sin_op(tensor):
+    r"""
+    Returns a new tensor with the sine of the elements of :attr:`input`.
+
+    .. math::
+
+        \text{out}_{i} = \sin(\text{input}_{i})
+
+    Args:
+        input (Tensor): the input tensor.
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> import numpy as np
+        >>> flow.enable_eager_execution()
+        >>> x1 = flow.Tensor(np.array([-0.5461,  0.1347, -2.7266, -0.2746]).astype(np.float32))
+        >>> out1 = flow.sin(x1)
+        >>> out1
+        tensor([-0.5194,  0.1343, -0.4032, -0.2712], dtype=oneflow.float32)
+        >>> x2 = flow.Tensor(np.array([-1.4, 2.6, 3.7]).astype(np.float32),device=flow.device('cuda'))
+        >>> out2 = flow.sin(x2)
+        >>> out2
+        tensor([-0.9854,  0.5155, -0.5298], device='cuda:0', dtype=oneflow.float32)
+
+    """
+
+    return Sin()(tensor)
+
+
+@register_tensor_op("sin")
+@experimental_api
+def sin_op_tensor(tensor):
+    r"""
+
+    sin() -> Tensor
+
+    See :func:`oneflow.compatible.single_client.experimental.sin`
+    
+    """
+
+    return Sin()(tensor)
+
+
+class Cos(Module):
+    def __init__(self) -> None:
+        super().__init__()
+
+    def forward(self, x):
+        return flow.F.cos(x)
+
+
+@oneflow_export("cos")
+@register_tensor_op("cos")
+@experimental_api
+def cos_op(tensor):
+    r"""
+    Returns a new tensor with the cosine  of the elements of :attr:`input`.
+    
+    .. math::
+        \text{out}_{i} = \cos(\text{input}_{i})
+
+    Args:
+        input (Tensor): the input tensor.
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> import numpy as np
+        >>> flow.enable_eager_execution()
+        >>> arr = np.array([1.4309,  1.2706, -0.8562,  0.9796])
+        >>> input = flow.Tensor(arr, dtype=flow.float32)
+        >>> output = flow.cos(input).numpy()
+
+    """
+    return Cos()(tensor)
+
+
+class Atan(Module):
+    def __init__(self) -> None:
+        super().__init__()
+
+    def forward(self, x):
+        return flow.F.atan(x)
+
+
+@oneflow_export("atan")
+@experimental_api
+def atan_op(tensor):
+    r"""
+    Returns a new tensor with the arctangent of the elements of :attr:`input`.
+
+    .. math::
+        \text{out}_{i} = \tan^{-1}(\text{input}_{i})
+
+    Args:
+        input (Tensor): the input tensor.
+
+    For example:
+
+    .. code-block:: python
+    
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> import numpy as np
+        >>> flow.enable_eager_execution()
+        >>> input = flow.Tensor(np.array([0.5, 0.6, 0.7]), dtype=flow.float32)
+        >>> output = flow.atan(input)
+        >>> output.shape
+        flow.Size([3])
+        
+    """
+    return Atan()(tensor)
+
+
+@register_tensor_op("atan")
+@experimental_api
+def atan_op_tensor(tensor):
+    r"""
+
+    See :func:`oneflow.compatible.single_client.experimental.atan`
+    
+    """
+    return Atan()(tensor)
+
+
+@oneflow_export("arctan")
+@experimental_api
+def arctan_op(tensor):
+    r"""
+    Alias for :func:`oneflow.compatible.single_client.experimental.atan`
+    
+    """
+    return Atan()(tensor)
+
+
+@register_tensor_op("arctan")
+@experimental_api
+def arctan_op_tensor(tensor):
+    r"""
+
+    See :func:`oneflow.compatible.single_client.experimental.arctan`
+    
+    """
+    return Atan()(tensor)
+
+
+class Log(Module):
+    def __init__(self) -> None:
+        super().__init__()
+
+    def forward(self, x):
+        return flow.F.log(x)
+
+
+@oneflow_export("log")
+@register_tensor_op("log")
+@experimental_api
+def log_op(tensor):
+    r"""
+    Returns a new tensor with the natural logarithm of the elements of :attr:`input`.
+    
+    .. math::
+        y_{i} = \log_{e} (x_{i})
+
+    Args:
+        input (Tensor): the input tensor.
+    
+    For example:
+
+    .. code-block:: python
+
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> import numpy as np
+        >>> flow.enable_eager_execution()
+        >>> arr = np.random.randn(2, 3, 4, 5)
+        >>> input = flow.Tensor(arr, dtype=flow.float32)
+        >>> output = flow.log(input)
+
+
+    """
+    return Log()(tensor)
+
+
+class Subtract(Module):
+    def __init__(self) -> None:
+        super().__init__()
+
+    def forward(self, x, y):
+        if isinstance(x, (int, float)):
+            return ScalarAdd(x)(-1 * y)
+        elif isinstance(y, (int, float)):
+            return ScalarAdd(-1 * y)(x)
+        elif x.shape == y.shape:
+            # TODO: add element-wise op
+            return BroadcastSub()(x, y)
+        elif x.shape == (1,):
+            return ScalarSubByTensor()(y, x)
+        elif y.shape == (1,):
+            return ScalarSubByTensor()(x, y)
+        else:
+            return BroadcastSub()(x, y)
+
+
+class Sqrt(Module):
+    def __init__(self) -> None:
+        super().__init__()
+
+    def forward(self, input):
+        return flow.F.sqrt(input)
+
+
+@oneflow_export("rsqrt")
+@register_tensor_op("rsqrt")
+@experimental_api
+def rsqrt_op(input):
+    r"""Returns a new tensor with the reciprocal of the square-root of each of
+        the elements of :attr:`input`.
+
+        .. math::
+            \text{out}_{i} = \frac{1}{\sqrt{\text{input}_{i}}}
+
+        Args:
+            input (Tensor) â€“ the input tensor.
+
+         For example:
+
+        .. code-block:: python
+
+            >>> import oneflow.compatible.single_client.experimental as flow
+            >>> import numpy as np
+            >>> flow.enable_eager_execution()
+
+            >>> a = flow.Tensor(np.array([1.0, 2.0, 3.0]))
+            >>> out = flow.rsqrt(a).numpy()
+            >>> out
+            array([1.        , 0.70710677, 0.57735026], dtype=float32)
+    """
+    return Rsqrt()(input)
+
+
+class Rsqrt(Module):
+    def __init__(self) -> None:
+        super().__init__()
+
+    def forward(self, input):
+        return flow.F.rsqrt(input)
+
+
+@oneflow_export("sqrt")
+@register_tensor_op("sqrt")
+@experimental_api
+def sqrt_op(input):
+    r"""Returns a new tensor with the square-root of the elements of :attr:`input`.
+
+        .. math::
+            \text{out}_{i} = \sqrt{\text{input}_{i}}
+
+        Args:
+            input (Tensor): the input tensor.
+
+         For example:
+
+        .. code-block:: python
+
+            >>> import oneflow.compatible.single_client.experimental as flow
+            >>> import numpy as np
+            >>> flow.enable_eager_execution()
+
+            >>> arr = np.array([1.0, 2.0, 3.0])
+            >>> input = flow.Tensor(arr)
+            >>> output = flow.sqrt(input).numpy()
+            >>> output
+            array([1.       , 1.4142135, 1.7320508], dtype=float32)
+        """
+    return Sqrt()(input)
+
+
+class Square(Module):
+    def __init__(self) -> None:
+        super().__init__()
+
+    def forward(self, input):
+        return flow.F.square(input)
+
+
+@oneflow_export("square")
+@register_tensor_op("square")
+@experimental_api
+def square_op(input):
+    r"""Returns a new tensor with the square of the elements of :attr:`input`.
+
+        .. math::
+            \text{out}_{i} = \sqrt{\text{input}_{i}}
+
+        Args:
+            input (Tensor): the input tensor.
+
+         For example:
+
+        .. code-block:: python
+
+            >>> import oneflow.compatible.single_client.experimental as flow
+            >>> import numpy as np
+            >>> flow.enable_eager_execution()
+
+            >>> arr = np.array([1.0, 2.0, 3.0])
+            >>> input = flow.Tensor(arr)
+            >>> output = flow.square(input).numpy()
+            >>> output
+            array([1., 4., 9.], dtype=float32)
+        """
+    return Square()(input)
+
+
+class Std(Module):
+    def __init__(self, dim=None, unbiased=True, keepdim=False) -> None:
+        super().__init__()
+        assert unbiased == True, "Only support 'unbiased=True' for now!"
+        self.unbiased = unbiased
+        self.keepdim = keepdim
+        self.dim = dim
+        self.reduce_count = 1
+        self.square_op = Square()
+        self.sqrt_op = Sqrt()
+        self.subtract_op = Subtract()
+
+    def forward(self, x):
+        self.axis = _check_axis(self.dim, x.shape)
+        if isinstance(self.axis, list) and len(self.axis) == 0:
+            return flow.experimental.zeros(size=x.shape)
+        else:
+            if len(self.axis) == 0:
+                self.reduce_count = x.nelement()
+            else:
+                for i in self.axis:
+                    self.reduce_count *= x.shape[i]
+
+            sum = (
+                flow.experimental.sum(self.square_op(x), self.axis, self.keepdim)
+                / self.reduce_count
+            )
+            square = self.square_op(
+                flow.experimental.sum(x, self.axis, self.keepdim) / self.reduce_count
+            )
+            subtract = self.subtract_op(sum, square)
+            res = self.sqrt_op(subtract)
+            return res
+
+
+@oneflow_export("std")
+@register_tensor_op("std")
+@experimental_api
+def std_op(tensor, dim, unbiased=True, keepdim=False):
+    r"""
+    Returns the standard-deviation of each row of the :attr:`input` tensor in the
+    dimension :attr:`dim`. If :attr:`dim` is a list of dimensions,
+    reduce over all of them.
+
+    If keepdim is True, the output tensor is of the same size as input except in 
+    the dimension(s) dim where it is of size 1. Otherwise, dim is squeezed, 
+    resulting in the output tensor having 1 (or len(dim)) fewer dimension(s).
+
+    If :attr:`unbiased` is ``False``, then the standard-deviation will be calculated
+    via the biased estimator. Otherwise, Bessel's correction will be used.
+
+    Args:
+        input (Tensor): the input tensor.
+        dim (int or tuple of python:ints): the dimension or dimensions to reduce.
+        unbiased (bool): whether to use the unbiased estimation or not
+        keepdim (bool): whether the output tensor has `dim` retained or not.
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> import numpy as np
+        >>> flow.enable_eager_execution()
+
+        >>> arr = np.array([1.0, 2.0, 3.0])
+        >>> input = flow.Tensor(arr)
+        >>> output = flow.std(input, dim=0).numpy()
+        >>> output
+        array([0.8164968], dtype=float32)
+
+    """
+    return Std(dim, unbiased, keepdim)(tensor)
+
+
+class Pow(Module):
+    def __init__(self) -> None:
+        super().__init__()
+
+    def forward(self, x, y):
+        if isinstance(y, (int, float)):
+            return flow.F.pow_scalar(x, alpha=y)
+        else:
+            return flow.F.pow(x, y)
+
+
+@oneflow_export("pow")
+@register_tensor_op("pow")
+@experimental_api
+def pow_op(tensor, exponent):
+    r"""Takes the power of each element in input with exponent and returns a tensor with the result. Exponent can be either a single float number, a single int number, or a tensor with the same shape as input.
+    When exponent is a scalar value, the operation applied is:
+
+    .. math::
+        \text{out}_i = x_i ^ \text{exponent}
+â€‹
+    When exponent is a tensor, the operation applied is:
+
+    .. math::
+        \text{out}_i = x_i ^ {\text{exponent}_i}
+
+    Args:
+        - input (Tensor): the input tensor.
+        - exponent (int, float, Tensor): the exponent.
+
+    Returns:
+        Tensor: The result of variance on the specified axis of input Tensor
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> import numpy as np
+        >>> flow.enable_eager_execution()
+
+        >>> x = flow.Tensor(np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0]))
+        >>> out = flow.pow(x, 2).numpy()
+        >>> out
+        array([ 1.,  4.,  9., 16., 25., 36.], dtype=float32)
+
+        >>> x = flow.Tensor(np.array([1.0, 2.0, 3.0, 4.0]))
+        >>> y = flow.Tensor(np.array([1.0, 2.0, 3.0, 4.0]))
+        >>> out = flow.pow(x, y).numpy()
+        >>> out
+        array([  1.,   4.,  27., 256.], dtype=float32)
+        
+    """
+    return Pow()(tensor, exponent)
+
+
+class Addmm(Module):
+    def __init__(self) -> None:
+        super().__init__()
+        self._matmul_op = (
+            flow.builtin_op("matmul")
+            .Input("a")
+            .Input("b")
+            .Output("out")
+            .Attr("transpose_a", False)
+            .Attr("transpose_b", False)
+            .Attr("alpha", 1.0)
+            .Build()
+        )
+
+    def forward(self, x, mat1, mat2, alpha=1, beta=1):
+        if len(x.shape) > 2 or len(mat1.shape) > 2 or len(mat2.shape) > 2:
+            raise ValueError("input matrixes shape can not be greater than 2")
+        else:
+            return _mul(x, beta) + _mul(self._matmul_op(mat1, mat2)[0], alpha)
+
+
+@oneflow_export("addmm")
+@experimental_api
+def addmm_op(input, mat1, mat2, alpha=1, beta=1):
+    r"""addmm(beta=1, input, alpha=1, mat1, mat2, out=None) -> Tensor
+
+    Performs a matrix multiplication of the matrices :attr:`mat1` and :attr:`mat2`.
+    The matrix :attr:`input` is added to the final result.
+
+    If :attr:`mat1` is a :math:`(n \times m)` tensor, :attr:`mat2` is a
+    :math:`(m \times p)` tensor, then :attr:`input` must be
+    broadcastable with a :math:`(n \times p)` tensor
+    and :attr:`out` will be a :math:`(n \times p)` tensor.
+
+    :attr:`alpha` and :attr:`beta` are scaling factors on matrix-vector product between
+    :attr:`mat1` and :attr:`mat2` and the added matrix :attr:`input` respectively.
+
+    .. math::
+        \text{out} = \beta\ \text{input} + \alpha\ (\text{mat1}_i \mathbin{@} \text{mat2}_i)
+
+    For inputs of type `FloatTensor` or `DoubleTensor`, arguments :attr:`beta` and
+    :attr:`alpha` must be real numbers, otherwise they should be integers.
+
+    Args:
+        beta (Number, optional): multiplier for :attr:`input` (:math:`\beta`)
+        input (Tensor): matrix to be added
+        alpha (Number, optional): multiplier for :math:`mat1 @ mat2` (:math:`\alpha`)
+        mat1 (Tensor): the first matrix to be multiplied
+        mat2 (Tensor): the second matrix to be multiplied
+        out (Tensor, optional): the output tensor.
+
+    For example:
+
+        >>> import numpy as np
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> flow.enable_eager_execution()
+        >>> input = flow.tensor(np.array([[1,2,4],[5,11,9.1]]))
+        >>> mat1 = flow.tensor(np.array([[7.3,1.9,7.3],[10.2,1,5.5]])) 
+        >>> mat2 = flow.tensor(np.array([[7.3,1.9,7.3],[10.2,1,5.5],[3.7,2.2,8.1]])) 
+        >>> output = flow.addmm(input, mat1, mat2)
+        >>> output
+        tensor([[100.68,  33.83, 126.87],
+                [110.01,  43.48, 133.61]], dtype=oneflow.float64)
+        >>> output.shape
+        flow.Size([2, 3])
+
+        >>> input2 = flow.tensor(np.array([1.7]))
+        >>> mat1 = flow.tensor(np.array([[1,2],[5,9.1],[7.7,1.4]]))
+        >>> mat2 = flow.tensor(np.array([[1,2,3.7],[5,9.1,6.8]]))
+        >>> output2 = flow.addmm(input2, mat1, mat2, alpha=1, beta=2)
+        >>> output2
+        tensor([[14.4 , 23.6 , 20.7 ],
+                [53.9 , 96.21, 83.78],
+                [18.1 , 31.54, 41.41]], dtype=oneflow.float64)
+        >>> output2.shape
+        flow.Size([3, 3])
+    """
+    return Addmm()(input, mat1, mat2, alpha, beta)
+
+
+@register_tensor_op("addmm")
+@experimental_api
+def addmm_op_tensor(input, mat1, mat2, alpha=1, beta=1):
+    r"""
+    See :func:`oneflow.compatible.single_client.experimental.addmm`
+    """
+    return Addmm()(input, mat1, mat2, alpha, beta)
+
+
+class Clamp(Module):
+    def __init__(self, min_value=None, max_value=None) -> None:
+        super().__init__()
+        if min_value is not None:
+            floating_min_value = float(min_value)
+            integral_min_value = int(min_value)
+        if max_value is not None:
+            floating_max_value = float(max_value)
+            integral_max_value = int(max_value)
+
+        if min_value is not None and max_value is not None:
+            self._op = (
+                flow.builtin_op("clip_by_scalar")
+                .Input("x")
+                .Output("y")
+                .Attr("floating_min", floating_min_value)
+                .Attr("integral_min", integral_min_value)
+                .Attr("floating_max", floating_max_value)
+                .Attr("integral_max", integral_max_value)
+                .Build()
+            )
+        elif min_value is not None:
+            self._op = (
+                flow.builtin_op("clip_by_scalar_min")
+                .Input("x")
+                .Output("y")
+                .Attr("floating_min", floating_min_value)
+                .Attr("integral_min", integral_min_value)
+                .Build()
+            )
+        elif max_value is not None:
+            self._op = (
+                flow.builtin_op("clip_by_scalar_max")
+                .Input("x")
+                .Output("y")
+                .Attr("floating_max", floating_max_value)
+                .Attr("integral_max", integral_max_value)
+                .Build()
+            )
+        else:
+            raise ValueError("min_value and max_value cannot be None at the same time")
+
+    def forward(self, x):
+        return self._op(x)[0]
+
+
+@oneflow_export("clamp")
+@experimental_api
+def clamp_op(tensor, min=None, max=None):
+    r"""
+    Clamp all elements in :attr:`input` into the range `[` :attr:`min`, :attr:`max` `]` and return
+    a resulting tensor:
+
+    .. math::
+        y_i = \begin{cases}
+            \text{min} & \text{if } x_i < \text{min} \\
+            x_i & \text{if } \text{min} \leq x_i \leq \text{max} \\
+            \text{max} & \text{if } x_i > \text{max}
+        \end{cases}
+
+    If :attr:`input` is of type `FloatTensor` or `DoubleTensor`, args :attr:`min`
+    and :attr:`max` must be real numbers, otherwise they should be integers.
+
+    Args:
+        input (Tensor): the input tensor.
+        min (Number): lower-bound of the range to be clamped to. Defaults to None.
+        max (Number): upper-bound of the range to be clamped to. Defaults to None.
+        out (Tensor, optional): the output tensor.
+
+    For example:
+
+
+    .. code-block:: python
+
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> import numpy as np
+        >>> flow.enable_eager_execution()
+        >>> arr = np.array([0.2, 0.6, -1.5, -0.3])
+        >>> input = flow.Tensor(arr)
+        >>> output = flow.clamp(input, min=-0.5, max=0.5)
+        >>> output
+        tensor([ 0.2,  0.5, -0.5, -0.3], dtype=oneflow.float32)
+
+        >>> arr = np.array([0.2, 0.6, -1.5, -0.3])
+        >>> input = flow.Tensor(arr)
+        >>> output = flow.clamp(input, min=None, max=0.5)
+        >>> output
+        tensor([ 0.2,  0.5, -1.5, -0.3], dtype=oneflow.float32)
+
+        >>> arr = np.array([0.2, 0.6, -1.5, -0.3])
+        >>> input = flow.Tensor(arr)
+        >>> output = flow.clamp(input, min=-0.5, max=None)
+        >>> output
+        tensor([ 0.2,  0.6, -0.5, -0.3], dtype=oneflow.float32)
+
+    """
+    return Clamp(min, max)(tensor)
+
+
+@register_tensor_op("clamp")
+@experimental_api
+def clamp_op_tensor(tensor, min=None, max=None):
+    r"""
+    See :func:`oneflow.compatible.single_client.experimental.clamp`
+    """
+    return Clamp(min, max)(tensor)
+
+
+@oneflow_export("clip")
+@experimental_api
+def clip_op(tensor, min=None, max=None):
+    r"""
+    Alias for :func:`oneflow.compatible.single_client.experimental.clamp`
+    """
+    return Clamp(min, max)(tensor)
+
+
+@register_tensor_op("clip")
+@experimental_api
+def clip_op_tensor(tensor, min=None, max=None):
+    r"""
+    See :func:`oneflow.compatible.single_client.experimental.clamp`
+    """
+    return Clamp(min, max)(tensor)
+
+
+class Cosh(Module):
+    def __init__(self) -> None:
+        super().__init__()
+
+    def forward(self, x):
+        return flow.F.cosh(x)
+
+
+@oneflow_export("cosh")
+@register_tensor_op("cosh")
+@experimental_api
+def cosh_op(tensor):
+    r"""
+    Returns a new tensor with the hyperbolic cosine of the elements of :attr:`input`.
+
+    .. math::
+        \text{out}_{i} = \cosh(\text{input}_{i})
+
+    Args:
+        input (Tensor): the input tensor.
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import numpy as np
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> flow.enable_eager_execution()
+
+        >>> arr = np.array([ 0.1632,  1.1835, -0.6979, -0.7325])
+        >>> input = flow.Tensor(arr, dtype=flow.float32)
+        >>> output = flow.cosh(input).numpy()
+        >>> output
+        array([1.0133467, 1.7859949, 1.2535787, 1.2804903], dtype=float32)
+
+    """
+    return Cosh()(tensor)
+
+
+class Erf(Module):
+    def __init__(self) -> None:
+        super().__init__()
+
+    def forward(self, input):
+        return flow.F.erf(input)
+
+
+@oneflow_export("erf")
+@register_tensor_op("erf")
+@experimental_api
+def erf_op(input):
+    r"""Computes the error function of each element. The error function is defined as follows:
+
+    .. math::
+            \operatorname{erf}(x)=\frac{2}{\sqrt{\pi}} \int_{0}^{x} e^{-t^{2}} d t
+
+    Args:
+        x (oneflow.compatible.single_client.Tensor): A Tensor
+
+    Returns:
+        oneflow.compatible.single_client.Tensor: The result Tensor   
+               
+    For example:
+
+    .. code-block:: python
+
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> import numpy as np
+        >>> flow.enable_eager_execution()
+
+        >>> x = flow.Tensor(np.array([0, -1., 10.]), dtype=flow.float32)
+        >>> out = flow.erf(x)
+        >>> out.shape
+        flow.Size([3])
+        >>> out.numpy()
+        array([ 0.       , -0.8427008,  1.       ], dtype=float32)
+
+        >>> x = flow.Tensor(np.array([[0, -1., 10.], [5, 7, 0.8]]), dtype=flow.float32)
+        >>> out = flow.erf(x)
+        >>> out.shape
+        flow.Size([2, 3])
+        >>> out.numpy()
+        array([[ 0.        , -0.8427008 ,  1.        ],
+               [ 1.        ,  1.        ,  0.74210095]], dtype=float32)
+
+        >>> x = flow.Tensor(np.array([[0, -1., 10.], [5, 7, 0.8], [2, 3, 4]]), dtype=flow.float32)
+        >>> out = x.erf()
+        >>> out.shape
+        flow.Size([3, 3])
+        >>> out.numpy()
+        array([[ 0.        , -0.8427008 ,  1.        ],
+               [ 1.        ,  1.        ,  0.74210095],
+               [ 0.9953223 ,  0.9999779 ,  1.        ]], dtype=float32)
+
+    """
+    return Erf()(input)
+
+
+@register_tensor_op("erf")
+@experimental_api
+def erf_op_tensor(input):
+    r"""
+    See :func:`oneflow.compatible.single_client.experimental.erf`
+    """
+    return Erf()(input)
+
+
+class Erfc(Module):
+    def __init__(self) -> None:
+        super().__init__()
+        self.erfc_op = flow.builtin_op("erfc").Input("x").Output("y").Build()
+
+    def forward(self, input):
+        return self.erfc_op(input)[0]
+
+
+@oneflow_export("erfc")
+@register_tensor_op("erfc")
+@experimental_api
+def erfc_op(input):
+    r"""Computes the complementary error function of each element of input. The complementary error 
+    function is defined as follows:
+
+    .. math::
+            \operatorname{erfc}(x)=1-\frac{2}{\sqrt{\pi}} \int_{0}^{x} e^{-t^{2}} d t
+
+    Args:
+        x (oneflow.compatible.single_client.Tensor): A Tensor
+
+    Returns:
+        oneflow.compatible.single_client.Tensor: The result Tensor
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> import numpy as np
+        >>> flow.enable_eager_execution()
+
+        >>> x = flow.Tensor(np.array([0, -1., 10.]), dtype=flow.float32)
+        >>> out = flow.erfc(x)
+        >>> out.shape
+        flow.Size([3])
+        >>> out.numpy()
+        array([1.0000000e+00, 1.8427007e+00, 2.8025969e-45], dtype=float32)
+
+        >>> x = flow.Tensor(np.array([[0, -1., 10.], [5, 7, 0.8]]), dtype=flow.float32)
+        >>> out = flow.erfc(x)
+        >>> out.shape
+        flow.Size([2, 3])
+        >>> out.numpy()
+        array([[1.0000000e+00, 1.8427007e+00, 2.8025969e-45],
+               [1.5374597e-12, 4.1838257e-23, 2.5789905e-01]], dtype=float32)
+
+        >>> x = flow.Tensor(np.array([[0, -1., 10.], [5, 7, 0.8], [2, 3, 4]]), dtype=flow.float32)
+        >>> out = x.erfc()
+        >>> out.shape
+        flow.Size([3, 3])
+        >>> out.numpy()
+        array([[1.0000000e+00, 1.8427007e+00, 2.8025969e-45],
+               [1.5374597e-12, 4.1838257e-23, 2.5789905e-01],
+               [4.6777348e-03, 2.2090499e-05, 1.5417259e-08]], dtype=float32)
+        
+    """
+    return Erfc()(input)
+
+
+@register_tensor_op("erfc")
+@experimental_api
+def erfc_op_tensor(input):
+    r"""
+    See :func:`oneflow.compatible.single_client.experimental.erfc`
+    """
+    return Erfc()(input)
+
+
+class Ceil(Module):
+    def __init__(self) -> None:
+        super().__init__()
+
+    def forward(self, x):
+        return flow.F.ceil(x)
+
+
+@oneflow_export("ceil")
+@experimental_api
+def ceil_op(x):
+    r"""Returns a new tensor with the ceil of the elements of :attr:`x`,
+    the smallest integer greater than or equal to each element.
+
+    The equation is: 
+
+    .. math::
+        \text{out}_{i} = \left\lceil \text{input}_{i} \right\rceil = \left\lfloor \text{input}_{i} \right\rfloor + 1
+
+    Args:
+        x (oneflow.compatible.single_client.Tensor): A Tensor.
+    
+    Returns:
+        oneflow.compatible.single_client.Tensor: The result Tensor
+
+    For example: 
+
+
+    .. code-block:: python 
+        
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> import numpy as np
+        >>> flow.enable_eager_execution() 
+        
+        >>> x = flow.Tensor(np.array([0.1, -2, 3.4]).astype(np.float32))
+        >>> y = flow.ceil(x)
+        >>> print(y.shape)
+        flow.Size([3])
+        >>> print(y.numpy())
+        [ 1. -2.  4.]
+
+
+        >>> x = flow.Tensor(np.array([[2.5, 4.6, 0.6],[7.8, 8.3, 9.2]]).astype(np.float32))
+        >>> y = x.ceil()
+        >>> print(y.shape)
+        flow.Size([2, 3])
+        >>> print(y.numpy())
+        [[ 3.  5.  1.]
+         [ 8.  9. 10.]]
+
+
+
+
+        >>> x = flow.Tensor(np.array([[[2.2, 4.4, 6.5],[7.1, 8.2, 9.3]],[[10.6,11.2,12.2],[13.5,14.8,15.9]]]).astype(np.float32))
+        >>> y = flow.ceil(x)
+        >>> print(y.shape)
+        flow.Size([2, 2, 3])
+        >>> print(y.numpy())
+        [[[ 3.  5.  7.]
+          [ 8.  9. 10.]]
+        <BLANKLINE>
+         [[11. 12. 13.]
+          [14. 15. 16.]]]
+
+    """
+
+    return Ceil()(x)
+
+
+@register_tensor_op("ceil")
+@experimental_api
+def ceil_op_tensor(x):
+    r"""
+    See :func:`oneflow.compatible.single_client.experimental.ceil`
+    """
+
+    return Ceil()(x)
+
+
+class Expm1(Module):
+    def __init__(self) -> None:
+        super().__init__()
+
+    def forward(self, x):
+        return flow.F.expm1(x)
+
+
+@oneflow_export("expm1")
+@experimental_api
+def expm1_op(x):
+    """Returns a new tensor with the exponential of the elements minus 1
+    of :attr:`x`.
+
+
+    The equation is: 
+
+    .. math::
+        y_{i} = e^{x_{i}} - 1
+
+    Args:
+        x (oneflow.compatible.single_client.Tensor): A Tensor.
+    
+    Returns:
+        oneflow.compatible.single_client.Tensor: The result Tensor
+
+    For example: 
+
+    .. code-block:: python 
+        
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> import numpy as np
+        >>> flow.enable_eager_execution() 
+        
+        >>> x = flow.Tensor(np.array([1, 2, 3]).astype(np.float32))
+        >>> y = flow.expm1(x)
+        >>> print(y.shape)
+        flow.Size([3])
+        >>> print(y.numpy())
+        [ 1.7182817  6.389056  19.085537 ]
+
+
+        >>> x = flow.Tensor(np.array([[2, 4, 6],[7, 8, 9]]).astype(np.float32))
+        >>> y = x.expm1()
+        >>> print(y.shape)
+        flow.Size([2, 3])
+        >>> print(y.numpy())
+        [[6.3890562e+00 5.3598152e+01 4.0242880e+02]
+         [1.0956332e+03 2.9799580e+03 8.1020840e+03]]
+
+
+
+        >>> x = flow.Tensor(np.array([[[2, 4, 6],[7, 8, 9]],[[10,11,12],[13,14,15]]]).astype(np.float32))
+        >>> y = flow.expm1(x)
+        >>> print(y.shape)
+        flow.Size([2, 2, 3])
+        >>> print(y.numpy())
+        [[[6.3890562e+00 5.3598152e+01 4.0242880e+02]
+          [1.0956332e+03 2.9799580e+03 8.1020840e+03]]
+        <BLANKLINE>
+         [[2.2025465e+04 5.9873141e+04 1.6275380e+05]
+          [4.4241238e+05 1.2026032e+06 3.2690165e+06]]]
+
+
+    """
+    return Expm1()(x)
+
+
+@register_tensor_op("expm1")
+@experimental_api
+def expm1_op_tensor(x):
+    r"""
+    See :func:`oneflow.compatible.single_client.experimental.expm1`
+    """
+
+    return Expm1()(x)
+
+
+class Topk(Module):
+    def __init__(
+        self, k, dim: int = None, largest: bool = True, sorted: bool = True
+    ) -> None:
+        super().__init__()
+        self._op_topk_last_dim = (
+            flow.builtin_op("top_k")
+            .Input("in")
+            .Output("out")
+            .Attr("k", k)
+            .Attr("sorted", sorted)
+            .Build()
+        )
+        self.dim = dim
+        self.largest = largest
+
+    def forward(self, input):
+        if self.dim == None:
+            self.dim = -1
+
+        num_axes = len(input.shape)
+        axis = self.dim if self.dim >= 0 else self.dim + num_axes
+        assert 0 <= axis < num_axes, "axis out of range"
+        if axis == num_axes - 1:
+            if self.largest:
+                indices = self._op_topk_last_dim(input)[0]
+            else:
+                neg_input = flow.experimental.mul(input, -1)
+                indices = self._op_topk_last_dim(neg_input)[0]
+            return (flow.experimental.gather(input, indices, dim=axis), indices)
+        else:
+            perm = get_perm_when_transpose_axis_to_last_dim(num_axes, axis)
+            x = flow.F.transpose(input, perm=perm)
+            if self.largest:
+                indices = self._op_topk_last_dim(x)[0]
+            else:
+                neg_input = flow.experimental.mul(x, -1)
+                indices = self._op_topk_last_dim(neg_input)[0]
+            indices = flow.F.transpose(indices, perm=get_inversed_perm(perm))
+            return (flow.experimental.gather(input, indices, dim=axis), indices)
+
+
+@oneflow_export("topk")
+@register_tensor_op("topk")
+@experimental_api
+def topk_op(input, k, dim: int = None, largest: bool = True, sorted: bool = True):
+    r"""Finds the values and indices of the k largest entries at specified axis.
+
+    Args:
+        input (oneflow.compatible.single_client.Tensor): Input Tensor
+        dim (int, optional): the dimension to sort along. Defaults to the last dim (-1)
+        largest (bool, optional): controls whether to return largest or smallest elements
+        sorted (bool, optional): controls whether to return the elements in sorted order
+
+    Returns:
+        Tuple(oneflow.compatible.single_client.Tensor, oneflow.compatible.single_client.Tensor(dtype=int32)): A tuple of (values, indices), where
+        the indices are the indices of the elements in the original input tensor.
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> import numpy as np
+        >>> flow.enable_eager_execution()
+        >>> x = np.array([[1, 3, 8, 7, 2], [1, 9, 4, 3, 2]], dtype=np.float32)
+        >>> (values, indices) = flow.topk(flow.Tensor(x), k=3, dim=1)
+        >>> values
+        tensor([[8., 7., 3.],
+                [9., 4., 3.]], dtype=oneflow.float32)
+        >>> indices
+        tensor([[2, 3, 1],
+                [1, 2, 3]], dtype=oneflow.int32)
+        >>> values.shape
+        flow.Size([2, 3])
+        >>> indices.shape
+        flow.Size([2, 3])
+        >>> (values, indices) = flow.topk(flow.Tensor(x), k=2, dim=1, largest=False)
+        >>> values
+        tensor([[1., 2.],
+                [1., 2.]], dtype=oneflow.float32)
+        >>> indices
+        tensor([[0, 4],
+                [0, 4]], dtype=oneflow.int32)
+        >>> values.shape
+        flow.Size([2, 2])
+        >>> indices.shape
+        flow.Size([2, 2])
+
+    """
+    return Topk(k=k, dim=dim, largest=largest, sorted=sorted)(input)
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(raise_on_error=True)
diff --git a/oneflow/compatible_single_client_python/nn/modules/matmul.py b/oneflow/compatible_single_client_python/nn/modules/matmul.py
new file mode 100644
index 0000000000000000000000000000000000000000..c9815f894ba5501341682623e710b1d899b0070d
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/matmul.py
@@ -0,0 +1,83 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.nn.module import Module
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+from oneflow.compatible_single_client_python.framework.tensor import register_tensor_op
+from oneflow.compatible_single_client_python.framework import id_util as id_util
+from typing import Optional, Sequence
+
+
+class MatMul(Module):
+    def __init__(self) -> None:
+        super().__init__()
+
+    def forward(self, a, b):
+        assert len(a.shape) >= 2, "Tensor a's dim should >=2"
+        assert len(b.shape) >= 2, "Tensor b's dim should >=2"
+
+        if len(a.shape) == len(b.shape):
+            if len(a.shape) == 2:
+                res = flow.F.matmul(a, b)
+            else:
+                res = flow.F.batch_matmul(a, b)
+        else:
+            # NOTE: support broadcast b to a only for now
+            assert (
+                len(b.shape) == 2
+            ), "Not support number of dimensions of a being less than number of dimensions of b!"
+            res = flow.F.broadcast_matmul(a, b)
+
+        return res
+
+
+@oneflow_export("matmul")
+@register_tensor_op("matmul")
+@experimental_api
+def matmul_op(a, b):
+    r"""This operator applies matrix multiplication to two Tensor.
+
+    Args:
+        a (oneflow.compatible.single_client.Tensor): A Tensor
+        b (oneflow.compatible.single_client.Tensor): A Tensor
+
+    Returns:
+        oneflow.compatible.single_client.Tensor: The result Tensor
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> import numpy as np
+        >>> flow.enable_eager_execution()
+        >>> input1 = flow.Tensor(np.random.randn(2, 6), dtype=flow.float32)
+        >>> input2 = flow.Tensor(np.random.randn(6, 5), dtype=flow.float32)
+        >>> of_out = flow.matmul(input1, input2)
+        >>> of_out.shape
+        flow.Size([2, 5])
+
+    """
+    return MatMul()(a, b)
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(raise_on_error=True)
diff --git a/oneflow/compatible_single_client_python/nn/modules/meshgrid.py b/oneflow/compatible_single_client_python/nn/modules/meshgrid.py
new file mode 100644
index 0000000000000000000000000000000000000000..66c98fc376d1670794aa81f52f949b1499f05659
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/meshgrid.py
@@ -0,0 +1,100 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.nn.module import Module
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+
+
+class MeshGrid(Module):
+    def __init__(self) -> None:
+        super().__init__()
+
+    def forward(self, inputs):
+        size = len(inputs)
+        assert size > 0, f"meshgrid expects a non-empty TensorList"
+        shape = list()
+        for i in range(size):
+            assert inputs[i].dim() <= 1, f(
+                "Expected scalar or 1D tensor in the tensor list but got: ", inputs[i]
+            )
+            if inputs[i].dim() == 0:
+                shape.append(1)
+            else:
+                shape.append(inputs[i].shape[0])
+        for i in range(size - 1):
+            assert (
+                inputs[i].dtype == inputs[i + 1].dtype
+                and inputs[i].device == inputs[i + 1].device
+            ), f"meshgrid expects all tensors to have the same dtype and device"
+        outputs = []
+        for i in range(size):
+            view_shape = [1] * size
+            view_shape[i] = -1
+            # TODO(BBuf) change reshape to view
+            outputs.append(inputs[i].reshape(view_shape).expand(*shape))
+        return outputs
+
+
+@oneflow_export("meshgrid")
+@experimental_api
+def meshgrid_op(*inputs):
+    r"""The interface is consistent with PyTorch.
+    The documentation is referenced from:
+    https://pytorch.org/docs/stable/_modules/torch/functional.html#meshgrid
+    
+    Take :math:`N` tensors, each of which can be either scalar or 1-dimensional
+    vector, and create :math:`N` N-dimensional grids, where the :math:`i` :sup:`th` grid is defined by
+    expanding the :math:`i` :sup:`th` input over dimensions defined by other inputs.
+
+    Args:
+        tensors (list of Tensor): list of scalars or 1 dimensional tensors. Scalars will be
+            treated as tensors of size :math:`(1,)` automatically
+
+    Returns:
+        seq (sequence of Tensors): If the input has :math:`k` tensors of size
+        :math:`(N_1,), (N_2,), \ldots , (N_k,)`, then the output would also have :math:`k` tensors,
+        where all tensors are of size :math:`(N_1, N_2, \ldots , N_k)`.
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import numpy as np
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> flow.enable_eager_execution()
+
+        >>> input1 = flow.Tensor(np.array([1, 2, 3]), dtype=flow.float32)
+        >>> input2 = flow.Tensor(np.array([4, 5, 6]), dtype=flow.float32)
+        >>> of_x, of_y = flow.meshgrid(input1, input2)
+        >>> of_x
+        tensor([[1., 1., 1.],
+                [2., 2., 2.],
+                [3., 3., 3.]], dtype=oneflow.float32)
+        >>> of_y
+        tensor([[4., 5., 6.],
+                [4., 5., 6.],
+                [4., 5., 6.]], dtype=oneflow.float32)
+    """
+    return MeshGrid()(inputs)
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(raise_on_error=True)
diff --git a/oneflow/compatible_single_client_python/nn/modules/ne.py b/oneflow/compatible_single_client_python/nn/modules/ne.py
new file mode 100644
index 0000000000000000000000000000000000000000..a79c8be3cdec247938a880ae8f8a6d50dc924fa0
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/ne.py
@@ -0,0 +1,86 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.nn.module import Module
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+from oneflow.compatible_single_client_python.framework.tensor import register_tensor_op
+
+
+class Ne(Module):
+    def __init__(self) -> None:
+        super().__init__()
+
+    def forward(self, input, other):
+        if isinstance(other, flow.Tensor) or isinstance(
+            other, oneflow._oneflow_internal.Tensor
+        ):
+            for i in range(len(input.size())):
+                assert (
+                    input.shape[i] >= other.shape[i]
+                ), "The second tensor's shape should broadcastable with the first argument."
+                if input.dtype != other.dtype:
+                    other = other.to(dtype=input.dtype)
+        elif isinstance(other, int) or isinstance(other, float):
+            other = flow.Tensor([other], dtype=input.dtype, device=input.device)
+        else:
+            raise NotImplementedError(
+                "Unsupport data type, The second argument can be a tensor whose shape is broadcastable with the first argument."
+            )
+        return flow.F.broadcast_not_equal(input, other)
+
+
+@oneflow_export("ne", "not_equal")
+@register_tensor_op("ne")
+@experimental_api
+def ne_op(input, other):
+    r"""
+    Computes element-wise not equality.
+    The second argument can be a number or a tensor whose shape is broadcastable with the first argument.
+
+    Args:
+        input (oneflow.compatible.single_client.Tensor): the tensor to compare
+        other (oneflow.compatible.single_client.Tensor, float or int): the target to compare
+
+    Returns:
+
+        - A boolean tensor that is True where :attr:`input` is not equal to :attr:`other` and False elsewhere
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> import numpy as np
+        >>> flow.enable_eager_execution()
+
+        >>> input = flow.Tensor(np.array([2, 3, 4, 5]), dtype=flow.float32)
+        >>> other = flow.Tensor(np.array([2, 3, 4, 1]), dtype=flow.float32)
+
+        >>> y = flow.ne(input, other)
+        >>> y
+        tensor([0, 0, 0, 1], dtype=oneflow.int8)
+
+    """
+    return Ne()(input, other)
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(raise_on_error=True)
diff --git a/oneflow/compatible_single_client_python/nn/modules/negative.py b/oneflow/compatible_single_client_python/nn/modules/negative.py
new file mode 100644
index 0000000000000000000000000000000000000000..55f0d07001e38292406a01f5e62877a561eb2333
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/negative.py
@@ -0,0 +1,67 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.nn.module import Module
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+from oneflow.compatible_single_client_python.framework.tensor import register_tensor_op
+
+
+class Negative(Module):
+    def __init__(self) -> None:
+        super().__init__()
+
+    def forward(self, x):
+        return flow.F.negative(x)
+
+
+@oneflow_export("negative", "neg")
+@register_tensor_op("negative")
+@experimental_api
+def negative_op(x):
+    """This operator computes the negative value of Tensor.
+
+    Args:
+        x (oneflow.compatible.single_client.Tensor): A Tensor
+
+    Returns:
+        oneflow.compatible.single_client.Tensor: The result Tensor
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import numpy as np
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> flow.enable_eager_execution()
+
+        >>> input = flow.Tensor(
+        ...    np.array([1.0, -1.0, 2.3]).astype(np.float32), dtype=flow.float32
+        ... )
+        >>> out = flow.negative(input)
+        >>> out
+        tensor([-1. ,  1. , -2.3], dtype=oneflow.float32)
+
+    """
+    return Negative()(x)
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(raise_on_error=True)
diff --git a/oneflow/compatible_single_client_python/nn/modules/norm.py b/oneflow/compatible_single_client_python/nn/modules/norm.py
new file mode 100644
index 0000000000000000000000000000000000000000..dacdf9fc43be7ce699736c154836a6372a7c752c
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/norm.py
@@ -0,0 +1,267 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.nn.module import Module
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+from oneflow.compatible_single_client_python.framework.tensor import register_tensor_op
+
+
+class Norm(Module):
+    def __init__(self, ord=None, dim=None, keepdim=False) -> None:
+        super().__init__()
+
+        self.ord = ord
+        self.dim = dim
+        self.keepdim = keepdim
+
+    def _vector_norm(self, x, ord, dim):
+        if isinstance(ord, str) and ord in ["fro", "nuc"]:
+            raise ValueError("Norm order {} is not supported for vectors".format(ord))
+        elif isinstance(ord, float) and ord in [float("inf"), float("-inf")]:
+            if ord == float("inf"):
+                return flow.experimental.max(flow.experimental.abs(x), dim=dim)
+            else:
+                return flow.experimental.min(flow.experimental.abs(x), dim=dim)
+        elif isinstance(ord, int):
+            if ord == 0:
+                # TODO: fix error when input are all zero vector
+                return flow.tensor([flow.experimental.argwhere(x).shape[0]])
+            else:
+                return flow.experimental.pow(
+                    flow.experimental.sum(
+                        flow.experimental.pow(flow.experimental.abs(x), ord), dim=dim
+                    ),
+                    1.0 / ord,
+                )
+        else:
+            raise ValueError("Invalid norm order: {}".format(ord))
+
+    def _matrix_norm(self, x, ord, dim):
+        if isinstance(ord, str) and ord in ["fro", "nuc"]:
+            if ord == "nuc":
+                raise NotImplementedError
+            else:
+                return flow.experimental.sqrt(
+                    flow.experimental.sum(flow.experimental.square(x), dim=dim)
+                )
+        elif isinstance(ord, float) and ord in [float("inf"), float("-inf")]:
+            if ord == float("inf"):
+                return flow.experimental.max(
+                    flow.experimental.sum(flow.experimental.abs(x), dim=1)
+                )
+            else:
+                return flow.experimental.min(
+                    flow.experimental.sum(flow.experimental.abs(x), dim=1)
+                )
+        elif isinstance(ord, int):
+            if ord == 1:
+                return flow.experimental.max(
+                    flow.experimental.sum(flow.experimental.abs(x), dim=0)
+                )
+            elif ord == -1:
+                return flow.experimental.min(
+                    flow.experimental.sum(flow.experimental.abs(x), dim=0)
+                )
+            elif ord == 2:
+                raise NotImplementedError
+            elif ord == -2:
+                raise NotImplementedError
+            else:
+                raise ValueError(
+                    "Norm order {} is not supported for matrices".format(ord)
+                )
+        else:
+            raise ValueError("Invalid norm order: {}".format(ord))
+
+    def _whether_keepdim(self, x):
+        if self.keepdim == True and self.dim != None:
+            return flow.experimental.unsqueeze(x, self.dim)
+        else:
+            return x
+
+    def forward(self, x):
+        num_axes = len(x.shape)
+        if self.dim == None and self.ord == None:
+            res = self._vector_norm(x.reshape((1, -1))[0], ord=2, dim=self.dim)
+        elif self.dim == None and self.ord != None:
+            assert (
+                num_axes <= 2
+            ), "input must be 1-D or 2-D when dim is None and ord is not None"
+            res = (
+                self._vector_norm(x, self.ord, self.dim)
+                if num_axes == 1
+                else self._matrix_norm(x, self.ord, self.dim)
+            )
+        elif isinstance(self.dim, (int, tuple, list)):
+            if isinstance(self.dim, int):
+                self.dim = self.dim if self.dim >= 0 else self.dim + num_axes
+                assert 0 <= self.dim < num_axes, "dim out of range"
+                res = self._vector_norm(
+                    x, ord=2 if self.ord == None else self.ord, dim=self.dim
+                )
+            else:
+                temp = list(self.dim) if isinstance(self.dim, tuple) else self.dim
+                for i in range(len(temp)):
+                    temp[i] = temp[i] if temp[i] >= 0 else temp[i] + num_axes
+                    assert 0 <= temp[i] < num_axes, "dim out of range"
+                self.dim = temp
+                res = self._matrix_norm(
+                    x, ord="fro" if self.ord == None else self.ord, dim=self.dim
+                )
+        else:
+            raise ValueError("Invalid dimension: {}".format(self.dim))
+        return self._whether_keepdim(res)
+
+
+@oneflow_export("linalg.norm")
+@experimental_api
+def norm_op(input, ord=None, dim=None, keepdim=False):
+    r"""linalg.norm(input, ord=None, dim=None, keepdim=False, *, out=None) -> Tensor
+
+    Returns the matrix norm or vector norm of a given tensor.
+
+    This function can calculate one of eight different types of matrix norms, or one
+    of an infinite number of vector norms, depending on both the number of reduction
+    dimensions and the value of the `ord` parameter.
+
+    Args:
+        input (Tensor): The input tensor. If dim is None, input must be 1-D or 2-D, unless :attr:`ord`
+            is None. If both :attr:`dim` and :attr:`ord` are None, the 2-norm of the input flattened to 1-D
+            will be returned. Its data type must be either a floating point or complex type. For complex
+            inputs, the norm is calculated on of the absolute values of each element. If the input is
+            complex and neither :attr:`dtype` nor :attr:`out` is specified, the result's data type will
+            be the corresponding floating point type (e.g. float if :attr:`input` is complexfloat).
+
+        ord (int, float, inf, -inf, 'fro', 'nuc', optional): The order of norm.
+            inf refers to :attr:`float('inf')`, numpy's :attr:`inf` object, or any equivalent object.
+            The following norms can be calculated:
+
+            =====  ============================  ==========================
+            ord    norm for matrices             norm for vectors
+            =====  ============================  ==========================
+            None   Frobenius norm                2-norm
+            'fro'  Frobenius norm                -- not supported --
+            'nuc'  -- not supported yet --       -- not supported --
+            inf    max(sum(abs(x), dim=1))       max(abs(x))
+            -inf   min(sum(abs(x), dim=1))       min(abs(x))
+            0      -- not supported --           sum(x != 0)
+            1      max(sum(abs(x), dim=0))       as below
+            -1     min(sum(abs(x), dim=0))       as below
+            2      -- not supported yet --       as below
+            -2     -- not supported yet --       as below
+            other  -- not supported --           sum(abs(x)**ord)**(1./ord)
+            =====  ============================  ==========================
+
+            Default: ``None``
+
+        dim (int, 2-tuple of ints, 2-list of ints, optional): If :attr:`dim` is an int,
+            vector norm will be calculated over the specified dimension. If :attr:`dim`
+            is a 2-tuple of ints, matrix norm will be calculated over the specified
+            dimensions. If :attr:`dim` is None, matrix norm will be calculated
+            when the input tensor has two dimensions, and vector norm will be
+            calculated when the input tensor has one dimension. Default: ``None``
+
+        keepdim (bool, optional): If set to True, the reduced dimensions are retained
+            in the result as dimensions with size one. Default: ``False``
+
+        out (Tensor, optional): The output tensor.
+
+    Examples::
+
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> from oneflow.compatible.single_client.experimental import linalg as LA
+        >>> import numpy as np
+        >>> flow.enable_eager_execution()
+        >>> a = flow.tensor(np.arange(9, dtype=np.float32) - 4)
+        >>> a
+        tensor([-4., -3., -2., -1.,  0.,  1.,  2.,  3.,  4.], dtype=oneflow.float32)
+        >>> b = a.reshape((3, 3))
+        >>> b
+        tensor([[-4., -3., -2.],
+                [-1.,  0.,  1.],
+                [ 2.,  3.,  4.]], dtype=oneflow.float32)
+
+        >>> LA.norm(a)
+        tensor([7.746], dtype=oneflow.float32)
+        >>> LA.norm(b)
+        tensor([7.746], dtype=oneflow.float32)
+        >>> LA.norm(b, 'fro')
+        tensor([7.746], dtype=oneflow.float32)
+        >>> LA.norm(a, float('inf'))
+        tensor([4.], dtype=oneflow.float32)
+        >>> LA.norm(b, float('inf'))
+        tensor([9.], dtype=oneflow.float32)
+        >>> LA.norm(a, -float('inf'))
+        tensor([0.], dtype=oneflow.float32)
+        >>> LA.norm(b, -float('inf'))
+        tensor([2.], dtype=oneflow.float32)
+
+        >>> LA.norm(a, 1)
+        tensor([20.], dtype=oneflow.float32)
+        >>> LA.norm(b, 1)
+        tensor([7.], dtype=oneflow.float32)
+        >>> LA.norm(a, -1)
+        tensor([0.], dtype=oneflow.float32)
+        >>> LA.norm(b, -1)
+        tensor([6.], dtype=oneflow.float32)
+        >>> LA.norm(a, 2)
+        tensor([7.746], dtype=oneflow.float32)
+        >>> LA.norm(a, -2)
+        tensor([0.], dtype=oneflow.float32)
+        >>> LA.norm(a, 3)
+        tensor([5.848], dtype=oneflow.float32)
+        >>> LA.norm(a, -3)
+        tensor([0.], dtype=oneflow.float32)
+
+    Using the :attr:`dim` argument to compute vector norms::
+
+        >>> c = flow.tensor([[1., 2., 3.],
+        ...                   [-1, 1, 4]])
+        >>> LA.norm(c, dim=0)
+        tensor([1.4142, 2.2361, 5.    ], dtype=oneflow.float32)
+        >>> LA.norm(c, dim=1, keepdim = True)
+        tensor([[3.7417],
+                [4.2426]], dtype=oneflow.float32)
+        >>> LA.norm(c, ord=1, dim=1)
+        tensor([6., 6.], dtype=oneflow.float32)
+
+    Using the :attr:`dim` argument to compute matrix norms::
+
+        >>> m = flow.tensor(np.arange(8, dtype=np.float32)).reshape((2, 2, 2))
+        >>> LA.norm(m, dim=(1,2))
+        tensor([ 3.7417, 11.225 ], dtype=oneflow.float32)
+    """
+    return Norm(ord, dim, keepdim)(input)
+
+
+@register_tensor_op("norm")
+@experimental_api
+def norm_tensor_op(input, ord=None, dim=None, keepdim=False):
+    r"""
+    See :func:`oneflow.compatible.single_client.experimental.linalg.norm.`
+    """
+    return Norm(ord, dim, keepdim)(input)
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(raise_on_error=True)
diff --git a/oneflow/compatible_single_client_python/nn/modules/normalization.py b/oneflow/compatible_single_client_python/nn/modules/normalization.py
new file mode 100644
index 0000000000000000000000000000000000000000..7ffa7cb8e9ab5330dd246092c4f7f1d33a532eaa
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/normalization.py
@@ -0,0 +1,342 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import oneflow
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.nn import init
+from oneflow.compatible_single_client_python.nn.module import Module
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+from oneflow.compatible_single_client_python.framework.tensor import Tensor
+from typing import Tuple, Union
+
+_shape_t = Union[int, Tuple[int], oneflow._oneflow_internal.Size]
+
+
+@oneflow_export("nn.GroupNorm")
+@experimental_api
+class GroupNorm(Module):
+    r"""The interface is consistent with PyTorch.
+    The documentation is referenced from:
+    https://pytorch.org/docs/stable/generated/torch.nn.GroupNorm.html
+
+    Applies Group Normalization over a mini-batch of inputs as described in
+    the paper `Group Normalization <https://arxiv.org/abs/1803.08494>`__
+
+    .. math::
+
+        y = \frac{x - \mathrm{E}[x]}{ \sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta
+
+    The input channels are separated into :attr:`num_groups` groups, each containing
+    ``num_channels / num_groups`` channels. The mean and standard-deviation are calculated
+    separately over the each group. :math:`\gamma` and :math:`\beta` are learnable
+    per-channel affine transform parameter vectors of size :attr:`num_channels` if
+    :attr:`affine` is ``True``.
+    The standard-deviation is calculated via the biased estimator, equivalent to
+    `torch.var(input, unbiased=False)`.
+
+    This layer uses statistics computed from input data in both training and
+    evaluation modes.
+
+    Args:
+        num_groups (int): number of groups to separate the channels into
+        num_channels (int): number of channels expected in input
+        eps: a value added to the denominator for numerical stability. Default: 1e-5
+        affine: a boolean value that when set to ``True``, this module
+            has learnable per-channel affine parameters initialized to ones (for weights)
+            and zeros (for biases). Default: ``True``.
+
+    Shape:
+        - Input: :math:`(N, C, *)` where :math:`C=\text{num_channels}`
+        - Output: :math:`(N, C, *)` (same shape as input)
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> import numpy as np
+        >>> flow.enable_eager_execution()
+
+        >>> input = flow.Tensor(np.random.randn(20, 6, 10, 10))
+        >>> # Separate 6 channels into 3 groups
+        >>> m = flow.nn.GroupNorm(3, 6)
+        >>> # Separate 6 channels into 6 groups (equivalent with InstanceNorm)
+        >>> m = flow.nn.GroupNorm(6, 6)
+        >>> # Put all 6 channels into a single group (equivalent with LayerNorm)
+        >>> m = flow.nn.GroupNorm(1, 6)
+        >>> # Activating the module
+        >>> output = m(input)
+    
+"""
+
+    def __init__(
+        self, num_groups: int, num_channels: int, eps: float = 1e-5, affine: bool = True
+    ) -> None:
+        super().__init__()
+        assert num_groups > 0, "The num_groups must larger than zero"
+        assert num_channels > 0, "The num_channels must larger than zero"
+        self.num_groups = num_groups
+        self.num_channels = num_channels
+        self.eps = eps
+        self.affine = affine
+        if self.affine:
+            self.weight = flow.nn.Parameter(flow.Tensor(1, num_channels, 1))
+            self.bias = flow.nn.Parameter(flow.Tensor(1, num_channels, 1))
+        else:
+            self.register_parameter("weight", None)
+            self.register_parameter("bias", None)
+
+        self.reset_parameters()
+
+    def reset_parameters(self) -> None:
+        if self.affine:
+            flow.nn.init.ones_(self.weight)
+            flow.nn.init.zeros_(self.bias)
+
+    def forward(self, input: Tensor) -> Tensor:
+        assert (
+            len(input.shape) >= 3
+        ), "The dimensions of input tensor must larger than 2"
+        assert (
+            input.shape[1] == self.num_channels
+        ), "The channels of input tensor must equal num_channels"
+        origin_shape = input.shape
+        reshape_to_1d = flow.experimental.reshape(
+            input, shape=[origin_shape[0], self.num_groups, -1]
+        )
+        mean = flow.experimental.mean(reshape_to_1d, dim=2, keepdim=True)
+        variance = flow.experimental.var(reshape_to_1d, dim=2, keepdim=True)
+        normalized = (reshape_to_1d - mean) / flow.experimental.sqrt(
+            variance + self.eps
+        )
+        normalized = flow.experimental.reshape(
+            normalized, shape=[origin_shape[0], self.num_channels, -1]
+        )
+        if self.weight:
+            normalized = normalized * self.weight
+        if self.bias:
+            normalized = normalized + self.bias
+        res = flow.experimental.reshape(normalized, shape=tuple(input.shape))
+
+        return res
+
+
+@oneflow_export("nn.LayerNorm")
+@experimental_api
+class LayerNorm(Module):
+    r"""Applies Layer Normalization over a mini-batch of inputs as described in
+    the paper `Layer Normalization <https://arxiv.org/abs/1607.06450>`__
+
+    .. math::
+        y = \frac{x - \mathrm{E}[x]}{ \sqrt{\mathrm{Var}[x] + \epsilon}} * \gamma + \beta
+
+    The mean and standard-deviation are calculated separately over the last
+    certain number dimensions which have to be of the shape specified by
+    :attr:`normalized_shape`.
+    :math:`\gamma` and :math:`\beta` are learnable affine transform parameters of
+    :attr:`normalized_shape` if :attr:`elementwise_affine` is ``True``.
+    The standard-deviation is calculated via the biased estimator.
+
+    .. note::
+        Unlike Batch Normalization and Instance Normalization, which applies
+        scalar scale and bias for each entire channel/plane with the
+        :attr:`affine` option, Layer Normalization applies per-element scale and
+        bias with :attr:`elementwise_affine`.
+
+    This layer uses statistics computed from input data in both training and
+    evaluation modes.
+
+    Args:
+        normalized_shape (int or list or oneflow.compatible.single_client.Size): input shape from an expected input of size
+
+            .. math::
+                [* \times \text{normalized_shape}[0] \times \text{normalized_shape}[1] \times \ldots \times \text{normalized_shape}[-1]]
+
+            If a single integer is used, it is treated as a singleton list, and this module will
+
+            normalize over the last dimension which is expected to be of that specific size.
+
+        eps: a value added to the denominator for numerical stability. Default: 1e-5
+        elementwise_affine: a boolean value that when set to ``True``, this module
+            has learnable per-element affine parameters initialized to ones (for weights)
+            and zeros (for biases). Default: ``True``.
+
+    Shape:
+        - Input: :math:`(N, *)`
+        - Output: :math:`(N, *)` (same shape as input)
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import numpy as np
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> flow.enable_eager_execution()
+
+        >>> input_arr = np.array(
+        ...     [
+        ...         [
+        ...             [[-0.16046895, -1.03667831], [-0.34974465, 0.26505867]],
+        ...             [[-1.24111986, -0.53806001], [1.72426331, 0.43572459]],
+        ...         ],
+        ...         [
+        ...             [[-0.77390957, -0.42610624], [0.16398858, -1.35760343]],
+        ...             [[1.07541728, 0.11008703], [0.26361224, -0.48663723]],
+        ...         ],
+        ...     ],
+        ...     dtype=np.float32,
+        ... )
+
+        >>> x = flow.Tensor(input_arr)
+        >>> m = flow.nn.LayerNorm(2)
+        >>> y = m(x).numpy()
+        >>> y
+        array([[[[ 0.99997395, -0.99997395],
+                 [-0.999947  ,  0.999947  ]],
+        <BLANKLINE>
+                [[-0.9999596 ,  0.9999594 ],
+                 [ 0.999988  , -0.999988  ]]],
+        <BLANKLINE>
+        <BLANKLINE>
+               [[[-0.9998343 ,  0.9998341 ],
+                 [ 0.9999914 , -0.9999914 ]],
+        <BLANKLINE>
+                [[ 0.99997866, -0.99997866],
+                 [ 0.9999646 , -0.9999646 ]]]], dtype=float32)
+
+    """
+    __constants__ = ["normalized_shape", "eps", "elementwise_affine"]
+    normalized_shape: Tuple[int, ...]
+    eps: float
+    elementwise_affine: bool
+
+    def __init__(
+        self,
+        normalized_shape: _shape_t,
+        eps: float = 1e-5,
+        elementwise_affine: bool = True,
+    ) -> None:
+        super(LayerNorm, self).__init__()
+        if isinstance(normalized_shape, int):
+            # mypy error: incompatible types in assignment
+            normalized_shape = (normalized_shape,)  # type: ignore[assignment]
+        self.normalized_shape = tuple(normalized_shape)  # type: ignore[arg-type]
+
+        self.epsilon = eps
+        self.elementwise_affine = elementwise_affine
+        if self.elementwise_affine:
+            self.weight = flow.nn.Parameter(flow.Tensor(*self.normalized_shape))
+            self.bias = flow.nn.Parameter(flow.Tensor(*self.normalized_shape))
+        else:
+            self.register_parameter("weight", None)
+            self.register_parameter("bias", None)
+        self.reset_parameters()
+        # An integer specifies which axis to normalize at first, defaults to 1.
+        self.begin_norm_axis = 1
+        # An integer specifies which axis params at, defaults to 1 in 'NCHW' format
+        self.begin_params_axis = 1
+
+    def reset_parameters(self) -> None:
+        if self.elementwise_affine:
+            init.ones_(self.weight)
+            init.zeros_(self.bias)
+
+    def forward(self, x):
+        assert len(x.shape) > len(
+            self.normalized_shape
+        ), "Input tensor dim must greater than normalized dim!"
+        self.begin_norm_axis = len(x.shape) - len(self.normalized_shape)
+        self.begin_params_axis = len(x.shape) - len(self.normalized_shape)
+
+        if x.device == flow.device("cpu"):
+            reduce_axis = []
+            for dim in range(len(x.shape)):
+                if dim >= self.begin_norm_axis:
+                    reduce_axis.append(dim)
+
+            mean = x.mean(dim=reduce_axis, keepdim=True)
+            variance = x.var(dim=reduce_axis, keepdim=True)
+
+            axis = self.begin_norm_axis
+
+            params_shape = x.shape[self.begin_params_axis :]
+            weight = self.weight
+            bias = self.bias
+            if len(mean.shape) == 1:
+                nd_params_shape = [1] * len(x.shape)
+                nd_params_shape[self.begin_norm_axis] = params_shape[0]
+                mean = mean.reshape(shape=nd_params_shape)
+                variance = variance.reshape(shape=nd_params_shape)
+
+                if self.weight and params_shape[0] == self.weight.nelement():
+                    weight = self.weight.reshape(shape=nd_params_shape)
+                if self.bias and params_shape[0] == self.bias.nelement():
+                    bias = self.bias.reshape(shape=nd_params_shape)
+            elif len(mean.shape) == len(x.shape):
+                pass
+            else:
+                raise ValueError(
+                    "shape of mean and variance should be 1D or has number of axes and x's"
+                )
+
+            variance += self.epsilon
+            normalized = (x - mean) * variance.rsqrt()
+
+            if self.weight:
+                normalized = normalized * weight
+            if self.bias:
+                normalized = normalized + bias
+            affined = normalized
+
+            nd_params_shape = [1] * (len(x.shape) - len(params_shape)) + list(
+                params_shape
+            )
+            if self.elementwise_affine:
+                affined = affined * self.weight
+                affined = affined + self.bias
+            return affined
+        else:
+            if self.elementwise_affine:
+                res = flow.F.layer_norm_affine(
+                    x,
+                    self.weight,
+                    self.bias,
+                    begin_norm_axis=self.begin_norm_axis,
+                    begin_params_axis=self.begin_params_axis,
+                    epsilon=self.epsilon,
+                )
+            else:
+                res = flow.F.layer_norm(
+                    x,
+                    begin_norm_axis=self.begin_norm_axis,
+                    begin_params_axis=self.begin_params_axis,
+                    epsilon=self.epsilon,
+                )
+            return res
+
+    def extra_repr(self) -> str:
+        return (
+            "{normalized_shape}, eps={eps}, "
+            "elementwise_affine={elementwise_affine}".format(**self.__dict__)
+        )
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(raise_on_error=True)
diff --git a/oneflow/compatible_single_client_python/nn/modules/padding.py b/oneflow/compatible_single_client_python/nn/modules/padding.py
new file mode 100644
index 0000000000000000000000000000000000000000..ae2a7b4200e48f8b347fbdc3e3a6efb30d8fab1f
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/padding.py
@@ -0,0 +1,193 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from typing import Union
+
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+from oneflow.compatible_single_client_python.nn.module import Module
+
+
+@oneflow_export("nn.ReplicationPad2d")
+@experimental_api
+class ReplicationPad2d(Module):
+    r"""The interface is consistent with PyTorch.
+    The documentation is referenced from:
+    https://pytorch.org/docs/stable/generated/torch.nn.ReplicationPad2d.html?highlight=replicationpad2d#torch.nn.ReplicationPad2d
+    
+    Pads the input tensor using the replication of the input boundary.
+
+    Args:
+        padding (Union[int, tuple, list]):  the size of the padding. If is `int`, uses the same padding in all boundaries. If a 4-`tuple`, uses (:math:`\mathrm{padding_{left}}`, :math:`\mathrm{padding_{right}}`, :math:`\mathrm{padding_{top}}`, :math:`\mathrm{padding_{bottom}}`)
+
+    Shape:
+        - Input: :math:`(N, C, H_{in}, W_{in})`
+        - Output: :math:`(N, C, H_{out}, W_{out})` where
+
+            :math:`H_{out} = H_{in} + \mathrm{padding_{top}} + \mathrm{padding_{bottom}}`
+
+            :math:`W_{out} = W_{in} + \mathrm{padding_{left}} + \mathrm{padding_{right}}`
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> import numpy as np
+        >>> flow.enable_eager_execution()
+        >>> replicationpad_layer_0 = flow.nn.ReplicationPad2d((2, 2, 1, 1))
+        >>> input = flow.Tensor(np.arange(18).reshape((1, 2, 3, 3)).astype(np.float32))
+        >>> input_int = flow.Tensor(np.arange(18).reshape((1, 2, 3, 3)).astype(np.int32))
+        >>> output = replicationpad_layer_0(input)
+        >>> output.shape
+        flow.Size([1, 2, 5, 7])
+        >>> output
+        tensor([[[[ 0.,  0.,  0.,  1.,  2.,  2.,  2.],
+                  [ 0.,  0.,  0.,  1.,  2.,  2.,  2.],
+                  [ 3.,  3.,  3.,  4.,  5.,  5.,  5.],
+                  [ 6.,  6.,  6.,  7.,  8.,  8.,  8.],
+                  [ 6.,  6.,  6.,  7.,  8.,  8.,  8.]],
+        <BLANKLINE>
+                 [[ 9.,  9.,  9., 10., 11., 11., 11.],
+                  [ 9.,  9.,  9., 10., 11., 11., 11.],
+                  [12., 12., 12., 13., 14., 14., 14.],
+                  [15., 15., 15., 16., 17., 17., 17.],
+                  [15., 15., 15., 16., 17., 17., 17.]]]], dtype=oneflow.float32)
+        >>> output_int = replicationpad_layer_0(input_int)
+        >>> output_int
+        tensor([[[[ 0.,  0.,  0.,  1.,  2.,  2.,  2.],
+                  [ 0.,  0.,  0.,  1.,  2.,  2.,  2.],
+                  [ 3.,  3.,  3.,  4.,  5.,  5.,  5.],
+                  [ 6.,  6.,  6.,  7.,  8.,  8.,  8.],
+                  [ 6.,  6.,  6.,  7.,  8.,  8.,  8.]],
+        <BLANKLINE>
+                 [[ 9.,  9.,  9., 10., 11., 11., 11.],
+                  [ 9.,  9.,  9., 10., 11., 11., 11.],
+                  [12., 12., 12., 13., 14., 14., 14.],
+                  [15., 15., 15., 16., 17., 17., 17.],
+                  [15., 15., 15., 16., 17., 17., 17.]]]], dtype=oneflow.float32)
+
+    """
+
+    def __init__(self, padding: Union[int, tuple, list]):
+        super().__init__()
+
+        if isinstance(padding, (tuple, list)):
+            assert len(padding) == 4, ValueError("Length of padding must be 4")
+            boundary = [padding[0], padding[1], padding[2], padding[3]]
+        elif isinstance(padding, int):
+            boundary = [padding, padding, padding, padding]
+        else:
+            raise ValueError("padding must be int or list or tuple!")
+
+        self.padding = boundary
+
+    def forward(self, x):
+        _, _, h, w = x.shape
+        if (
+            self.padding[2] < h
+            and self.padding[3] < h
+            and self.padding[0] < w
+            and self.padding[1] < w
+        ):
+            return flow.F.pad(x, pad=self.padding, mode="replicate")
+        else:
+            raise AssertionError(
+                "Padding size should be less than the corresponding input dimension. Please check."
+            )
+
+
+@oneflow_export("nn.ReflectionPad2d")
+@experimental_api
+class ReflectionPad2d(Module):
+    r"""The interface is consistent with PyTorch.
+    The documentation is referenced from:
+    https://pytorch.org/docs/stable/generated/torch.nn.ReflectionPad2d.html
+
+
+    This operator pads the input tensor using the reflection of the input boundary.
+
+    Args:
+        padding (Union[int,tuple]): The size or bundary of padding, if is `int` uses the same padding in all dimension; if 4-dims `tuple`, uses :math:`(\text{padding}_{\text{left}}, \text{padding}_{\text{right}}, \text{padding}_{\text{top}}, \text{padding}_{\text{bottom}} )`
+
+    Returns:
+        Tensor: Returns a new tensor which is result of the reflection padding of the input tensor.
+
+    Shape:
+        - Input: :math:`(N, C, H_{\text{in}}, W_{\text{in}})`
+        - Output: :math:`(N, C, H_{\text{out}}, W_{\text{out}})` where
+
+          :math:`H_{\text{out}} = H_{\text{in}} + \text{padding}_{\text{top}} + \text{padding}_{\text{bottom}}`
+
+          :math:`W_{\text{out}} = W_{\text{in}} + \text{padding}_{\text{left}} + \text{padding}_{\text{right}}`
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> import numpy as np
+        >>> flow.enable_eager_execution()
+        >>> input = flow.Tensor(np.arange(18).reshape((1, 2, 3, 3)), dtype=flow.float32)
+        >>> m = flow.nn.ReflectionPad2d((2, 2, 1, 1))
+        >>> out = m(input)
+        >>> out
+        tensor([[[[ 5.,  4.,  3.,  4.,  5.,  4.,  3.],
+                  [ 2.,  1.,  0.,  1.,  2.,  1.,  0.],
+                  [ 5.,  4.,  3.,  4.,  5.,  4.,  3.],
+                  [ 8.,  7.,  6.,  7.,  8.,  7.,  6.],
+                  [ 5.,  4.,  3.,  4.,  5.,  4.,  3.]],
+        <BLANKLINE>         
+                 [[14., 13., 12., 13., 14., 13., 12.],
+                  [11., 10.,  9., 10., 11., 10.,  9.],
+                  [14., 13., 12., 13., 14., 13., 12.],
+                  [17., 16., 15., 16., 17., 16., 15.],
+                  [14., 13., 12., 13., 14., 13., 12.]]]], dtype=oneflow.float32)
+
+    """
+
+    def __init__(self, padding: Union[int, tuple]) -> None:
+        super().__init__()
+        if isinstance(padding, tuple):
+            assert len(padding) == 4, ValueError("Padding length must be 4")
+            boundary = [padding[0], padding[1], padding[2], padding[3]]
+        elif isinstance(padding, int):
+            boundary = [padding, padding, padding, padding]
+        else:
+            raise ValueError("padding must be in or list or tuple!")
+        self.padding = boundary
+
+    def forward(self, x):
+        H, W = x.shape[2], x.shape[3]
+        if (
+            self.padding[2] < H
+            and self.padding[3] < H
+            and self.padding[0] < W
+            and self.padding[1] < W
+        ):
+            return flow.F.pad(x, pad=self.padding, mode="reflect")
+        else:
+            raise ValueError(
+                "padding size should be less than the corresponding input dimension!"
+            )
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(raise_on_error=True)
diff --git a/oneflow/compatible_single_client_python/nn/modules/permute.py b/oneflow/compatible_single_client_python/nn/modules/permute.py
new file mode 100644
index 0000000000000000000000000000000000000000..4373398eb2efd7e4b16a8837e5bfcc47e2b98d5b
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/permute.py
@@ -0,0 +1,72 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.nn.module import Module
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+from oneflow.compatible_single_client_python.framework.tensor import register_tensor_op
+from typing import Optional, Sequence
+
+
+class Permute(Module):
+    def __init__(self, *dims) -> None:
+        super().__init__()
+        self.perm = list(*dims)
+
+    def forward(self, x):
+        assert len(self.perm) == len(x.shape)
+        new_perm = []
+        for dim in self.perm:
+            if dim < 0:
+                dim += len(self.perm)
+            assert dim >= 0 and dim < len(
+                x.shape
+            ), "Invalid dim0 {}, len(shape): {}".format(dim, len(x.shape))
+            new_perm.append(dim)
+        return flow.F.transpose(x, perm=new_perm)
+
+
+@register_tensor_op("permute")
+@experimental_api
+def permute_op(tensor, *dims):
+    r"""Returns a view of the original tensor with its dimensions permuted.
+
+    Args:
+        *dims (int...): The desired ordering of dimensions
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import numpy as np
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> flow.enable_eager_execution()
+
+        >>> input = flow.Tensor(np.random.randn(2, 6, 5, 3), dtype=flow.float32)
+        >>> out = input.permute(1, 0, 2, 3).shape
+        >>> out
+        flow.Size([6, 2, 5, 3])
+
+    """
+    return Permute(dims)(tensor)
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(raise_on_error=True)
diff --git a/oneflow/compatible_single_client_python/nn/modules/pixelshuffle.py b/oneflow/compatible_single_client_python/nn/modules/pixelshuffle.py
new file mode 100644
index 0000000000000000000000000000000000000000..55b501cd69b5d37b03f88cad0e48ab35e1bbd2bf
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/pixelshuffle.py
@@ -0,0 +1,118 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from oneflow.compatible_single_client_python.framework.tensor import Tensor
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+from oneflow.compatible_single_client_python.nn.module import Module
+
+
+@oneflow_export("nn.PixelShuffle")
+@experimental_api
+class PixelShuffle(Module):
+    r"""The interface is consistent with PyTorch.
+    The documentation is referenced from:
+    https://pytorch.org/docs/stable/generated/torch.nn.PixelShuffle.html#torch.nn.PixelShuffle
+
+    Rearranges elements in a tensor of shape :math:`(*, C \times r^2, H, W)`
+    to a tensor of shape :math:`(*, C, H \times r, W \times r)`, where r is an upscale factor.
+
+    This is useful for implementing efficient sub-pixel convolution
+    with a stride of :math:`1/r`.
+
+    See the paper:
+    `Real-Time Single Image and Video Super-Resolution Using an Efficient Sub-Pixel Convolutional Neural Network`_
+    by Shi et. al (2016) for more details.
+
+    Args:
+        upscale_factor (int): factor to increase spatial resolution by
+
+    Shape:
+        - Input: :math:`(*, C_{in}, H_{in}, W_{in})`, where * is zero or more batch dimensions
+        - Output: :math:`(*, C_{out}, H_{out}, W_{out})`, where
+
+    .. math::
+        C_{out} = C_{in} \div \text{upscale_factor}^2
+
+    .. math::
+        H_{out} = H_{in} \times \text{upscale_factor}
+
+    .. math::
+        W_{out} = W_{in} \times \text{upscale_factor}
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> import numpy as np
+        >>> flow.enable_eager_execution()
+
+        >>> m = flow.nn.PixelShuffle(upscale_factor=2)
+        >>> x = flow.Tensor(np.random.randn(3, 4, 5, 5))
+        >>> y = m(x)
+        >>> print(y.size())
+        flow.Size([3, 1, 10, 10])
+
+        >>> m = flow.nn.PixelShuffle(upscale_factor=3)
+        >>> x = flow.Tensor(np.random.randn(1, 18, 2, 2))
+        >>> y = m(x)
+        >>> print(y.size())
+        flow.Size([1, 2, 6, 6])
+
+    .. _Real-Time Single Image and Video Super-Resolution Using an Efficient Sub-Pixel Convolutional Neural Network:
+        https://arxiv.org/abs/1609.05158
+    """
+
+    def __init__(self, upscale_factor: int) -> None:
+        super().__init__()
+        assert upscale_factor > 0, "The scale factor must larger than zero"
+        self.upscale_factor = upscale_factor
+
+    def forward(self, input: Tensor) -> Tensor:
+        assert len(input.shape) == 4, "Only Accept 4D Tensor"
+
+        _batch, _channel, _height, _width = input.shape
+        assert (
+            _channel % (self.upscale_factor ** 2) == 0
+        ), "The channels of input tensor must be divisible by (upscale_factor * upscale_factor)"
+
+        _new_c = int(_channel / (self.upscale_factor ** 2))
+
+        out = input.reshape(
+            [_batch, _new_c, self.upscale_factor ** 2, _height, _width,]
+        )
+        out = out.reshape(
+            [_batch, _new_c, self.upscale_factor, self.upscale_factor, _height, _width,]
+        )
+        out = out.permute(0, 1, 4, 2, 5, 3)
+        out = out.reshape(
+            [
+                _batch,
+                _new_c,
+                _height * self.upscale_factor,
+                _width * self.upscale_factor,
+            ]
+        )
+
+        return out
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(raise_on_error=True)
diff --git a/oneflow/compatible_single_client_python/nn/modules/pooling.py b/oneflow/compatible_single_client_python/nn/modules/pooling.py
new file mode 100644
index 0000000000000000000000000000000000000000..e6ed5091adf385386662d92634e51e9c13607d63
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/pooling.py
@@ -0,0 +1,600 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from typing import Optional
+
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+from oneflow.compatible_single_client_python.nn.module import Module
+from oneflow.compatible_single_client_python.nn.modules.utils import (
+    _single,
+    _pair,
+    _triple,
+)
+from oneflow.compatible_single_client_python.nn.common_types import (
+    _size_1_t,
+    _size_2_t,
+    _size_3_t,
+)
+from oneflow.compatible_single_client_python.ops.nn_ops import (
+    calc_pool_padding,
+    get_dhw_offset,
+)
+
+
+@oneflow_export("nn.AvgPool1d")
+@experimental_api
+class AvgPool1d(Module):
+    r"""Applies a 1D average pooling over an input signal composed of several input planes.
+
+    In the simplest case, the output value of the layer with input size :math:`(N, C, H, W)`,
+    output :math:`(N, C, H_{out}, W_{out})` and `kernel_size` :math:`k`
+    can be precisely described as:
+
+    .. math::
+
+        out(N_i, C_j, l)  = \frac{1}{k} \sum_{m=0}^{k-1}
+                               input(N_i, C_j, stride[0] \times h + m, stride*l + m)
+
+    If padding is non-zero, then the input is implicitly zero-padded on both sides for padding number of points.
+    The parameters kernel_size, stride, padding can each be an int or a one-element tuple.
+
+    Note:
+        When ceil_mode=True, sliding windows are allowed to go off-bounds if they start within the left padding or the
+        input. Sliding windows that would start in the right padded region are ignored.
+    
+    Args:
+        kernel_size: the size of the window.
+        strides: the stride of the window. Default value is kernel_size.
+        padding: implicit zero padding to be added on both sides.
+        ceil_mode: when True, will use ceil instead of floor to compute the output shape.
+        count_include_pad: when True, will include the zero-padding in the averaging calculation.
+
+
+    # TODO: fix cuDNN bugs in pooling_1d
+    
+    """
+
+    def __init__(
+        self,
+        kernel_size: _size_1_t,
+        stride: Optional[_size_1_t] = None,
+        padding: _size_1_t = 0,
+        ceil_mode: bool = False,
+        count_include_pad: Optional[bool] = None,
+        name: Optional[str] = None,
+    ):
+        # TODO: fix cuDNN bugs in pooling_1d
+        raise NotImplementedError
+
+
+@oneflow_export("nn.AvgPool2d")
+@experimental_api
+class AvgPool2d(Module):
+    r"""Performs the 2d-average pooling on the input.
+
+    In the simplest case, the output value of the layer with input size :math:`(N, C, H, W)`,
+    output :math:`(N, C, H_{out}, W_{out})` and `kernel_size` :math:`(kH, kW)`
+    can be precisely described as:
+
+    .. math::
+
+        out(N_i, C_j, h, w)  = \frac{1}{kH * kW} \sum_{m=0}^{kH-1} \sum_{n=0}^{kW-1}
+                               input(N_i, C_j, stride[0] \times h + m, stride[1] \times w + n)
+
+    Args:
+        kernel_size (Union[int, Tuple[int, int]]):  An int or list of ints that has length 1, 2. The size of the window for each dimension of the input Tensor.
+        strides (Union[int, Tuple[int, int]]): An int or list of ints that has length 1, 2. The stride of the sliding window for each dimension of the input Tensor.
+        padding (Tuple[int, int]): An int or list of ints that has length 1, 2. Implicit zero padding to be added on both sides.
+        ceil_mode (bool, default to False): When True, will use ceil instead of floor to compute the output shape.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client.experimental as flow
+        import numpy as np
+
+
+        of_avgpool2d = flow.nn.AvgPool2d(
+            kernel_size=(3, 2),
+            padding=0,
+            stride=(2, 1),
+        )
+        x = flow.Tensor(shape=(1, 1, 10, 10))
+        of_y = of_avgpool2d(x)   
+        
+    """
+
+    def __init__(
+        self,
+        kernel_size: _size_2_t,
+        stride: Optional[_size_2_t] = None,
+        padding: _size_2_t = 0,
+        ceil_mode: bool = False,
+        count_include_pad: Optional[bool] = None,
+        divisor_override: Optional[int] = None,
+        name: Optional[str] = None,
+    ):
+        super().__init__()
+        self.kernel_size = _pair(kernel_size)
+        self.stride = _pair(stride) if (stride is not None) else kernel_size
+
+        assert isinstance(padding, int) or isinstance(
+            padding, tuple
+        ), "padding can only int int or tuple of 2 ints."
+        padding = _pair(padding)
+        padding = [0, 0, *padding]
+
+        assert count_include_pad is None, "count_include_pad not supported yet"
+        assert divisor_override is None, "divisor_override not supported yet"
+
+        self._channel_pos = "channels_first"
+        # TODO(yaochi): align with pytorch when padding is asymmetric
+        self._padding_type, _pads_list = calc_pool_padding(
+            padding, get_dhw_offset(self._channel_pos), 2
+        )
+        self._padding_before = [pad[0] for pad in _pads_list]
+        self._padding_after = [pad[1] for pad in _pads_list]
+        self.ceil_mode = ceil_mode
+
+    def forward(self, x):
+        res = flow.F.avg_pool_2d(
+            x,
+            kernel_size=self.kernel_size,
+            stride=self.stride,
+            padding=self._padding_type,
+            padding_before=self._padding_before,
+            padding_after=self._padding_after,
+            ceil_mode=self.ceil_mode,
+            data_format=self._channel_pos,
+        )
+        return res
+
+
+@oneflow_export("nn.AvgPool3d")
+@experimental_api
+class AvgPool3d(Module):
+    r"""Applies a 3D average pooling over an input signal composed of several input planes.
+
+    In the simplest case, the output value of the layer with input size :math:`(N, C, D, H, W)`,
+    output :math:`(N, C, D_{out}, H_{out}, W_{out})` and `kernel_size` :math:`(kD, kH, kW)`
+    can be precisely described as:
+
+    .. math::
+
+        out(N_i, C_j, d, h, w)  = \frac{1}{kD * kH * kW } \sum_{k=0}^{kD-1} \sum_{m=0}^{kH-1} \sum_{n=0}^{kW-1}
+                               input(N_i, C_j, stride[0] \times d + k, stride[1] \times h + m, stride[2] \times w + n)
+    
+    If padding is non-zero, then the input is implicitly zero-padded on all three sides for padding number of points.
+
+    Note:
+        When ceil_mode=True, sliding windows are allowed to go off-bounds if they start within the left padding or the
+        input. Sliding windows that would start in the right padded region are ignored.
+
+    Args:
+        kernel_size: the size of the window.
+        strides:  the stride of the window. Default value is kernel_size.
+        padding:  implicit zero padding to be added on all three sides.
+        ceil_mode:  when True, will use ceil instead of floor to compute the output shape.
+        count_include_pad: when True, will include the zero-padding in the averaging calculation.
+        divisor_override: if specified, it will be used as divisor, otherwise kernel_size will be used.
+
+    Shape:
+        - Input: :math:`(N, C, D_{in}, H_{in}, W_{in})`
+        - Output: :math:`(N, C, D_{out}, H_{out}, W_{out})`, where
+
+          .. math::
+              D_{out} = \left\lfloor\frac{D_{in} + 2 \times \text{padding}[0] - \text{kernel_size}[0]}{\text{stride}[0]} + 1\right\rfloor
+
+          .. math::
+              H_{out} = \left\lfloor\frac{H_{in} + 2 \times \text{padding}[1] - \text{kernel_size}[1]}{\text{stride}[1]} + 1\right\rfloor
+
+          .. math::
+              W_{out} = \left\lfloor\frac{W_{in} + 2 \times \text{padding}[2] - \text{kernel_size}[2]}{\text{stride}[2]} + 1\right\rfloor
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> import numpy as np
+
+        >>> flow.enable_eager_execution()
+        >>> inputarr = np.random.randn(9, 7, 11, 32, 20)
+        >>> of_avgpool3d = flow.nn.AvgPool3d(kernel_size=(2,2,2),padding=(0,0,0),stride=(1,1,1),)
+        >>> x = flow.Tensor(inputarr)
+        >>> y = of_avgpool3d(x)
+
+    """
+
+    def __init__(
+        self,
+        kernel_size: _size_3_t,
+        stride: Optional[_size_3_t] = None,
+        padding: _size_3_t = 0,
+        ceil_mode: bool = False,
+        count_include_pad: Optional[bool] = None,
+        divisor_override: Optional[int] = None,
+    ):
+        super().__init__()
+        kernel_size = _pair(kernel_size)
+        stride = _pair(stride) if (stride is not None) else kernel_size
+
+        assert padding == (0, 0, 0), "padding>0 not supported yet"
+        assert isinstance(padding, int) or isinstance(
+            padding, tuple
+        ), "padding can only int int or tuple of 3 ints."
+        padding = _pair(padding)
+        padding = [0, 0, *padding]
+
+        assert count_include_pad is None, "count_include_pad not supported yet"
+        assert divisor_override is None, "divisor_override not supported yet"
+
+        _channel_pos = "channels_first"
+        # TODO(yaochi): align with pytorch when padding is asymmetric
+        _padding_type, _pads_list = calc_pool_padding(
+            padding, get_dhw_offset(_channel_pos), 3
+        )
+        _padding_before = [pad[0] for pad in _pads_list]
+        _padding_after = [pad[1] for pad in _pads_list]
+
+        self._op = (
+            flow.builtin_op("avg_pool_3d")
+            .Attr("data_format", _channel_pos)
+            .Attr("pool_size", kernel_size)
+            .Attr("strides", stride)
+            .Attr("ceil_mode", ceil_mode)
+            .Attr("padding", _padding_type)
+            .Attr("padding_before", _padding_before)
+            .Attr("padding_after", _padding_after)
+            .Input("x")
+            .Output("y")
+            .Build()
+        )
+
+    def forward(self, x):
+        res = self._op(x)[0]
+        return res
+
+
+@oneflow_export("nn.MaxPool1d")
+@experimental_api
+class MaxPool1d(Module):
+    r"""The interface is consistent with PyTorch.
+    The documentation is referenced from: https://pytorch.org/docs/stable/generated/torch.nn.MaxPool1d.html#torch.nn.MaxPool1d
+
+    Applies a 1D max pooling over an input signal composed of several input planes.
+
+    In the simplest case, the output value of the layer with input size :math:`(N, C, L)`
+    and output :math:`(N, C, L_{out})` can be precisely described as:
+
+    .. math::
+        out(N_i, C_j, k) = \max_{m=0, \ldots, \text{kernel\_size} - 1}
+                input(N_i, C_j, stride \times k + m)
+
+    If :attr:`padding` is non-zero, then the input is implicitly padded with minimum value on both sides
+    for :attr:`padding` number of points. :attr:`dilation` is the stride between the elements within the
+    sliding window. This `link`_ has a nice visualization of the pooling parameters.
+
+    Note:
+        When ceil_mode=True, sliding windows are allowed to go off-bounds if they start within the left padding
+        or the input. Sliding windows that would start in the right padded region are ignored.
+
+    Args:
+        kernel_size: The size of the sliding window, must be > 0.
+        stride: The stride of the sliding window, must be > 0. Default value is :attr:`kernel_size`.
+        padding: Implicit negative infinity padding to be added on both sides, must be >= 0 and <= kernel_size / 2.
+        dilation: The stride between elements within a sliding window, must be > 0.
+        return_indices: If ``True``, will return the argmax along with the max values.
+                        Useful for :class:`torch.nn.MaxUnpool1d` later
+        ceil_mode: If ``True``, will use `ceil` instead of `floor` to compute the output shape. This
+                   ensures that every element in the input tensor is covered by a sliding window.
+
+    Shape:
+        - Input: :math:`(N, C, L_{in})`
+        - Output: :math:`(N, C, L_{out})`, where
+
+          .. math::
+              L_{out} = \left\lfloor \frac{L_{in} + 2 \times \text{padding} - \text{dilation}
+                    \times (\text{kernel_size} - 1) - 1}{\text{stride}} + 1\right\rfloor
+
+    """
+
+    def __init__(
+        self,
+        kernel_size: _size_1_t,
+        stride: Optional[_size_1_t] = None,
+        padding: _size_1_t = 0,
+        dilation: _size_1_t = 1,
+        return_indices: bool = False,
+        ceil_mode: bool = False,
+    ):
+        # TODO: fix cuDNN bugs in pooling_1d
+        raise NotImplementedError
+
+
+@oneflow_export("nn.MaxPool2d")
+@experimental_api
+class MaxPool2d(Module):
+    r"""The interface is consistent with PyTorch.
+    The documentation is referenced from: https://pytorch.org/docs/stable/generated/torch.nn.MaxPool2d.html#torch.nn.MaxPool2d
+
+    Applies a 2D max pooling over an input signal composed of several input planes.
+
+    In the simplest case, the output value of the layer with input size :math:`(N, C, H, W)`,
+    output :math:`(N, C, H_{out}, W_{out})` and :attr:`kernel_size` :math:`(kH, kW)`
+    can be precisely described as:
+
+    .. math::
+        \begin{aligned}
+            out(N_i, C_j, h, w) ={} & \max_{m=0, \ldots, kH-1} \max_{n=0, \ldots, kW-1} \\
+                                    & \text{input}(N_i, C_j, \text{stride[0]} \times h + m,
+                                                   \text{stride[1]} \times w + n)
+        \end{aligned}
+
+    If :attr:`padding` is non-zero, then the input is implicitly minimum value padded on both sides
+    for :attr:`padding` number of points. :attr:`dilation` controls the spacing between the kernel points.
+    It is harder to describe, but this `link`_ has a nice visualization of what :attr:`dilation` does.
+
+    Note:
+        When ceil_mode=True, sliding windows are allowed to go off-bounds if they start within the left padding
+        or the input. Sliding windows that would start in the right padded region are ignored.
+    The parameters :attr:`kernel_size`, :attr:`stride`, :attr:`padding`, :attr:`dilation` can either be:
+        - a single ``int`` -- in which case the same value is used for the height and width dimension
+        - a ``tuple`` of two ints -- in which case, the first `int` is used for the height dimension,
+          and the second `int` for the width dimension
+
+    Args:
+        kernel_size: the size of the window to take a max over
+        stride: the stride of the window. Default value is :attr:`kernel_size`
+        padding: implicit minimum value padding to be added on both sides
+        dilation: a parameter that controls the stride of elements in the window
+        return_indices: if ``True``, will return the max indices along with the outputs.
+                        Useful for :class:`torch.nn.MaxUnpool2d` later
+        ceil_mode: when True, will use `ceil` instead of `floor` to compute the output shape
+
+    Shape:
+        - Input: :math:`(N, C, H_{in}, W_{in})`
+        - Output: :math:`(N, C, H_{out}, W_{out})`, where
+
+          .. math::
+              H_{out} = \left\lfloor\frac{H_{in} + 2 * \text{padding[0]} - \text{dilation[0]}
+                    \times (\text{kernel_size[0]} - 1) - 1}{\text{stride[0]}} + 1\right\rfloor
+          .. math::
+              W_{out} = \left\lfloor\frac{W_{in} + 2 * \text{padding[1]} - \text{dilation[1]}
+                    \times (\text{kernel_size[1]} - 1) - 1}{\text{stride[1]}} + 1\right\rfloor
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> import numpy as np
+        >>> flow.enable_eager_execution()
+
+        >>> kernel_size, stride, padding = (3, 3), (1, 1), (1, 2)
+        >>> m = flow.nn.MaxPool2d(kernel_size, stride, padding)
+        >>> np.random.seed(0)
+        >>> x = flow.Tensor(np.random.rand(1, 1, 5, 3))
+        >>> y = m(x)
+        >>> y #doctest: +ELLIPSIS
+        tensor([[[[0.5488, 0.7152, 0.7152, 0.7152, 0.6459],
+                  ...
+                  [0.568 , 0.9256, 0.9256, 0.9256, 0.5289]]]], dtype=oneflow.float32)
+
+        >>> kernel_size, stride, padding = (2, 3), (4, 5), (1, 2)
+        >>> m = flow.nn.MaxPool2d(kernel_size, stride, padding)
+        >>> x = flow.Tensor(np.random.randn(9, 7, 32, 20))
+        >>> y = m(x)
+        >>> y.size()
+        flow.Size([9, 7, 9, 5])
+
+    """
+
+    def __init__(
+        self,
+        kernel_size: _size_2_t,
+        stride: Optional[_size_2_t] = None,
+        padding: _size_2_t = 0,
+        dilation: _size_2_t = 1,
+        return_indices: bool = False,
+        ceil_mode: bool = False,
+    ):
+        super().__init__()
+        self.kernel_size = _pair(kernel_size)
+        self.strides = _pair(stride) if (stride is not None) else kernel_size
+        data_format = "NCHW"
+        self.channel_pos = (
+            "channels_last" if data_format == "NHWC" else "channels_first"
+        )
+
+        assert return_indices is False, "Only support return_indices==False for now!"
+        assert dilation == 1 or dilation == (1, 1), "Only support dilation==1 for now!"
+
+        padding = _pair(padding)
+        if len(padding) == 2:
+            if data_format == "NCHW":
+                padding = (0, 0, padding[0], padding[1])
+            else:
+                raise ValueError("error padding param!")
+        else:
+            raise ValueError("error padding param!")
+        self.padding_type, pads_list = calc_pool_padding(
+            padding, get_dhw_offset(self.channel_pos), 2
+        )
+        self.padding_before = [pad[0] for pad in pads_list]
+        self.padding_after = [pad[1] for pad in pads_list]
+        self.ceil_mode = ceil_mode
+
+    def forward(self, x):
+        return flow.F.max_pool_2d(
+            x,
+            kernel_size=self.kernel_size,
+            stride=self.strides,
+            padding=self.padding_type,
+            padding_before=self.padding_before,
+            padding_after=self.padding_after,
+            ceil_mode=self.ceil_mode,
+            data_format=self.channel_pos,
+        )
+
+
+@oneflow_export("nn.MaxPool3d")
+@experimental_api
+class MaxPool3d(Module):
+    r"""The interface is consistent with PyTorch.
+    The documentation is referenced from: https://pytorch.org/docs/stable/generated/torch.nn.MaxPool3d.html#torch.nn.MaxPool3d
+
+    Applies a 3D max pooling over an input signal composed of several input planes.
+
+    In the simplest case, the output value of the layer with input size :math:`(N, C, D, H, W)`,
+    output :math:`(N, C, D_{out}, H_{out}, W_{out})` and :attr:`kernel_size` :math:`(kD, kH, kW)`
+    can be precisely described as:
+
+    .. math::
+        \begin{aligned}
+            \text{out}(N_i, C_j, d, h, w) ={} & \max_{k=0, \ldots, kD-1} \max_{m=0, \ldots, kH-1} \max_{n=0, \ldots, kW-1} \\
+                                              & \text{input}(N_i, C_j, \text{stride[0]} \times d + k,
+                                                             \text{stride[1]} \times h + m, \text{stride[2]} \times w + n)
+        \end{aligned}
+
+    If :attr:`padding` is non-zero, then the input is implicitly minimum value on both sides
+    for :attr:`padding` number of points. :attr:`dilation` controls the spacing between the kernel points.
+    It is harder to describe, but this `link`_ has a nice visualization of what :attr:`dilation` does.
+
+    Note:
+        When ceil_mode=True, sliding windows are allowed to go off-bounds if they start within the left padding
+        or the input. Sliding windows that would start in the right padded region are ignored.
+
+    The parameters :attr:`kernel_size`, :attr:`stride`, :attr:`padding`, :attr:`dilation` can either be:
+
+        - a single ``int`` -- in which case the same value is used for the depth, height and width dimension
+        - a ``tuple`` of three ints -- in which case, the first `int` is used for the depth dimension,
+          the second `int` for the height dimension and the third `int` for the width dimension
+
+    Args:
+        kernel_size: the size of the window to take a max over
+        stride: the stride of the window. Default value is :attr:`kernel_size`
+        padding: implicit minimum value padding to be added on all three sides
+        dilation: a parameter that controls the stride of elements in the window
+        return_indices: if ``True``, will return the max indices along with the outputs.
+                        Useful for :class:`torch.nn.MaxUnpool3d` later
+        ceil_mode: when True, will use `ceil` instead of `floor` to compute the output shape
+
+    Shape:
+        - Input: :math:`(N, C, D_{in}, H_{in}, W_{in})`
+        - Output: :math:`(N, C, D_{out}, H_{out}, W_{out})`, where
+
+          .. math::
+              D_{out} = \left\lfloor\frac{D_{in} + 2 \times \text{padding}[0] - \text{dilation}[0] \times
+                (\text{kernel_size}[0] - 1) - 1}{\text{stride}[0]} + 1\right\rfloor
+
+          .. math::
+              H_{out} = \left\lfloor\frac{H_{in} + 2 \times \text{padding}[1] - \text{dilation}[1] \times
+                (\text{kernel_size}[1] - 1) - 1}{\text{stride}[1]} + 1\right\rfloor
+
+          .. math::
+              W_{out} = \left\lfloor\frac{W_{in} + 2 \times \text{padding}[2] - \text{dilation}[2] \times
+                (\text{kernel_size}[2] - 1) - 1}{\text{stride}[2]} + 1\right\rfloor
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> import numpy as np
+        >>> flow.enable_eager_execution()
+
+        >>> kernel_size, stride, padding = (3, 3, 3), (1, 1, 1), (1, 1, 2)
+        >>> m = flow.nn.MaxPool3d(kernel_size, stride, padding)
+        >>> np.random.seed(0)
+        >>> x = flow.Tensor(np.random.rand(1, 1, 3, 5, 3))
+        >>> y = m(x)
+        >>> y #doctest: +ELLIPSIS
+        tensor([[[[[0.7782, 0.87  , 0.9786, 0.9786, 0.9786],
+                   ...
+                   [0.9447, 0.9447, 0.9447, 0.6668, 0.6668]]]]], dtype=oneflow.float32)
+        >>> kernel_size, stride, padding = (2, 2, 3), (3, 4, 5), (2, 1, 2)
+        >>> m = flow.nn.MaxPool3d(kernel_size, stride, padding)
+        >>> x = flow.Tensor(np.random.randn(9, 7, 11, 32, 20))
+        >>> y = m(x)
+        >>> y.size()
+        flow.Size([9, 7, 5, 9, 5])
+
+    """
+
+    def __init__(
+        self,
+        kernel_size: _size_3_t,
+        stride: Optional[_size_3_t] = None,
+        padding: _size_3_t = 0,
+        dilation: _size_3_t = 1,
+        return_indices: bool = False,
+        ceil_mode: bool = False,
+    ):
+        super().__init__()
+        kernel_size = _triple(kernel_size)
+        strides = _triple(stride) if (stride is not None) else kernel_size
+        data_format = "NCDHW"
+        channel_pos = "channels_last" if data_format == "NDHWC" else "channels_first"
+
+        assert return_indices is False, "Only support return_indices==False for now!"
+        assert dilation == 1 or dilation == (
+            1,
+            1,
+            1,
+        ), "Only support dilation==1 for now!"
+
+        padding = _triple(padding)
+        if len(padding) == 3:
+            if data_format == "NCDHW":
+                padding = (0, 0, padding[0], padding[1], padding[2])
+            else:
+                raise ValueError("error padding param!")
+        else:
+            raise ValueError("error padding param!")
+
+        padding_type, pads_list = calc_pool_padding(
+            padding, get_dhw_offset(channel_pos), 3
+        )
+        padding_before = [pad[0] for pad in pads_list]
+        padding_after = [pad[1] for pad in pads_list]
+
+        self._op = (
+            flow.builtin_op("max_pool_3d")
+            .Attr("data_format", channel_pos)
+            .Attr("pool_size", kernel_size)
+            .Attr("strides", strides)
+            .Attr("ceil_mode", ceil_mode)
+            .Attr("padding", padding_type)
+            .Attr("padding_before", padding_before)
+            .Attr("padding_after", padding_after)
+            .Input("x")
+            .Output("y")
+            .Build()
+        )
+
+    def forward(self, x):
+        return self._op(x)[0]
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(raise_on_error=True)
diff --git a/oneflow/compatible_single_client_python/nn/modules/reduce_ops.py b/oneflow/compatible_single_client_python/nn/modules/reduce_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..ee76a364f9dfe8d824fcfe81170fff515af6a600
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/reduce_ops.py
@@ -0,0 +1,222 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import collections
+from typing import Optional, Sequence, Union
+
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+from oneflow.compatible_single_client_python.nn.module import Module
+from oneflow.compatible_single_client_python.framework.tensor import register_tensor_op
+from oneflow.compatible_single_client_python.nn.modules.utils import _check_axis
+
+
+def _build_reduce_op(op_type_name, keepdims):
+    return (
+        flow.builtin_op(op_type_name)
+        .Input("input_tensor")
+        .Output("output_tensor")
+        .Attr("keepdims", keepdims)
+        .Build()
+    )
+
+
+class Sum(Module):
+    def __init__(
+        self, axis: Optional[Union[int, Sequence[int]]] = None, keepdims: bool = False
+    ) -> None:
+        super().__init__()
+
+        self.axis = axis
+        self.keepdims = keepdims
+        self._op = _build_reduce_op("reduce_sum", keepdims)
+
+    def forward(self, input):
+        axis_checked = _check_axis(self.axis, input.shape)
+        if len(axis_checked) == 0:
+            return input
+        return self._op(input, axis=axis_checked)[0]
+
+
+@oneflow_export("sum")
+@register_tensor_op("sum")
+@experimental_api
+def _sum(input, dim=None, keepdim=False):
+    r"""Computes the sum of row of elements in a tensor in the given axis, if the axis is None, sum of all elements will be caculated.
+    
+    For example:
+
+    .. code-block:: python
+
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> flow.enable_eager_execution()
+        >>> input = flow.Tensor([[1, 2, 3], [4, 5, 6]])
+        >>> flow.sum(input)
+        tensor([21.], dtype=oneflow.float32)
+        >>> flow.sum(input, dim=0)
+        tensor([5., 7., 9.], dtype=oneflow.float32)
+        >>> flow.sum(input, dim=1)
+        tensor([ 6., 15.], dtype=oneflow.float32)
+
+    """
+
+    return Sum(dim, keepdim)(input)
+
+
+class Mean(Module):
+    def __init__(
+        self, axis: Optional[Union[int, Sequence[int]]] = None, keepdims: bool = False
+    ) -> None:
+        super().__init__()
+
+        self.axis = axis
+        self.keepdims = keepdims
+        if axis is None:
+            self.axes = []
+        else:
+            self.axes = list(axis) if isinstance(axis, collections.Sized) else [axis]
+
+    def forward(self, input):
+        axis_checked = _check_axis(self.axis, input.shape)
+        if len(axis_checked) == 0:
+            return input
+        reduce_sum = flow.experimental.sum(input, dim=self.axis, keepdim=self.keepdims)
+        reduce_count = 1
+        if len(self.axes) == 0:
+            for dim in input.shape:
+                reduce_count *= dim
+        else:
+            for i in self.axes:
+                reduce_count *= input.shape[i]
+        return flow.experimental.mul(reduce_sum, 1.0 / reduce_count)
+
+
+@oneflow_export("mean")
+@register_tensor_op("mean")
+@experimental_api
+def _mean(input, dim=None, keepdim=False):
+    r"""Computes the mean of row of elements in a tensor in the given axis, if the axis is None, mean of all elements will be caculated.
+    
+    For example:
+
+    .. code-block:: python
+
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> flow.enable_eager_execution()
+        >>> input = flow.Tensor([[1, 2, 3], [4, 5, 6]])
+        >>> flow.mean(input)
+        tensor([3.5], dtype=oneflow.float32)
+        >>> flow.mean(input, dim=0)
+        tensor([2.5, 3.5, 4.5], dtype=oneflow.float32)
+        >>> flow.mean(input, dim=1)
+        tensor([2., 5.], dtype=oneflow.float32)
+
+    """
+
+    return Mean(dim, keepdim)(input)
+
+
+class Min(Module):
+    def __init__(
+        self, axis: Optional[Union[int, Sequence[int]]] = None, keepdims: bool = False
+    ) -> None:
+        super().__init__()
+
+        self.axis = axis
+        self.keepdims = keepdims
+        self._op = _build_reduce_op("reduce_min", keepdims)
+
+    def forward(self, input):
+        axis_checked = _check_axis(self.axis, input.shape)
+        if len(axis_checked) == 0:
+            return input
+        return self._op(input, axis=axis_checked)[0]
+
+
+@oneflow_export("min")
+@register_tensor_op("min")
+@experimental_api
+def _min(input, dim=None, keepdim=False):
+    r"""Computes the minimum value of all elements in the input tensor.
+    
+    For example:
+
+    .. code-block:: python
+
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> flow.enable_eager_execution()
+        >>> input = flow.Tensor([[4, 1, 5], [2, 6, 3]])
+        >>> flow.min(input)
+        tensor([1.], dtype=oneflow.float32)
+        >>> flow.min(input, dim=0)
+        tensor([2., 1., 3.], dtype=oneflow.float32)
+        >>> flow.min(input, dim=1)
+        tensor([1., 2.], dtype=oneflow.float32)
+
+    """
+
+    return Min(dim, keepdim)(input)
+
+
+class Max(Module):
+    def __init__(
+        self, axis: Optional[Union[int, Sequence[int]]] = None, keepdims: bool = False
+    ) -> None:
+        super().__init__()
+
+        self.axis = axis
+        self.keepdims = keepdims
+        self._op = _build_reduce_op("reduce_max", keepdims)
+
+    def forward(self, input):
+        axis_checked = _check_axis(self.axis, input.shape)
+        if len(axis_checked) == 0:
+            return input
+        return self._op(input, axis=axis_checked)[0]
+
+
+@oneflow_export("max")
+@register_tensor_op("max")
+@experimental_api
+def _max(input, dim=None, keepdim=False):
+    r"""Computes the maximum value of all elements in the input tensor.
+    
+    For example:
+
+    .. code-block:: python
+
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> flow.enable_eager_execution()
+        >>> input = flow.Tensor([[4, 1, 5], [2, 6, 3]])
+        >>> flow.max(input)
+        tensor([6.], dtype=oneflow.float32)
+        >>> flow.max(input, dim=0)
+        tensor([4., 6., 5.], dtype=oneflow.float32)
+        >>> flow.max(input, dim=1)
+        tensor([5., 6.], dtype=oneflow.float32)
+
+    """
+
+    return Max(dim, keepdim)(input)
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(raise_on_error=True)
diff --git a/oneflow/compatible_single_client_python/nn/modules/repeat.py b/oneflow/compatible_single_client_python/nn/modules/repeat.py
new file mode 100644
index 0000000000000000000000000000000000000000..eccee128c8e3899f663bc5f9277de6af331d6211
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/repeat.py
@@ -0,0 +1,103 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.nn.module import Module
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+from oneflow.compatible_single_client_python.framework.tensor import register_tensor_op
+
+
+class Repeat(Module):
+    def __init__(self, sizes) -> None:
+        super().__init__()
+        self.sizes = sizes
+
+    def forward(self, input):
+        repeat = self.sizes
+        for repeat_v in repeat:
+            assert repeat_v > 0
+        input_shape = input.shape
+        assert len(repeat) >= len(input_shape)
+        in_reshape = []
+        out_reshape = []
+        expand_dim = []
+        diff = len(repeat) - len(input_shape)
+        for i in range(len(repeat) - 1, -1, -1):
+            if i >= diff:
+                if repeat[i] > 1:
+                    if input_shape[i - diff] > 1:
+                        in_reshape.insert(0, input_shape[i - diff])
+                        in_reshape.insert(0, 1)
+                        expand_dim.insert(0, input_shape[i - diff])
+                        expand_dim.insert(0, repeat[i])
+                        out_reshape.insert(0, input_shape[i - diff] * repeat[i])
+                    else:
+                        in_reshape.insert(0, input_shape[i - diff])
+                        expand_dim.insert(0, repeat[i])
+                        out_reshape.insert(0, repeat[i])
+                else:  # repeat[i] == 1
+                    in_reshape.insert(0, input_shape[i - diff])
+                    expand_dim.insert(0, input_shape[i - diff])
+                    out_reshape.insert(0, input_shape[i - diff])
+            else:  # i < diff
+                expand_dim.insert(0, repeat[i])
+                out_reshape.insert(0, repeat[i])
+
+        new_tensor = flow.experimental.reshape(input, in_reshape)
+        tmp_tensor = new_tensor.expand(*expand_dim)
+        out = flow.experimental.reshape(tmp_tensor, out_reshape)
+        return out
+
+
+@oneflow_export("repeat")
+@register_tensor_op("repeat")
+@experimental_api
+def repeat_op(x, sizes):
+    """This operator repeat the input tensor to a larger size along the specified dimensions.
+
+    Args:
+        x (oneflow.compatible.single_client.Tensor): The input Tensor.
+        size (Sequence[int]): The number of times to repeat this tensor along each dimension
+
+    Returns:
+        oneflow.compatible.single_client.Tensor: The result Tensor.
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> import numpy as np
+        >>> flow.enable_eager_execution()
+
+        >>> x = np.array([[[[0, 1]],
+        ...               [[2, 3]],
+        ...               [[4, 5]]]]).astype(np.int32)
+
+        >>> input = flow.Tensor(x)
+        >>> out = input.repeat(sizes=(1, 1, 2, 2))
+        >>> out.shape
+        flow.Size([1, 3, 2, 4])
+    """
+    return Repeat(sizes=sizes)(x)
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(raise_on_error=True)
diff --git a/oneflow/compatible_single_client_python/nn/modules/reshape.py b/oneflow/compatible_single_client_python/nn/modules/reshape.py
new file mode 100644
index 0000000000000000000000000000000000000000..6f4015aa4b3aec66e1bb26ea6fdce316af784906
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/reshape.py
@@ -0,0 +1,128 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.nn.module import Module
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+from oneflow.compatible_single_client_python.framework.tensor import register_tensor_op
+from typing import Sequence
+
+
+class Reshape(Module):
+    def __init__(self, shape: Sequence[int]) -> None:
+        super().__init__()
+        self.shape = shape
+
+    def forward(self, x):
+        return flow.F.reshape(x, shape=self.shape)
+
+
+@oneflow_export("reshape")
+@register_tensor_op("reshape")
+@experimental_api
+def reshape_op(x, shape: Sequence[int] = None):
+    """This operator reshapes a Tensor.
+
+    We can set one dimension in `shape` as `-1`, the operator will infer the complete shape.
+
+    Args:
+        x: A Tensor.
+        shape: Shape of the output tensor.
+    Returns:
+        A Tensor has the same type as `x`.
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import numpy as np
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> flow.enable_eager_execution()
+
+        >>> x = np.array(
+        ...    [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12], [13, 14, 15, 16]]
+        ... ).astype(np.float32)
+        >>> input = flow.Tensor(x)
+
+        >>> y = flow.reshape(input, shape=[2, 2, 2, -1]).shape
+        >>> y
+        flow.Size([2, 2, 2, 2])
+
+    """
+    return Reshape(shape=shape)(x)
+
+
+@oneflow_export("view")
+@register_tensor_op("view")
+@experimental_api
+def view_op(x, shape: Sequence[int] = None):
+    r"""
+    The interface is consistent with PyTorch.
+    The documentation is referenced from: https://pytorch.org/docs/stable/generated/torch.Tensor.view.html
+
+    Returns a new tensor with the same data as the :attr:`self` tensor but of a
+    different :attr:`shape`.
+
+    The returned tensor shares the same data and must have the same number
+    of elements, but may have a different size. For a tensor to be viewed, the new
+    view size must be compatible with its original size and stride, i.e., each new
+    view dimension must either be a subspace of an original dimension, or only span
+    across original dimensions :math:`d, d+1, \dots, d+k` that satisfy the following
+    contiguity-like condition that :math:`\forall i = d, \dots, d+k-1`,
+
+    .. math::
+
+      \text{stride}[i] = \text{stride}[i+1] \times \text{size}[i+1]
+
+    Otherwise, it will not be possible to view :attr:`self` tensor as :attr:`shape`
+    without copying it (e.g., via :meth:`contiguous`). When it is unclear whether a
+    :meth:`view` can be performed, it is advisable to use :meth:`reshape`, which
+    returns a view if the shapes are compatible, and copies (equivalent to calling
+    :meth:`contiguous`) otherwise.
+
+    Args:
+        x: A Tensor.
+        shape: Shape of the output tensor.
+    Returns:
+        A Tensor has the same type as `x`.
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import numpy as np
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> flow.enable_eager_execution()
+
+        >>> x = np.array(
+        ...    [[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12], [13, 14, 15, 16]]
+        ... ).astype(np.float32)
+        >>> input = flow.Tensor(x)
+
+        >>> y = flow.view(input, shape=[2, 2, 2, -1]).numpy().shape
+        >>> y
+        (2, 2, 2, 2)
+
+    """
+    return Reshape(shape=shape)(x)
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(raise_on_error=True)
diff --git a/oneflow/compatible_single_client_python/nn/modules/round.py b/oneflow/compatible_single_client_python/nn/modules/round.py
new file mode 100644
index 0000000000000000000000000000000000000000..9327e391d94324de5dd1be55ef2522fba21f4aa0
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/round.py
@@ -0,0 +1,79 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.nn.module import Module
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+from oneflow.compatible_single_client_python.framework.tensor import register_tensor_op
+
+
+class Round(Module):
+    def __init__(self) -> None:
+        super().__init__()
+
+    def forward(self, x):
+        return flow.F.round(x)
+
+
+@oneflow_export("round")
+@experimental_api
+def round_op(x):
+    """This operator rounds the value of Blob to the nearest integer.
+    Args:
+        x (oneflow.compatible.single_client.Tensor): A Tensor
+    Returns:
+        oneflow.compatible.single_client.Tensor: The result Tensor
+    For example:
+
+    .. code-block:: python
+
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> import numpy as np
+        >>> flow.enable_eager_execution()
+        >>> x1 = flow.Tensor(np.array([1.49999, 1.500001, 2.7]).astype(np.float32))
+        >>> out1 = flow.round(x1)
+        >>> out1.numpy()
+        array([1., 2., 3.], dtype=float32)
+        >>> x2 = flow.Tensor(np.array([2.499999, 7.5000001, 5.3, 6.8]).astype(np.float32))
+        >>> out2 = flow.round(x2)
+        >>> out2.numpy()
+        array([2., 8., 5., 7.], dtype=float32)
+
+    """
+
+    return Round()(x)
+
+
+@register_tensor_op("round")
+@experimental_api
+def round_op_tensor(x):
+    r"""
+    round() -> Tensor
+
+    See :func:`oneflow.compatible.single_client.experimental.round`
+
+    """
+
+    return Round()(x)
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(raise_on_error=True)
diff --git a/oneflow/compatible_single_client_python/nn/modules/sign.py b/oneflow/compatible_single_client_python/nn/modules/sign.py
new file mode 100644
index 0000000000000000000000000000000000000000..d87cd6efe122e18a083c40599e9356e2d5a1a124
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/sign.py
@@ -0,0 +1,83 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+from oneflow.compatible_single_client_python.framework.tensor import register_tensor_op
+from oneflow.compatible_single_client_python.nn.module import Module
+
+
+class Sign(Module):
+    def __init__(self) -> None:
+        super().__init__()
+
+    def forward(self, x):
+        return flow.F.sign(x)
+
+
+@oneflow_export("sign")
+@experimental_api
+def sign_op(x):
+    r"""Computes the sign of Tensor.
+
+    .. math::
+
+        \text{out}_{i}  = \text{sgn}(\text{input}_{i})
+
+    Args:
+        input (Tensor): the input tensor.
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> import numpy as np
+        >>> flow.enable_eager_execution()
+        >>> x1 = flow.Tensor(np.array([-2, 0, 2]).astype(np.float32))
+        >>> out1 = flow.sign(x1)
+        >>> out1.numpy()
+        array([-1.,  0.,  1.], dtype=float32)
+        >>> x2 = flow.Tensor(np.array([-3.2, -4.5, 5.8]).astype(np.float32),device=flow.device('cuda'))
+        >>> out2 = flow.sign(x2)
+        >>> out2.numpy()
+        array([-1., -1.,  1.], dtype=float32)
+
+    """
+    return Sign()(x)
+
+
+@register_tensor_op("sign")
+@experimental_api
+def sign_op_tensor(x):
+    r"""
+
+    sign() -> Tensor
+
+    See :func:`oneflow.compatible.single_client.experimental.sign`
+
+    """
+
+    return Sign()(x)
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(raise_on_error=True)
diff --git a/oneflow/compatible_single_client_python/nn/modules/sinh.py b/oneflow/compatible_single_client_python/nn/modules/sinh.py
new file mode 100644
index 0000000000000000000000000000000000000000..0cbc18422b5e9e07007c4d129e7eb6a8c4155ed6
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/sinh.py
@@ -0,0 +1,85 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.nn.module import Module
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+from oneflow.compatible_single_client_python.framework.tensor import register_tensor_op
+
+
+class Sinh(Module):
+    def __init__(self) -> None:
+        super().__init__()
+
+    def forward(self, x):
+        return flow.F.sinh(x)
+
+
+@oneflow_export("sinh")
+@experimental_api
+def sinh_op(x):
+    r"""Returns a new tensor with the hyperbolic sine of the elements of :attr:`input`.
+
+    .. math::
+        \text{out}_{i} = \sinh(\text{input}_{i})
+
+    Args:
+        input (Tensor): the input tensor.
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import numpy as np
+        >>> import oneflow.compatible.single_client.experimental as flow
+
+        >>> x1 = flow.Tensor(np.array([1, 2, 3]))
+        >>> x2 = flow.Tensor(np.array([1.53123589,0.54242598,0.15117185]))
+        >>> x3 = flow.Tensor(np.array([1,0,-1]))
+
+        >>> flow.enable_eager_execution()
+        >>> flow.sinh(x1).numpy()
+        array([ 1.1752012,  3.6268604, 10.017875 ], dtype=float32)
+        >>> flow.sinh(x2).numpy()
+        array([2.20381  , 0.5694193, 0.1517483], dtype=float32)
+        >>> flow.sinh(x3).numpy()
+        array([ 1.1752012,  0.       , -1.1752012], dtype=float32)
+
+    """
+
+    return Sinh()(x)
+
+
+@register_tensor_op("sinh")
+@experimental_api
+def sinh_op_tensor(x):
+    r"""
+
+    sinh() -> Tensor
+
+    See :func:`oneflow.compatible.single_client.experimental.sinh`
+
+    """
+
+    return Sinh()(x)
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(raise_on_error=True)
diff --git a/oneflow/compatible_single_client_python/nn/modules/slice.py b/oneflow/compatible_single_client_python/nn/modules/slice.py
new file mode 100644
index 0000000000000000000000000000000000000000..9322e213e8092af2f35879cf094106df441c0a17
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/slice.py
@@ -0,0 +1,167 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import numpy as np
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.nn.module import Module
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+from oneflow.compatible_single_client_python.ops.array_ops import (
+    check_slice_tup_list,
+    GetSliceAttrs,
+)
+from typing import Sequence, Tuple
+
+
+class Slice(Module):
+    def __init__(
+        self, start: Tuple[int, ...], stop: Tuple[int, ...], step: Tuple[int, ...]
+    ) -> None:
+        super().__init__()
+        self.start = start
+        self.stop = stop
+        self.step = step
+
+    def forward(self, x):
+        return flow.F.slice(x, start=self.start, stop=self.stop, step=self.step)
+
+
+@oneflow_export("slice")
+@experimental_api
+def slice_op(x, slice_tup_list: Sequence[Tuple[int, int, int]]):
+    r"""Extracts a slice from a tensor.
+    The `slice_tup_list` assigns the slice indices in each dimension, the format is (start, stop, step).
+    The operator will slice the tensor according to the `slice_tup_list`.
+
+    Args:
+        x: A `Tensor`.
+        slice_tup_list: A list of slice tuple, indicate each dimension slice (start, stop, step).
+
+    For example: 
+
+    .. code-block:: python 
+
+        >>> import numpy as np
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> flow.enable_eager_execution()
+
+        >>> input = flow.Tensor(np.random.randn(3, 6, 9).astype(np.float32))
+        >>> tup_list = [[None, None, None], [0, 5, 2], [0, 6, 3]]
+        >>> y = flow.slice(input, slice_tup_list=tup_list)
+        >>> y.shape
+        flow.Size([3, 3, 2])
+    """
+    start, stop, step = check_slice_tup_list(slice_tup_list, x.shape)
+    return Slice(start, stop, step)(x)
+
+
+class SliceUpdate(Module):
+    def __init__(
+        self, start: Tuple[int, ...], stop: Tuple[int, ...], step: Tuple[int, ...]
+    ) -> None:
+        super().__init__()
+        self.start = start
+        self.stop = stop
+        self.step = step
+
+    def forward(self, x, update):
+        return flow.F.slice_update(
+            x, update, start=self.start, stop=self.stop, step=self.step
+        )
+
+
+@oneflow_export("slice_update")
+@experimental_api
+def slice_update_op(x, update, slice_tup_list: Sequence[Tuple[int, int, int]]):
+    r"""Update a slice of tensor `x`. Like `x[start:stop:step] = update`. 
+
+    Args:
+        x: A `Tensor`, whose slice will be updated.
+        update: A `Tensor`, indicate the update content.
+        slice_tup_list: A list of slice tuple, indicate each dimension slice (start, stop, step).
+
+    For example: 
+
+    .. code-block:: python 
+
+        >>> import numpy as np
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> flow.enable_eager_execution()
+
+        >>> input = flow.Tensor(np.array([1, 1, 1, 1, 1]).astype(np.float32))
+        >>> update = flow.Tensor(np.array([2, 3, 4]).astype(np.float32))
+        >>> y = flow.slice_update(input, update, slice_tup_list=[[1, 4, 1]])
+        >>> y.numpy()
+        array([1., 2., 3., 4., 1.], dtype=float32)
+    """
+    start, stop, step = GetSliceAttrs(slice_tup_list, x.shape)
+    return SliceUpdate(start, stop, step)(x, update)
+
+
+class LogicalSliceAssign(Module):
+    def __init__(
+        self, start: Tuple[int, ...], stop: Tuple[int, ...], step: Tuple[int, ...]
+    ) -> None:
+        super().__init__()
+        self.start = start
+        self.stop = stop
+        self.step = step
+
+    def forward(self, x, update):
+        if update.dtype != x.dtype:
+            update = update.to(dtype=x.dtype)
+        return flow.F.logical_slice_assign(
+            x, update, start=self.start, stop=self.stop, step=self.step
+        )
+
+
+# NOTE: conflict with existing userop: flow.experimental.logical_slice_assign, so use tmp.logical_slice_assign
+@oneflow_export("tmp.logical_slice_assign")
+@experimental_api
+def logical_slice_assign_op(x, update, slice_tup_list: Sequence[Tuple[int, int, int]]):
+    r"""Update a slice of tensor `x`(in-place). Like `x[start:stop:step] = update`. 
+
+    Args:
+        x: A `Tensor`, whose slice will be updated.
+        update: A `Tensor`, indicate the update content.
+        slice_tup_list: A list of slice tuple, indicate each dimension slice (start, stop, step).
+
+    For example: 
+
+    .. code-block:: python 
+
+        >>> import numpy as np
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> flow.enable_eager_execution()
+
+        >>> input = flow.Tensor(np.array([1, 1, 1, 1, 1]).astype(np.float32))
+        >>> update = flow.Tensor(np.array([2, 3, 4]).astype(np.float32))
+        >>> y = flow.tmp.logical_slice_assign(input, update, slice_tup_list=[[1, 4, 1]])
+    """
+    """[summary]
+
+    Returns:
+        [type]: [description]
+    """
+    start, stop, step = GetSliceAttrs(slice_tup_list, x.shape)
+    return LogicalSliceAssign(start, stop, step)(x, update)
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(raise_on_error=True)
diff --git a/oneflow/compatible_single_client_python/nn/modules/softplus.py b/oneflow/compatible_single_client_python/nn/modules/softplus.py
new file mode 100644
index 0000000000000000000000000000000000000000..18979ba8d979ad713fd7f5f1c3aec0413b31e929
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/softplus.py
@@ -0,0 +1,77 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.nn.module import Module
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+from oneflow.compatible_single_client_python.framework.tensor import register_tensor_op
+
+
+class Softplus(Module):
+    def __init__(self) -> None:
+        super().__init__()
+
+    def forward(self, x):
+        return flow.F.softplus(x)
+
+
+@oneflow_export("softplus")
+@register_tensor_op("softplus")
+@experimental_api
+def softplus_op(x):
+    r"""Applies the element-wise function:
+
+    .. math::
+        Softplus(x)= \frac{1}{Î²}*log(1+exp(Î²âˆ—x))
+
+    SoftPlus is a smooth approximation to the ReLU function and can be used to constrain the output of a machine to always be positive.
+
+    For numerical stability the implementation reverts to the linear function when :attr:`input X Î² > threshold`.
+
+    Args:
+        beta:the value for the Softplus formulation.Default:1
+
+        threshold:values above this revert to a linear function.Default:20
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import numpy as np
+        >>> import oneflow.compatible.single_client.experimental as flow
+
+        >>> x1 = flow.Tensor(np.array([1, 2, 3]))
+        >>> x2 = flow.Tensor(np.array([1.53123589,0.54242598,0.15117185]))
+        >>> x3 = flow.Tensor(np.array([1,0,-1]))
+
+        >>> flow.enable_eager_execution()
+        >>> flow.softplus(x1).numpy()
+        array([1.3132616, 2.126928 , 3.0485873], dtype=float32)
+        >>> flow.softplus(x2).numpy()
+        array([1.7270232, 1.0006962, 0.771587 ], dtype=float32)
+        >>> flow.softplus(x3).numpy()
+        array([1.3132616 , 0.6931472 , 0.31326166], dtype=float32)
+
+    """
+    return Softplus()(x)
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(raise_on_error=True)
diff --git a/oneflow/compatible_single_client_python/nn/modules/sort.py b/oneflow/compatible_single_client_python/nn/modules/sort.py
new file mode 100644
index 0000000000000000000000000000000000000000..a092328210707c55a551878a33e479d546bbef60
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/sort.py
@@ -0,0 +1,112 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.nn.module import Module
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+from oneflow.compatible_single_client_python.framework.tensor import register_tensor_op
+from oneflow.compatible_single_client_python.ops.transpose_util import (
+    get_perm_when_transpose_axis_to_last_dim,
+    get_inversed_perm,
+)
+
+
+class Sort(Module):
+    def __init__(self, dim: int = -1, descending: bool = False) -> None:
+        super().__init__()
+        self.dim = dim
+        direction = "DESCENDING" if descending else "ASCENDING"
+        self._argsort_op = (
+            flow.builtin_op("arg_sort")
+            .Input("in")
+            .Output("out")
+            .Attr("direction", direction)
+            .Build()
+        )
+
+    def forward(self, input):
+        num_dims = len(input.shape)
+        dim = self.dim if self.dim >= 0 else self.dim + num_dims
+        assert 0 <= dim < num_dims, "dim out of range"
+        if dim == num_dims - 1:
+            indices = self._argsort_op(input)[0]
+            return (flow.experimental.gather(input, indices, dim), indices)
+        else:
+            perm = get_perm_when_transpose_axis_to_last_dim(num_dims, dim)
+            x = flow.F.transpose(input, perm=perm)
+            indices = self._argsort_op(x)[0]
+            indices = flow.F.transpose(indices, perm=get_inversed_perm(perm))
+            return (flow.experimental.gather(input, indices, dim), indices)
+
+
+@oneflow_export("sort")
+@register_tensor_op("sort")
+@experimental_api
+def sort_op(input, dim: int = -1, descending: bool = False):
+    """Sorts the elements of the input tensor along a given dimension in ascending order by value.
+
+    Args:
+        input (oneflow.compatible.single_client.Tensor): The input Tensor.
+        dim (int, optional): dimension to be sorted. Defaults to the last dim (-1).
+        descending (bool, optional): controls the sorting order (ascending or descending).
+
+    Returns:
+        Tuple(oneflow.compatible.single_client.Tensor, oneflow.compatible.single_client.Tensor(dtype=int32)): A tuple of (values, indices), where
+        where the values are the sorted values and the indices are the indices of the elements
+        in the original input tensor.
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> import numpy as np
+        >>> flow.enable_eager_execution()
+
+        >>> x = np.array([[1, 3, 8, 7, 2], [1, 9, 4, 3, 2]], dtype=np.float32)
+        >>> input = flow.Tensor(x)
+        >>> (values, indices) = flow.sort(input)
+        >>> values
+        tensor([[1., 2., 3., 7., 8.],
+                [1., 2., 3., 4., 9.]], dtype=oneflow.float32)
+        >>> indices
+        tensor([[0, 4, 1, 3, 2],
+                [0, 4, 3, 2, 1]], dtype=oneflow.int32)
+        >>> (values, indices) = flow.sort(input, descending=True)
+        >>> values
+        tensor([[8., 7., 3., 2., 1.],
+                [9., 4., 3., 2., 1.]], dtype=oneflow.float32)
+        >>> indices
+        tensor([[2, 3, 1, 4, 0],
+                [1, 2, 3, 4, 0]], dtype=oneflow.int32)
+        >>> (values, indices) = flow.sort(input, dim=0)
+        >>> values
+        tensor([[1., 3., 4., 3., 2.],
+                [1., 9., 8., 7., 2.]], dtype=oneflow.float32)
+        >>> indices
+        tensor([[0, 0, 1, 1, 0],
+                [1, 1, 0, 0, 1]], dtype=oneflow.int32)
+ 
+    """
+    return Sort(dim=dim, descending=descending)(input)
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(raise_on_error=True)
diff --git a/oneflow/compatible_single_client_python/nn/modules/sparse.py b/oneflow/compatible_single_client_python/nn/modules/sparse.py
new file mode 100644
index 0000000000000000000000000000000000000000..35d46be55bd72e9315f507df964ccde6c7c75951
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/sparse.py
@@ -0,0 +1,118 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from oneflow.compatible import single_client as flow
+
+from oneflow.compatible_single_client_python.framework.tensor import Tensor
+from oneflow.compatible_single_client_python.oneflow_export import oneflow_export
+from oneflow.compatible_single_client_python.nn.module import Module
+
+from typing import Optional, List, Tuple
+
+
+@oneflow_export("nn.Embedding")
+class Embedding(Module):
+    r"""A simple lookup table that stores embeddings of a fixed dictionary and size.
+
+    This module is often used to store word embeddings and retrieve them using indices.
+    The input to the module is a list of indices, and the output is the corresponding
+    word embeddings.
+
+    Args:
+        num_embeddings (int): size of the dictionary of embeddings
+        embedding_dim (int): the size of each embedding vector
+        padding_idx (int, optional): If specified, the entries at :attr:`padding_idx` do not contribute to the gradient;
+                                    therefore, the embedding vector at :attr:`padding_idx` is not updated during training,
+                                    i.e. it remains as a fixed "pad". For a newly constructed Embedding,
+                                    the embedding vector at :attr:`padding_idx` will default to all zeros,
+                                    but can be updated to another value to be used as the padding vector.
+    
+    For example:
+
+    .. code-block:: python
+        
+        >>> import numpy as np
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> flow.enable_eager_execution()
+
+        >>> indices = flow.Tensor([[1, 2, 4, 5], [4, 3, 2, 9]], dtype=flow.int)
+        >>> m = flow.nn.Embedding(10, 3)
+        >>> y = m(indices)
+
+    """
+
+    def __init__(
+        self,
+        num_embeddings: int,
+        embedding_dim: int,
+        padding_idx: Optional[int] = None,
+        max_norm: Optional[float] = None,
+        norm_type: Optional[float] = None,
+        scale_grad_by_freq: bool = False,
+        sparse: bool = False,
+        _weight: Optional[Tensor] = None,
+    ):
+        super().__init__()
+
+        self.num_embeddings = num_embeddings
+        self.embedding_dim = embedding_dim
+        if padding_idx is not None:
+            if padding_idx > 0:
+                assert (
+                    padding_idx < self.num_embeddings
+                ), "Padding_idx must be within num_embeddings"
+            elif padding_idx < 0:
+                assert (
+                    padding_idx >= -self.num_embeddings
+                ), "Padding_idx must be within num_embeddings"
+                padding_idx = self.num_embeddings + padding_idx
+
+        self.padding_idx = padding_idx
+        assert max_norm is None, "Not support max_norm yet!"
+        assert norm_type is None, "Not support norm_type yet!"
+        assert scale_grad_by_freq is False, "Not support scale_grad_by_freq=True yet!"
+        assert sparse is False, "Not support sparse=True yet!"
+
+        if _weight is None:
+            self.weight = flow.nn.Parameter(Tensor(num_embeddings, embedding_dim))
+            self.reset_parameters()
+        else:
+            assert list(_weight.shape) == [
+                num_embeddings,
+                embedding_dim,
+            ], "Shape of weight does not match num_embeddings and embedding_dim"
+            self.weight = flow.nn.Parameter(_weight)
+
+        self.sparse = sparse
+
+    def reset_parameters(self) -> None:
+        flow.nn.init.normal_(self.weight)
+        self._fill_padding_idx_with_zero()
+
+    def _fill_padding_idx_with_zero(self) -> None:
+        # TODO padding_idx rely on tensor slice
+        if self.padding_idx is not None:
+            with flow.no_grad():
+                self.weight[self.padding_idx].fill_(0)
+
+    def forward(self, indices):
+        res = flow.F.gather(self.weight, indices, axis=0)
+        return res
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(raise_on_error=True)
diff --git a/oneflow/compatible_single_client_python/nn/modules/squeeze.py b/oneflow/compatible_single_client_python/nn/modules/squeeze.py
new file mode 100644
index 0000000000000000000000000000000000000000..b70501ffb7a4c1dd0a41d5cdb68c825fafbd147f
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/squeeze.py
@@ -0,0 +1,80 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.nn.module import Module
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+from oneflow.compatible_single_client_python.framework.tensor import register_tensor_op
+from oneflow.compatible_single_client_python.framework import id_util as id_util
+from typing import Optional, Sequence
+
+
+class Squeeze(Module):
+    def __init__(self, dim: Optional[Sequence[int]] = None) -> None:
+        super().__init__()
+        self.dim = dim
+
+    def forward(self, x):
+        if self.dim is None:
+            return x
+        return flow.F.squeeze(x, dim=self.dim)
+
+
+@oneflow_export("squeeze")
+@register_tensor_op("squeeze")
+@experimental_api
+def squeeze_op(input, dim: Optional[Sequence[int]] = None):
+    """This operator removes the specified dimention which size is 1 of the input Tensor.
+    If the `dim` is not specified, this operator will remove all the dimention which size is 1 of the input Tensor.
+
+    The amount of element in return value is the same as Tensor `input`.
+
+    Args:
+        input (oneflow.compatible.single_client.Tensor): The input Tensor.
+        dim (Optional[Sequence[int]]): The dim. Defaults to None.
+
+    Returns:
+        Tensor: The result Tensor.
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> import numpy as np
+        >>> flow.enable_eager_execution()
+
+        >>> input = flow.Tensor(np.array([[[[1, 1, 1]]]]).astype(np.int32))
+        >>> out = flow.squeeze(input, dim=[1, 2]).shape
+        >>> out
+        flow.Size([1, 3])
+
+    """
+    if isinstance(dim, int):
+        dim = [dim]
+    elif dim is None:
+        dim = range(input.ndim)
+
+    dim = list(filter(lambda i: input.size(i) == 1, dim))
+    return Squeeze(dim=dim)(input)
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(raise_on_error=True)
diff --git a/oneflow/compatible_single_client_python/nn/modules/stack.py b/oneflow/compatible_single_client_python/nn/modules/stack.py
new file mode 100644
index 0000000000000000000000000000000000000000..4a3b510872352a62c1e8cf5ea9d9ae2aa1cdefff
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/stack.py
@@ -0,0 +1,91 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from oneflow.compatible import single_client as flow
+from typing import List, Tuple
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+from oneflow.compatible_single_client_python.framework.tensor import register_tensor_op
+from oneflow.compatible_single_client_python.nn.module import Module
+from oneflow.compatible_single_client_python.framework.tensor import Tensor
+
+
+class Stack(Module):
+    def __init__(self, dim: int = 0) -> None:
+        super().__init__()
+        self.dim = dim
+
+    def forward(self, inputs):
+        assert isinstance(inputs, (List, Tuple))
+        input_shape = inputs[0].shape
+        max_dim = len(input_shape)
+
+        # The axis must be in range [-(_max_dim +1), _max_dim]
+        if self.dim < 0:
+            self.dim = self.dim + max_dim + 1
+        assert (self.dim >= 0) and (self.dim <= max_dim)
+        input_list_length = len(inputs)
+        unsqueezed = list()
+        for i in range(input_list_length):
+            current_shape = inputs[i].shape
+            assert (
+                input_shape == current_shape
+            ), "Each tensor should have the same shape ! Found a tensor instance shape is: {}".format(
+                current_shape
+            )
+            unsqueezed.append(inputs[i].unsqueeze(dim=self.dim))
+        return flow.experimental.cat(unsqueezed, dim=self.dim)
+
+
+@oneflow_export("stack")
+@register_tensor_op("stack")
+@experimental_api
+def stack(inputs: Tensor, dim: int = 0) -> None:
+    r"""Concatenates a sequence of tensors along a new dimension.
+    The returned tensor shares the same underlying data with input tensors.
+
+    A :attr:`dim` value within the range `[-input.ndimension() - 1, input.ndimension() + 1]`
+    can be used. Negative :attr:`dim` will correspond to :meth:`stack`
+    applied at :attr:`dim` = ``dim + input.ndimension() + 1``.
+
+    Args:
+        inputs (List[oneflow.compatible.single_client.Tensor]): the list of input tensors. Each tensor should have the same shape.
+        dim (int): the index at which to insert the concatenated dimension.
+
+    Returns:
+        A `Tensor`
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> import numpy as np
+        >>> flow.enable_eager_execution()
+        >>> x = flow.Tensor(np.random.rand(1, 3, 5))
+        >>> y = flow.Tensor(np.random.rand(1, 3, 5))
+        >>> out = flow.stack([x, y], dim = -1)
+        >>> out.shape
+        flow.Size([1, 3, 5, 2])
+    """
+    return Stack(dim)(inputs)
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(raise_on_error=True)
diff --git a/oneflow/compatible_single_client_python/nn/modules/tan.py b/oneflow/compatible_single_client_python/nn/modules/tan.py
new file mode 100644
index 0000000000000000000000000000000000000000..5c74bbb63de098e2a65e290b9304a34f09570d0a
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/tan.py
@@ -0,0 +1,78 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+from oneflow.compatible_single_client_python.nn.module import Module
+from oneflow.compatible_single_client_python.framework.tensor import register_tensor_op
+
+
+class Tan(Module):
+    def __init__(self):
+        super().__init__()
+        self._op = flow.builtin_op("tan").Input("x").Output("y").Build()
+
+    def forward(self, x):
+        return self._op(x)[0]
+
+
+@oneflow_export("tan")
+@experimental_api
+def tan_op(input):
+    r"""Returns  the tan value of the elements of :attr:`input`.
+
+    .. math::
+        \text{out}_{i} = \tan(\text{input}_{i})
+
+    Args:
+        input (Tensor): the input tensor.
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> import numpy as np
+        >>> flow.enable_eager_execution()
+        >>> np_arr = np.array([-1/4*np.pi, 0, 1/4*np.pi]).astype(np.float32)
+        >>> input = flow.Tensor(np_arr)
+        >>> output = flow.tan(input)
+        >>> output
+        tensor([-1.,  0.,  1.], dtype=oneflow.float32)
+
+    """
+
+    return Tan()(input)
+
+
+@register_tensor_op("tan")
+@experimental_api
+def tan_op_tensor(input):
+    r"""
+    tan() -> Tensor
+    See :func:`oneflow.compatible.single_client.experimental.tan`
+
+    """
+
+    return Tan()(input)
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(raise_on_error=True)
diff --git a/oneflow/compatible_single_client_python/nn/modules/tensor_buffer.py b/oneflow/compatible_single_client_python/nn/modules/tensor_buffer.py
new file mode 100644
index 0000000000000000000000000000000000000000..d2624864dd5da64f2d4fbee911fd086ecf7bab77
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/tensor_buffer.py
@@ -0,0 +1,161 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from typing import Sequence, Optional
+
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.nn.module import Module
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+
+
+class TensorBufferToTensor(Module):
+    def __init__(self, dtype, instance_shape):
+        super().__init__()
+        self._op = (
+            flow.builtin_op("tensor_buffer_to_tensor")
+            .Input("in")
+            .Output("out")
+            .Attr("dtype", dtype)
+            .Attr("instance_shape", instance_shape)
+            .Build()
+        )
+
+    def forward(self, input):
+        return self._op(input)[0]
+
+
+@oneflow_export("tensor_buffer_to_tensor")
+@experimental_api
+def tensor_buffer_to_tensor_op(x, dtype: flow.dtype, instance_shape: Sequence[int]):
+    """This operator converts the Tensor's type from TensorBuffer to original type.
+    Some operator's output data type is `TensorBuffer`, you can use this operator to convert back
+    to `Tensor`.
+
+    Refer to `Concept Explanation <https://docs.oneflow.org/basics_topics/concept_explanation.html#3tensorbuffer-tensorlist>`_
+    for more about TensorBuffer.
+
+    Args:
+        x (oneflow.compatible.single_client.Tensor): The input Tensor.
+        dtype (flow.dtype): The data dtype.
+        instance_shape (Sequence[int]): The shape of each TensorBuffer instance.
+
+    Returns:
+        oneflow.compatible.single_client.Tensor: The result Tensor.
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import numpy as np
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> flow.enable_eager_execution()
+
+        >>> x = np.random.randn(4, 16, 64, 64).astype(np.float32)
+        >>> x = flow.Tensor(x)
+        >>> x = flow.tensor_to_tensor_buffer(x, instance_dims=2)
+        >>> output = flow.tensor_buffer_to_tensor(x, instance_shape=(64, 64), dtype=flow.float)
+        >>> output.shape
+        flow.Size([4, 16, 64, 64])
+
+    """
+    return TensorBufferToTensor(dtype=dtype, instance_shape=instance_shape)(x)
+
+
+class TensorToTensorBuffer(Module):
+    def __init__(self, instance_dims):
+        super().__init__()
+        self._op = (
+            flow.builtin_op("tensor_to_tensor_buffer")
+            .Input("in")
+            .Output("out")
+            .Attr("instance_dims", instance_dims)
+            .Build()
+        )
+
+    def forward(self, input):
+        return self._op(input)[0]
+
+
+@oneflow_export("tensor_to_tensor_buffer")
+@experimental_api
+def tensor_to_tensor_buffer(x, instance_dims: int):
+    """This operator converts the Tensor's type to TensorBuffer.
+
+    Refer to `Concept Explanation <https://docs.oneflow.org/basics_topics/concept_explanation.html#3tensorbuffer-tensorlist>`_
+    for more about TensorBuffer.
+
+    Args:
+        x (oneflow.compatible.single_client.Tensor): The input Tensor.
+        instance_dims (int): The dimensions of dynamic tensor instance.
+
+    Returns:
+        oneflow.compatible.single_client.Tensor: The result Tensor.
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import numpy as np
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> flow.enable_eager_execution()
+
+        >>> x = np.random.randn(4, 16, 64, 64).astype(np.float32)
+        >>> x = flow.Tensor(x)
+        >>> x = flow.tensor_to_tensor_buffer(x, instance_dims=2)
+        >>> output = flow.tensor_buffer_to_tensor(x, instance_shape=(64, 64), dtype=flow.float)
+        >>> output.shape
+        flow.Size([4, 16, 64, 64])
+    
+    """
+    return TensorToTensorBuffer(instance_dims=instance_dims)(x)
+
+
+class GenTensorBuffer(Module):
+    def __init__(self, shape, shape_list, value_list, data_type, dynamic_out):
+        super().__init__()
+        self._op = (
+            flow.builtin_op("gen_tensor_buffer")
+            .Output("out")
+            .Attr("shape", shape)
+            .Attr("shape_list", shape_list)
+            .Attr("value_list", value_list)
+            .Attr("data_type", data_type)
+            .Attr("dynamic_out", dynamic_out)
+            .Build()
+        )
+
+    def forward(self):
+        return self._op()[0]
+
+
+@oneflow_export("gen_tensor_buffer")
+@experimental_api
+def gen_tensor_buffer(
+    shape: Sequence[int],
+    shape_list: Sequence[Sequence[int]],
+    value_list: Sequence[float],
+    data_type: Optional[flow.dtype] = flow.float32,
+    dynamic_out: Optional[bool] = False,
+):
+    return GenTensorBuffer(shape, shape_list, value_list, data_type, dynamic_out)()
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(raise_on_error=True)
diff --git a/oneflow/compatible_single_client_python/nn/modules/tensor_ops.py b/oneflow/compatible_single_client_python/nn/modules/tensor_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..8add93391760f10dc70ba46a5cb107d356a35d4b
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/tensor_ops.py
@@ -0,0 +1,94 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.oneflow_export import experimental_api
+from oneflow.compatible_single_client_python.nn.module import Module
+from oneflow.compatible_single_client_python.framework.tensor import register_tensor_op
+
+
+class TypeAs(Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, input, target):
+        return input.to(dtype=target.dtype)
+
+
+@register_tensor_op("type_as")
+@experimental_api
+def type_as_op(input, target):
+    r"""Returns this tensor cast to the type of the given tensor.
+        This is a no-op if the tensor is already of the correct type.
+
+    Args:
+        input  (Tensor): the input tensor.
+        target (Tensor): the tensor which has the desired type.
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> import numpy as np
+        >>> flow.enable_eager_execution()
+
+        >>> input = flow.Tensor(np.random.randn(1, 2, 3), dtype=flow.float32)
+        >>> target = flow.Tensor(np.random.randn(4, 5, 6), dtype = flow.int32)
+        >>> input = input.type_as(target)
+        >>> input.dtype
+        oneflow.int32
+
+    """
+    return TypeAs()(input, target)
+
+
+class Long(Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, input):
+        return input.to(dtype=flow.int64)
+
+
+@register_tensor_op("long")
+@experimental_api
+def long_op(input):
+    r"""`Tensor.long()` is equivalent to `Tensor.to(flow.int64)`. See to().
+
+    Args:
+        input  (Tensor): the input tensor.
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> import numpy as np
+        >>> flow.enable_eager_execution()
+
+        >>> input = flow.Tensor(np.random.randn(1, 2, 3), dtype=flow.float32)
+        >>> input = input.long()
+        >>> input.dtype
+        oneflow.int64
+
+    """
+    return Long()(input)
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(raise_on_error=True)
diff --git a/oneflow/compatible_single_client_python/nn/modules/tile.py b/oneflow/compatible_single_client_python/nn/modules/tile.py
new file mode 100644
index 0000000000000000000000000000000000000000..ad32b121d8dcbd9dce502ad47f138e795bcb6c27
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/tile.py
@@ -0,0 +1,101 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from typing import Union
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.nn.module import Module
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+from oneflow.compatible_single_client_python.framework.tensor import (
+    Tensor,
+    register_tensor_op,
+)
+
+
+class Tile(Module):
+    def __init__(self, reps: tuple) -> None:
+        super().__init__()
+        self.reps = reps
+
+    def forward(self, input: Tensor) -> Tensor:
+        reps = self.reps
+        for s in self.reps:
+            assert s > 0
+        input_shape = input.shape
+        diff = len(input_shape) - len(reps)
+        if diff > 0:
+            shape = [1 for _ in range(diff)]
+            shape.extend([i for i in reps])
+            reps = tuple(shape)
+        return input.repeat(reps)
+
+
+@oneflow_export("tile")
+@register_tensor_op("tile")
+@experimental_api
+def tile_op(x, reps):
+    r"""The interface is consistent with PyTorch.
+    The documentation is referenced from:
+    https://pytorch.org/docs/stable/generated/torch.tile.html
+
+    Constructs a tensor by repeating the elements of ``input``.  The ``reps`` argument specifies the number
+    of repetitions in each dimension.
+
+    If ``reps`` specifies fewer dimensions than ``input`` has, then ones are prepended to ``reps`` until
+    all dimensions are specified.  For example, if ``input`` has shape (8, 6, 4, 2) and ``reps`` is (2, 2),
+    then ``reps`` is treated as (1, 1, 2, 2).
+
+    Analogously, if ``input`` has fewer dimensions than ``reps`` specifies, then ``input`` is treated as
+    if it were unsqueezed at dimension zero until it has as many dimensions as ``reps`` specifies.
+    For example, if ``input`` has shape (4, 2) and ``reps`` is (3, 3, 2, 2), then ``input`` is treated as
+    if it had the shape (1, 1, 4, 2).
+
+    .. note::
+        This function is similar to NumPyâ€™s tile function.
+
+    Args:
+        input (oneflow.compatible.single_client.Tensor): the tensor whose elements to repeat.
+        reps (tuple): the number of repetitions per dimension.
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> import numpy as np
+        >>> flow.enable_eager_execution()
+
+        >>> x = np.array([1, 2]).astype(np.int32)
+        >>> input = flow.Tensor(x, dtype=flow.int32)
+        >>> out = input.tile(reps=(2,))
+        >>> out
+        tensor([1, 2, 1, 2], dtype=oneflow.int32)
+
+        >>> x = np.random.randn(5, 2, 1)
+        >>> input = flow.Tensor(x)
+        >>> out = input.tile(reps=(3, 4))
+        >>> out.size()
+        flow.Size([5, 6, 4])
+
+    """
+    return Tile(reps=reps)(x)
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(raise_on_error=True)
diff --git a/oneflow/compatible_single_client_python/nn/modules/to.py b/oneflow/compatible_single_client_python/nn/modules/to.py
new file mode 100644
index 0000000000000000000000000000000000000000..326670df49543fa10f9a857ec6bf73adb6a9aa6d
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/to.py
@@ -0,0 +1,102 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.nn.module import Module
+from oneflow.compatible_single_client_python.framework.tensor import register_tensor_op
+from oneflow.compatible_single_client_python.oneflow_export import oneflow_export
+from typing import Optional, Union
+
+
+class To(Module):
+    def __init__(self, copy):
+        super().__init__()
+        self.copy = copy
+
+    def forward(self, x, device, dtype):
+        result = x
+        if device is not None:
+            if x.device != device or self.copy:
+                result = flow.F.copy(x, device_type=device.type, device_id=device.index)
+        if dtype is not None:
+            if x.dtype != dtype or self.copy:
+                result = flow.F.cast(result, dtype=dtype)
+        return result
+
+
+@oneflow_export("to")
+@register_tensor_op("to")
+def to_op(input, *args, **kwargs):
+    r"""Performs Tensor dtype and/or device conversion. 
+        A flow.dtype and flow.device are inferred from the arguments of `input.to(*args, **kwargs)`.
+    
+    .. note::
+        If the ``input`` Tensor already
+        has the correct :class:`flow.dtype` and :class:`flow.device`, then ``input`` is returned.
+        Otherwise, the returned tensor is a copy of ``input`` with the desired.
+
+    Args:
+        input (oneflow.compatible.single_client.Tensor): An input tensor.
+        *args (oneflow.compatible.single_client.Tensor or oneflow.compatible.single_client.device or oneflow.compatible.single_client.dtype): Positional arguments
+        **kwargs (oneflow.compatible.single_client.device or oneflow.compatible.single_client.dtype) : Key-value arguments
+
+    Returns:
+        oneflow.compatible.single_client.Tensor: A Tensor.
+    
+    For example:
+
+    .. code-block:: python
+
+        >>> import numpy as np
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> flow.enable_eager_execution()
+
+        >>> arr = np.random.randint(1, 9, size=(1, 2, 3, 4))
+        >>> input = flow.Tensor(arr)
+        >>> output = input.to(dtype=flow.float32)
+        >>> np.array_equal(arr.astype(np.float32), output.numpy())
+        True
+
+    """
+    copy = kwargs.get("copy", False)
+    device = kwargs.get("device", None)
+    dtype = kwargs.get("dtype", None)
+    if len(args) > 0:
+        if isinstance(args[0], flow.Tensor):
+            if len(args) == 2:
+                copy = args[1]
+            return To(copy)(input, args[0].device, args[0].dtype)
+        elif isinstance(args[0], flow.dtype):
+            if len(args) == 2:
+                copy = args[1]
+            return To(copy)(input, None, args[0])
+        else:
+            device = flow.device(args[0]) if isinstance(args[0], str) else args[0]
+            if len(args) > 1:
+                dtype = args[1]
+                assert isinstance(dtype, flow.dtype)
+            if len(args) > 2:
+                copy = args[2]
+            assert isinstance(device, flow.device)
+            return To(copy)(input, device, dtype)
+    if isinstance(device, flow.device) or isinstance(dtype, flow.dtype):
+        return To(copy)(input, device, dtype)
+    raise TypeError("to() received an invalid combination of arguments")
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(raise_on_error=True)
diff --git a/oneflow/compatible_single_client_python/nn/modules/transpose.py b/oneflow/compatible_single_client_python/nn/modules/transpose.py
new file mode 100644
index 0000000000000000000000000000000000000000..e7387a9f96db4ad0b153e4e0290112b0a95967a9
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/transpose.py
@@ -0,0 +1,99 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.nn.module import Module
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+from oneflow.compatible_single_client_python.framework.tensor import register_tensor_op
+from typing import Optional, Sequence
+
+
+class Transpose(Module):
+    def __init__(
+        self, dim0, dim1, conjugate: bool = False, batch_axis_non_change: bool = False,
+    ) -> None:
+        super().__init__()
+
+        if conjugate:
+            raise NotImplementedError
+
+        if batch_axis_non_change:
+            raise NotImplementedError
+
+        self.dim0 = dim0
+        self.dim1 = dim1
+
+    def forward(self, x):
+        x_shape = x.shape
+        dim0 = self.dim0
+        dim1 = self.dim1
+        if dim0 < 0:
+            dim0 += len(x_shape)
+        if dim1 < 0:
+            dim1 += len(x_shape)
+        assert dim0 >= 0 and dim0 < len(
+            x_shape
+        ), "Invalid dim0 {}, len(shape): {}".format(dim0, len(x_shape))
+        assert dim1 >= 0 and dim1 < len(
+            x_shape
+        ), "Invalid dim1 {}, len(shape): {}".format(dim1, len(x_shape))
+
+        perm = []
+        for i in range(len(x_shape)):
+            perm.append(i)
+        perm[dim0], perm[dim1] = perm[dim1], perm[dim0]
+
+        return flow.F.transpose(x, perm=perm)
+
+
+@oneflow_export("transpose")
+@register_tensor_op("transpose")
+@experimental_api
+def transpose_op(tensor, dim0, dim1):
+    r"""Returns a tensor that is a transposed version of input. The given dimensions dim0 and dim1 are swapped.
+
+    The resulting out tensor shares its underlying storage with the input tensor, so changing the content of one would change the content of the other.
+
+    Args:
+        tensor (oneflow.compatible.single_client.Tensor): The input tensor.
+        dim0 (int): the first dimension to be transposed.
+        dim1 (int): the second dimension to be transposed.
+    Returns:
+        Tensor: A transposed tensor.
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import numpy as np
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> flow.enable_eager_execution()
+
+        >>> input = flow.Tensor(np.random.randn(2, 6, 5, 3), dtype=flow.float32)
+        >>> out = flow.transpose(input, 0, 1).shape
+        >>> out
+        flow.Size([6, 2, 5, 3])
+
+    """
+    return Transpose(dim0=dim0, dim1=dim1)(tensor)
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(raise_on_error=True)
diff --git a/oneflow/compatible_single_client_python/nn/modules/triu.py b/oneflow/compatible_single_client_python/nn/modules/triu.py
new file mode 100644
index 0000000000000000000000000000000000000000..ebff3e931b0b959ce566a3574e348469c4f57f40
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/triu.py
@@ -0,0 +1,67 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+from oneflow.compatible_single_client_python.nn.module import Module
+from oneflow.compatible_single_client_python.framework.tensor import register_tensor_op
+
+
+class Triu(Module):
+    def __init__(self, diagonal=0):
+        super().__init__()
+        self.diagonal = diagonal
+
+    def forward(self, x):
+        return flow.F.triu(x, self.diagonal)
+
+
+@oneflow_export("triu")
+@register_tensor_op("triu")
+@experimental_api
+def triu_op(x, diagonal=0):
+    r"""Returns the upper triangular part of a matrix (2-D tensor) or batch of matrices input, 
+    the other elements of the result tensor out are set to 0.
+    
+    Args:
+        input (Tensor): the input tensor. 
+        diagonal (int, optional): the diagonal to consider
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> import numpy as np
+        >>> flow.enable_eager_execution()
+
+        >>> x = flow.Tensor(np.ones(shape=(3, 3)).astype(np.float32))
+        >>> flow.triu(x)
+        tensor([[1., 1., 1.],
+                [0., 1., 1.],
+                [0., 0., 1.]], dtype=oneflow.float32)
+
+    """
+    return Triu(diagonal)(x)
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(raise_on_error=True)
diff --git a/oneflow/compatible_single_client_python/nn/modules/unsqueeze.py b/oneflow/compatible_single_client_python/nn/modules/unsqueeze.py
new file mode 100644
index 0000000000000000000000000000000000000000..0bae797a98ccedfd2cfdd4ae8c7c57185a0b158c
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/unsqueeze.py
@@ -0,0 +1,76 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.nn.module import Module
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+from oneflow.compatible_single_client_python.framework.tensor import register_tensor_op
+
+
+class Unsqueeze(Module):
+    def __init__(self, dim: int = 0) -> None:
+        super().__init__()
+        self.dim = dim
+
+    def forward(self, input):
+        assert (
+            -(1 + input.ndimension()) <= self.dim <= input.ndimension()
+        ), "dim should within the range [-input.ndimension() - 1, input.ndimension() + 1)"
+
+        if self.dim < 0:
+            self.dim = 1 + input.ndimension() + self.dim
+        return flow.F.expand_dims(input, axis=self.dim)
+
+
+@oneflow_export("unsqueeze")
+@register_tensor_op("unsqueeze")
+@experimental_api
+def unsqueeze_op(input, dim):
+    r"""Returns a new tensor with a dimension of size one inserted at the
+    specified position.
+
+    The returned tensor shares the same underlying data with this tensor.
+
+    A :attr:`dim` value within the range `[-input.ndimension() - 1, input.ndimension() + 1)`
+    can be used. Negative :attr:`dim` will correspond to :meth:`unsqueeze`
+    applied at :attr:`dim` = ``dim + input.ndimension() + 1``.
+
+    Args:
+        input (Tensor): the input tensor.
+        dim (int): the index at which to insert the singleton dimension
+
+    For example: 
+
+    .. code-block:: python 
+
+        >>> import numpy as np
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> flow.enable_eager_execution()
+
+        >>> x = flow.Tensor(np.random.rand(2, 3, 4))
+        >>> y = x.unsqueeze(2)
+        >>> y.shape
+        flow.Size([2, 3, 1, 4])
+    """
+    return Unsqueeze(dim)(input)
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(raise_on_error=True)
diff --git a/oneflow/compatible_single_client_python/nn/modules/upsampling.py b/oneflow/compatible_single_client_python/nn/modules/upsampling.py
new file mode 100644
index 0000000000000000000000000000000000000000..b13ab8b2dfd2ed86a1f80bf12db84e46a50180ff
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/upsampling.py
@@ -0,0 +1,272 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.nn.module import Module
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+from oneflow.compatible_single_client_python.framework.tensor import register_tensor_op
+from typing import Optional, Union, Tuple
+
+
+@oneflow_export("nn.Upsample")
+@experimental_api
+class Upsample(Module):
+    r"""Upsamples a given multi-channel 2D (spatial) data.
+
+    The input data is assumed to be of the form
+    `minibatch x channels x height x width`.
+    Hence, for spatial inputs, we expect a 4D Tensor.
+
+    The algorithms available for upsampling are nearest neighbor,
+    bilinear, 4D input Tensor, respectively.
+
+    One can either give a :attr:`scale_factor` or the target output :attr:`size` to
+    calculate the output size. (You cannot give both, as it is ambiguous)
+
+    Args:
+        size (int or Tuple[int, int] optional):
+            output spatial sizes
+        scale_factor (float or Tuple[float, float], optional):
+            multiplier for spatial size. Has to match input size if it is a tuple.
+        mode (str, optional): the upsampling algorithm: one of ``'nearest'``,
+            ``'bilinear'``.
+            Default: ``'nearest'``
+        align_corners (bool, optional): if ``True``, the corner pixels of the input
+            and output tensors are aligned, and thus preserving the values at
+            those pixels. This only has effect when :attr:`mode` is ``'bilinear'``.
+            Default: ``False``
+
+    Shape:
+        - Input: : :math:`(N, C, H_{in}, W_{in})`
+        - Output: :math:`(N, C, H_{out}, W_{out})` , where
+
+    .. math::
+        D_{out} = \left\lfloor D_{in} \times \text{scale_factor} \right\rfloor
+
+    .. math::
+        H_{out} = \left\lfloor H_{in} \times \text{scale_factor} \right\rfloor
+
+    .. math::
+        W_{out} = \left\lfloor W_{in} \times \text{scale_factor} \right\rfloor
+
+    .. note::
+        If you want downsampling/general resizing, you should use :func:`~nn.functional.interpolate`.
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import numpy as np
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> flow.enable_eager_execution()
+
+        >>> input = flow.Tensor(np.arange(1, 5).reshape((1, 1, 2, 2)), dtype=flow.float32)
+        >>> input = input.to("cuda")
+        >>> m = flow.nn.Upsample(scale_factor=2.0, mode="nearest")
+        >>> output = m(input)
+        >>> output #doctest: +ELLIPSIS
+        tensor([[[[1., 1., 2., 2.],
+                  ...
+                  [3., 3., 4., 4.]]]], device='cuda:0', dtype=oneflow.float32)
+
+    """
+
+    def __init__(
+        self,
+        size: Optional[Union[int, Tuple[int, ...]]] = None,
+        scale_factor: Optional[Union[float, Tuple[float, ...]]] = None,
+        mode: str = "nearest",
+        align_corners: Optional[bool] = None,
+    ):
+        super().__init__()
+        self.size = size
+        if isinstance(scale_factor, tuple):
+            self.scale_factor = tuple(float(factor) for factor in scale_factor)
+        else:
+            self.scale_factor = float(scale_factor) if scale_factor else None
+
+        self.mode = mode
+        if align_corners == None:
+            align_corners = False
+
+        self.align_corners = align_corners
+        self.height_scale = None
+        self.width_scale = None
+
+        if isinstance(self.scale_factor, float):
+            self.height_scale = self.scale_factor
+            self.width_scale = self.scale_factor
+        elif isinstance(self.scale_factor, tuple):
+            self.height_scale = self.scale_factor[0]
+            self.width_scale = self.scale_factor[1]
+        else:
+            pass
+
+        if self.mode != "nearest" and self.mode != "bilinear":
+            raise ValueError('interpolation must be "nearest" or "bilinear".')
+
+        if self.mode == "nearest" and self.align_corners:
+            raise ValueError('interpolation "nearest" does not support align_corners.')
+
+    def forward(self, x):
+        assert (
+            self.size != None or self.scale_factor != None
+        ), f"size and scale_factor can not be none at the same time!"
+        h, w = x.shape[2], x.shape[3]
+        if self.height_scale == None:
+            if isinstance(self.size, int):
+                self.height_scale = 1.0 * self.size / h
+            else:
+                self.height_scale = 1.0 * self.size[0] / h
+        if self.width_scale == None:
+            if isinstance(self.size, int):
+                self.width_scale = 1.0 * self.size / w
+            else:
+                self.width_scale = 1.0 * self.size[1] / w
+
+        res = flow.F.upsample(
+            x,
+            height_scale=self.height_scale,
+            width_scale=self.width_scale,
+            align_corners=self.align_corners,
+            interpolation=self.mode,
+            data_format="channels_first",
+        )
+        return res
+
+
+@oneflow_export("nn.UpsamplingNearest2d")
+@experimental_api
+class UpsamplingNearest2d(Upsample):
+    r"""Applies a 2D nearest neighbor upsampling to an input signal composed of several input
+    channels.
+
+    To specify the scale, it takes either the :attr:`size` or the :attr:`scale_factor`
+    as it's constructor argument.
+
+    When :attr:`size` is given, it is the output size of the image `(h, w)`.
+
+    Args:
+        size (int or Tuple[int, int], optional): output spatial sizes
+        scale_factor (float or Tuple[float, float], optional): multiplier for
+            spatial size.
+
+    .. warning::
+        This class is deprecated in favor of :func:`~nn.functional.interpolate`.
+
+    Shape:
+        - Input: :math:`(N, C, H_{in}, W_{in})`
+        - Output: :math:`(N, C, H_{out}, W_{out})` where
+
+    .. math::
+          H_{out} = \left\lfloor H_{in} \times \text{scale_factor} \right\rfloor
+
+    .. math::
+          W_{out} = \left\lfloor W_{in} \times \text{scale_factor} \right\rfloor
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import numpy as np
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> flow.enable_eager_execution()
+
+        >>> input = flow.Tensor(np.arange(1, 5).reshape((1, 1, 2, 2)), dtype=flow.float32)
+        >>> input = input.to("cuda")
+        >>> m = flow.nn.UpsamplingNearest2d(scale_factor=2.0)
+        >>> output = m(input)
+        >>> output #doctest: +ELLIPSIS
+        tensor([[[[1., 1., 2., 2.],
+                  ...
+                  [3., 3., 4., 4.]]]], device='cuda:0', dtype=oneflow.float32)
+
+    """
+
+    def __init__(
+        self,
+        size: Optional[Tuple[int, int]] = None,
+        scale_factor: Optional[Tuple[float, float]] = None,
+    ) -> None:
+        super(UpsamplingNearest2d, self).__init__(size, scale_factor, mode="nearest")
+
+
+@oneflow_export("nn.UpsamplingBilinear2d")
+@experimental_api
+class UpsamplingBilinear2d(Upsample):
+    r"""Applies a 2D bilinear upsampling to an input signal composed of several input
+    channels.
+
+    To specify the scale, it takes either the :attr:`size` or the :attr:`scale_factor`
+    as it's constructor argument.
+
+    When :attr:`size` is given, it is the output size of the image `(h, w)`.
+
+    Args:
+        size (int or Tuple[int, int], optional): output spatial sizes
+        scale_factor (float or Tuple[float, float], optional): multiplier for
+            spatial size.
+
+    .. warning::
+        This class is deprecated in favor of :func:`~nn.functional.interpolate`. It is
+        equivalent to ``nn.functional.interpolate(..., mode='bilinear', align_corners=True)``.
+
+    Shape:
+        - Input: :math:`(N, C, H_{in}, W_{in})`
+        - Output: :math:`(N, C, H_{out}, W_{out})` where
+
+    .. math::
+        H_{out} = \left\lfloor H_{in} \times \text{scale_factor} \right\rfloor
+
+    .. math::
+        W_{out} = \left\lfloor W_{in} \times \text{scale_factor} \right\rfloor
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import numpy as np
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> flow.enable_eager_execution()
+
+        >>> input = flow.Tensor(np.arange(1, 5).reshape((1, 1, 2, 2)), dtype=flow.float32)
+        >>> input = input.to("cuda")
+        >>> m = flow.nn.UpsamplingBilinear2d(scale_factor=2.0)
+        >>> output = m(input)
+        >>> output #doctest: +ELLIPSIS
+        tensor([[[[1.    , 1.3333, 1.6667, 2.    ],
+                  ...
+                  [3.    , 3.3333, 3.6667, 4.    ]]]], device='cuda:0',
+               dtype=oneflow.float32)
+
+    """
+
+    def __init__(
+        self,
+        size: Optional[Tuple[int, int]] = None,
+        scale_factor: Optional[Tuple[float, float]] = None,
+    ) -> None:
+        super(UpsamplingBilinear2d, self).__init__(
+            size, scale_factor, mode="bilinear", align_corners=True
+        )
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(raise_on_error=True)
diff --git a/oneflow/compatible_single_client_python/nn/modules/utils.py b/oneflow/compatible_single_client_python/nn/modules/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..4f09c8dc99edac7d2890c4156570865ed0289d18
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/utils.py
@@ -0,0 +1,77 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from typing import List
+
+from collections import abc as container_abcs
+from itertools import repeat
+
+
+def _ntuple(n):
+    def parse(x):
+        if isinstance(x, container_abcs.Iterable):
+            return tuple(x)
+        return tuple(repeat(x, n))
+
+    return parse
+
+
+_single = _ntuple(1)
+_pair = _ntuple(2)
+_triple = _ntuple(3)
+_quadruple = _ntuple(4)
+
+
+def _reverse_repeat_tuple(t, n):
+    r"""Reverse the order of `t` and repeat each element for `n` times.
+    This can be used to translate padding arg used by Conv and Pooling modules
+    to the ones used by `F.pad`.
+    """
+    return tuple(x for x in reversed(t) for _ in range(n))
+
+
+def _list_with_default(out_size, defaults):
+    # type: (List[int], List[int]) -> List[int]
+    if isinstance(out_size, int):
+        return out_size
+    if len(defaults) <= len(out_size):
+        raise ValueError(
+            "Input dimension should be at least {}".format(len(out_size) + 1)
+        )
+    return [
+        v if v is not None else d for v, d in zip(out_size, defaults[-len(out_size) :])
+    ]
+
+
+def _check_axis(axis, shape):
+    ndim = len(shape)
+    # TODO(yaochi): refine this function when all related ops in `python/ops/math_ops.py` migrated
+    if axis is None:
+        axis = list(range(len(shape)))
+    if isinstance(axis, int):
+        axis = [axis]
+
+    assert isinstance(axis, (list, tuple)), "Invalid axis {}".format(axis)
+    axis = list(axis)
+    for i in range(len(axis)):
+        assert (
+            -ndim <= axis[i] <= ndim - 1
+        ), "Dimension out of range (expected to be in range of [{}, {}], but got {})".format(
+            -ndim, ndim - 1, axis[i]
+        )
+        if axis[i] < 0:
+            axis[i] = axis[i] + ndim
+
+    return axis
diff --git a/oneflow/compatible_single_client_python/nn/modules/where.py b/oneflow/compatible_single_client_python/nn/modules/where.py
new file mode 100644
index 0000000000000000000000000000000000000000..dcf5ed9e4f91e9ba178251355ac902f6eee4bc02
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/where.py
@@ -0,0 +1,148 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.nn.module import Module
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+from oneflow.compatible_single_client_python.framework.tensor import register_tensor_op
+
+
+class Where(Module):
+    def __init__(self) -> None:
+        super().__init__()
+
+    def forward(self, condition, x, y):
+        assert condition.dtype == flow.int32 or condition.dtype == flow.int8
+        if isinstance(x, int) or isinstance(x, float):
+            x = flow.Tensor(
+                [float(x)],
+                dtype=flow.float32,
+                device=flow.device(condition.device.type),
+            )
+        if isinstance(y, int) or isinstance(y, float):
+            y = flow.Tensor(
+                [float(y)],
+                dtype=flow.float32,
+                device=flow.device(condition.device.type),
+            )
+
+        assert (
+            condition.device.type == x.device.type
+            and condition.device.type == y.device.type
+        )
+
+        assert len(condition.shape) == len(x.shape) and len(condition.shape) == len(
+            y.shape
+        ), f"The dim of where module's inputs can not match, please check!"
+
+        broadcast_cond = condition
+        broadcast_x = x
+        broadcast_y = y
+
+        broadcast_like_shape = []
+        broadcast_condition_axes = []
+        broadcast_x_axes = []
+        broadcast_y_axes = []
+
+        for i in range(len(x.shape)):
+            max_dim = max(x.shape[i], max(y.shape[i], condition.shape[i]))
+            broadcast_like_shape.append(max_dim)
+            if max_dim != condition.shape[i]:
+                broadcast_condition_axes.append(i)
+            if max_dim != x.shape[i]:
+                broadcast_x_axes.append(i)
+            if max_dim != y.shape[i]:
+                broadcast_y_axes.append(i)
+
+        broadcast_like_tensor = flow.experimental.zeros(
+            tuple(broadcast_like_shape), dtype=flow.float32
+        )
+        broadcast_like_tensor = broadcast_like_tensor.to(x.device.type)
+        broadcast_like_tensor.requires_grad = x.requires_grad or y.requires_grad
+
+        if len(broadcast_condition_axes) != 0:
+            condition = flow.experimental.cast(condition, flow.float32)
+            broadcast_cond = flow.experimental.broadcast_like(
+                condition, broadcast_like_tensor, tuple(broadcast_condition_axes)
+            )
+            broadcast_cond = flow.experimental.cast(broadcast_cond, flow.int32)
+
+        if len(broadcast_x_axes) != 0:
+            broadcast_x = flow.experimental.broadcast_like(
+                x, broadcast_like_tensor, broadcast_axes=tuple(broadcast_x_axes)
+            )
+
+        if len(broadcast_y_axes) != 0:
+            broadcast_y = flow.experimental.broadcast_like(
+                y, broadcast_like_tensor, broadcast_axes=tuple(broadcast_y_axes)
+            )
+
+        return flow.F.where(broadcast_cond, broadcast_x, broadcast_y)
+
+
+@oneflow_export("where")
+@register_tensor_op("where")
+@experimental_api
+def where_op(condition, x, y):
+    """Return a tensor of elements selected from either :attr:`x` or :attr:`y`, depending on :attr:`condition`.
+    If the element in condition is larger than 0,
+
+    it will take the `x` element, else it will take the `y` element
+
+    .. note::
+
+        The tensors :attr:`condition`, :attr:`x`, :attr:`y` must be broadcastable.
+        It will take the `x` element, else it will take the `y` element.
+
+    Args:
+        condition (IntTensor): When 1 (nonzero), yield x, otherwise yield y
+        x (Tensor or Scalar): value (if :attr:x is a scalar) or values selected at indices
+                            where :attr:`condition` is True
+        y (Tensor or Scalar): value (if :attr:x is a scalar) or values selected at indices
+                            where :attr:`condition` is False
+    Returns:
+        Tensor: A tensor of shape equal to the broadcasted shape of :attr:`condition`, :attr:`x`, :attr:`y`
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import numpy as np
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> flow.enable_eager_execution()
+
+        >>> x = flow.Tensor(
+        ...    np.array([[-0.4620, 0.3139], [0.3898, -0.7197], [0.0478, -0.1657]]),
+        ...    dtype=flow.float32,
+        ... )
+        >>> y = flow.Tensor(np.ones(shape=(3, 2)), dtype=flow.float32)
+        >>> condition = flow.Tensor(np.array([[0, 1], [1, 0], [1, 0]]), dtype=flow.int32)
+        >>> out = condition.where(x, y)
+        >>> out #doctest: +ELLIPSIS
+        tensor([[1.    , 0.3139],
+                ...
+                [0.0478, 1.    ]], dtype=oneflow.float32)
+
+    """
+    return Where()(condition, x, y)
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(raise_on_error=True)
diff --git a/oneflow/compatible_single_client_python/nn/modules/zeropad2d.py b/oneflow/compatible_single_client_python/nn/modules/zeropad2d.py
new file mode 100644
index 0000000000000000000000000000000000000000..4e43dfc317daaadb223f63fdc5f97bc32b1e18e2
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/modules/zeropad2d.py
@@ -0,0 +1,131 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+from typing import Union
+
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+from oneflow.compatible_single_client_python.nn.module import Module
+
+
+@oneflow_export("nn.ZeroPad2d")
+@experimental_api
+class ZeroPad2d(Module):
+    r"""The interface is consistent with PyTorch.
+    The documentation is referenced from:
+    https://pytorch.org/docs/stable/generated/torch.nn.ZeroPad2d.html
+
+    Pads the input tensor boundaries with zero. User can set the amount of padding by setting the parameter `paddings`.
+
+    Args:
+        padding (Union[int, tuple]):  the size of the padding. If is `int`, uses the same padding in all boundaries. If a 4-`tuple`, uses (:math:`\mathrm{padding_{left}}`, :math:`\mathrm{padding_{right}}`, :math:`\mathrm{padding_{top}}`, :math:`\mathrm{padding_{bottom}}`)
+
+    Shape:
+        - Input: :math:`(N, C, H_{in}, W_{in})`
+        - Output: :math:`(N, C, H_{out}, W_{out})` where
+
+            :math:`H_{out} = H_{in} + \mathrm{padding_{top}} + \mathrm{padding_{bottom}}`
+
+            :math:`W_{out} = W_{in} + \mathrm{padding_{left}} + \mathrm{padding_{right}}`
+
+    For example:
+
+    .. code-block:: python
+
+        >>> import oneflow.compatible.single_client.experimental as flow
+        >>> import numpy as np
+        >>> flow.enable_eager_execution()
+        >>> zeropad_layer_int = flow.nn.ZeroPad2d(2)
+        >>> zeropad_layer_tuple = flow.nn.ZeroPad2d((1,2,2,0))
+        >>> input = flow.Tensor(np.arange(18).reshape((1, 2, 3, 3)).astype(np.float32))
+        >>> output_int = zeropad_layer_int(input)
+        >>> output_int.shape
+        flow.Size([1, 2, 7, 7])
+        >>> output_int
+        tensor([[[[ 0.,  0.,  0.,  0.,  0.,  0.,  0.],
+                  [ 0.,  0.,  0.,  0.,  0.,  0.,  0.],
+                  [ 0.,  0.,  0.,  1.,  2.,  0.,  0.],
+                  [ 0.,  0.,  3.,  4.,  5.,  0.,  0.],
+                  [ 0.,  0.,  6.,  7.,  8.,  0.,  0.],
+                  [ 0.,  0.,  0.,  0.,  0.,  0.,  0.],
+                  [ 0.,  0.,  0.,  0.,  0.,  0.,  0.]],
+        <BLANKLINE>
+                 [[ 0.,  0.,  0.,  0.,  0.,  0.,  0.],
+                  [ 0.,  0.,  0.,  0.,  0.,  0.,  0.],
+                  [ 0.,  0.,  9., 10., 11.,  0.,  0.],
+                  [ 0.,  0., 12., 13., 14.,  0.,  0.],
+                  [ 0.,  0., 15., 16., 17.,  0.,  0.],
+                  [ 0.,  0.,  0.,  0.,  0.,  0.,  0.],
+                  [ 0.,  0.,  0.,  0.,  0.,  0.,  0.]]]], dtype=oneflow.float32)
+        >>> output_tuple = zeropad_layer_tuple(input)
+        >>> output_tuple
+        tensor([[[[ 0.,  0.,  0.,  0.,  0.,  0.],
+                  [ 0.,  0.,  0.,  0.,  0.,  0.],
+                  [ 0.,  0.,  1.,  2.,  0.,  0.],
+                  [ 0.,  3.,  4.,  5.,  0.,  0.],
+                  [ 0.,  6.,  7.,  8.,  0.,  0.]],
+        <BLANKLINE>
+                 [[ 0.,  0.,  0.,  0.,  0.,  0.],
+                  [ 0.,  0.,  0.,  0.,  0.,  0.],
+                  [ 0.,  9., 10., 11.,  0.,  0.],
+                  [ 0., 12., 13., 14.,  0.,  0.],
+                  [ 0., 15., 16., 17.,  0.,  0.]]]], dtype=oneflow.float32)
+    """
+
+    def __init__(self, padding: Union[int, tuple]):
+        super().__init__()
+        if isinstance(padding, tuple):
+            assert len(padding) == 4, ValueError("Length of padding must be 4")
+            boundary = [padding[0], padding[1], padding[2], padding[3]]
+        elif isinstance(padding, int):
+            boundary = [padding, padding, padding, padding]
+        else:
+            raise ValueError("padding must be int  or tuple!")
+
+        self.padding = boundary
+        self.value = 0.0000
+
+    def forward(self, x):
+        _, _, h, w = x.shape
+
+        if x.dtype in [flow.float32, flow.float16, flow.float64]:
+            floating_value = float(self.value)
+            integral_value = int(0)
+        else:
+            floating_value = float(0)
+            integral_value = int(self.value)
+        self._op = (
+            flow.builtin_op("constant_pad2d")
+            .Input("x")
+            .Output("y")
+            .Attr("padding", self.padding)
+            .Attr("floating_value", floating_value)
+            .Attr("integral_value", integral_value)
+            .Build()
+        )
+
+        res = self._op(x)[0]
+        return res
+
+
+if __name__ == "__main__":
+    import doctest
+
+    doctest.testmod(raise_on_error=True)
diff --git a/oneflow/compatible_single_client_python/nn/optimizer/__init__.py b/oneflow/compatible_single_client_python/nn/optimizer/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/oneflow/compatible_single_client_python/nn/optimizer/adam.py b/oneflow/compatible_single_client_python/nn/optimizer/adam.py
new file mode 100644
index 0000000000000000000000000000000000000000..8bb6132fd82b511e1813845d8daa619d0b68561f
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/optimizer/adam.py
@@ -0,0 +1,161 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from typing import List, Dict, Callable, Union, Iterator, Tuple
+import collections
+
+from oneflow.compatible import single_client as flow
+
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+from oneflow.compatible_single_client_python.nn.parameter import Parameter
+from oneflow.compatible_single_client_python.nn.optimizer.optimizer import (
+    Optimizer,
+    ParamGroup,
+)
+
+
+@oneflow_export("optim.Adam")
+@experimental_api
+class Adam(Optimizer):
+    r"""Implements Adam algorithm.
+
+    It has been proposed in `Adam: A Method for Stochastic Optimization`_.
+    The implementation of the L2 penalty follows changes proposed in
+    `Decoupled Weight Decay Regularization`_.
+
+    This algorithm can adjust the learning rate of each parameter dynamically according to the 1st-moment estimates and the 2nd-moment estimates of gradient.
+
+    the equation of parameters updating is:
+
+    .. math::
+
+        & V_t = \beta_1*V_{t-1} + (1-\beta_1)*grad
+
+        & S_t = \beta_2*S_{t-1} + (1-\beta_2)*{grad} \odot {grad}
+
+        & \hat{g} = learning\_rate*\frac{{V_t}}{\sqrt{{S_t}}+\epsilon}
+
+        & param_{new} = param_{old} - \hat{g}
+
+    Args:
+        params (iterable): iterable of parameters to optimize or dicts defining
+            parameter groups
+        lr (float, optional): learning rate (default: 1e-3)
+        betas (Tuple[float, float], optional): coefficients used for computing
+            running averages of gradient and its square (default: (0.9, 0.999))
+        eps (float, optional): term added to the denominator to improve
+            numerical stability (default: 1e-8)
+        weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
+        scale (float, optional): the scale factor of loss (default: 1.0)
+
+    .. _Adam\: A Method for Stochastic Optimization:
+        https://arxiv.org/abs/1412.6980
+    .. _Decoupled Weight Decay Regularization:
+        https://arxiv.org/abs/1711.05101
+
+    """
+
+    def __init__(
+        self,
+        parameters: Union[Iterator[Parameter], List[Dict]],
+        lr: float = 1e-3,
+        betas: Tuple[float, float] = (0.9, 0.999),
+        eps: float = 1e-8,
+        weight_decay: float = 0,  # Adam's weight_decay actually does L2 Normalize
+        amsgrad: bool = False,
+        scale: float = 1.0,
+    ):
+        super().__init__()
+        assert lr >= 0.0, f"Invalid learning rate: {lr}"
+        assert eps >= 0.0, f"Invalid epsilon value: {eps}"
+        assert (
+            betas[0] >= 0.0 and betas[0] < 1.0
+        ), f"Invalid beta parameter at index 0: {betas[0]}"
+        assert (
+            betas[1] >= 0.0 and betas[1] < 1.0
+        ), f"Invalid beta parameter at index 1: {betas[1]}"
+        assert weight_decay >= 0.0, f"Invalid weight_decay value: {weight_decay}"
+        assert scale > 0.0, f"Invalid scale factor: {scale}"
+        assert amsgrad is False, "Not support AMSGrad now!"
+
+        self._default_options["lr"] = lr
+        self._default_options["eps"] = eps
+        self._default_options["betas"] = betas
+        self._default_options["weight_decay"] = weight_decay
+        self._default_options["amsgrad"] = amsgrad
+        self._default_options["scale"] = scale
+
+        # Add parameters
+        if isinstance(parameters, collections.abc.Iterator):
+            self.param_groups.append(ParamGroup(parameters, self._default_options))
+        else:  # List[Dict]
+            for param in parameters:
+                self.param_groups.append(ParamGroup(param, self._default_options))
+
+        for param_group in self.param_groups:
+            for param in param_group.parameters:
+                assert param.is_leaf, "parameters must be leaf tensor"
+                self._state[param] = dict()
+                self._state[param]["exp_avg"] = flow.experimental.zeros_like(param)
+                self._state[param]["exp_avg_sq"] = flow.experimental.zeros_like(param)
+
+        self._op = (
+            flow.builtin_op("adam_update")
+            .Input("model")
+            .Input("model_diff")
+            .Input("m")
+            .Input("v")
+            .Attr("l1", 0.0)
+            .Attr("weight_decay", 0.0)
+            .Build()
+        )
+
+    def step(self, closure: Callable = None):
+        """Performs a single optimization step.
+
+        Args:
+            closure (callable, optional): A closure that reevaluates the model
+                and returns the loss.
+        """
+        with flow.no_grad():
+            loss = None
+            if closure is not None:
+                loss = closure()
+
+            for param_group in self.param_groups:
+                kwargs = {
+                    "learning_rate_val": param_group["lr"],
+                    "scale": param_group["scale"],
+                    "l2": param_group["weight_decay"],
+                    "beta1": param_group["betas"][0],
+                    "beta2": param_group["betas"][1],
+                    "epsilon": param_group["eps"],
+                }
+                for param in param_group.parameters:
+                    if param.grad is None:
+                        continue
+                    m_tensor = self._state[param]["exp_avg"]
+                    v_tensor = self._state[param]["exp_avg_sq"]
+                    self._op(
+                        param, param.grad, m_tensor, v_tensor, **kwargs,
+                    )
+
+            self._state["step"] = self._state["step"] + 1
+
+            return loss
diff --git a/oneflow/compatible_single_client_python/nn/optimizer/adamw.py b/oneflow/compatible_single_client_python/nn/optimizer/adamw.py
new file mode 100644
index 0000000000000000000000000000000000000000..2d95d835d359a8517a1b5b7d23b7efe4c5c1e71a
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/optimizer/adamw.py
@@ -0,0 +1,164 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from typing import List, Dict, Callable, Union, Iterator, Tuple
+import collections
+
+from oneflow.compatible import single_client as flow
+
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+from oneflow.compatible_single_client_python.nn.parameter import Parameter
+from oneflow.compatible_single_client_python.nn.optimizer.optimizer import (
+    ParamGroup,
+    Optimizer,
+)
+
+
+@oneflow_export("optim.AdamW")
+@experimental_api
+class AdamW(Optimizer):
+    r"""Implements AdamW algorithm.
+
+    The original Adam algorithm was proposed in `Adam: A Method for Stochastic Optimization`_.
+    The AdamW variant was proposed in `Decoupled Weight Decay Regularization`_.
+
+    The optimizer of the Adam-weight-decay algorithm.
+
+    (More details please refer to `Adam-weight-decay <https://www.fast.ai/2018/07/02/adam-weight-decay/>`_).
+
+    So we use Adam-weight-decay algorithm to solve this problem.
+
+    the equation of parameters updating is:
+
+    .. math::
+
+        & V_t = \beta_1*V_{t-1} + (1-\beta_1)*grad
+
+        & S_t = \beta_2*S_{t-1} + (1-\beta_2)*{grad} \odot {grad}
+
+        & \hat{g} = learning\_rate*(\frac{{V_t}}{\sqrt{{S_t}}+\epsilon}+\lambda*param_{old})
+
+        & param_{new} = param_{old} - \hat{g}
+
+    Args:
+        params (iterable): iterable of parameters to optimize or dicts defining
+            parameter groups
+        lr (float, optional): learning rate (default: 1e-3)
+        betas (Tuple[float, float], optional): coefficients used for computing
+            running averages of gradient and its square (default: (0.9, 0.999))
+        eps (float, optional): term added to the denominator to improve
+            numerical stability (default: 1e-8)
+        weight_decay (float, optional): weight decay (L2 penalty) (In the equation is Î», default: 0)
+        scale (float, optional): the scale factor of loss (default: 1.0)
+
+    .. _Adam\: A Method for Stochastic Optimization:
+        https://arxiv.org/abs/1412.6980
+    .. _Decoupled Weight Decay Regularization:
+        https://arxiv.org/abs/1711.05101
+
+    """
+
+    def __init__(
+        self,
+        parameters: Union[Iterator[Parameter], List[Dict]],
+        lr: float = 1e-3,
+        betas: Tuple[float, float] = (0.9, 0.999),
+        eps: float = 1e-8,
+        weight_decay: float = 0,
+        amsgrad: bool = False,
+        scale: float = 1.0,
+    ):
+        super().__init__()
+        assert lr >= 0.0, f"Invalid learning rate: {lr}"
+        assert eps >= 0.0, f"Invalid epsilon value: {eps}"
+        assert (
+            betas[0] >= 0.0 and betas[0] < 1.0
+        ), f"Invalid beta parameter at index 0: {betas[0]}"
+        assert (
+            betas[1] >= 0.0 and betas[1] < 1.0
+        ), f"Invalid beta parameter at index 1: {betas[1]}"
+        assert weight_decay >= 0.0, f"Invalid weight_decay value: {weight_decay}"
+        assert scale > 0.0, f"Invalid scale factor: {scale}"
+        assert amsgrad is False, "Not support AMSGrad now!"
+
+        self._default_options["lr"] = lr
+        self._default_options["eps"] = eps
+        self._default_options["betas"] = betas
+        self._default_options["weight_decay"] = weight_decay
+        self._default_options["amsgrad"] = amsgrad
+        self._default_options["scale"] = scale
+
+        # Add parameters
+        if isinstance(parameters, collections.abc.Iterator):
+            self.param_groups.append(ParamGroup(parameters, self._default_options))
+        else:  # List[Dict]
+            for param in parameters:
+                self.param_groups.append(ParamGroup(param, self._default_options))
+
+        for param_group in self.param_groups:
+            for param in param_group.parameters:
+                assert param.is_leaf, "parameters must be leaf tensor"
+                self._state[param] = dict()
+                self._state[param]["exp_avg"] = flow.experimental.zeros_like(param)
+                self._state[param]["exp_avg_sq"] = flow.experimental.zeros_like(param)
+
+        self._op = (
+            flow.builtin_op("adam_update")
+            .Input("model")
+            .Input("model_diff")
+            .Input("m")
+            .Input("v")
+            .Attr("l1", 0.0)
+            .Attr("l2", 0.0)
+            .Build()
+        )
+
+    def step(self, closure: Callable = None):
+        """Performs a single optimization step.
+
+        Args:
+            closure (callable, optional): A closure that reevaluates the model
+                and returns the loss.
+        """
+        with flow.no_grad():
+            loss = None
+            if closure is not None:
+                loss = closure()
+
+            for param_group in self.param_groups:
+                kwargs = {
+                    "learning_rate_val": param_group["lr"],
+                    "scale": param_group["scale"],
+                    "weight_decay": param_group["weight_decay"],
+                    "beta1": param_group["betas"][0],
+                    "beta2": param_group["betas"][1],
+                    "epsilon": param_group["eps"],
+                }
+                for param in param_group.parameters:
+                    if param.grad is None:
+                        continue
+                    m_tensor = self._state[param]["exp_avg"]
+                    v_tensor = self._state[param]["exp_avg_sq"]
+                    self._op(
+                        param, param.grad, m_tensor, v_tensor, **kwargs,
+                    )
+
+            self._state["step"] = self._state["step"] + 1
+
+            return loss
diff --git a/oneflow/compatible_single_client_python/nn/optimizer/cosine_annealing_lr.py b/oneflow/compatible_single_client_python/nn/optimizer/cosine_annealing_lr.py
new file mode 100644
index 0000000000000000000000000000000000000000..4f759146e4c2da3fbbe28b49dfe6e2ed42f96d78
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/optimizer/cosine_annealing_lr.py
@@ -0,0 +1,89 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import math
+
+from oneflow.compatible_single_client_python.oneflow_export import (
+    experimental_api,
+    oneflow_export,
+)
+from .lr_scheduler import LrScheduler
+
+
+@oneflow_export("optim.lr_scheduler.CosineAnnealingLR")
+@experimental_api
+class CosineAnnealingLR(LrScheduler):
+    r"""This operator creates a Cosine decayed learning rate scheduler.
+
+    Before the steps are specified by user, the learning rate will be updated as:
+
+    .. math::
+
+        & cos\_decay = 0.5*(1+cos(\pi*\frac{current\_step}{steps}))
+
+        & decay\_factor = (1-\alpha)*cos\_decay+\alpha
+
+        & learning\_rate = base\_learning\_rate*decay\_factor
+
+    After the steps specified by user, the learning rate will be :
+
+    .. math::
+
+        learning\_rate = {base\_learning\_rate}*{\alpha}
+
+    It has been proposed in
+    `SGDR: Stochastic Gradient Descent with Warm Restarts`_. Note that this only
+    implements the cosine annealing part of SGDR, and not the restarts.
+
+    Args:
+        optimizer(Optimizer): Wrapped optimizer.
+        steps (int): The decay steps in the scheduler.
+        alpha (float, optional): The learning rate scale factor (:math:`\alpha`). (default: 0.0)
+        last_step (int, optional): The index of last step. (default: -1)
+        verbose (bool, optional): If ``True``, prints a message to stdout for each update. (default: ``False``)
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client.experimental as flow
+
+        ...
+        cosine_annealing_lr = flow.optim.lr_scheduler.CosineAnnealingLR(optimizer, steps=100, alpha=0.0)
+        for epoch in range(num_epoch):
+            train(...)
+            cosine_annealing_lr.step()
+
+    .. _SGDR\: Stochastic Gradient Descent with Warm Restarts:
+        https://arxiv.org/abs/1608.03983
+    """
+
+    def __init__(
+        self, optimizer, steps: int, alpha: float = 0.0, last_step=-1, verbose=False
+    ):
+        assert steps > 0, f"steps must greater than zero, but got {steps}"
+
+        self.steps = steps
+        self.alpha = alpha
+        super().__init__(optimizer, last_step, verbose)
+
+    def get_lr(self):
+        if self.last_step < self.steps:
+            cos_decay = 0.5 * (1 + math.cos(math.pi * self.last_step / self.steps))
+            decay_factor = (1 - self.alpha) * cos_decay + self.alpha
+            return [base_lr * decay_factor for base_lr in self.base_lrs]
+        else:
+            return [base_lr * self.alpha for base_lr in self.base_lrs]
diff --git a/oneflow/compatible_single_client_python/nn/optimizer/lambda_lr.py b/oneflow/compatible_single_client_python/nn/optimizer/lambda_lr.py
new file mode 100644
index 0000000000000000000000000000000000000000..d9f51505c7457b11e6a2b80d0b1de091d59901be
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/optimizer/lambda_lr.py
@@ -0,0 +1,111 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import types
+
+from oneflow.compatible_single_client_python.oneflow_export import (
+    experimental_api,
+    oneflow_export,
+)
+from .lr_scheduler import LrScheduler
+
+
+@oneflow_export("optim.lr_scheduler.LambdaLR")
+@experimental_api
+class LambdaLR(LrScheduler):
+    r"""
+    Sets the learning rate of each parameter group to the initial lr times a given function.
+    When last_step=-1, sets initial lr as lr.
+
+    .. math::
+
+        learning\_rate = base\_learning\_rate*lambda(last\_step)
+
+    Args:
+        optimizer(Optimizer): Wrapped optimizer.
+        lr_lambda(function or list): A function which computes a multiplicative factor given an integer
+            parameter epoch, or a list of such functions, one for each group in optimizer.param_groups.
+        last_step (int, optional): The index of last step. (default: -1)
+        verbose (bool, optional): If ``True``, prints a message to stdout for each update. (default: ``False``)
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client.experimental as flow
+
+        ...
+        lambda1 = lambda step: step // 30
+        lambda2 = lambda step: 0.95 * step
+        lambda_lr = flow.optim.lr_scheduler.LambdaLR(optimizer, [lambda1, lambda2])
+        for epoch in range(num_epoch):
+            train(...)
+            lambda_lr.step()
+
+    """
+
+    def __init__(self, optimizer, lr_lambda, last_step=-1, verbose=False):
+        if not isinstance(lr_lambda, (list, tuple)):
+            self.lr_lambdas = [lr_lambda] * len(optimizer.param_groups)
+        else:
+            assert len(lr_lambda) == len(
+                optimizer.param_groups
+            ), f"Expected {len(optimizer.param_groups)} lr_lambdas, but got {len(lr_lambda)}"
+            self.lr_lambdas = list(lr_lambda)
+
+        super().__init__(optimizer, last_step, verbose)
+
+    def state_dict(self):
+        """Returns the state of the scheduler as a :class:`dict`.
+
+        It contains an entry for every variable in self.__dict__ which
+        is not the optimizer.
+        The learning rate lambda functions will only be saved if they are callable objects
+        and not if they are functions or lambdas.
+        """
+        state_dict = {
+            key: value
+            for key, value in self.__dict__.items()
+            if key not in ("optimizer", "lr_lambdas")
+        }
+        state_dict["lr_lambdas"] = [None] * len(self.lr_lambdas)
+
+        for idx, fn in enumerate(self.lr_lambdas):
+            if not isinstance(fn, types.FunctionType):
+                state_dict["lr_lambdas"][idx] = fn.__dict__.copy()
+
+        return state_dict
+
+    def load_state_dict(self, state_dict):
+        """Loads the schedulers state.
+
+        Arguments:
+            state_dict (dict): scheduler state. Should be an object returned
+                from a call to :meth:`state_dict`.
+        """
+        lr_lambdas = state_dict.pop("lr_lambdas")
+        self.__dict__.update(state_dict)
+        state_dict["lr_lambdas"] = lr_lambdas
+
+        for idx, fn in enumerate(lr_lambdas):
+            if fn is not None:
+                self.lr_lambdas[idx].__dict__.update(fn)
+
+    def get_lr(self):
+        return [
+            base_lr * lmbda(self.last_step)
+            for lmbda, base_lr in zip(self.lr_lambdas, self.base_lrs)
+        ]
diff --git a/oneflow/compatible_single_client_python/nn/optimizer/lr_scheduler.py b/oneflow/compatible_single_client_python/nn/optimizer/lr_scheduler.py
new file mode 100644
index 0000000000000000000000000000000000000000..1b69f0ffd1408de172750babc546630d10dfa828
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/optimizer/lr_scheduler.py
@@ -0,0 +1,90 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+from .optimizer import Optimizer
+
+
+@oneflow_export("optim.lr_scheduler._LRScheduler")
+@experimental_api
+class LrScheduler(object):
+    def __init__(self, optimizer, last_step=-1, verbose=False):
+        if not isinstance(optimizer, Optimizer):
+            raise TypeError(f"{type(optimizer).__name__} is not an Optimizer object")
+        self._optimizer = optimizer
+
+        if last_step == -1:
+            for group in self._optimizer.param_groups:
+                group["initial_lr"] = group["lr"]
+        else:
+            for i, group in enumerate(self._optimizer.param_groups):
+                assert "initial_lr" in group, (
+                    "param 'initial_lr' is not specified in "
+                    f"param_groups[{i}] when resuming an optimizer"
+                )
+
+        self.base_lrs = [group["initial_lr"] for group in self._optimizer.param_groups]
+        self.last_lr = list()
+        self.last_step = last_step
+
+        self.verbose = verbose
+        self.step()
+
+    def state_dict(self):
+        """Returns the state of the scheduler as a :class:`dict`.
+
+        It contains an entry for every variable in self.__dict__ which
+        is not the optimizer.
+        """
+        return {
+            key: value for key, value in self.__dict__.items() if key != "_optimizer"
+        }
+
+    def load_state_dict(self, state_dict):
+        """Loads the schedulers state.
+
+        Arguments:
+            state_dict (dict): scheduler state. Should be an object returned
+                from a call to :meth:`state_dict`.
+        """
+        self.__dict__.update(state_dict)
+
+    def get_lr(self):
+        """Compute learning rate using chainable form of the scheduler
+        """
+        raise NotImplementedError
+
+    def get_last_lr(self):
+        """ Return last computed learning rate by current scheduler.
+        """
+        return self.last_lr
+
+    def print_lr(self, group_idx, lr):
+        """Display the current learning rate.
+        """
+        print(f"Adjusting learning rate of param_groups[{group_idx}] to {lr}")
+
+    def step(self):
+        self.last_step += 1
+        self.last_lr = self.get_lr()
+
+        for i, group in enumerate(self._optimizer.param_groups):
+            group["lr"] = self.last_lr[i]
+            if self.verbose:
+                self.print_lr(i, self.last_lr[i])
diff --git a/oneflow/compatible_single_client_python/nn/optimizer/optimizer.py b/oneflow/compatible_single_client_python/nn/optimizer/optimizer.py
new file mode 100644
index 0000000000000000000000000000000000000000..38a1f91d80e1d4db31eb2e03be986847756f14c4
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/optimizer/optimizer.py
@@ -0,0 +1,124 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import warnings
+from typing import Dict, Callable, Union, Any, Iterator
+import collections
+
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+from oneflow.compatible_single_client_python.nn.parameter import Parameter
+from oneflow.compatible_single_client_python.framework.tensor import Tensor
+
+
+class ParamGroup(object):
+    def __init__(
+        self,
+        parameters: Union[Iterator[Parameter], Dict[str, Any]],
+        default_options: Dict,
+    ):
+        if isinstance(parameters, collections.abc.Iterator):
+            self._parameters = list(parameters)
+            self._options = default_options
+        else:  # Dict
+            assert "params" in parameters
+            self._parameters = list(parameters["params"])
+            self._options = default_options
+            for key in self._options:
+                if key in parameters:
+                    self._options[key] = parameters[key]
+
+    def __getitem__(self, key):
+        return self._options[key]
+
+    def __setitem__(self, key, value):
+        self._options[key] = value
+
+    @property
+    def options(self):
+        return self._options
+
+    @property
+    def parameters(self):
+        return self._parameters
+
+
+@oneflow_export("optim.Optimizer")
+@experimental_api
+class Optimizer(object):
+    def __init__(self):
+        self.param_groups = list()
+        self._default_options = dict()
+        self._state = dict()
+        self._state["step"] = 0
+        self._op = None
+
+    def add_param_group(self, param_group) -> None:
+        # TODO(wyg)
+        raise NotImplementedError()
+
+    def load_state_dict(self, state_dict) -> None:
+        # TODO(wyg)
+        raise NotImplementedError()
+
+    def state_dict(self):
+        # TODO(wyg)
+        raise NotImplementedError()
+
+    def step(self, closure: Union[Callable, None] = None) -> Union[Tensor, None]:
+        raise NotImplementedError()
+
+    def zero_grad(self, set_to_none: bool = False):
+        r"""Sets the gradients of all optimized torch.Tensor s to zero.
+
+        Args:
+            set_to_none (bool): instead of setting to zero, set the grads to None.
+                This will in general have lower memory footprint, and can modestly
+                improve performance. However, it changes certain behaviors.
+        For example:
+            1. When the user tries to access a gradient and perform manual ops on
+            it, a None attribute or a Tensor full of 0s will behave differently.
+
+            2. If the user requests zero_grad(set_to_none=True) followed by a
+            backward pass, grads are guaranteed to be None for params that did not
+            receive a gradient.
+
+            3. Optimizers have a different behavior if the gradient is 0 or None
+            (in one case it does the step with a gradient of 0 and in the other
+            it skips the step altogether).
+
+        Returns:
+            None
+
+        """
+        all_grad_is_none = True
+        for param_group in self.param_groups:
+            for param in param_group.parameters:
+                if param.grad is not None:
+                    all_grad_is_none = False
+                    if set_to_none:
+                        param.grad = None
+                    else:
+                        param.grad.zeros_()
+        if all_grad_is_none:
+            # TODO: delete this after implementing Tensor.data
+            warnings.warn(
+                "\nParameters in optimizer do not have gradient.\n"
+                "Please check `loss.backward()` is called or not,\n"
+                "or try to declare optimizer after calling `module.to()`"
+            )
diff --git a/oneflow/compatible_single_client_python/nn/optimizer/rmsprop.py b/oneflow/compatible_single_client_python/nn/optimizer/rmsprop.py
new file mode 100644
index 0000000000000000000000000000000000000000..00303222ce215f9a37c4358d6a481fc857f18fc5
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/optimizer/rmsprop.py
@@ -0,0 +1,192 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from typing import List, Dict, Callable, Union, Iterator
+import collections
+
+from oneflow.compatible import single_client as flow
+
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+from oneflow.compatible_single_client_python.nn.parameter import Parameter
+from oneflow.compatible_single_client_python.nn.optimizer.optimizer import (
+    ParamGroup,
+    Optimizer,
+)
+
+
+@oneflow_export("optim.RMSprop")
+@experimental_api
+class RMSprop(Optimizer):
+    r"""Implements RMSprop algorithm.
+
+    oot Mean Squared Propagation (RMSProp) is an unpublished, adaptive learning
+    rate method. The original slides proposed RMSProp: Slide 29 of
+    http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf .
+
+    The original equation is as follows:
+
+    .. math::
+
+        r(w, t) = \alpha r(w, t-1) + (1 - \alpha)(\nabla Q_{i}(w))^2
+
+        W = w - \frac{\eta} {\\sqrt{r(w,t) + \epsilon}} \nabla Q_{i}(w)
+
+    The first equation calculates moving average of the squared gradient for
+    each weight. Then dividing the gradient by :math:`sqrt{v(w,t)}`.
+    In some cases, adding a momentum term :math: `\beta` is beneficial.
+    In our implementation, Nesterov momentum is used:
+
+    .. math::
+
+        r(w, t) = \alpha r(w, t-1) + (1 - \alpha)(\nabla Q_{i}(w))^2
+
+        v(w, t) = \beta v(w, t-1) + \frac{\eta} {\\sqrt{r(w,t) +
+            \epsilon}} \nabla Q_{i}(w)
+
+        w = w - v(w, t)
+
+    if centered is True:
+
+    .. math::
+
+        r(w, t) = \alpha r(w, t-1) + (1 - \alpha)(\nabla Q_{i}(w))^2
+
+        g(w, t) = \alpha g(w, t-1) + (1 - \alpha)\nabla Q_{i}(w)
+
+        v(w, t) = \beta v(w, t-1) + \frac{\eta} {\\sqrt{r(w,t) - (g(w, t))^2 +
+            \epsilon}} \nabla Q_{i}(w)
+
+        w = w - v(w, t)
+
+    where, :math:`\alpha` is a hyperparameter and typical values are 0.99, 0.95
+    and so on. :math:`\beta` is the momentum term. :math:`\epsilon` is a
+    smoothing term to avoid division by zero, usually set somewhere in range
+    from 1e-4 to 1e-8.
+
+    Args:
+        params (iterable): iterable of parameters to optimize or dicts defining
+            parameter groups
+        lr (float, optional): learning rate (default: 1e-2)
+        momentum (float, optional): momentum factor (default: 0, oneflow not support momenmtum > 0 now!)
+        alpha (float, optional): smoothing constant (default: 0.99)
+        eps (float, optional): term added to the denominator to improve
+            numerical stability (default: 1e-8)
+        centered (bool, optional) : if ``True``, compute the centered RMSProp,
+            the gradient is normalized by an estimation of its variance
+        weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
+    """
+
+    def __init__(
+        self,
+        parameters: Union[Iterator[Parameter], List[Dict]],
+        lr: float = 1e-3,
+        alpha: float = 0.99,
+        eps: float = 1e-8,
+        weight_decay: float = 0,
+        momentum: float = 0.0,
+        centered: bool = False,
+        scale: float = 1.0,
+    ):
+        super().__init__()
+        assert lr >= 0.0, f"Invalid learning rate: {lr}"
+        assert alpha >= 0.0, f"Invalid alpha value: {alpha}"
+        assert eps >= 0.0, f"Invalid epsilon value: {eps}"
+        assert weight_decay >= 0.0, f"Invalid weight_decay value: {weight_decay}"
+        assert scale > 0.0, f"Invalid scale factor: {scale}"
+        assert momentum == 0.0, "Not support momentum greater than zeros now!"
+
+        self._default_options["lr"] = lr
+        self._default_options["alpha"] = alpha
+        self._default_options["eps"] = eps
+        self._default_options["weight_decay"] = weight_decay
+        self._default_options["centered"] = centered
+        self._default_options["scale"] = scale
+
+        # Add parameters
+        if isinstance(parameters, collections.abc.Iterator):
+            self.param_groups.append(ParamGroup(parameters, self._default_options))
+        else:  # List[Dict]
+            for param in parameters:
+                self.param_groups.append(ParamGroup(param, self._default_options))
+
+        for param_group in self.param_groups:
+            for param in param_group.parameters:
+                assert param.is_leaf, "parameters must be leaf tensor"
+                self._state[param] = dict()
+                self._state[param]["square_avg"] = flow.experimental.zeros_like(param)
+                if param_group["centered"]:
+                    self._state[param]["grad_avg"] = flow.experimental.zeros_like(param)
+
+        self._centered_rmsprop = (
+            flow.builtin_op("rmsprop_update")
+            .Input("model")
+            .Input("model_diff")
+            .Input("mean_square")
+            .Input("mean_gradient")
+            .Attr("centered", True)
+            .Attr("l1", 0.0)
+            .Attr("l2", 0.0)
+            .Build()
+        )
+        self._rmsprop = (
+            flow.builtin_op("rmsprop_update")
+            .Input("model")
+            .Input("model_diff")
+            .Input("mean_square")
+            .Attr("centered", False)
+            .Attr("l1", 0.0)
+            .Attr("l2", 0.0)
+            .Build()
+        )
+
+    def step(self, closure: Callable = None):
+        """Performs a single optimization step.
+
+        Args:
+            closure (callable, optional): A closure that reevaluates the model
+                and returns the loss.
+        """
+        with flow.no_grad():
+            loss = None
+            if closure is not None:
+                loss = closure()
+
+            for param_group in self.param_groups:
+                kwargs = {
+                    "learning_rate_val": param_group["lr"],
+                    "scale": param_group["scale"],
+                    "epsilon": param_group["eps"],
+                    "decay_rate": param_group["alpha"],
+                    "weight_decay": param_group["weight_decay"],
+                }
+                for param in param_group.parameters:
+                    if param.grad is None:
+                        continue
+                    ms_tensor = self._state[param]["square_avg"]
+                    if param_group["centered"]:
+                        mg_tensor = self._state[param]["grad_avg"]
+                        self._centered_rmsprop(
+                            param, param.grad, ms_tensor, mg_tensor, **kwargs
+                        )
+                    else:
+                        self._rmsprop(param, param.grad, ms_tensor, **kwargs)
+
+            self._state["step"] = self._state["step"] + 1
+
+            return loss
diff --git a/oneflow/compatible_single_client_python/nn/optimizer/sgd.py b/oneflow/compatible_single_client_python/nn/optimizer/sgd.py
new file mode 100644
index 0000000000000000000000000000000000000000..b66db77c227f3e0b59b8bd3a7f107b0eed991eb4
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/optimizer/sgd.py
@@ -0,0 +1,140 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from typing import List, Dict, Callable, Union, Iterator
+import collections
+
+from oneflow.compatible import single_client as flow
+
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    experimental_api,
+)
+from oneflow.compatible_single_client_python.nn.parameter import Parameter
+from .optimizer import Optimizer, ParamGroup
+
+
+@oneflow_export("optim.SGD")
+@experimental_api
+class SGD(Optimizer):
+    r"""Implements SGD algorithm.
+
+    This algorithm takes a random sampleâ€™s gradient as an approximate estimate of the overall gradient in small batch gradient descent.
+
+    When the momentum = 0, the equation of parameters updating is:
+
+        .. math::
+
+            param_{new} = param_{old} - learning\_rate * grad
+
+    With momentum, the equation of parameters updating is:
+
+        .. math::
+
+            & V_t = \beta * V_{t-1} + learning\_rate * g_t
+
+            & param_{new} = param_{old} - V_t
+
+    Args:
+        params (iterable): iterable of parameters to optimize or dicts defining
+            parameter groups
+        lr (float, optional): learning rate (default: 1e-3)
+        momentum (float, optional): Momentum factor (default: 0.0)
+        scale (float, optional): the scale factor of loss (default: 1.0)
+
+    """
+
+    def __init__(
+        self,
+        parameters: Union[Iterator[Parameter], List[Dict]],
+        lr: float = 1e-3,
+        momentum: float = 0.0,
+        scale: float = 1.0,
+    ):
+        super().__init__()
+        assert lr >= 0.0, f"Invalid learning rate: {lr}"
+        assert momentum >= 0.0, f"Invalid momentum: {momentum}"
+        assert scale >= 0.0, f"Invalid scale factor: {scale}"
+
+        self._default_options["lr"] = lr
+        self._default_options["scale"] = scale
+        self._default_options["momentum"] = momentum
+
+        # Add parameters
+        if isinstance(parameters, collections.abc.Iterator):
+            self.param_groups.append(ParamGroup(parameters, self._default_options))
+        else:  # List[Dict]
+            for param in parameters:
+                self.param_groups.append(ParamGroup(param, self._default_options))
+
+        for param_group in self.param_groups:
+            for param in param_group.parameters:
+                assert param.is_leaf, "parameters must be leaf tensor"
+                self._state[param] = dict()
+                if param_group["momentum"] != 0.0:
+                    self._state[param]["momentum_buf"] = flow.experimental.zeros_like(
+                        param
+                    )
+
+        self._momentum_sgd = (
+            flow.builtin_op("momentum_update")
+            .Input("model")
+            .Input("model_diff")
+            .Input("momentum")
+            .Attr("l1", 0.0)
+            .Attr("l2", 0.0)
+            .Attr("weight_decay", 0.0)
+            .Build()
+        )
+        self._sgd = (
+            flow.builtin_op("sgd_update")
+            .Input("model")
+            .Input("model_diff")
+            .Attr("weight_decay", 0.0)
+            .Attr("l1", 0.0)
+            .Attr("l2", 0.0)
+            .Build()
+        )
+
+    def step(self, closure: Callable = None):
+        with flow.no_grad():
+            loss = None
+            if closure is not None:
+                loss = closure()
+
+            for param_group in self.param_groups:
+                lr = param_group["lr"]
+                for param in param_group.parameters:
+                    if param.grad is None:
+                        continue
+                    if param_group["momentum"] == 0.0:
+                        scale = param_group["scale"]
+                        self._sgd(param, param.grad, learning_rate_val=lr, scale=scale)
+                    else:
+                        momentum_buf = self._state[param]["momentum_buf"]
+                        scale = param_group["scale"]
+                        beta = param_group["momentum"]
+                        self._momentum_sgd(
+                            param,
+                            param.grad,
+                            momentum_buf,
+                            learning_rate_val=lr,
+                            scale=scale,
+                            beta=beta,
+                        )
+
+            self._state["step"] = self._state["step"] + 1
+            return loss
diff --git a/oneflow/compatible_single_client_python/nn/optimizer/step_lr.py b/oneflow/compatible_single_client_python/nn/optimizer/step_lr.py
new file mode 100644
index 0000000000000000000000000000000000000000..3ebb647a6746223a3f1a4f563383835383828a20
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/optimizer/step_lr.py
@@ -0,0 +1,67 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from oneflow.compatible_single_client_python.oneflow_export import (
+    experimental_api,
+    oneflow_export,
+)
+from .lr_scheduler import LrScheduler
+
+
+@oneflow_export("optim.lr_scheduler.StepLR")
+@experimental_api
+class StepLR(LrScheduler):
+    r"""
+    Decays the learning rate of each parameter group by gamma every step_size steps.
+    Notice that such decay can happen simultaneously with other changes to the learning
+    rate fromoutside this scheduler. When last_step=-1, sets initial lr as lr.
+
+    Args:
+        optimizer(Optimizer): Wrapped optimizer.
+        step_size (int): Period of learning rate decay.
+        gamma (float, optional): Multiplicative factor of learning rate decay. (default: 0.1)
+        last_step (int, optional): The index of last step. (default: -1)
+        verbose (bool, optional): If ``True``, prints a message to stdout for each update. (default: ``False``)
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client.experimental as flow
+
+        ...
+        step_lr = flow.optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.1)
+        for epoch in range(num_epoch):
+            train(...)
+            step_lr.step()
+
+    """
+
+    def __init__(
+        self, optimizer, step_size: int, gamma: float = 0.1, last_step=-1, verbose=False
+    ):
+        assert step_size > 0, f"step_size must greater than zero, but got {step_size}"
+        assert gamma > 0.0, f"gamma must greater than zero, but got {gamma}"
+
+        self.step_size = step_size
+        self.gamma = gamma
+        super().__init__(optimizer, last_step, verbose)
+
+    def get_lr(self):
+        if self.last_step == 0 or self.last_step % self.step_size != 0:
+            return [group["lr"] for group in self._optimizer.param_groups]
+        else:
+            return [group["lr"] * self.gamma for group in self._optimizer.param_groups]
diff --git a/oneflow/compatible_single_client_python/nn/parameter.py b/oneflow/compatible_single_client_python/nn/parameter.py
new file mode 100644
index 0000000000000000000000000000000000000000..c32fff57beba68648b3cc70e36676f696b5f25ce
--- /dev/null
+++ b/oneflow/compatible_single_client_python/nn/parameter.py
@@ -0,0 +1,33 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.oneflow_export import oneflow_export
+from oneflow.compatible_single_client_python.framework.tensor import Tensor
+
+
+@oneflow_export("nn.Parameter")
+class Parameter(Tensor):
+    def __init__(self, data, requires_grad=True):
+        # TODO: uncomment this line when autograd is ready
+        # data.requires_grad = True
+        # TODO: uncomment the following two lines when consistent <-> local conversion is ready
+        # data.set_is_consistent(True)
+        # data.set_placement(flow.placement("gpu", ["0:0"], None))
+        self._data = data
+        self._data.requires_grad = requires_grad
+
+    def __getattr__(self, name):
+        return getattr(self._data, name)
diff --git a/oneflow/compatible_single_client_python/oneflow_export.py b/oneflow/compatible_single_client_python/oneflow_export.py
new file mode 100644
index 0000000000000000000000000000000000000000..ce5427d0f0d663d65a3df06dea6c3228b76be36f
--- /dev/null
+++ b/oneflow/compatible_single_client_python/oneflow_export.py
@@ -0,0 +1,108 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import inspect
+import re
+import collections
+
+from oneflow.compatible_single_client_python.lib.core import enable_if as enable_if_util
+from oneflow.compatible_single_client_python.lib.core import traceinfo as traceinfo
+from oneflow.compatible_single_client_python.lib.core.high_order_bool import always_true
+import oneflow._oneflow_internal
+
+
+def oneflow_export(*api_names, **kwargs):
+    def Decorator(func_or_class):
+        new_api_names = list(api_names)
+        if hasattr(func_or_class, "_ONEFLOW_API_TAG"):
+            if func_or_class._ONEFLOW_API_TAG == "experimental_api":
+                new_api_names = ["experimental." + n for n in new_api_names]
+        else:
+            new_api_names = ["experimental." + n for n in new_api_names] + new_api_names
+        # func_or_class._ONEFLOW_API = [
+        #     "compatible.single_client." + x for x in new_api_names
+        # ]
+        func_or_class._ONEFLOW_API = new_api_names
+        func_or_class._IS_VALUE = False
+        return func_or_class
+
+    return Decorator
+
+
+def oneflow_export_value(*api_names, **kwargs):
+    def Decorator(func_or_class):
+        new_api_names = list(api_names)
+        if hasattr(func_or_class, "_ONEFLOW_API_TAG"):
+            if func_or_class._ONEFLOW_API_TAG == "experimental_api":
+                new_api_names = ["experimental." + n for n in new_api_names]
+        else:
+            new_api_names = ["experimental." + n for n in new_api_names] + new_api_names
+        # func_or_class._ONEFLOW_API = [
+        #     "compatible.single_client." + x for x in new_api_names
+        # ]
+        func_or_class._ONEFLOW_API = new_api_names
+        func_or_class._IS_VALUE = True
+        return func_or_class
+
+    return Decorator
+
+
+def stable_api(func_or_class):
+    func_or_class._ONEFLOW_API_TAG = "stable_api"
+    return func_or_class
+
+
+def experimental_api(func_or_class):
+    func_or_class._ONEFLOW_API_TAG = "experimental_api"
+    return func_or_class
+
+
+_DEPRECATED = set()
+
+
+def oneflow_deprecate(*api_names, **kwargs):
+    def Decorator(func_or_class):
+        _DEPRECATED.add(func_or_class)
+        return func_or_class
+
+    return Decorator
+
+
+@oneflow_export("is_deprecated")
+def is_deprecated(func_or_class):
+    return (
+        isinstance(func_or_class, collections.Hashable) and func_or_class in _DEPRECATED
+    )
+
+
+def export_oneflow_api_internal_symbols(internal_name, api_name):
+    names = internal_name.split(".")
+    api = oneflow._oneflow_internal
+    for n in names:
+        api = getattr(api, n)
+    globals()[api_name] = api
+    oneflow_export(api_name)(api)
+
+
+internal_names_2_api_names = {
+    "placement": "placement",
+    "Size": "Size",
+    "device": "device",
+    "autograd.no_grad": "no_grad",
+}
+
+
+for internal_name, api_name in internal_names_2_api_names.items():
+    export_oneflow_api_internal_symbols(internal_name, api_name)
diff --git a/oneflow/compatible_single_client_python/ops/__init__.py b/oneflow/compatible_single_client_python/ops/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..71766feff029e632e5ba519af6ffa41985b341f6
--- /dev/null
+++ b/oneflow/compatible_single_client_python/ops/__init__.py
@@ -0,0 +1,135 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+import re
+
+from oneflow.core.operator import op_conf_pb2 as op_conf_util
+from oneflow.core.register import logical_blob_id_pb2 as logical_blob_id_util
+from oneflow.compatible_single_client_python.framework import c_api_util as c_api_util
+from oneflow.compatible_single_client_python.framework import (
+    compile_context as compile_context,
+)
+from oneflow.compatible_single_client_python.framework import id_util as id_util
+from oneflow.compatible_single_client_python.framework import (
+    input_blob_def as input_blob_util,
+)
+from oneflow.compatible_single_client_python.framework import (
+    remote_blob as remote_blob_util,
+)
+from oneflow.compatible_single_client_python.framework import hob as hob
+from oneflow.compatible_single_client_python.lib.core import enable_if as enable_if
+from oneflow.compatible_single_client_python.framework import (
+    session_context as session_ctx,
+)
+from oneflow.compatible_single_client_python.framework import scope_util as scope_util
+from oneflow.compatible_single_client_python.eager import boxing_util as boxing_util
+from oneflow.compatible_single_client_python.eager import (
+    blob_register as blob_register_util,
+)
+from oneflow._oneflow_internal.oneflow.core.job import placement as placement_cfg
+import oneflow._oneflow_internal
+
+blob_register = oneflow._oneflow_internal.GetDefaultBlobRegister()
+
+
+def InputOpByArgBlobDef(blob_def):
+    assert isinstance(blob_def, input_blob_util.ArgBlobDef)
+    op_conf = op_conf_util.OperatorConf()
+    op_conf.name = blob_def.op_name
+    op_conf.input_conf.out = blob_def.blob_name
+    op_conf.input_conf.blob_conf.CopyFrom(blob_def.ToInterfaceBlobConf())
+    blob_def.AddAndInferOp(op_conf)
+    lbi = logical_blob_id_util.LogicalBlobId()
+    lbi.op_name = blob_def.op_name
+    lbi.blob_name = blob_def.blob_name
+    return remote_blob_util.RemoteBlob(lbi)
+
+
+def ReturnRemoteBlob(remote_blob, allow_cpu_return_op=True):
+    return enable_if.unique([LazyReturnRemoteBlob, EagerReturnRemoteBlob])(
+        remote_blob, allow_cpu_return_op
+    )
+
+
+@enable_if.condition(hob.in_global_mode & ~hob.eager_execution_enabled)
+def LazyReturnRemoteBlob(remote_blob, allow_cpu_return_op=True):
+    assert isinstance(
+        remote_blob,
+        (
+            oneflow._oneflow_internal.LazyMirroredBlob,
+            oneflow._oneflow_internal.LazyConsistentBlob,
+        ),
+    )
+    op_conf, lbi, scope = _GetReturnOpConfAndOutLbiAndScope(
+        remote_blob, allow_cpu_return_op
+    )
+    compile_context.CurJobAddOp(op_conf, scope)
+    return remote_blob_util.RemoteBlob(lbi)
+
+
+@enable_if.condition(hob.in_global_mode & hob.eager_execution_enabled)
+def EagerReturnRemoteBlob(remote_blob, allow_cpu_return_op=True):
+    if not hob.is_trainable(None):
+        return remote_blob
+    op_conf, lbi, scope = _GetReturnOpConfAndOutLbiAndScope(
+        remote_blob, allow_cpu_return_op
+    )
+    if remote_blob.blob_object.op_arg_parallel_attr.is_mirrored():
+        add_and_infer = compile_context.CurJobAddMirroredOp
+    else:
+        add_and_infer = compile_context.CurJobAddConsistentOp
+    op_attribute = add_and_infer(op_conf, scope)
+
+    def BuildInstruction(builder):
+        get_blob_scope = blob_register_util.BnInOp2BlobObjectScope
+        with get_blob_scope(blob_register, op_attribute) as bn_in_op2blob_object:
+            cfg_op_attribute = oneflow._oneflow_internal.deprecated.MakeOpAttributeByString(
+                str(op_attribute)
+            )
+            builder.StatelessCall(
+                cfg_op_attribute,
+                remote_blob.blob_object.parallel_desc_symbol.parallel_conf,
+                bn_in_op2blob_object,
+                boxing_util.BoxingTo,
+            )
+
+    oneflow._oneflow_internal.deprecated.LogicalRun(BuildInstruction)
+    return remote_blob_util.RemoteBlob(lbi)
+
+
+def _GetReturnOpConfAndOutLbiAndScope(remote_blob, allow_cpu_return_op=True):
+    op_conf = op_conf_util.OperatorConf()
+    op_conf.name = id_util.UniqueStr("Return_")
+    setattr(op_conf.return_conf, "in", remote_blob.unique_name)
+    op_conf.return_conf.out = "out"
+    if allow_cpu_return_op:
+        op_conf.device_tag = "cpu"
+
+    lbi = logical_blob_id_util.LogicalBlobId()
+    lbi.op_name = op_conf.name
+    lbi.blob_name = "out"
+
+    parallel_conf = placement_cfg.ParallelConf()
+    parallel_conf.CopyFrom(remote_blob.parallel_conf)
+
+    def BuildScope(old_scope, builder):
+        return builder.BuildScopeWithNewParallelConf(old_scope, parallel_conf)
+
+    sess = session_ctx.GetDefaultSession()
+    scope = scope_util.MakeScope(BuildScope)
+
+    return op_conf, lbi, scope
diff --git a/oneflow/compatible_single_client_python/ops/array_ops.py b/oneflow/compatible_single_client_python/ops/array_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..d8e00ab7c47915b969b88ec978ee3cb9a197c878
--- /dev/null
+++ b/oneflow/compatible_single_client_python/ops/array_ops.py
@@ -0,0 +1,2656 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+from functools import reduce
+from typing import Iterable, List, Optional, Sequence, Union, Tuple
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    stable_api,
+)
+
+import numpy as np
+import operator
+from oneflow.compatible import single_client as flow
+from oneflow.core.operator import op_conf_pb2 as op_conf_util
+from oneflow.core.register import logical_blob_id_pb2 as logical_blob_id_util
+from oneflow.compatible_single_client_python.framework import (
+    interpret_util as interpret_util,
+)
+from oneflow.compatible_single_client_python.framework import id_util as id_util
+from oneflow.compatible_single_client_python.framework import (
+    remote_blob as remote_blob_util,
+)
+import oneflow._oneflow_internal
+
+
+@oneflow_export("gather")
+@stable_api
+def gather(
+    params: oneflow._oneflow_internal.BlobDesc,
+    indices: oneflow._oneflow_internal.BlobDesc,
+    validate_indices: Optional[oneflow._oneflow_internal.BlobDesc] = None,
+    axis: Optional[int] = None,
+    batch_dims: int = 0,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""This operator gathers slices from params `axis` according to indices.
+
+    Args:
+        params: A `Blob`. The blob from which to gather values. Must be at least rank `axis + 1`.
+        indices: A `Blob`. Index blob. Must be in range [0, params.shape[axis]).
+        axis: A `int`. The axis in params to gather indices from. Defaults to the first dimension.
+            Supports negative indexes.
+        batch_dims: An optional `int`. Defaults to 0.
+        name: A name for the operation (optional).
+    Returns:
+        A blob. Has the same type as params.
+
+    For example:
+
+    Example 1:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def gather_Job(x: tp.Numpy.Placeholder(shape=(3, 3), dtype=flow.float32),
+                    indice: tp.Numpy.Placeholder(shape=(2, ), dtype=flow.int32)
+        ) -> tp.Numpy:
+            gather_blob = flow.gather(params=x,
+                                    indices=indice,
+                                    axis=1)
+            return gather_blob
+
+
+        x = np.array([[1, 2, 3],
+                    [4, 5, 6],
+                    [7, 8, 9]]).astype(np.float32)
+        indice = np.array([0, 2]).astype(np.int32)
+        out = gather_Job(x, indice)
+
+        # out [[1. 3.]
+        #      [4. 6.]
+        #      [7. 9.]]
+
+
+    Example 2:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def gather_Job(x: tp.Numpy.Placeholder(shape=(3, 3), dtype=flow.float32),
+                    indice: tp.Numpy.Placeholder(shape=(2, ), dtype=flow.int32)
+        ) -> tp.Numpy:
+            gather_blob = flow.gather(params=x,
+                                    indices=indice,
+                                    axis=0)
+            return gather_blob
+
+
+        x = np.array([[1, 2, 3],
+                    [4, 5, 6],
+                    [7, 8, 9]]).astype(np.float32)
+        indice = np.array([0, 2]).astype(np.int32)
+        out = gather_Job(x, indice)
+
+        # out [[1. 2. 3.]
+        #      [7. 8. 9.]]
+
+    """
+    params_ndims = len(params.shape)
+    if axis is None:
+        axis = batch_dims
+    elif axis < 0:
+        origin_axis = axis
+        axis += params_ndims
+        assert axis >= 0 and axis < params_ndims, ValueError(
+            "Expected axis to between [%d, %d).  But received: %d "
+            % (-params_ndims, params_ndims, origin_axis)
+        )
+
+    if batch_dims > 0:
+        if axis == batch_dims:
+            return (
+                flow.user_op_builder(
+                    name if name is not None else id_util.UniqueStr("BatchGather_")
+                )
+                .Op("batch_gather")
+                .Input("in", [params])
+                .Input("indices", [indices])
+                .Output("out")
+                .Build()
+                .InferAndTryRun()
+                .RemoteBlobList()[0]
+            )
+        elif axis > batch_dims:
+            raise NotImplementedError
+        else:
+            raise AttributeError
+    else:
+        return (
+            flow.user_op_builder(
+                name if name is not None else id_util.UniqueStr("Gather_")
+            )
+            .Op("gather")
+            .Input("in", [params])
+            .Input("indices", [indices])
+            .Output("out")
+            .Attr("axis", int(axis))
+            .Build()
+            .InferAndTryRun()
+            .RemoteBlobList()[0]
+        )
+
+
+@oneflow_export("flatten")
+@stable_api
+def flatten(
+    input: oneflow._oneflow_internal.BlobDesc,
+    start_dim: int = 0,
+    end_dim: int = -1,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""Flattens a contiguous range of dims in a Blob.
+
+    Args:
+        input: A `Blob`.
+        start_dim: The first dim to flatten.
+        end_dim: The last dim to flatten.
+        name: A name for the operation (optional).
+    Returns:
+        A `Blob`, has the same type as `input`.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+        @flow.global_function()
+        def flatten_Job(input: tp.Numpy.Placeholder(shape=(4, 4, 3, 2), dtype=flow.float32)
+        ) -> tp.Numpy:
+            flatten_blob = flow.flatten(input, start_dim=1, end_dim=-1)
+            return flatten_blob
+
+
+        input = np.zeros((4, 4, 3, 2)).astype(np.float32)
+        out = flatten_Job(input)
+
+        # out.shape (4, 24)
+
+    """
+    if name is None:
+        name = id_util.UniqueStr("Flatten_")
+    return (
+        flow.user_op_builder(name)
+        .Op("flatten")
+        .Input("in", [input])
+        .Output("out")
+        .Attr("start_dim", start_dim)
+        .Attr("end_dim", end_dim)
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
+
+
+def infer_shape(x, shape):
+    dim_index_need_infer = shape.index(-1) if shape.count(-1) == 1 else None
+    in_elem_cnt = reduce(operator.mul, x.shape, 1)
+    out_elem_cnt = reduce(operator.mul, shape, 1)
+    if dim_index_need_infer is not None:
+        assert (in_elem_cnt % out_elem_cnt) == 0
+        shape[dim_index_need_infer] = int(abs(in_elem_cnt / out_elem_cnt))
+    else:
+        assert in_elem_cnt == out_elem_cnt
+    return shape
+
+
+@oneflow_export("reshape")
+@stable_api
+def reshape(
+    x: oneflow._oneflow_internal.BlobDesc,
+    shape: Sequence[int],
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""This operator reshapes a Blob.
+    If the Blob is dynamic, it will call `flow.dynamic_reshape` automatically
+
+    We can set one dimension in `shape` as `-1`, the operator will infer the complete shape.
+
+    Args:
+        x: A `Blob`.
+        shape: Shape of the output blob.
+        name: A name for the operation (optional).
+    Returns:
+        A `Blob`, has the same type as `x`.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def reshape_Job(x: tp.Numpy.Placeholder(shape=(4, 4), dtype=flow.float32)
+        ) -> tp.Numpy:
+            reshape_blob = flow.reshape(x,
+                                        shape=[2, 2, 2, -1])
+            return reshape_blob
+
+
+        x = np.array([[1, 2, 3, 4],
+                    [5, 6, 7, 8],
+                    [9, 10, 11, 12],
+                    [13, 14, 15, 16]]).astype(np.float32)
+        out = reshape_Job(x)
+
+        # out.shape (2, 2, 2, 2)
+
+    """
+    x = flow.cast_to_current_logical_view(x)
+    assert isinstance(shape, tuple) or isinstance(shape, list)
+    shape = list(shape)
+    assert all(dim == -1 or dim > 0 for dim in shape)
+    assert shape.count(-1) <= 1
+    if not x.is_dynamic:
+        if name is None:
+            name = id_util.UniqueStr("Reshape_")
+        return (
+            flow.user_op_builder(name)
+            .Op("reshape")
+            .Input("in", [x])
+            .Output("out")
+            .Attr("shape", infer_shape(x, shape))
+            .Build()
+            .InferAndTryRun()
+            .RemoteBlobList()[0]
+        )
+    else:
+        op_conf = op_conf_util.OperatorConf()
+        setattr(
+            op_conf,
+            "name",
+            name if name is not None else id_util.UniqueStr("DynamicReshape_"),
+        )
+        setattr(op_conf.dynamic_reshape_conf, "in", x.unique_name)
+        op_conf.dynamic_reshape_conf.shape.dim.extend(list(shape))
+        setattr(op_conf.dynamic_reshape_conf, "out", "out")
+        interpret_util.Forward(op_conf)
+        lbi = logical_blob_id_util.LogicalBlobId()
+        lbi.op_name = op_conf.name
+        lbi.blob_name = "out"
+        return remote_blob_util.RemoteBlob(lbi)
+
+
+@oneflow_export("reshape_like")
+def reshape_like(
+    x: oneflow._oneflow_internal.BlobDesc,
+    like: oneflow._oneflow_internal.BlobDesc,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator reshapes the Blob x to be the same as Blob `like` .
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): The input Blob.
+        like (oneflow._oneflow_internal.BlobDesc): A Blob.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def reshape_like_Job(x: tp.Numpy.Placeholder(shape=(4, 4), dtype=flow.float32)
+        ) -> tp.Numpy:
+            like_blob = flow.constant(value=1,
+                                    dtype=flow.int8,
+                                    shape=(2, 2, 4))
+            reshape_like_blob = flow.reshape_like(x,
+                                                like=like_blob)
+            return reshape_like_blob
+
+
+        x = np.array([[1, 2, 3, 4],
+                    [5, 6, 7, 8],
+                    [9, 10, 11, 12],
+                    [13, 14, 15, 16]]).astype(np.float32)
+        out = reshape_like_Job(x)
+
+        # out.shape (2, 2, 4)
+
+    """
+    if name is None:
+        name = id_util.UniqueStr("ReshapeLike_")
+    return (
+        flow.user_op_builder(name)
+        .Op("reshape_like")
+        .Input("in", [x])
+        .Input("like", [like])
+        .Output("out")
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
+
+
+@oneflow_export("dynamic_reshape")
+def dynamic_reshape(
+    x: oneflow._oneflow_internal.BlobDesc,
+    shape: Sequence[int],
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator reshapes a dynamic blob.
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): The input Blob.
+        shape (Sequence[int]): The output shape.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+        @flow.global_function()
+        def dynamic_reshape_Job(x: tp.Numpy.Placeholder(shape=(1, 3, 64, 64), dtype=flow.float32)
+        ) -> tp.Numpy:
+            reshape_out1 = flow.dynamic_reshape(x, (-1, 64))
+            variable1 = flow.get_variable(
+                "var1",
+                shape=(64, 32),
+                dtype=flow.float,
+                initializer=flow.random_uniform_initializer(minval=-10, maxval=10),
+                trainable=True,
+            )
+            matmul_tensor = flow.matmul(reshape_out1, variable1)
+            reshape_out2 = flow.dynamic_reshape(matmul_tensor, (-1, 8, 4))
+            return reshape_out2
+
+        x = np.random.rand(1, 3, 64, 64).astype(np.float32)
+        out = dynamic_reshape_Job(x)
+
+        # out.shape (192, 8, 4)
+
+    """
+    assert isinstance(shape, tuple) or isinstance(shape, list)
+    shape = list(shape)
+    op_conf = op_conf_util.OperatorConf()
+    setattr(
+        op_conf,
+        "name",
+        name if name is not None else id_util.UniqueStr("DynamicReshape_"),
+    )
+    setattr(op_conf.dynamic_reshape_conf, "in", x.unique_name)
+    op_conf.dynamic_reshape_conf.shape.dim.extend(list(shape))
+    setattr(op_conf.dynamic_reshape_conf, "out", "out")
+    interpret_util.Forward(op_conf)
+    lbi = logical_blob_id_util.LogicalBlobId()
+    lbi.op_name = op_conf.name
+    lbi.blob_name = "out"
+    return remote_blob_util.RemoteBlob(lbi)
+
+
+@oneflow_export("transpose")
+@stable_api
+def transpose(
+    a: oneflow._oneflow_internal.BlobDesc,
+    perm: Sequence[int] = None,
+    conjugate: bool = False,
+    batch_axis_non_change: bool = False,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""This operator transposes the specified axis of input Blob.
+
+    Args:
+        a (oneflow._oneflow_internal.BlobDesc): The input Blob.
+        perm (Sequence[int], optional): The list of dimension permutation. Defaults to None.
+        conjugate (bool, optional): Still Unavailable. Defaults to False.
+        batch_axis_non_change (bool, optional): deprecated. Defaults to False.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Raises:
+        NotImplementedError: The attribute `conjugate` still unavailable.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: A transposed blob.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def transpose_Job(x: tp.Numpy.Placeholder(shape=(1, 2, 3), dtype=flow.float32)
+        ) -> tp.Numpy:
+            transpose_blob = flow.transpose(x,
+                                            perm=[2, 0, 1])
+            return transpose_blob
+
+        x = np.random.randn(1, 2, 3).astype(np.float32)
+        out = transpose_Job(x)
+
+        # out.shape (3, 1, 2)
+
+    """
+    assert isinstance(perm, (tuple, list))
+
+    if name is None:
+        name = id_util.UniqueStr("Transpose_")
+
+    if conjugate:
+        raise NotImplementedError
+
+    return (
+        flow.user_op_builder(name)
+        .Op("transpose")
+        .Input("input", [a])
+        .Output("output")
+        .Attr("perm", perm)
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
+
+
+@oneflow_export("slice")
+@stable_api
+def slice(
+    x: oneflow._oneflow_internal.BlobDesc,
+    begin: Sequence[int],
+    size: Sequence[int],
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""Extracts a slice from a tensor.
+
+    Args:
+        x: A `Blob`.
+        begin: A list or a tuple, indicate each dimension slice begin, whose length must be equal
+            to x's number of dimensions, the first element of begin must be set to None.
+            (Because the internal op of OneFlow does not support 0-dimension slice at present.)
+        size: A list or a tuple, indicate each dimension slice size, whose length must be equal
+            to x's number of dimensions, the first element of beign must be set to None.
+        name: A name for the operation (optional).
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def slice_Job(x: tp.Numpy.Placeholder(shape=(3, 3), dtype=flow.float32)
+        ) -> tp.Numpy:
+            slice_blob = flow.slice(x,
+                                    begin=[None, 0],
+                                    size=[None, 2])
+            return slice_blob
+
+        x = np.array([[1, 2, 3],
+                    [4, 5, 6],
+                    [7, 8, 9]]).astype(np.float32)
+        out = slice_Job(x)
+
+        # out [[1. 2.]
+        #      [4. 5.]
+        #      [7. 8.]]
+
+    """
+    ndim = len(x.shape)
+    if not isinstance(begin, (list, tuple)) or len(begin) != ndim:
+        raise ValueError(
+            "begin must be a list/tuple with the same length as input tensor's number of dimensions"
+        )
+
+    if not all(isinstance(b, int) or b is None for b in begin):
+        raise ValueError("element of begin must be a int or None")
+
+    if not isinstance(size, (list, tuple)) or len(size) != ndim:
+        raise ValueError(
+            "size must be a list/tuple with the same length as input tensor's number of dimensions."
+        )
+
+    if not all(isinstance(s, int) or s is None for s in size):
+        raise ValueError("element of size must be a int or None")
+
+    slice_tup_list = []
+    for b, s, dim_size in zip(begin, size, x.shape):
+        start, stop, step = (None, None, 1)
+        if b is not None:
+            if b < -dim_size or b >= dim_size:
+                raise ValueError("element of begin is out of range")
+            start = b
+
+        if s is not None:
+            if s == -1:
+                stop = dim_size
+            else:
+                if s <= 0 or s > dim_size:
+                    raise ValueError("element of size is invalid")
+                if b + s < dim_size:
+                    stop = b + s
+
+        slice_tup_list.append((start, stop, step))
+
+    return slice_v2(x, slice_tup_list, name=name)
+
+
+def check_slice_tup_list(slice_tup_list, shape):
+    ndim = len(shape)
+    if not isinstance(slice_tup_list, (list, tuple)) or len(slice_tup_list) > ndim:
+        raise ValueError(
+            "slice_tup_list must be a list or tuple with length "
+            "less than or equal to number of dimensions of input tensor"
+        )
+
+    # if length of slice_tup_list is less than number of dimensions of x, fill it to length of ndims reduce 1
+    if len(slice_tup_list) < ndim:
+        slice_tup_list += type(slice_tup_list)(
+            [(None, None, None)] * (ndim - len(slice_tup_list))
+        )
+
+    start_list = []
+    stop_list = []
+    step_list = []
+
+    for slice_tup, dim_size in zip(slice_tup_list, shape):
+        if not isinstance(slice_tup, (tuple, list)) or len(slice_tup) != 3:
+            raise ValueError(
+                "element of slice_tup_list must be a list or tuple with form (start, stop, step)"
+            )
+
+        if not all(isinstance(idx, int) or idx is None for idx in slice_tup):
+            raise ValueError("element of slice tuple must int or None")
+
+        (start, stop, step) = slice_tup
+        if step is None:
+            step = 1
+
+        if step == 0:
+            raise ValueError("slice step can't be 0")
+
+        if start is None:
+            start = 0 if step > 0 else np.iinfo(np.int64).max
+        elif start < -dim_size or start >= dim_size:
+            raise ValueError("slice start must be in range [-size, size)")
+
+        if stop is None:
+            stop = np.iinfo(np.int64).max if step > 0 else np.iinfo(np.int64).min
+        elif stop < -dim_size - 1 or stop > dim_size:
+            raise ValueError("slice start must be in range [-size-1, size]")
+
+        start_list.append(start)
+        stop_list.append(stop)
+        step_list.append(step)
+
+    return start_list, stop_list, step_list
+
+
+@oneflow_export("slice_v2")
+def slice_v2(
+    x: oneflow._oneflow_internal.BlobDesc,
+    slice_tup_list: Sequence[Tuple[int, int, int]],
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""Extracts a slice from a tensor.
+    The `slice_tup_list` assigns the slice indices in each dimension, the format is (start, stop, step).
+    The operator will slice the Blob according to the `slice_top_list`.
+
+    Args:
+        x: A `Blob`.
+        slice_tup_list: A list of slice tuple, indicate each dimension slice (start, stop, step).
+        name: A name for the operation (optional).
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob.
+
+    Note: Because the internal op of OneFlow does not support 0-dimension slice at present, we should
+    set the zero element in `slice_tup_list` as `None`.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+        @flow.global_function()
+        def slicev2_Job(x: tp.Numpy.Placeholder(shape=(3, 6, 9), dtype=flow.float32)
+        ) -> tp.Numpy:
+            slicev2_blob = flow.slice_v2(x,
+                                        slice_tup_list=[[None, None, None],
+                                                        [0, 5, 2], # slice in dimension 1, extract [0, 2, 4]
+                                                        [0, 6, 3]]) # slice in dimension 2, extract [0, 3]
+            return slicev2_blob
+        x = np.random.randn(3, 6, 9).astype(np.float32)
+        out = slicev2_Job(x)
+
+        # out.shape (3, 3, 2)
+
+    """
+    name = name or id_util.UniqueStr("Slice_")
+    if not isinstance(name, str):
+        raise ValueError("name must be a string")
+
+    start, stop, step = check_slice_tup_list(slice_tup_list, x.shape)
+
+    op = (
+        flow.user_op_builder(name)
+        .Op("slice")
+        .Input("x", [x])
+        .Output("y")
+        .Attr("start", start)
+        .Attr("stop", stop)
+        .Attr("step", step)
+        .Build()
+    )
+    return op.InferAndTryRun().SoleOutputBlob()
+
+
+@oneflow_export("slice_update")
+@stable_api
+def api_slice_update(
+    x: oneflow._oneflow_internal.BlobDesc,
+    update: oneflow._oneflow_internal.BlobDesc,
+    slice_tup_list: Sequence[Tuple[int, int, int]],
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""Update a slice of tensor `x`. Like `x[start:stop:step] = update`. 
+
+    Args:
+        x: A `Blob`, whose slice will be updated.
+        update: A `Blob`, indicate the update content.
+        slice_tup_list: A list of slice tuple, indicate each dimension slice (start, stop, step).
+        name: A name for the operation (optional).
+
+    For example: 
+
+    .. code-block:: python 
+
+        import oneflow.compatible.single_client as flow 
+        import oneflow.compatible.single_client.typing as tp 
+        import numpy as np 
+
+
+        @flow.global_function()
+        def slice_update_job(x: tp.Numpy.Placeholder(shape=(5, )), 
+                            update: tp.Numpy.Placeholder(shape=(3, )))->tp.Numpy: 
+            out = flow.slice_update(x=x, 
+                                    update=update, 
+                                    slice_tup_list=[[1, 4, 1]])
+
+            return out 
+
+        x = np.array([1, 1, 1, 1, 1]).astype(np.float32)
+        update = np.array([2, 3, 4]).astype(np.float32)
+        out = slice_update_job(x, update)
+
+        # out [1. 2. 3. 4. 1.]
+
+    """
+    if name is None:
+        name = id_util.UniqueStr("SliceUpdate_")
+
+    if not isinstance(name, str):
+        raise ValueError("name must be a string")
+
+    start, stop, step = check_slice_tup_list(slice_tup_list, x.shape)
+
+    op = (
+        flow.user_op_builder(name)
+        .Op("slice_update")
+        .Input("x", [x])
+        .Input("update", [update])
+        .Output("y")
+        .Attr("start", start)
+        .Attr("stop", stop)
+        .Attr("step", step)
+        .Build()
+    )
+    return op.InferAndTryRun().SoleOutputBlob()
+
+
+# Get slice attrs for slice_assign and logical_slice
+# Note the step in slice_tup_list must be greater than 0
+# as slice_assign and logical_slice only support step > 0
+def GetSliceAttrs(slice_tup_list, input_shape):
+    ndim = len(input_shape)
+    if not (isinstance(slice_tup_list, (list, tuple)) and len(slice_tup_list) <= ndim):
+        raise ValueError(
+            "slice_tup_list must be a list or tuple with length "
+            "less than or equal to number of dimensions of input tensor"
+        )
+
+    # Right extends slice_tup_list with [None, None, None] if len(slice_tup_list) < len(input_shape)
+    if len(slice_tup_list) < ndim:
+        slice_tup_list += type(slice_tup_list)(
+            [(None, None, None)] * (ndim - len(slice_tup_list))
+        )
+
+    start_list = []
+    stop_list = []
+    step_list = []
+
+    for slice_tup, dim_size in zip(slice_tup_list, input_shape):
+        if not (isinstance(slice_tup, (tuple, list)) and len(slice_tup) == 3):
+            raise ValueError(
+                "element of slice_tup_list must be a list or tuple with form (start, stop, step)"
+            )
+
+        if not all(isinstance(idx, int) or idx is None for idx in slice_tup):
+            raise ValueError("element of slice tuple must int or None")
+
+        (start, stop, step) = slice_tup
+        if step is None:
+            step = 1
+
+        if step <= 0:
+            raise ValueError("slice_assign/logical_slice step must be greater than 0")
+
+        if start is None:
+            start = 0
+        elif start < -dim_size or start >= dim_size:
+            raise ValueError(
+                "slice_assign/logical_slice start must be in range [-size, size)"
+            )
+        elif start < 0:
+            start += dim_size
+
+        if stop is None:
+            stop = dim_size
+        elif stop < -dim_size or stop > dim_size:
+            raise ValueError(
+                "slice_assign/logical_slice start must be in range [-size, size]"
+            )
+        elif stop < 0:
+            stop += dim_size
+
+        start_list.append(start)
+        stop_list.append(stop)
+        step_list.append(step)
+
+    return start_list, stop_list, step_list
+
+
+@oneflow_export("experimental.logical_slice")
+def logical_slice(
+    x: oneflow._oneflow_internal.BlobDesc,
+    slice_tup_list: Sequence[Tuple[int, int, int]],
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+
+    name = id_util.UniqueStr("LogicalSlice_") if name is None else name
+    if not isinstance(name, str):
+        raise ValueError("name must be a string")
+
+    start_list, stop_list, step_list = GetSliceAttrs(slice_tup_list, x.shape)
+    op = (
+        flow.user_op_builder(name)
+        .Op("logical_slice")
+        .Input("x", [x])
+        .Output("y")
+        .Attr("start", start_list)
+        .Attr("stop", stop_list)
+        .Attr("step", step_list)
+        .Build()
+    )
+    return op.InferAndTryRun().SoleOutputBlob()
+
+
+@oneflow_export("experimental.logical_slice_assign")
+def logical_slice_assign(
+    x: oneflow._oneflow_internal.BlobDesc,
+    value: oneflow._oneflow_internal.BlobDesc,
+    slice_tup_list: Sequence[Tuple[int, int, int]],
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+
+    name = id_util.UniqueStr("LogicalSliceAssign_") if name is None else name
+    if not isinstance(name, str):
+        raise ValueError("name must be a string")
+
+    start_list, stop_list, step_list = GetSliceAttrs(slice_tup_list, x.shape)
+    op = (
+        flow.user_op_builder(name)
+        .Op("logical_slice_assign")
+        .Input("ref", [x])
+        .Input("value", [value])
+        .Attr("start", start_list)
+        .Attr("stop", stop_list)
+        .Attr("step", step_list)
+        .Build()
+    )
+    return op.InferAndTryRun()
+
+
+@oneflow_export("reverse")
+def reverse(
+    input: oneflow._oneflow_internal.BlobDesc,
+    axis: Union[int, Sequence[int]],
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator reverses the elements on the assigned axis.
+
+    Args:
+        input (oneflow._oneflow_internal.BlobDesc): The input Blob.
+        axis (Union[int, Sequence[int]]): The reverse axis.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Raises:
+        ValueError: The name must be a string.
+        ValueError: The axis must be a int or a list/tuple of int.
+        ValueError: The axis is out of range.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def reverse_Job(x: tp.Numpy.Placeholder(shape=(3, 3), dtype=flow.float32)) -> tp.Numpy:
+            reverse_blob = flow.reverse(x,
+                                        axis=0)
+            return reverse_blob
+
+
+        x = np.array([[1, 2, 3],
+                    [4, 5, 6],
+                    [7, 8, 9]]).astype(np.float32)
+        out = reverse_Job(x)
+
+        # out [[7. 8. 9.]
+        #      [4. 5. 6.]
+        #      [1. 2. 3.]]
+
+    """
+    if name is None:
+        name = id_util.UniqueStr("Reverse_")
+
+    if not isinstance(name, str):
+        raise ValueError("name must be a string")
+
+    if isinstance(axis, int):
+        axis = [axis]
+
+    if not isinstance(axis, (tuple, list)) or not all(isinstance(a, int) for a in axis):
+        raise ValueError("axis must be a int or a list/tuple of int")
+
+    ndim = len(input.shape)
+    slice_tup_list = [(None, None, None)] * ndim
+    for i, a in enumerate(axis):
+        if a < 0:
+            a += ndim
+
+        if a < 0 or a >= ndim:
+            raise ValueError("axis is out of range")
+
+        slice_tup_list[a] = (None, None, -1)
+
+    return slice_v2(input, slice_tup_list, name)
+
+
+@oneflow_export("concat")
+def concat(
+    inputs: Optional[Sequence[oneflow._oneflow_internal.BlobDesc]] = None,
+    axis: int = 0,
+    max_dim_size: Optional[int] = None,
+    name: Optional[str] = None,
+    values: Optional[Sequence[oneflow._oneflow_internal.BlobDesc]] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""Concatenate two or more `Blob` s at specified axis.
+
+    Analogous to `numpy.concatenate <https://docs.scipy.org/doc/numpy/reference/generated/numpy.concatenate.html>`_
+
+    Args:
+        inputs: a `list` of `Blob`
+        axis: a `int`. `0` by default
+        max_dim_size: hint of max dimension size along the given axis
+        name: name of this operator. `None` by default
+        values: deprecated param, use inputs instead
+
+    Returns:
+        A `Blob`
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def concat_Job() -> tp.Numpy:
+            constant_blob_1 = flow.constant(value=1.5,
+                                            shape=(1, 3, 3, 4),
+                                            dtype=flow.float,
+                                            name="blob1")
+            constant_blob_2 = flow.constant(value=2.5,
+                                            shape=(1, 3, 3, 4),
+                                            dtype=flow.float,
+                                            name="blob2")
+            return flow.concat(inputs=[constant_blob_1, constant_blob_2],
+                            axis=3)
+
+
+        out = concat_Job()
+
+        # out.shape (1, 3, 3, 8)
+
+    """
+    # backward compatible with values param name
+    if values is not None:
+        assert inputs is None
+        inputs = values
+
+    assert isinstance(inputs, (list, tuple))
+    if len(inputs) == 1:
+        return inputs[0]
+
+    assert len(inputs) >= 2
+    if axis < 0:
+        axis += len(inputs[0].shape)
+    assert axis >= 0 and axis < len(
+        inputs[0].shape
+    ), "axis must be in range [0, num_axes of inputs)"
+
+    first_input_shape = inputs[0].shape
+    static_dim_size = 0
+    dynamic_dim_size = 0
+    for input in inputs:
+        assert len(input.shape) == len(first_input_shape)
+        for i in range(len(input.shape)):
+            if i == axis:
+                if input.is_dynamic:
+                    dynamic_dim_size += input.shape[i]
+                else:
+                    static_dim_size += input.shape[i]
+            else:
+                assert input.shape[i] == first_input_shape[i]
+
+    if max_dim_size is None:
+        max_dim_size = static_dim_size + dynamic_dim_size
+    else:
+        assert (
+            max_dim_size >= static_dim_size
+        ), "max diemension size {} is too small to hold concatenated static dimension size {} along the given axis".format(
+            max_dim_size, static_dim_size
+        )
+
+    if name is None:
+        name = id_util.UniqueStr("Concat_")
+
+    op = (
+        flow.user_op_builder(name)
+        .Op("concat")
+        .Input("in", inputs)
+        .Output("out")
+        .Attr("axis", axis)
+        .Attr("max_dim_size", max_dim_size)
+        .Build()
+    )
+    return op.InferAndTryRun().SoleOutputBlob()
+
+
+@oneflow_export("gather_nd")
+def gather_nd(
+    params: oneflow._oneflow_internal.BlobDesc,
+    indices: oneflow._oneflow_internal.BlobDesc,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator is a high-dimensional extension of `gather`, `indices` is a K-dimensional
+    tensor, which is regarded as a index of input Blob `params`.
+
+    Each element defines a slice of `params`:
+
+    .. math::
+
+        output[(i_0,i_1,...,i_{K-2})] = param[indices(i_{0},i_{1},...,i_{K-2})]
+
+
+    Args:
+        params (oneflow._oneflow_internal.BlobDesc): The input Blob.
+        indices (oneflow._oneflow_internal.BlobDesc): The slice indices.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob.
+
+    For example:
+
+    Example 1:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def gather_nd_Job(x: tp.Numpy.Placeholder(shape=(3, 3), dtype=flow.float32),
+                        indice: tp.Numpy.Placeholder(shape=(2, 1), dtype=flow.int32)
+        ) -> tp.Numpy:
+            gather_nd_blob = flow.gather_nd(params=x,
+                                            indices=indice)
+            return gather_nd_blob
+
+
+        x = np.array([[1, 2, 3],
+                    [4, 5, 6],
+                    [7, 8, 9]]).astype(np.float32)
+        indice = np.array([[0], [2]]).astype(np.int32)
+        out = gather_nd_Job(x, indice)
+
+        # out [[1. 2. 3.]
+        #      [7. 8. 9.]]
+
+    Example 2:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def gather_nd_Job(x: tp.Numpy.Placeholder(shape=(3, 3), dtype=flow.float32),
+                        indice: tp.Numpy.Placeholder(shape=(2, 2), dtype=flow.int32)
+        ) -> tp.Numpy:
+            gather_nd_blob = flow.gather_nd(params=x,
+                                            indices=indice)
+            return gather_nd_blob
+
+
+        x = np.array([[1, 2, 3],
+                    [4, 5, 6],
+                    [7, 8, 9]]).astype(np.float32)
+        indice = np.array([[0, 2], [2, 1]]).astype(np.int32)
+        out = gather_nd_Job(x, indice)
+
+        # out [3. 8.]
+
+    Example3:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def gather_nd_Job(x: tp.Numpy.Placeholder(shape=(3, 3), dtype=flow.float32),
+                        indice: tp.Numpy.Placeholder(shape=(3, 2), dtype=flow.int32)
+        ) -> tp.Numpy:
+            gather_nd_blob = flow.gather_nd(params=x,
+                                            indices=indice)
+            return gather_nd_blob
+
+
+        x = np.array([[1, 2, 3],
+                    [4, 5, 6],
+                    [7, 8, 9]]).astype(np.float32)
+        indice = np.array([[0, 1], [1, 0], [2, 2]]).astype(np.int32)
+        out = gather_nd_Job(x, indice)
+
+        # out [2. 4. 9.]
+
+    """
+    if name is None:
+        name = id_util.UniqueStr("GatherNd_")
+    op = (
+        flow.user_op_builder(name)
+        .Op("gather_nd")
+        .Input("params", [params])
+        .Input("indices", [indices])
+        .Output("out")
+        .Build()
+    )
+    return op.InferAndTryRun().RemoteBlobList()[0]
+
+
+@oneflow_export("scatter_nd")
+def scatter_nd(
+    indices: oneflow._oneflow_internal.BlobDesc,
+    updates: oneflow._oneflow_internal.BlobDesc,
+    shape: Sequence[int],
+    name: Optional[str] = None,
+):
+    """This operator inserts the elements in `updates` according to the `indices` and create a new Blob.
+
+    Args:
+        indices (oneflow._oneflow_internal.BlobDesc): The indice of `updates`. Its type should be `flow.int`.
+        updates (oneflow._oneflow_internal.BlobDesc): The update Blob.
+        shape (Sequence[int]): The constant tensor shape, the constant tensor elements are all zero.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob.
+
+    For example:
+
+    Example 1:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def scatter_nd_Job(indice: tp.Numpy.Placeholder(shape=(3, 1), dtype=flow.int32),
+                        update: tp.Numpy.Placeholder(shape=(3, ), dtype=flow.float32),
+        ) -> tp.Numpy:
+            scatter_blob = flow.scatter_nd(indices=indice,
+                                        updates=update,
+                                        shape=[8])
+            return scatter_blob
+
+
+        indice_array = np.array([[1], [6], [4]]).astype(np.int32)
+        update_array = np.array([10.2, 5.1, 12.7]).astype(np.float32)
+        out = scatter_nd_Job(indice_array, update_array)
+
+        # [ 0.  10.2  0.   0.  12.7  0.   5.1  0. ]
+
+    Example 2:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def scatter_nd_Job(indice: tp.Numpy.Placeholder(shape=(3, 1), dtype=flow.int32),
+                        update: tp.Numpy.Placeholder(shape=(3, 3), dtype=flow.float32),
+        ) -> tp.Numpy:
+            scatter_blob = flow.scatter_nd(indices=indice,
+                                        updates=update,
+                                        shape=[5, 3])
+            return scatter_blob
+
+
+        indice_array = np.array([[0], [4], [2]]).astype(np.int32)
+        update_array = np.array([[1, 1, 1],
+                                [2, 2, 2],
+                                [3, 3, 3]]).astype(np.float32)
+        out = scatter_nd_Job(indice_array, update_array)
+
+        # out [[1. 1. 1.]
+        #      [0. 0. 0.]
+        #      [3. 3. 3.]
+        #      [0. 0. 0.]
+        #      [2. 2. 2.]]
+
+    """
+    if name is None:
+        name = id_util.UniqueStr("ScatterNd_")
+    op = (
+        flow.user_op_builder(name)
+        .Op("scatter_nd")
+        .Input("indices", [indices])
+        .Input("updates", [updates])
+        .Attr("shape", shape)
+        .Output("out")
+        .Build()
+    )
+    return op.InferAndTryRun().RemoteBlobList()[0]
+
+
+@oneflow_export("tensor_scatter_nd_update")
+def tensor_scatter_nd_update(
+    params: oneflow._oneflow_internal.BlobDesc,
+    indices: oneflow._oneflow_internal.BlobDesc,
+    updates: oneflow._oneflow_internal.BlobDesc,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator inserts the elements in `updates` according to the `indices` into the Blob `params`.
+
+    Args:
+        params (oneflow._oneflow_internal.BlobDesc): The input Blob.
+        indices (oneflow._oneflow_internal.BlobDesc): The indice of `updates`. Its type should be `flow.int32`.
+        updates (oneflow._oneflow_internal.BlobDesc): The update Blob.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def tensor_scatter_nd_Job(x: tp.Numpy.Placeholder(shape=(5, 3), dtype=flow.float32),
+                                indice: tp.Numpy.Placeholder(shape=(3, 1), dtype=flow.int32),
+                                update: tp.Numpy.Placeholder(shape=(3, 3), dtype=flow.float32),
+        ) -> tp.Numpy:
+            scatter_blob = flow.tensor_scatter_nd_update(params=x,
+                                                        indices=indice,
+                                                        updates=update)
+            return scatter_blob
+
+        x = np.array([[1, 2, 3],
+                    [1, 2, 3],
+                    [1, 2, 3],
+                    [1, 2, 3],
+                    [1, 2, 3]]).astype(np.float32)
+        indice_array = np.array([[0], [4], [2]]).astype(np.int32)
+        update_array = np.array([[1, 1, 1],
+                                [2, 2, 2],
+                                [3, 3, 3]]).astype(np.float32)
+        out = tensor_scatter_nd_Job(x, indice_array, update_array)
+
+        # out [[1. 1. 1.]
+        #      [1. 2. 3.]
+        #      [3. 3. 3.]
+        #      [1. 2. 3.]
+        #      [2. 2. 2.]]
+
+    """
+    if name is None:
+        name = id_util.UniqueStr("TensorScatterNdUpdate_")
+    op = (
+        flow.user_op_builder(name)
+        .Op("tensor_scatter_nd_update")
+        .Input("params", [params])
+        .Input("updates", [updates])
+        .Input("indices", [indices])
+        .Output("out")
+        .Build()
+    )
+    return op.InferAndTryRun().RemoteBlobList()[0]
+
+
+@oneflow_export("tensor_scatter_nd_add")
+def tensor_scatter_nd_add(
+    params: oneflow._oneflow_internal.BlobDesc,
+    indices: oneflow._oneflow_internal.BlobDesc,
+    updates: oneflow._oneflow_internal.BlobDesc,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator adds elements from 'updates' to Blob 'params' based on the `indices`.
+
+    Args:
+        params (oneflow._oneflow_internal.BlobDesc): The input Blob.
+        indices (oneflow._oneflow_internal.BlobDesc): The indice of `updates`. Its type should be `flow.int32`.
+        updates (oneflow._oneflow_internal.BlobDesc): The update Blob.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob.
+
+    For exampleï¼š
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def tensor_scatter_nd_add_Job(x: tp.Numpy.Placeholder(shape=(5, 3), dtype=flow.float32),
+                                    indice: tp.Numpy.Placeholder(shape=(3, 1), dtype=flow.int32),
+                                    update: tp.Numpy.Placeholder(shape=(3, 3), dtype=flow.float32),
+        ) -> tp.Numpy:
+            scatter_blob = flow.tensor_scatter_nd_add(params=x,
+                                                    indices=indice,
+                                                    updates=update)
+            return scatter_blob
+
+        x = np.array([[1, 2, 3],
+                    [1, 2, 3],
+                    [1, 2, 3],
+                    [1, 2, 3],
+                    [1, 2, 3]]).astype(np.float32)
+        indice_array = np.array([[0], [4], [2]]).astype(np.int32)
+        update_array = np.array([[1, 1, 1],
+                                [2, 2, 2],
+                                [3, 3, 3]]).astype(np.float32)
+        out = tensor_scatter_nd_add_Job(x, indice_array, update_array)
+
+        # out [[2. 3. 4.]
+        #      [1. 2. 3.]
+        #      [4. 5. 6.]
+        #      [1. 2. 3.]
+        #      [3. 4. 5.]]
+
+    """
+    if name is None:
+        name = id_util.UniqueStr("TensorScatterNdAdd_")
+    op = (
+        flow.user_op_builder(name)
+        .Op("tensor_scatter_nd_add")
+        .Input("params", [params])
+        .Input("updates", [updates])
+        .Input("indices", [indices])
+        .Output("out")
+        .Build()
+    )
+    return op.InferAndTryRun().RemoteBlobList()[0]
+
+
+@oneflow_export("argwhere")
+@stable_api
+def argwhere(
+    condition: oneflow._oneflow_internal.BlobDesc,
+    dtype: Optional[flow.dtype] = None,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator finds the indices of input Blob `condition` elements that are non-zero. It returns a List.
+    Each element in the output is a coordinate that points to a non-zero element in the condition.
+
+    Args:
+        condition (oneflow._oneflow_internal.BlobDesc): The input Blob.
+        dtype (Optional[flow.dtype], optional): The data type of output. Defaults to None.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob. Its type is `ListNumpy`.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def argwhere_Job(x: tp.Numpy.Placeholder(shape=(2, 3), dtype=flow.float32),
+        ) -> tp.ListNumpy:
+            return flow.argwhere(x)
+
+
+        x = np.array([[0, 1, 0],
+                    [2, 0, 2]]).astype(np.float32)
+        out = argwhere_Job(x)
+
+        # out [array([[0, 1],
+        #             [1, 0],
+        #             [1, 2]], dtype=int32)]
+
+    """
+    if name is None:
+        name = id_util.UniqueStr("ArgWhere_")
+
+    if dtype is None:
+        dtype = flow.int32
+
+    op = (
+        flow.user_op_builder(name)
+        .Op("argwhere")
+        .Input("input", [condition])
+        .Attr("dtype", dtype)
+        .Output("output")
+        .Output("output_size")
+        .Build()
+    )
+    output, output_size = op.InferAndTryRun().RemoteBlobList()
+    return sync_dynamic_resize(output, output_size)
+
+
+@oneflow_export("nonzero")
+@stable_api
+def nonzero(
+    a: oneflow._oneflow_internal.BlobDesc, name: Optional[str] = None
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator finds the indices of input Blob `condition` elements that are non-zero.
+
+    Args:
+        a (oneflow._oneflow_internal.BlobDesc): The input Blob.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob.
+    """
+    if name is None:
+        argwhere_name = id_util.UniqueStr("Nonzero_ArgWhere_")
+        tranpose_name = id_util.UniqueStr("Nonzero_Transpose_")
+    else:
+        argwhere_name = name + "_ArgWhere"
+        tranpose_name = name + "_Transpose"
+    indices = argwhere(a, name=argwhere_name)
+    return transpose(indices, perm=(1, 0), name=tranpose_name)
+
+
+@oneflow_export("where")
+@stable_api
+def where(
+    condition: oneflow._oneflow_internal.BlobDesc,
+    x: Optional[oneflow._oneflow_internal.BlobDesc] = None,
+    y: Optional[oneflow._oneflow_internal.BlobDesc] = None,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator returns the elements where condition is larger than 0.
+
+    If `x` and `y` is None, this operator is equal to `oneflow.compatible.single_client.argwhere`.
+
+    If `x` and `y` both are not None, If the element in condition is larger than 0,
+    it will take the `x` element, else it will take the `y` element.
+
+    Args:
+        condition (oneflow._oneflow_internal.BlobDesc): The input Blob.
+        x (Optional[oneflow._oneflow_internal.BlobDesc], optional): A Blob. Defaults to None.
+        y (Optional[oneflow._oneflow_internal.BlobDesc], optional): A Blob. Defaults to None.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Raises:
+        ValueError: It is not supported when exactly one of x or y is non-None
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob. Its type is `ListNumpy`.
+
+    For example:
+
+    Example 1:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def where_Job(condition: tp.Numpy.Placeholder(shape=(5, ), dtype=flow.int32),
+                    x: tp.Numpy.Placeholder(shape=(5, ), dtype=flow.float32),
+                    y: tp.Numpy.Placeholder(shape=(5, ), dtype=flow.float32),
+        ) -> tp.ListNumpy:
+            return flow.where(condition=condition,
+                            x=x,
+                            y=y)
+
+
+        condition = np.array([3, 0, 1, 0, 1]).astype(np.int32)
+        x = np.array([10, 20, 30, 40, 50]).astype(np.float32)
+        y = np.array([100, 200, 300, 400, 500]).astype(np.float32)
+        out = where_Job(condition, x, y)
+
+        # out [array([ 10., 200.,  30., 400.,  50.], dtype=float32)]
+
+    Example 2:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def where_Job(condition: tp.Numpy.Placeholder(shape=(5, ), dtype=flow.int32),
+        ) -> tp.ListNumpy:
+            return flow.where(condition=condition)
+
+
+        condition = np.array([3, 0, 1, 0, 1]).astype(np.int32)
+        out = where_Job(condition)
+
+        # out [array([[0],
+        #             [2],
+        #             [4]], dtype=int32)]
+
+    """
+    if x is None and y is None:
+        return argwhere(condition, name=name)
+    elif x is not None and y is not None:
+        if name is None:
+            name = id_util.UniqueStr("Where_")
+
+        if x.shape == condition.shape and y.shape == condition.shape:
+            broadcast_cond = condition
+            broadcast_x = x
+            broadcast_y = y
+        else:
+            broadcast_cond = flow.broadcast_to_compatible_with(condition, [x, y])
+            broadcast_x = flow.broadcast_to_compatible_with(x, [condition, y])
+            broadcast_y = flow.broadcast_to_compatible_with(y, [condition, x])
+        return (
+            flow.user_op_builder(name)
+            .Op("where")
+            .Input("condition", [broadcast_cond])
+            .Input("x", [broadcast_x])
+            .Input("y", [broadcast_y])
+            .Output("out")
+            .Build()
+            .InferAndTryRun()
+            .RemoteBlobList()[0]
+        )
+    else:
+        raise ValueError("it is not supported when exactly one of x or y is non-None")
+
+
+@oneflow_export("elem_cnt")
+def elem_cnt(
+    inputs: oneflow._oneflow_internal.BlobDesc,
+    dtype: Optional[flow.dtype] = None,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator returns the amount of elements in input Blob.
+
+    Args:
+        inputs (oneflow._oneflow_internal.BlobDesc): The input Blob.
+        dtype (Optional[flow.dtype], optional): The data type. Defaults to None.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob. Its type is `ListNumpy`.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def elem_cnt_Job(x: tp.Numpy.Placeholder(shape=(5, ), dtype=flow.float32),
+        ) -> tp.ListNumpy:
+            return flow.elem_cnt(inputs=x, dtype=flow.int32)
+
+        x = np.array([10, 20, -30, 40, 50]).astype(np.float32)
+        out = elem_cnt_Job(x)
+
+        # [array([5], dtype=int32)]
+
+    """
+    op_conf = op_conf_util.OperatorConf()
+    setattr(
+        op_conf, "name", name if name is not None else id_util.UniqueStr("ElemCnt_")
+    )
+    op_conf.shape_elem_cnt_conf.x = inputs.unique_name
+
+    op_conf.shape_elem_cnt_conf.exclude_axis_conf.SetInParent()
+    if dtype is not None:
+        op_conf.shape_elem_cnt_conf.data_type = oneflow._oneflow_internal.deprecated.GetProtoDtype4OfDtype(
+            dtype
+        )
+    op_conf.shape_elem_cnt_conf.y = "y"
+    interpret_util.Forward(op_conf)
+    out_lbi = logical_blob_id_util.LogicalBlobId()
+    setattr(out_lbi, "op_name", op_conf.name)
+    setattr(out_lbi, "blob_name", "y")
+    return remote_blob_util.RemoteBlob(out_lbi)
+
+
+@oneflow_export("sync_dynamic_resize")
+def sync_dynamic_resize(
+    inputs: oneflow._oneflow_internal.BlobDesc,
+    size: oneflow._oneflow_internal.BlobDesc,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    """
+
+    Args:
+        inputs (oneflow._oneflow_internal.BlobDesc): The input Blob.
+        size (oneflow._oneflow_internal.BlobDesc): The size of new Blob.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob. Its type is `ListNumpy`.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def sync_dynamic_resize_Job(x: tp.Numpy.Placeholder(shape=(4, 3), dtype=flow.float32),
+                                    size: tp.Numpy.Placeholder(shape=(1, ), dtype=flow.int32),
+        ) -> tp.ListNumpy:
+            resize_Blob = flow.sync_dynamic_resize(inputs=x,
+                                                size=size)
+            return resize_Blob
+
+        x = np.array([[1, 2, 3],
+                    [4, 5, 6],
+                    [7, 8, 9],
+                    [10, 11, 12]]).astype(np.float32)
+        size = np.array([2]).astype(np.int32)
+        out = sync_dynamic_resize_Job(x, size)
+
+        # out [array([[1., 2., 3.],
+        #             [4., 5., 6.]], dtype=float32)]
+
+    """
+    op_conf = op_conf_util.OperatorConf()
+    setattr(
+        op_conf,
+        "name",
+        name if name is not None else id_util.UniqueStr("SyncDynamicResize_"),
+    )
+    setattr(op_conf.sync_dynamic_resize_conf, "in", inputs.unique_name)
+    setattr(op_conf.sync_dynamic_resize_conf, "size", size.unique_name)
+    setattr(op_conf.sync_dynamic_resize_conf, "axis", 0)
+    setattr(op_conf.sync_dynamic_resize_conf, "out", "out")
+    setattr(op_conf.sync_dynamic_resize_conf, "eager", flow.eager_execution_enabled())
+    interpret_util.Forward(op_conf)
+    out_lbi = logical_blob_id_util.LogicalBlobId()
+    setattr(out_lbi, "op_name", op_conf.name)
+    setattr(out_lbi, "blob_name", "out")
+    return remote_blob_util.RemoteBlob(out_lbi)
+
+
+@oneflow_export("stack")
+@stable_api
+def stack(
+    inputs: Sequence[oneflow._oneflow_internal.BlobDesc],
+    axis: int = 0,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator stacks the multiple Blobs on the specified axis.
+
+    Args:
+        inputs (Sequence[oneflow._oneflow_internal.BlobDesc]): A list of input Blob.
+        axis (int): The stack axis.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+        import numpy as np
+
+
+        @flow.global_function()
+        def stack_job(x: tp.Numpy.Placeholder(shape=(2, 4, 6)),
+                    y: tp.Numpy.Placeholder(shape=(2, 4, 6)))->tp.Numpy:
+            out = flow.stack([x, y], axis=2)
+            return out
+
+        x = np.ones(shape=(2, 4, 6), dtype=np.float32)
+        y = np.ones(shape=(2, 4, 6), dtype=np.float32)
+
+        out = stack_job(x, y)
+
+        # output.shape (2, 4, 2, 6)
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob.
+
+    """
+    if name is None:
+        name = id_util.UniqueStr("Stack_")
+
+    inputs = list(inputs)
+
+    _input_shape = inputs[0].shape
+    _max_dim = len(_input_shape)
+
+    # The axis must be in range [-(_max_dim +1), _max_dim]
+    if axis < 0:
+        axis = axis + _max_dim + 1
+    assert (axis >= 0) and (axis <= _max_dim)
+
+    # All input tensors must have the same shape
+    _input_list_length = len(inputs)
+    for i in range(_input_list_length):
+        _current_shape = inputs[i].shape
+        assert (
+            _input_shape == _current_shape
+        ), "Each tensor should have the same shape ! Found a tensor instance shape is: {}".format(
+            _current_shape
+        )
+        # Expand dims for each tensor
+        inputs[i] = flow.expand_dims(
+            inputs[i], axis=axis, name=name + "expand_dims_{}".format(i)
+        )
+
+    return flow.concat(inputs, axis=axis, name=name + "concat")
+
+
+@oneflow_export("random.generate_random_batch_permutation_indices")
+def generate_random_batch_permutation_indices(
+    value: oneflow._oneflow_internal.BlobDesc,
+    seed: Optional[int] = None,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator generates a random permutation of indices in batch axis.
+
+    Args:
+        value (oneflow._oneflow_internal.BlobDesc): The input Blob.
+        seed (Optional[int], optional): The random seed. Defaults to None.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob. Its type is `ListNumpy`.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def random_indice_Job(x: tp.Numpy.Placeholder(shape=(4, 3), dtype=flow.int32),
+        ) -> tp.ListNumpy:
+            return flow.random.generate_random_batch_permutation_indices(value=x)
+
+        x = np.array([[1, 1, 1],
+                    [2, 2, 2],
+                    [3, 3, 3],
+                    [4, 4, 4]]).astype(np.int32)
+        out = random_indice_Job(x)
+
+        # out [array([3, 0, 2, 1], dtype=int32)]
+
+    """
+    import random
+
+    op = (
+        flow.user_op_builder(
+            name
+            if name is not None
+            else id_util.UniqueStr(value.op_name + "_random_batch_permutation_indices")
+        )
+        .Op("generate_random_batch_permutation_indices")
+        .Input("x", [value])
+        .Output("y")
+    )
+    if seed is not None:
+        op.Attr("seed", seed)
+        assert name is not None
+    else:
+        op.Attr("seed", random.randint(-(2 ** 63) + 1, 2 ** 63 - 1))
+    return op.Build().InferAndTryRun().RemoteBlobList()[0]
+
+
+@oneflow_export("random.shuffle")
+def shuffle(
+    value: oneflow._oneflow_internal.BlobDesc,
+    seed: Optional[int] = None,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator shuffle the elements in input Blob.
+
+    Args:
+        value (oneflow._oneflow_internal.BlobDesc): The input Blob.
+        seed (Optional[int], optional): The random seed. Defaults to None.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def shuffle_Job(x: tp.Numpy.Placeholder(shape=(3, 3), dtype=flow.int32),
+        ) -> tp.Numpy:
+            return flow.random.shuffle(x)
+
+        x = np.array([[1, 1, 1],
+                    [2, 2, 2],
+                    [3, 3, 3]]).astype(np.int32)
+        out = shuffle_Job(x)
+
+        # out [[3 3 3]
+        #      [1 1 1]
+        #      [2 2 2]]
+
+    """
+    return flow.gather(value, generate_random_batch_permutation_indices(value, seed))
+
+
+@oneflow_export("identity")
+def identity(
+    x: oneflow._oneflow_internal.BlobDesc, name: Optional[str] = None
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""This operator returns a `Blob` that has identical content and data type to input `Blob`.
+
+    Analogous to `tf.identity <https://www.tensorflow.org/api_docs/python/tf/identity>`_
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): The input Blob.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def identity_Job(x: tp.Numpy.Placeholder(shape=(3, 3), dtype=flow.int32),
+        ) -> tp.Numpy:
+            return flow.identity(x)
+
+        x = np.array([[1, 1, 1],
+                    [2, 2, 2],
+                    [3, 3, 3]]).astype(np.int32)
+        out = identity_Job(x)
+
+        # out [[1 1 1]
+        #      [2 2 2]
+        #      [3 3 3]]
+
+    """
+    if name is None:
+        name = id_util.UniqueStr("Identity_")
+
+    op = (
+        flow.user_op_builder(name).Op("identity").Input("in", [x]).Output("out").Build()
+    )
+    return op.InferAndTryRun().SoleOutputBlob()
+
+
+@oneflow_export("identity_n")
+def identity_n(
+    inputs: Sequence[oneflow._oneflow_internal.BlobDesc], name: Optional[str] = None
+) -> List[oneflow._oneflow_internal.BlobDesc]:
+    """This operator is similar to `oneflow.compatible.single_client.identity`. The difference is that the input and output
+    of `identity_n` is `List`.
+
+    Args:
+        inputs (Iterable[oneflow._oneflow_internal.BlobDesc]): A List of input Blob.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        List[oneflow._oneflow_internal.BlobDesc]: A list of result Blob.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+        from typing import List
+
+
+        @flow.global_function()
+        def identity_Job(x: tp.Numpy.Placeholder(shape=(1, 3), dtype=flow.int32),
+                        y: tp.Numpy.Placeholder(shape=(1, 3), dtype=flow.int32),
+                        z: tp.Numpy.Placeholder(shape=(1, 3), dtype=flow.int32)
+        ) -> List[tp.Numpy]:
+            return flow.identity_n([x, y, z])
+
+
+        x = np.array([[1, 1, 1]]).astype(np.int32)
+        y = np.array([[2, 2, 2]]).astype(np.int32)
+        z = np.array([[3, 3, 3]]).astype(np.int32)
+        out = identity_Job(x, y, z)
+
+        # out[0] [[1, 1, 1]]
+        # out[1] [[2, 2, 2]]
+        # out[2] [[3, 3, 3]]
+
+    """
+    return (
+        flow.user_op_builder(
+            name if name is not None else id_util.UniqueStr("IdentityN_")
+        )
+        .Op("tuple_identity")
+        .Input("in", inputs)
+        .Output("out", len(inputs))
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()
+    )
+
+
+@oneflow_export("cast_to_static_shape")
+def cast_to_static_shape(
+    x: oneflow._oneflow_internal.BlobDesc, name: Optional[str] = None
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""This operator returns a `Blob` that has identical content and data type to input `Blob`, and whose shape is converted from dynamic to static
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): The input Blob which has dynamic shape.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob which is identical to input blob but has static shape.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+        @flow.global_function()
+        def cast_to_static_shape_func(
+            x: tp.ListNumpy.Placeholder(shape=(3, 3), dtype=flow.float32),
+        ) -> tp.Numpy:
+            return flow.cast_to_static_shape(x)
+
+        x = np.array([[1, 1, 1],
+                      [2, 2, 2],
+                      [3, 3, 3]]).astype(np.float32)
+
+        out = cast_to_static_shape_func(x)
+
+        # out [[1 1 1]
+        #      [2 2 2]
+        #      [3 3 3]]
+
+    """
+    if not x.is_dynamic:
+        return x
+
+    if name is None:
+        name = id_util.UniqueStr("CastToStaticShape_")
+
+    op = (
+        flow.user_op_builder(name)
+        .Op("cast_to_static_shape")
+        .Input("input", [x])
+        .Output("output")
+        .Build()
+    )
+    return op.InferAndTryRun().SoleOutputBlob()
+
+
+@oneflow_export("squeeze")
+@stable_api
+def squeeze(
+    input: oneflow._oneflow_internal.BlobDesc,
+    axis: Optional[Sequence[int]] = None,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator removes the specified dimention which size is 1 of the input Blob.
+    If the `axis` is not specified, this operator will remove all the dimention which size is 1 of the input Blob.
+
+    The amount of element in return value is the same as Blob `input`.
+
+    Args:
+        input (oneflow._oneflow_internal.BlobDesc): The input Blob.
+        axis (Optional[Sequence[int]], optional): The axis. Defaults to None.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob.
+
+    For example:
+
+    Example 1:
+
+    .. code-block:
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def squeeze_Job(x: tp.Numpy.Placeholder(shape=(1, 1, 1, 3), dtype=flow.int32),
+        ) -> tp.Numpy:
+            return flow.squeeze(x)
+
+
+        x = np.array([[[[1, 1, 1]]]]).astype(np.int32)
+        out = squeeze_Job(x)
+
+        # out.shape (3,)
+
+    Example 2:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def squeeze_Job(x: tp.Numpy.Placeholder(shape=(1, 1, 1, 3), dtype=flow.int32),
+        ) -> tp.Numpy:
+            return flow.squeeze(x, axis=[1, 2])
+
+
+        x = np.array([[[[1, 1, 1]]]]).astype(np.int32)
+        out = squeeze_Job(x)
+
+        # out.shape (1, 3)
+
+    """
+    if axis is None:
+        axis = [idx for idx, dim in enumerate(input.shape) if dim == 1]
+    else:
+        assert isinstance(axis, list) or isinstance(axis, tuple)
+        in_num_axes = len(input.shape)
+        for x in axis:
+            assert x >= -in_num_axes and x < in_num_axes
+    return (
+        flow.user_op_builder(
+            name if name is not None else id_util.UniqueStr("Squeeze_")
+        )
+        .Op("squeeze")
+        .Input("in", [input])
+        .Output("out")
+        .Attr("axes", list(axis))
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
+
+
+@oneflow_export("expand")
+@stable_api
+def expand(
+    x: oneflow._oneflow_internal.BlobDesc,
+    expand_size: Sequence[int],
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator expand the input Blob to a larger size.
+
+    Passing -1 as the size for a dimension means not changing the size of that dimension.
+
+    Blob can be also expanded to a larger number of dimensions and the new ones will be appended at the front.
+
+    For the new dimensions, the size cannot be set to -1.
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): The input Blob.
+        expand_size (Sequence[int]): The desired expanded size.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+        @flow.global_function()
+        def expandJob(x: tp.Numpy.Placeholder(shape=(1, 3, 1, 2), dtype=flow.int32),
+        ) -> tp.Numpy:
+            return flow.expand(input=x,
+                                expand_size=[1, 3, 2, 2])
+
+        x = np.array([[[[0, 1]],
+                       [[2, 3]],
+                       [[4, 5]]]]).astype(np.int32)
+
+        out = expandJob(x)
+        # out shape: [1, 3, 2, 2]
+        # [[[[0, 1],
+        #    [0, 1]],
+        #   [[2, 3],
+        #    [2, 3]],
+        #   [[4, 5],
+        #    [4, 5]]]]
+    """
+    expand_size = list(expand_size)
+    assert len(expand_size) >= len(
+        x.shape
+    ), "The desired expanded dims should not be less than the input dims."
+    # calculate the original stride
+    original_stride = [1]
+    for i in range(len(x.shape) - 2, -1, -1):
+        original_stride.insert(0, original_stride[0] * x.shape[i + 1])
+
+    # calculate the output shape and stride
+    new_size = []
+    new_stride = []
+    diff = len(expand_size) - len(x.shape)
+    for i in range(len(expand_size) - 1, -1, -1):
+        if i >= diff:
+            if expand_size[i] == -1 or expand_size[i] == x.shape[i - diff]:
+                new_size.insert(0, x.shape[i - diff])
+                new_stride.insert(0, original_stride[i - diff])
+            else:
+                assert expand_size[i] >= 1 and x.shape[i - diff] == 1
+                new_size.insert(0, expand_size[i])
+                new_stride.insert(0, 0)
+        else:
+            assert expand_size[i] >= 1
+            new_size.insert(0, expand_size[i])
+            if expand_size[i] == 1:
+                new_stride.insert(0, new_stride[0])
+            else:
+                new_stride.insert(0, 0)
+
+    return (
+        flow.user_op_builder(name if name is not None else id_util.UniqueStr("Expand_"))
+        .Op("expand")
+        .Input("in", [x])
+        .Output("out")
+        .Attr("in_shape", list(x.shape))
+        .Attr("out_shape", new_size)
+        .Attr("stride", new_stride)
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
+
+
+@oneflow_export("expand_dims")
+def expand_dims(
+    input: oneflow._oneflow_internal.BlobDesc, axis: int, name: Optional[str] = None
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator inserts a dimention at the specified axis in the input Blob.
+    The size of new dimension can only be 1, and the amount of element in return value is the same as Blob `input`.
+
+    Args:
+        input (oneflow._oneflow_internal.BlobDesc): The input Blob.
+        axis (int): The specified dimension index.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def expand_dim_Job(x: tp.Numpy.Placeholder(shape=(1, 3, 3), dtype=flow.int32),
+        ) -> tp.Numpy:
+            return flow.expand_dims(input=x,
+                                    axis=2)
+
+
+        x = np.array([[[1, 1, 1],
+                    [1, 1, 1],
+                    [1, 1, 1]]]).astype(np.int32)
+        out = expand_dim_Job(x)
+
+        # out.shape (1, 3, 1, 3)
+
+    """
+    in_num_axes = len(input.shape)
+    assert axis >= -(in_num_axes + 1) and axis <= in_num_axes
+    return (
+        flow.user_op_builder(
+            name if name is not None else id_util.UniqueStr("ExpandDims_")
+        )
+        .Op("expand_dims")
+        .Input("in", [input])
+        .Output("out")
+        .Attr("axis", axis)
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
+
+
+@oneflow_export("broadcast_like")
+@stable_api
+def broadcast_like(
+    x: oneflow._oneflow_internal.BlobDesc,
+    like: oneflow._oneflow_internal.BlobDesc,
+    broadcast_axes: Optional[Sequence[int]] = None,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator broadcast the input Blob `x` on the specified axis with input Blob `like`.
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): The input Blob.
+        like (oneflow._oneflow_internal.BlobDesc): A Blob.
+        broadcast_axes (Optional[Sequence[int]], optional): The broadcast axis. Defaults to None.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Raises:
+        ValueError: The length of broadcast_axes must be greater than 0 and less than or equal to number of axes of like shape.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob.
+
+    For example:
+
+    Example 1:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def broadcast_like_Job(x: tp.Numpy.Placeholder(shape=(3, 1), dtype=flow.float32)
+        ) -> tp.Numpy:
+            like_tensor = flow.constant(value=1.0,
+                                        dtype=flow.float32,
+                                        shape=(3, 3))
+            return flow.broadcast_like(x=x,
+                                    like=like_tensor,
+                                    broadcast_axes=(1, ))
+
+
+        x = np.array([[1], [1], [1]]).astype(np.float32)
+        out = broadcast_like_Job(x)
+
+        # out [[[1 1 1]
+        #       [1 1 1]
+        #       [1 1 1]]]
+
+        # out.shape (3, 3)
+
+    Example 2:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def broadcast_like_Job(x: tp.Numpy.Placeholder(shape=(3, 1, 1), dtype=flow.float32)
+        ) -> tp.Numpy:
+            like_tensor = flow.constant(value=1.0,
+                                        dtype=flow.float32,
+                                        shape=(3, 3, 3))
+            return flow.broadcast_like(x=x,
+                                    like=like_tensor,
+                                    broadcast_axes=(1, 2))
+
+
+        x = np.random.randn(3, 1, 1).astype(np.float32)
+        out = broadcast_like_Job(x)
+
+        # out.shape (3, 3, 3)
+
+    """
+    if name is None:
+        name = id_util.UniqueStr("BroadcastLike_")
+
+    if broadcast_axes is None:
+        broadcast_axes = list(range(len(like.shape)))
+
+    assert isinstance(broadcast_axes, (list, tuple))
+
+    if len(broadcast_axes) <= 0 or len(broadcast_axes) > len(like.shape):
+        raise ValueError(
+            "The length of broadcast_axes must be greater than 0 and less than or equal to number of axes of like shape"
+        )
+
+    op = (
+        flow.user_op_builder(name)
+        .Op("broadcast_like")
+        .Input("x", [x])
+        .Input("like", [like])
+        .Attr("broadcast_axes", broadcast_axes)
+        .Output("y")
+        .Build()
+    )
+    return op.InferAndTryRun().SoleOutputBlob()
+
+
+@oneflow_export("masked_fill")
+@stable_api
+def masked_fill(
+    x: oneflow._oneflow_internal.BlobDesc,
+    mask: oneflow._oneflow_internal.BlobDesc,
+    value: Union[float, int],
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""Fill a blob with a given value according to the given mask.
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): Input Blob.
+        mask (oneflow._oneflow_internal.BlobDesc): Composed with 0 and 1, the input blob 'x' will be
+            filled with the given value where the mask is 1.
+        value (Union[int, int]): The value to use for filling the input blob.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+    Attention:
+        x and mask must be broadcastable to each other.
+        mask must be int type (int8/int32/int64).
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The value-filled Blob
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+        @flow.global_function()
+        def masked_fill_Job(x: tp.Numpy.Placeholder((4, ), mask: tp.Numpy.Placeholder((4, ),
+                            dtype = flow.int8))->tp.Numpy:
+            return flow.masked_fill(x, mask, value=5)
+
+        x = np.array([1, 2, 3, 4], dtype=np.float32)
+        mask = np.array([1, 0, 0, 1], dtype=np.int8)
+
+        out = masked_fill_Job(x, mask)
+
+        # output [5 2 3 5]
+
+    """
+    if name is None:
+        name = id_util.UniqueStr("MaskedFill_")
+    value_like_x = flow.constant_like(like=x, value=value, name=name + "_ConstantLike")
+    return flow.where(condition=mask, x=value_like_x, y=x, name=name + "_Where")
+
+
+@oneflow_export("dim_gather")
+def dim_gather(
+    input: oneflow._oneflow_internal.BlobDesc,
+    dim: int,
+    index: oneflow._oneflow_internal.BlobDesc,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r""" This operator gathers elements from `input` according to `index` along with the axis `dim`.
+
+    Take a 3-D blob as example, the output is specified by:
+
+    .. code-block:: python
+
+        output[i][j][k] = input[index[i][j][k]][j][k]  # if dim == 0
+        output[i][j][k] = input[i][index[i][j][k]][k]  # if dim == 1
+        output[i][j][k] = input[i][j][index[i][j][k]]  # if dim == 2
+
+
+    The shape of `input` and `index` should be the same except in the `dim` dimension.
+
+    That is, if `input` is a n-dimension blob with shape :math:`(x_0, x_1, \dots, x_{i-1}, x_i, x_{i+1}, \dots, x_n)`,
+    and `dim = i`, then `index` must be a n-dimension blob with shape :math:`(x_0, x_1, \dots, x_{i-1}, k, x_{i+1}, \dots, x_n)`
+    where :math:`k \geq 1`.
+
+    The return Blob `output` will have the same shape with `index`.
+
+    Args:
+        input (oneflow._oneflow_internal.BlobDesc): The input blob
+        dim (int): The axis along which to index
+        index (oneflow._oneflow_internal.BlobDesc): The index blob of elements to gather
+        name (Optional[str], optional): The name of the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The elements gathered from `input` will be returned as the output Blob.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+        @flow.global_function()
+        def dim_gather_Job(input: tp.Numpy.Placeholder((2, 2), dtype=flow.float64),
+                        index:tp.Numpy.Placeholder((2, 2), dtype=flow.int32))->tp.Numpy:
+            return flow.dim_gather(input, 1, index)
+
+        input = np.array([[1, 2], [3, 4]]).astype(np.float64)
+        index = np.array([[1, 0], [0, 1]]).astype(np.int32)
+
+        out = dim_gather_Job(input, index)
+        # output
+        # [[2. 1.]
+        #  [3. 4.]]
+
+    """
+    if len(input.shape) != len(index.shape):
+        raise ValueError("Dimensions of input and index should equal")
+
+    for i in range(0, len(input.shape)):
+        if dim == i:
+            continue
+        else:
+            if input.shape[i] != index.shape[i]:
+                raise ValueError(
+                    "Dimensions of input and index should be same except at dim"
+                )
+
+    if dim >= len(index.shape):
+        raise ValueError(
+            "Value of dim is out of range(dim should be less than len(index.shape))"
+        )
+
+    return (
+        flow.user_op_builder(
+            name if name is not None else id_util.UniqueStr("DimGather_")
+        )
+        .Op("dim_gather")
+        .Input("input", [input])
+        .Input("index", [index])
+        .Output("output")
+        .Attr("dim", int(dim))
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
+
+
+@oneflow_export("amp_white_identity")
+def amp_white_identity(
+    x: oneflow._oneflow_internal.BlobDesc, name: Optional[str] = None
+) -> oneflow._oneflow_internal.BlobDesc:
+    if name is None:
+        name = id_util.UniqueStr("AmpWhiteIdentity_")
+    op = (
+        flow.user_op_builder(name)
+        .Op("amp_white_identity")
+        .Input("in", [x])
+        .Output("out")
+        .Build()
+    )
+    return op.InferAndTryRun().SoleOutputBlob()
+
+
+@oneflow_export("zeros")
+@stable_api
+def zeros(
+    shape: Sequence[int],
+    dtype: Optional[flow.dtype] = None,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator creates a Tensor filled with the scalar value `0`.
+
+    Args:
+        shape (Sequence[int]): The shape of the Tensor.
+        dtype (Optional[flow.dtype], optional): The data type. Defaults to None.
+        name (Optional[str], optional): The name for the operator. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Tensor filled with value `0`
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def zeros_job() -> tp.Numpy:
+            return flow.zeros(shape=(2, 3), dtype=flow.float32)
+
+
+        out = zeros_job()
+
+        # output: [[0. 0. 0.]
+        #          [0. 0. 0.]]
+
+    """
+    if name is None:
+        name = id_util.UniqueStr("Zeros_")
+
+    if dtype is None:
+        dtype = flow.float32
+
+    return flow.constant(value=0.0, shape=shape, dtype=dtype, name=name + "constant")
+
+
+@oneflow_export("ones")
+@stable_api
+def ones(
+    shape: Sequence[int],
+    dtype: Optional[flow.dtype] = None,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator creates a Tensor filled with the scalar value `1`.
+
+    Args:
+        shape (Sequence[int]): The shape of the Tensor.
+        dtype (Optional[flow.dtype], optional): The data type. Defaults to None.
+        name (Optional[str], optional): The name for the operator. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob filled with value `1`
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def ones_job() -> tp.Numpy:
+            return flow.ones(shape=(2, 3), dtype=flow.float32)
+
+
+        out = ones_job()
+
+        # output: [[1. 1. 1.]
+        #          [1. 1. 1.]]
+    """
+    if name is None:
+        name = id_util.UniqueStr("Ones_")
+
+    if dtype is None:
+        dtype = flow.float32
+
+    return flow.constant(value=1.0, shape=shape, dtype=dtype, name=name + "constant")
+
+
+@oneflow_export("profiler.nvtx_start")
+def nvtx_start(
+    x: oneflow._oneflow_internal.BlobDesc, mark_prefix: str, name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+
+    if name is None:
+        name = id_util.UniqueStr("NvtxStart_")
+
+    op = (
+        flow.user_op_builder(name)
+        .Op("nvtx_start")
+        .Input("in", [x])
+        .Output("out")
+        .Attr("mark_prefix", str(mark_prefix))
+        .Build()
+    )
+    return op.InferAndTryRun().SoleOutputBlob()
+
+
+@oneflow_export("profiler.nvtx_end")
+def nvtx_end(
+    x: oneflow._oneflow_internal.BlobDesc, mark_prefix: str, name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+
+    if name is None:
+        name = id_util.UniqueStr("NvtxEnd_")
+
+    op = (
+        flow.user_op_builder(name)
+        .Op("nvtx_end")
+        .Input("in", [x])
+        .Output("out")
+        .Attr("mark_prefix", str(mark_prefix))
+        .Build()
+    )
+    return op.InferAndTryRun().SoleOutputBlob()
diff --git a/oneflow/compatible_single_client_python/ops/assign_op.py b/oneflow/compatible_single_client_python/ops/assign_op.py
new file mode 100644
index 0000000000000000000000000000000000000000..81897d93e8c3d8bc8a89ad07a55d75e243f9c287
--- /dev/null
+++ b/oneflow/compatible_single_client_python/ops/assign_op.py
@@ -0,0 +1,115 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+import os
+
+from oneflow.compatible import single_client as flow
+from oneflow.core.operator import op_conf_pb2 as op_conf_util
+from oneflow.core.register import logical_blob_id_pb2 as logical_blob_id_util
+from oneflow.compatible_single_client_python.eager import boxing_util as boxing_util
+from oneflow.compatible_single_client_python.framework import (
+    interpret_util as interpret_util,
+)
+from oneflow.compatible_single_client_python.framework import hob as hob
+from oneflow.compatible_single_client_python.framework import id_util as id_util
+from oneflow.compatible_single_client_python.framework import (
+    interpret_util as interpret_util,
+)
+from oneflow.compatible_single_client_python.framework import (
+    placement_context as placement_ctx,
+)
+from oneflow.compatible_single_client_python.framework import (
+    remote_blob as remote_blob_util,
+)
+from oneflow.compatible_single_client_python.lib.core import enable_if as enable_if
+from oneflow.compatible_single_client_python.framework import hob as hob
+from oneflow.compatible_single_client_python.oneflow_export import oneflow_export
+from oneflow.compatible import single_client as flow
+
+
+@oneflow_export("assign")
+def assign(ref, value, dtype=None, name=None):
+    if name is None:
+        name = id_util.UniqueStr("Assign_")
+
+    op = (
+        flow.consistent_user_op_builder(name)
+        .Op("assign")
+        .Input("ref", [ref])
+        .Input("value", [value])
+        .Build()
+    )
+    op.InferAndTryRun()
+
+
+@oneflow_export("system.assign")
+def api_system_assign(ref, value, validate_shape=None, use_locking=None, name=None):
+    # TODO(lixinqi): check ref.is_lvalue
+    api = enable_if.unique([lazy_system_assign, eager_system_assign])
+    return api(
+        ref, value, validate_shape=validate_shape, use_locking=use_locking, name=name
+    )
+
+
+@enable_if.condition(hob.in_global_mode & ~hob.eager_execution_enabled)
+def lazy_system_assign(ref, value, validate_shape=None, use_locking=None, name=None):
+    op_conf = _SystemAssignOpConf(ref, value, name=name)
+    (
+        device_tag,
+        machine_device_ids,
+        hierarchy,
+    ) = oneflow._oneflow_internal.GetDeviceTagAndMachineDeviceIdsAndHierarchy(
+        ref.parallel_conf
+    )
+    if hierarchy is not None:
+        hierarchy = tuple(hierarchy.dim())
+    with flow.scope.placement(device_tag, machine_device_ids, hierarchy):
+        interpret_util.Forward(op_conf)
+    return ref
+
+
+@enable_if.condition(hob.in_global_mode & hob.eager_execution_enabled)
+def eager_system_assign(ref, value, validate_shape=None, use_locking=None, name=None):
+    op_conf = _SystemAssignOpConf(ref, value, name=name)
+    # no backward for assign
+    oneflow._oneflow_internal.deprecated.LogicalRun(
+        lambda builder: boxing_util.BuildAssignInstruction(
+            builder, ref.blob_object, value.blob_object, op_conf
+        )
+    )
+    return ref
+
+
+@oneflow_export("experimental.eager_assign_121")
+def api_one_to_one_assign(ref, value):
+    assert hob.eager_execution_enabled(None)
+    oneflow._oneflow_internal.deprecated.LogicalRun(
+        lambda builder: builder.Build121AssignInstruction(
+            ref.blob_object, value.blob_object
+        )
+    )
+    return ref
+
+
+def _SystemAssignOpConf(ref, value, name=None):
+    if name is None:
+        name = id_util.UniqueStr("Assign_")
+    op_conf = op_conf_util.OperatorConf()
+    op_conf.name = name
+    op_conf.assign_conf.ref = ref.unique_name
+    op_conf.assign_conf.value = value.unique_name
+    return op_conf
diff --git a/oneflow/compatible_single_client_python/ops/builtin_ops.py b/oneflow/compatible_single_client_python/ops/builtin_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..5b0849e47faa054aeb0ff9788b5892d26754ad4f
--- /dev/null
+++ b/oneflow/compatible_single_client_python/ops/builtin_ops.py
@@ -0,0 +1,116 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+
+from oneflow.compatible import single_client as flow
+import oneflow._oneflow_internal
+from oneflow.compatible_single_client_python.framework import id_util as id_util
+from oneflow.compatible_single_client_python.framework.attr_util import (
+    convert_to_user_attr_value,
+)
+from oneflow.compatible_single_client_python.oneflow_export import oneflow_export
+
+
+@oneflow_export("builtin_op")
+class BuiltinOp(object):
+    def __init__(self, op_type_name, op_name=None):
+        if op_name is None:
+            op_name = id_util.UniqueStr(op_type_name)
+        self._builder = oneflow._oneflow_internal.one.OpBuilder(op_type_name, op_name)
+        self._op = None
+        self._op_type_name = op_type_name
+
+    @property
+    def op(self):
+        r"""access the builtin op
+
+        Returns:
+            the builtin op
+        """
+        # TODO: Check for op completeness.
+        if self._op is None:
+            self._op = self._builder.build()
+        return self._op
+
+    def Input(self, input_name, num=1):
+        r"""Set input blob of op
+
+        Args:
+            input_name (str): input name of blob
+            num (int, optional) : Defaults to 1.
+
+        Returns:
+            self
+        """
+        assert isinstance(num, int) and num >= 1
+        self._builder.input(input_name, num)
+        return self
+
+    def Output(self, output_name, num=1):
+        r"""Set output blob of op
+
+        Args:
+            output_name (str): name of output blob
+            num (int, optional):  Defaults to 1.
+
+        Returns:
+            self
+        """
+        assert isinstance(num, int) and num >= 1
+        self._builder.output(output_name, num)
+        return self
+
+    def Attr(self, attr_name, attr_value, attr_type_name=None):
+        r"""Set value of op's attribute.
+
+        Args:
+            attr_name (str): attribute name of op
+            attr_value (Any): attribute value of op
+
+        Raises:
+            ValueError: raised when value is not idential to op's attribute type.
+
+        Returns:
+            [type]: [description]
+        """
+        if attr_type_name is not None:
+            print(
+                """WARNING: Argument 'attr_type_name' of UserOpConfBuilder.Attr has been deprecated. Please remove it.
+
+            For instance:
+                -     .Attr("out_num", out_num, "AttrTypeInt64")
+                +     .Attr("out_num", out_num)
+                        """
+            )
+            print(traceback.format_stack()[-2])
+
+        assert self._op_type_name is not None
+        self._builder.attr(
+            attr_name,
+            convert_to_user_attr_value(self._op_type_name, attr_name, attr_value),
+        )
+        return self
+
+    def Build(self):
+        r"""Explicitly complete the construction of the builtin op
+
+        Returns:
+            the completed builtin op
+        """
+        if self._op is None:
+            self._op = self._builder.build()
+        return self._op
diff --git a/oneflow/compatible_single_client_python/ops/categorical_ordinal_encode_op.py b/oneflow/compatible_single_client_python/ops/categorical_ordinal_encode_op.py
new file mode 100644
index 0000000000000000000000000000000000000000..d0040684ebdf7b3d482698b03207e690a105e1ca
--- /dev/null
+++ b/oneflow/compatible_single_client_python/ops/categorical_ordinal_encode_op.py
@@ -0,0 +1,170 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+from typing import Optional
+from oneflow.compatible_single_client_python.oneflow_export import oneflow_export
+
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.framework import id_util as id_util
+from oneflow.compatible_single_client_python.framework import (
+    remote_blob as remote_blob_util,
+)
+import oneflow._oneflow_internal
+
+
+@oneflow_export("categorical_ordinal_encode")
+def categorical_ordinal_encode(
+    table: oneflow._oneflow_internal.BlobDesc,
+    size: oneflow._oneflow_internal.BlobDesc,
+    input_tensor: oneflow._oneflow_internal.BlobDesc,
+    hash_precomputed: bool = True,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator maintains a hash table to encode the categorical ordinal Blob. It converts a discrete input value into a continuous integer ID.
+
+    Args:
+        table (oneflow._oneflow_internal.BlobDesc): The hash table, you can assign it as a variable.
+        size (oneflow._oneflow_internal.BlobDesc): The size of hash table.
+        input_tensor (oneflow._oneflow_internal.BlobDesc): The input Blob.
+        hash_precomputed (bool, optional): We currently only support the 'True' mode. The internal hash value will no longer be computed. Defaults to True.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+        @flow.global_function()
+        def categorical_ordinal_encode_Job(x: tp.Numpy.Placeholder((3, 3), dtype=flow.int32)
+        ) -> tp.Numpy:
+            dtype = x.dtype
+            with flow.scope.namespace("categorical_ordinal_encode"):
+                table = flow.get_variable(
+                    name="Table",
+                    shape=(16,),
+                    dtype=dtype,
+                    initializer=flow.constant_initializer(0, dtype=dtype),
+                    trainable=False,
+                    reuse=False,
+                )
+                size = flow.get_variable(
+                    name="Size",
+                    shape=(1,),
+                    dtype=dtype,
+                    initializer=flow.constant_initializer(0, dtype=dtype),
+                    trainable=False,
+                    reuse=False,
+                )
+                return flow.categorical_ordinal_encode(
+                    table=table, size=size, input_tensor=x, name="Encode",
+                )
+
+        x = np.array([[7, 0, 2],
+                    [1, 7, 2],
+                    [0, 1, 7]]).astype(np.int32)
+
+        out = categorical_ordinal_encode_Job(x)
+
+        # out [[1 0 2]
+        #      [3 1 2]
+        #      [0 3 1]]
+
+    """
+    assert hash_precomputed is True
+    return (
+        flow.user_op_builder(name or id_util.UniqueStr("CategoricalOrdinalEncode_"))
+        .Op("CategoricalOrdinalEncode")
+        .Input("in", [input_tensor])
+        .Input("table", [table])
+        .Input("size", [size])
+        .Output("out")
+        .Attr("hash_precomputed", hash_precomputed)
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
+
+
+@oneflow_export("layers.categorical_ordinal_encoder")
+def categorical_ordinal_encoder(
+    input_tensor: oneflow._oneflow_internal.BlobDesc,
+    capacity: int,
+    hash_precomputed: bool = True,
+    name: str = "CategoricalOrdinalEncoder",
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator uses `oneflow.compatible.single_client.categorical_ordinal_encode` to encapsulate a categorical_ordinal_encoder. More details please refer to `oneflow.compatible.single_client.categorical_ordinal_encode`
+
+    Args:
+        input_tensor (oneflow._oneflow_internal.BlobDesc): The input Blob.
+        capacity (int): The capacity of hash table.
+        hash_precomputed (bool, optional): We currently only support the 'True' mode. The internal hash value will no longer be computed. Defaults to True.
+        name (str, optional): The name for the operation. Defaults to "CategoricalOrdinalEncoder".
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+        @flow.global_function()
+        def categorical_ordinal_encoder_Job(x: tp.Numpy.Placeholder((3, 3), dtype=flow.int32)
+        ) -> tp.Numpy:
+            return flow.layers.categorical_ordinal_encoder(x, 16)
+
+        x = np.array([[7, 0, 2],
+                    [1, 7, 2],
+                    [0, 1, 7]]).astype(np.int32)
+
+        out = categorical_ordinal_encoder_Job(x)
+
+        # out [[1 0 2]
+        #      [3 1 2]
+        #      [0 3 1]]
+
+    """
+    assert hash_precomputed is True
+    dtype = input_tensor.dtype
+    with flow.scope.namespace(name):
+        table = flow.get_variable(
+            name="Table",
+            shape=(capacity * 2,),
+            dtype=dtype,
+            initializer=flow.constant_initializer(0, dtype=dtype),
+            trainable=False,
+            reuse=False,
+        )
+        size = flow.get_variable(
+            name="Size",
+            shape=(1,),
+            dtype=dtype,
+            initializer=flow.constant_initializer(0, dtype=dtype),
+            trainable=False,
+            reuse=False,
+        )
+        return categorical_ordinal_encode(
+            table=table, size=size, input_tensor=input_tensor, name="Encode",
+        )
diff --git a/oneflow/compatible_single_client_python/ops/combined_margin_loss.py b/oneflow/compatible_single_client_python/ops/combined_margin_loss.py
new file mode 100644
index 0000000000000000000000000000000000000000..57aa99a0e5f49fad6dd30cd2df558c1dd936bb9f
--- /dev/null
+++ b/oneflow/compatible_single_client_python/ops/combined_margin_loss.py
@@ -0,0 +1,67 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from __future__ import absolute_import
+
+import os
+from typing import Union, Optional, Sequence
+
+from oneflow.compatible import single_client as flow
+from oneflow.core.operator import op_conf_pb2 as op_conf_util
+from oneflow.core.register import logical_blob_id_pb2 as logical_blob_id_util
+from oneflow.compatible_single_client_python.framework import (
+    interpret_util as interpret_util,
+)
+from oneflow.compatible_single_client_python.framework import id_util as id_util
+from oneflow.compatible_single_client_python.framework import (
+    remote_blob as remote_blob_util,
+)
+from oneflow.compatible_single_client_python.framework import module as module_util
+from oneflow.compatible_single_client_python.ops import (
+    math_unary_elementwise_ops as math_unary_elementwise_ops,
+)
+from oneflow.compatible_single_client_python.oneflow_export import oneflow_export
+import oneflow._oneflow_internal
+
+
+@oneflow_export("combined_margin_loss")
+def combined_margin_loss(
+    x: oneflow._oneflow_internal.BlobDesc,
+    label: oneflow._oneflow_internal.BlobDesc,
+    m1: float = 1,
+    m2: float = 0,
+    m3: float = 0,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    depth = x.shape[1]
+    y, theta = (
+        flow.user_op_builder(
+            name if name is not None else id_util.UniqueStr("CombinedMarginLoss_")
+        )
+        .Op("combined_margin_loss")
+        .Input("x", [x])
+        .Input("label", [label])
+        .Output("y")
+        .Output("theta")
+        .Attr("m1", float(m1))
+        .Attr("m2", float(m2))
+        .Attr("m3", float(m3))
+        .Attr("depth", int(depth))
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()
+    )
+    return y
diff --git a/oneflow/compatible_single_client_python/ops/constant_op.py b/oneflow/compatible_single_client_python/ops/constant_op.py
new file mode 100644
index 0000000000000000000000000000000000000000..17a5e003912d750282489490976af17fd960defd
--- /dev/null
+++ b/oneflow/compatible_single_client_python/ops/constant_op.py
@@ -0,0 +1,322 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+import os
+from typing import Optional, Sequence, Union
+
+from oneflow.compatible import single_client as flow
+from oneflow.core.operator import op_conf_pb2 as op_conf_util
+from oneflow.core.register import logical_blob_id_pb2 as logical_blob_id_util
+from oneflow.compatible_single_client_python.framework import (
+    interpret_util as interpret_util,
+)
+from oneflow.compatible_single_client_python.framework import id_util as id_util
+from oneflow.compatible_single_client_python.framework import (
+    remote_blob as remote_blob_util,
+)
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    stable_api,
+)
+import oneflow._oneflow_internal
+
+
+@oneflow_export("constant")
+def constant(
+    value: Union[int, float],
+    dtype: Optional[flow.dtype] = None,
+    shape: Optional[Sequence[int]] = None,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator creates a constant Blob.
+
+    Args:
+        value (Union[int, float]): The constant value of Blob.
+        dtype (Optional[flow.dtype], optional): The data type of Blob. Defaults to None.
+        shape (Optional[Sequence[int]], optional): The shape of Blob. Defaults to None.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Raises:
+        NotImplementedError: The data type of value should be int or float.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result blob.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def constant_Job() -> tp.Numpy:
+            constant_blob = flow.constant(value=1.5,
+                                        shape=(1, 3, 3),
+                                        dtype=flow.float)
+            return constant_blob
+
+
+        out = constant_Job()
+
+        # out [[[1.5 1.5 1.5]
+        #       [1.5 1.5 1.5]
+        #       [1.5 1.5 1.5]]]
+
+    """
+    if name is None:
+        name = id_util.UniqueStr("Constant_")
+    assert value is not None
+    assert dtype is not None
+
+    if not isinstance(value, (int, float)):
+        raise NotImplementedError
+
+    if isinstance(value, float):
+        is_floating_value = True
+        floating_value = float(value)
+        integer_value = int(0)
+    else:
+        is_floating_value = False
+        floating_value = float(0)
+        integer_value = int(value)
+    if shape is not None:
+        assert isinstance(shape, (list, tuple))
+    else:
+        shape = []
+    return (
+        flow.user_op_builder(name)
+        .Op("constant")
+        .Output("out")
+        .Attr("floating_value", floating_value)
+        .Attr("integer_value", integer_value)
+        .Attr("is_floating_value", is_floating_value)
+        .Attr("dtype", dtype)
+        .Attr("shape", shape)
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
+
+
+@oneflow_export("constant_scalar")
+def constant_scalar(
+    value: Union[int, float],
+    dtype: Optional[flow.dtype] = None,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator creates a constant scalar Blob.
+
+    Args:
+        value (Union[int, float]): The constant value of Blob.
+        dtype (Optional[flow.dtype], optional): The data type of Blob. Defaults to None.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result blob.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def constant_scalar_Job() -> tp.Numpy:
+            constant_scalar = flow.constant_scalar(value=2.5,
+                                                dtype=flow.float)
+            return constant_scalar
+
+
+        out = constant_scalar_Job()
+
+        # out [2.5]
+
+    """
+    return flow.constant(value, dtype=dtype, shape=[1])
+
+
+@oneflow_export("constant_like")
+def constant_like(
+    like: oneflow._oneflow_internal.BlobDesc,
+    value: Union[int, float],
+    dtype: Optional[flow.dtype] = None,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator creates a constant Blob that has the same shape as `like`.
+
+    Args:
+        like (oneflow._oneflow_internal.BlobDesc): A Blob.
+        value (Union[int, float]): The constant value of Blob.
+        dtype (Optional[flow.dtype], optional): The data type of Blob. Defaults to None.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Raises:
+        NotImplementedError: The data type of value should be int or float.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def constant_like_Job() -> tp.Numpy:
+            constant_blob = flow.constant(value=1.5,
+                                        shape=(1, 3, 3),
+                                        dtype=flow.float)
+            constant_like_blob = flow.constant_like(like=constant_blob,
+                                                    value=5.5,
+                                                    dtype=flow.float)
+            return constant_like_blob
+
+
+        out = constant_like_Job()
+
+        # out [[[5.5 5.5 5.5]
+        #       [5.5 5.5 5.5]
+        #       [5.5 5.5 5.5]]]
+
+    """
+    op_conf = op_conf_util.OperatorConf()
+    setattr(
+        op_conf,
+        "name",
+        name if name is not None else id_util.UniqueStr("ConstantLike_"),
+    )
+    setattr(op_conf.constant_like_conf, "like", like.unique_name)
+    if isinstance(value, int):
+        op_conf.constant_like_conf.int_operand = value
+    elif isinstance(value, float):
+        op_conf.constant_like_conf.float_operand = value
+    else:
+        raise NotImplementedError
+    if dtype is not None:
+        setattr(
+            op_conf.constant_like_conf,
+            "data_type",
+            oneflow._oneflow_internal.deprecated.GetProtoDtype4OfDtype(dtype),
+        )
+    setattr(op_conf.constant_like_conf, "out", "out")
+    interpret_util.Forward(op_conf)
+    out_lbi = logical_blob_id_util.LogicalBlobId()
+    setattr(out_lbi, "op_name", op_conf.name)
+    setattr(out_lbi, "blob_name", "out")
+    return remote_blob_util.RemoteBlob(out_lbi)
+
+
+@oneflow_export("ones_like")
+@stable_api
+def ones_like(
+    like: oneflow._oneflow_internal.BlobDesc,
+    dtype: Optional[flow.dtype] = None,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator creates a Blob with all elements set to `1` that has the same shape as `like`.
+
+    Args:
+        like (oneflow._oneflow_internal.BlobDesc): A Blob.
+        dtype (Optional[flow.dtype], optional): The data type of Blob. Defaults to None.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def ones_like_Job() -> tp.Numpy:
+            constant_blob = flow.constant(value=1.5,
+                                        shape=(1, 3, 3),
+                                        dtype=flow.float)
+            ones_like_blob = flow.ones_like(like=constant_blob,
+                                            dtype=flow.float)
+            return ones_like_blob
+
+
+        out = ones_like_Job()
+
+        # out [[[1. 1. 1.]
+        #       [1. 1. 1.]
+        #       [1. 1. 1.]]]
+
+    """
+    return constant_like(like, 1, dtype=dtype, name=name)
+
+
+@oneflow_export("zeros_like")
+@stable_api
+def zeros_like(
+    like: oneflow._oneflow_internal.BlobDesc,
+    dtype: Optional[flow.dtype] = None,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator creates a Blob that has the same shape as `like` whose all elements are set to `0`.
+
+    Args:
+        like (oneflow._oneflow_internal.BlobDesc): A Blob.
+        dtype (Optional[flow.dtype], optional): The data type of Blob. Defaults to None.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def zeros_like_Job() -> tp.Numpy:
+            constant_blob = flow.constant(value=1.5,
+                                        shape=(1, 3, 3),
+                                        dtype=flow.float)
+            zeros_like_blob = flow.zeros_like(like=constant_blob,
+                                            dtype=flow.float)
+            return zeros_like_blob
+
+
+        out = zeros_like_Job()
+
+        # out [[[0. 0. 0.]
+        #       [0. 0. 0.]
+        #       [0. 0. 0.]]]
+
+    """
+    return constant_like(like, 0, dtype=dtype, name=name)
diff --git a/oneflow/compatible_single_client_python/ops/count_not_finite.py b/oneflow/compatible_single_client_python/ops/count_not_finite.py
new file mode 100644
index 0000000000000000000000000000000000000000..042c5ff39db92a5b2b884eb0e5ce66f10ab87fe3
--- /dev/null
+++ b/oneflow/compatible_single_client_python/ops/count_not_finite.py
@@ -0,0 +1,68 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+import os
+
+from oneflow.compatible import single_client as flow
+from oneflow.core.operator import op_conf_pb2 as op_conf_util
+from oneflow.core.register import logical_blob_id_pb2 as logical_blob_id_util
+from oneflow.compatible_single_client_python.framework import (
+    distribute as distribute_util,
+)
+from oneflow.compatible_single_client_python.framework import id_util as id_util
+from oneflow.compatible_single_client_python.framework import (
+    remote_blob as remote_blob_util,
+)
+from oneflow.compatible_single_client_python.oneflow_export import oneflow_export
+import oneflow._oneflow_internal
+from typing import Optional, Union, Sequence
+
+
+@oneflow_export("count_not_finite")
+def count_not_finite(
+    x: oneflow._oneflow_internal.BlobDesc, name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    return (
+        flow.user_op_builder(
+            name if name is not None else id_util.UniqueStr("CountNotFinite_")
+        )
+        .Op("count_not_finite")
+        .Input("x", [x])
+        .Output("y")
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
+
+
+@oneflow_export("multi_count_not_finite")
+def multi_count_not_finite(
+    x: Optional[Sequence[oneflow._oneflow_internal.BlobDesc]] = None,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+
+    return (
+        flow.user_op_builder(
+            name if name is not None else id_util.UniqueStr("MultiCountNotFinite_")
+        )
+        .Op("multi_count_not_finite")
+        .Input("x", x)
+        .Output("y")
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
diff --git a/oneflow/compatible_single_client_python/ops/data_ops.py b/oneflow/compatible_single_client_python/ops/data_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..527d585676ea3812124dd667997363c4cd0adc87
--- /dev/null
+++ b/oneflow/compatible_single_client_python/ops/data_ops.py
@@ -0,0 +1,501 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+from typing import Optional, Sequence, Tuple, Union, List
+
+from oneflow.compatible import single_client as flow
+from oneflow.core.operator import op_conf_pb2 as op_conf_util
+from oneflow.core.job import initializer_conf_pb2 as initializer_conf_util
+from oneflow.core.register import logical_blob_id_pb2 as logical_blob_id_util
+from oneflow.compatible_single_client_python.framework import id_util as id_util
+from oneflow.compatible_single_client_python.framework import (
+    interpret_util as interpret_util,
+)
+from oneflow.compatible_single_client_python.framework import (
+    remote_blob as remote_blob_util,
+)
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    oneflow_deprecate,
+)
+import oneflow._oneflow_internal
+import traceback
+
+
+@oneflow_export("data.ImagePreprocessor")
+class ImagePreprocessor(object):
+    def __init__(self, preprocessor: str) -> None:
+        assert isinstance(preprocessor, str)
+        if preprocessor.lower() != "bgr2rgb" and preprocessor.lower() != "mirror":
+            raise ValueError('preprocessor must be "bgr2rgb" or "mirror".')
+
+        self.preprocessor = preprocessor
+
+    def is_rgb(self) -> bool:
+        return self.preprocessor.lower() == "bgr2rgb"
+
+    def is_mirror(self) -> bool:
+        return self.preprocessor.lower() == "mirror"
+
+
+@oneflow_export("data.ImageResizePreprocessor")
+class ImageResizePreprocessor(object):
+    def __init__(self, width: int, height: int) -> None:
+        assert isinstance(width, int)
+        assert isinstance(height, int)
+        self.width = width
+        self.height = height
+
+
+@oneflow_export("data.ImageCodec")
+class ImageCodec(object):
+    def __init__(
+        self,
+        image_preprocessors: Optional[
+            Sequence[Union[ImagePreprocessor, ImageResizePreprocessor,]]
+        ] = None,
+    ) -> None:
+        if isinstance(image_preprocessors, (list, tuple)):
+            self.image_preprocessors = list(image_preprocessors)
+        else:
+            self.image_preprocessors = []
+
+    def color_space(self) -> str:
+        for img_preprocessor in self.image_preprocessors:
+            if (
+                isinstance(img_preprocessor, ImagePreprocessor)
+                and img_preprocessor.is_rgb()
+            ):
+                return "RGB"
+        return "BGR"
+
+    def do_mirror(self) -> bool:
+        for img_preprocessor in self.image_preprocessors:
+            if (
+                isinstance(img_preprocessor, ImagePreprocessor)
+                and img_preprocessor.is_mirror()
+            ):
+                return True
+        return False
+
+    def do_resize(self):
+        for img_preprocessor in self.image_preprocessors:
+            if isinstance(img_preprocessor, ImageResizePreprocessor):
+                return (True, img_preprocessor.width, img_preprocessor.height)
+        return (False, -1, -1)
+
+
+@oneflow_export("data.RawCodec")
+class RawCodec(object):
+    def __init__(self, truncate: bool = False, auto_zero_padding: bool = False) -> None:
+        if auto_zero_padding:
+            print(
+                """WARNING: auto_zero_padding has been deprecated, Please use truncate instead.
+                """
+            )
+        self.truncate = truncate or auto_zero_padding
+
+
+@oneflow_export("data.NormByChannelPreprocessor")
+class NormByChannelPreprocessor(object):
+    def __init__(
+        self,
+        mean_values: Union[List[float], Tuple[float]],
+        std_values: Union[List[float], Tuple[float]] = (1.0, 1.0, 1.0),
+        data_format: str = "channels_last",
+    ) -> None:
+        assert isinstance(mean_values, (list, tuple))
+        assert isinstance(std_values, (list, tuple))
+        assert isinstance(data_format, str)
+        self.mean_values = mean_values
+        self.std_values = std_values
+        self.data_format = data_format
+
+    def output_layout(self) -> str:
+        if self.data_format == "channels_last":
+            return "NHWC"
+        else:
+            return "NCHW"
+
+
+@oneflow_export("data.BlobConf")
+class BlobConf(object):
+    def __init__(
+        self,
+        name: str,
+        shape: Sequence[int],
+        dtype: flow.dtype,
+        codec: Union[ImageCodec, RawCodec],
+        preprocessors: Optional[Sequence[Union[NormByChannelPreprocessor,]]] = None,
+    ) -> None:
+        assert isinstance(name, str)
+        assert isinstance(shape, (list, tuple))
+
+        self.name = name
+        self.shape = shape
+        self.dtype = dtype
+        self.codec = codec
+
+        if isinstance(preprocessors, (list, tuple)):
+            self.preprocessors = list(preprocessors)
+        else:
+            self.preprocessors = []
+
+    def decode_blob(
+        self, input_blob: oneflow._oneflow_internal.BlobDesc, batch_size: int
+    ) -> oneflow._oneflow_internal.BlobDesc:
+        if isinstance(self.codec, ImageCodec):
+            color_space = self.codec.color_space()
+            image = flow.data.ofrecord_image_decoder(
+                input_blob=input_blob, blob_name=self.name, color_space=color_space
+            )
+            coin_flip = None
+            if self.codec.do_mirror():
+                coin_flip = flow.random.coin_flip(batch_size)
+
+            do_resize, width, height = self.codec.do_resize()
+            if do_resize:
+                assert width > 0 and height > 0
+                image, _, _ = flow.image.resize(
+                    image=image, target_size=(width, height)
+                )
+            else:
+                assert len(self.shape) >= 2
+                image, _, _ = flow.image.resize(
+                    image=image, target_size=(self.shape[0], self.shape[1])
+                )
+            for preprocess in self.preprocessors:
+                image = flow.image.crop_mirror_normalize(
+                    input_blob=image,
+                    mirror_blob=coin_flip,
+                    color_space=color_space,
+                    output_layout=preprocess.output_layout(),
+                    mean=preprocess.mean_values,
+                    std=preprocess.std_values,
+                    output_dtype=self.dtype,
+                )
+            return image
+        elif isinstance(self.codec, RawCodec):
+            raw = flow.data.ofrecord_raw_decoder(
+                input_blob=input_blob,
+                blob_name=self.name,
+                shape=self.shape,
+                dtype=self.dtype,
+                truncate=self.codec.truncate,
+            )
+            return raw
+        else:
+            raise NotImplementedError
+
+
+@oneflow_export("data.decode_ofrecord")
+@oneflow_deprecate()
+def decode_ofrecord(
+    ofrecord_dir: str,
+    blobs: Sequence[BlobConf],
+    batch_size: int = 1,
+    data_part_num: int = 1,
+    part_name_prefix: str = "part-",
+    part_name_suffix_length: int = -1,
+    shuffle: bool = False,
+    buffer_size: int = 1024,
+    name: str = None,
+) -> Tuple[oneflow._oneflow_internal.BlobDesc]:
+    print(
+        "WARNING:",
+        "oneflow.compatible.single_client.data.decode_ofrecord is deprecated, and NOT work in eager mode, please use: \n",
+        "    1)   ofrecord = oneflow.compatible.single_client.data.ofrecord_reader(...) to read ofrecord; \n",
+        "    2)   image = oneflow.compatible.single_client.data.ofrecord_image_decoder(...) to decode image; \n",
+        "    3)   raw = oneflow.compatible.single_client.data.ofrecord_raw_decoder(...) to decode raw data like label; \n",
+        traceback.format_stack()[-2],
+    )
+    assert not flow.eager_execution_enabled()
+
+    ofrecord = flow.data.ofrecord_reader(
+        ofrecord_dir=ofrecord_dir,
+        batch_size=batch_size,
+        data_part_num=data_part_num,
+        part_name_prefix=part_name_prefix,
+        part_name_suffix_length=part_name_suffix_length,
+        random_shuffle=shuffle,
+        shuffle_buffer_size=buffer_size,
+        name=name,
+    )
+
+    result_blob_list = []
+    for blob_conf in blobs:
+        result_blob_list.append(
+            blob_conf.decode_blob(input_blob=ofrecord, batch_size=batch_size)
+        )
+
+    return tuple(result_blob_list)
+
+
+@oneflow_export("data.ofrecord_loader")
+def ofrecord_loader(
+    ofrecord_dir: str,
+    batch_size: int = 1,
+    data_part_num: int = 1,
+    part_name_prefix: str = "part-",
+    part_name_suffix_length: int = -1,
+    shuffle: bool = False,
+    shuffle_buffer_size: int = 1024,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    print(
+        "WARNING:",
+        "oneflow.compatible.single_client.data.ofrecord_loader is deprecated, and NOT work in eager mode, please use: \n",
+        "    ofrecord = oneflow.compatible.single_client.data.ofrecord_reader(...) to read ofrecord; \n",
+        traceback.format_stack()[-2],
+    )
+
+    return flow.data.ofrecord_reader(
+        ofrecord_dir=ofrecord_dir,
+        batch_size=batch_size,
+        data_part_num=data_part_num,
+        part_name_prefix=part_name_prefix,
+        part_name_suffix_length=part_name_suffix_length,
+        random_shuffle=shuffle,
+        shuffle_buffer_size=shuffle_buffer_size,
+        name=name,
+    )
+
+
+@oneflow_export("data.ofrecord_reader")
+def ofrecord_reader(
+    ofrecord_dir: str,
+    batch_size: int = 1,
+    data_part_num: int = 1,
+    part_name_prefix: str = "part-",
+    part_name_suffix_length: int = -1,
+    random_shuffle: bool = False,
+    shuffle_buffer_size: int = 1024,
+    shuffle_after_epoch: bool = False,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""Get ofrecord object from ofrecord dataset.
+
+    Args:
+        ofrecord_dir (str): Path to ofrecord dataset.
+        batch_size (int, optional): Batch size. Defaults to 1.
+        data_part_num (int, optional): Number of dataset's partitions. Defaults to 1.
+        part_name_prefix (str, optional): Prefix of dataset's parition file. Defaults to "part-".
+        part_name_suffix_length (int, optional): Total length of padded suffix number , -1 means no padding. eg: 3 for `part-001`. Defaults to -1.
+        random_shuffle (bool, optional): Determines records shuffled or not. Defaults to False.
+        shuffle_buffer_size (int, optional): Shuffle buffer size. Defaults to 1024.
+        shuffle_after_epoch (bool, optional): Shuffled or not after each epoch. Defaults to False.
+        name (Optional[str], optional): Optional name. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+        from typing import Tuple
+
+
+        @flow.global_function(type="predict")
+        def ofrecord_reader_job() -> Tuple[tp.Numpy, tp.Numpy]:
+            batch_size = 16
+            with flow.scope.placement("cpu", "0:0"):
+                # our ofrecord file path is "./dataset/part-0"
+                ofrecord = flow.data.ofrecord_reader(
+                    "./dataset/",
+                    batch_size=batch_size,
+                    data_part_num=1,
+                    part_name_suffix_length=-1,
+                    part_name_prefix='part-',
+                    random_shuffle=True,
+                    shuffle_after_epoch=True,
+                )
+                # image shape is (28*28, )
+                image = flow.data.OFRecordRawDecoder(
+                    ofrecord, "images", shape=(784, ), dtype=flow.int32
+                )
+                # label shape is (1, )
+                label = flow.data.OFRecordRawDecoder(
+                    ofrecord, "labels", shape=(1, ), dtype=flow.int32
+                )
+
+                return image, label
+
+        if __name__ == "__main__":
+            images, labels = ofrecord_reader_job()
+            print("In per batch, images shape is", images.shape)
+            print("In per batch, labels shape is", labels.shape)
+
+            # In per batch, images shape is (16, 784)
+            # In per batch, labels shape is (16, 1)
+
+    """
+    if name is None:
+        name = id_util.UniqueStr("OFRecord_Reader_")
+
+    return (
+        flow.user_op_builder(name)
+        .Op("OFRecordReader")
+        .Output("out")
+        .Attr("data_dir", ofrecord_dir)
+        .Attr("data_part_num", data_part_num)
+        .Attr("batch_size", batch_size)
+        .Attr("part_name_prefix", part_name_prefix)
+        .Attr("random_shuffle", random_shuffle)
+        .Attr("shuffle_buffer_size", shuffle_buffer_size)
+        .Attr("shuffle_after_epoch", shuffle_after_epoch)
+        .Attr("part_name_suffix_length", part_name_suffix_length)
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
+
+
+@oneflow_export("data.decode_random")
+def decode_random(
+    shape: Sequence[int],
+    dtype: flow.dtype,
+    batch_size: int = 1,
+    initializer: Optional[initializer_conf_util.InitializerConf] = None,
+    tick: Optional[oneflow._oneflow_internal.BlobDesc] = None,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    op_conf = op_conf_util.OperatorConf()
+
+    if name is None:
+        name = id_util.UniqueStr("DecodeRandom_")
+    assert isinstance(name, str)
+    op_conf.name = name
+
+    assert isinstance(shape, (list, tuple))
+    op_conf.decode_random_conf.shape.dim.extend(shape)
+
+    assert dtype is not None
+    setattr(
+        op_conf.decode_random_conf,
+        "data_type",
+        oneflow._oneflow_internal.deprecated.GetProtoDtype4OfDtype(dtype),
+    )
+
+    op_conf.decode_random_conf.batch_size = batch_size
+
+    if initializer is not None:
+        op_conf.decode_random_conf.data_initializer.CopyFrom(initializer)
+    else:
+        op_conf.decode_random_conf.data_initializer.CopyFrom(
+            flow.random_uniform_initializer()
+        )
+
+    if tick:
+        op_conf.decode_random_conf.tick = tick.unique_name
+    op_conf.decode_random_conf.out = "out"
+
+    lbi = logical_blob_id_util.LogicalBlobId()
+    lbi.op_name = op_conf.name
+    lbi.blob_name = "out"
+
+    interpret_util.ConsistentForward(op_conf)
+    return remote_blob_util.RemoteBlob(lbi)
+
+
+@oneflow_export(
+    "data.image_decoder_random_crop_resize", "data.ImageDecoderRandomCropResize"
+)
+def image_decoder_random_crop_resize(
+    input_blob: oneflow._oneflow_internal.BlobDesc,
+    target_width: int,
+    target_height: int,
+    num_attempts: Optional[int] = None,
+    seed: Optional[int] = None,
+    random_area: Optional[Sequence[float]] = None,
+    random_aspect_ratio: Optional[Sequence[float]] = None,
+    num_workers: Optional[int] = None,
+    warmup_size: Optional[int] = None,
+    max_num_pixels: Optional[int] = None,
+    name: Optional[str] = None,
+) -> Tuple[oneflow._oneflow_internal.BlobDesc]:
+    if name is None:
+        name = id_util.UniqueStr("ImageDecoderRandomCropResize_")
+
+    op_conf = op_conf_util.OperatorConf()
+    op_conf.name = name
+    setattr(op_conf.image_decoder_random_crop_resize_conf, "in", input_blob.unique_name)
+    op_conf.image_decoder_random_crop_resize_conf.out = "out"
+    op_conf.image_decoder_random_crop_resize_conf.target_width = target_width
+    op_conf.image_decoder_random_crop_resize_conf.target_height = target_height
+    if num_attempts is not None:
+        op_conf.image_decoder_random_crop_resize_conf.num_attempts = num_attempts
+    if seed is not None:
+        op_conf.image_decoder_random_crop_resize_conf.seed = seed
+    if random_area is not None:
+        assert len(random_area) == 2
+        op_conf.image_decoder_random_crop_resize_conf.random_area_min = random_area[0]
+        op_conf.image_decoder_random_crop_resize_conf.random_area_max = random_area[1]
+    if random_aspect_ratio is not None:
+        assert len(random_aspect_ratio) == 2
+        op_conf.image_decoder_random_crop_resize_conf.random_aspect_ratio_min = random_aspect_ratio[
+            0
+        ]
+        op_conf.image_decoder_random_crop_resize_conf.random_aspect_ratio_max = random_aspect_ratio[
+            1
+        ]
+    if num_workers is not None:
+        op_conf.image_decoder_random_crop_resize_conf.num_workers = num_workers
+    if warmup_size is not None:
+        op_conf.image_decoder_random_crop_resize_conf.warmup_size = warmup_size
+    if max_num_pixels is not None:
+        op_conf.image_decoder_random_crop_resize_conf.max_num_pixels = max_num_pixels
+    interpret_util.Forward(op_conf)
+    lbi = logical_blob_id_util.LogicalBlobId()
+    lbi.op_name = op_conf.name
+    lbi.blob_name = "out"
+    return remote_blob_util.RemoteBlob(lbi)
+
+
+@oneflow_export("data.onerec_reader")
+def onerec_reader(
+    files,
+    batch_size=1,
+    random_shuffle=False,
+    shuffle_mode="instance",
+    shuffle_buffer_size=1024,
+    shuffle_after_epoch=False,
+    verify_example=True,
+    name=None,
+):
+    assert isinstance(files, (list, tuple))
+
+    if name is None:
+        name = id_util.UniqueStr("OneRecReader_")
+
+    return (
+        flow.user_op_builder(name)
+        .Op("OneRecReader")
+        .Output("out")
+        .Attr("files", files)
+        .Attr("batch_size", batch_size)
+        .Attr("random_shuffle", random_shuffle)
+        .Attr("shuffle_mode", shuffle_mode)
+        .Attr("shuffle_buffer_size", shuffle_buffer_size)
+        .Attr("shuffle_after_epoch", shuffle_after_epoch)
+        .Attr("verify_example", verify_example)
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
diff --git a/oneflow/compatible_single_client_python/ops/diag_ops.py b/oneflow/compatible_single_client_python/ops/diag_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..a076e0dc58b85dea93b3f4b06997d42b8f52fd14
--- /dev/null
+++ b/oneflow/compatible_single_client_python/ops/diag_ops.py
@@ -0,0 +1,74 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.framework import id_util as id_util
+from oneflow.compatible_single_client_python.oneflow_export import oneflow_export
+from oneflow.compatible_single_client_python.framework import (
+    remote_blob as remote_blob_util,
+)
+from typing import Optional
+import oneflow._oneflow_internal
+
+
+@oneflow_export("diag")
+def diag(
+    input: oneflow._oneflow_internal.BlobDesc,
+    diagonal: Optional[int] = 0,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator compute diagonal. 
+
+    If input is a vector, then returns a square matrix with the elements of input as the diagonal.
+    If input is a matrix, then returns a vector with the diagonal elements of input.
+    Args:
+        input (remote_blob_util.BlobDef): The input Blob.
+        diagonal (Optional[int], 0): The diagonal to consider. If diagonal = 0, it is the main diagonal. If diagonal > 0, it is above the main diagonal. If diagonal < 0, it is below the main diagonal. Defaults to 0.
+
+    Returns:
+        remote_blob_util.BlobDef: The result Blob. 
+
+    For example: 
+
+    .. code-block:: python 
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def Diag_Job(input: tp.Numpy.Placeholder((3, 3), dtype=flow.float32),) -> tp.Numpy:
+            return flow.diag(input)
+
+
+        input = np.array([[1.0, 2.0, 3.0],
+                          [4.0, 5.0, 6.0],
+                          [7.0, 8.0, 9.0],], dtype=np.float32)
+        out = Diag_Job(input)
+        # out [1. 5. 9.]
+
+    """
+    return (
+        flow.user_op_builder(name if name is not None else id_util.UniqueStr("Diag_"))
+        .Op("diag")
+        .Input("in", [input])
+        .Attr("diagonal", int(diagonal))
+        .Output("out")
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
diff --git a/oneflow/compatible_single_client_python/ops/domain_ops.py b/oneflow/compatible_single_client_python/ops/domain_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..91b429f6b4939f7e1c73b72e0b1d9011c26fb642
--- /dev/null
+++ b/oneflow/compatible_single_client_python/ops/domain_ops.py
@@ -0,0 +1,48 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import typing
+from oneflow.compatible import single_client as flow
+import oneflow._oneflow_internal
+from oneflow.compatible_single_client_python.framework import id_util as id_util
+
+from oneflow.compatible_single_client_python.oneflow_export import oneflow_export
+
+
+@oneflow_export("nn.fused_self_attention_query_mul_key_and_value")
+def api_fused_self_attention_query_mul_key_and_value(
+    x: oneflow._oneflow_internal.BlobDesc,
+    head_size: int,
+    alpha: float = 1.0,
+    name: typing.Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+
+    if name is None:
+        name = id_util.UniqueStr("FusedSelfAttentionQueryMulKeyAndValue_")
+
+    op = (
+        flow.user_op_builder(name)
+        .Op("fused_self_attention_query_mul_key_and_value")
+        .Input("hidden_states", [x])
+        .Attr("head_size", int(head_size))
+        .Attr("alpha", float(alpha))
+        .Output("query_mul_key")
+        .Output("value")
+        .Build()
+    )
+
+    qmk, v = op.InferAndTryRun().RemoteBlobList()
+    return qmk, v
diff --git a/oneflow/compatible_single_client_python/ops/eager_nccl_ops.py b/oneflow/compatible_single_client_python/ops/eager_nccl_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..f9d516aef0e163ee4f67a10f116489ee201a7987
--- /dev/null
+++ b/oneflow/compatible_single_client_python/ops/eager_nccl_ops.py
@@ -0,0 +1,46 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+from typing import Optional
+
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.framework import id_util as id_util
+from oneflow.compatible_single_client_python.framework import (
+    remote_blob as remote_blob_util,
+)
+from oneflow.compatible_single_client_python.oneflow_export import oneflow_export
+import oneflow._oneflow_internal
+
+
+@oneflow_export("eager_nccl_all_reduce")
+def eager_nccl_all_reduce(
+    x: oneflow._oneflow_internal.BlobDesc,
+    parallel_conf: str,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    return (
+        flow.user_op_builder(
+            name if name is not None else id_util.UniqueStr("EagerNcclAllReduce_")
+        )
+        .Op("eager_nccl_all_reduce")
+        .Input("in", [x])
+        .Output("out")
+        .Attr("parallel_conf", parallel_conf)
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
diff --git a/oneflow/compatible_single_client_python/ops/get_variable.py b/oneflow/compatible_single_client_python/ops/get_variable.py
new file mode 100644
index 0000000000000000000000000000000000000000..099afd31a4f13c3381178b2427c7633a83b4d3e7
--- /dev/null
+++ b/oneflow/compatible_single_client_python/ops/get_variable.py
@@ -0,0 +1,415 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+from typing import Optional, Sequence, Union
+from oneflow.compatible_single_client_python.oneflow_export import oneflow_export
+
+from oneflow.compatible_single_client_python.framework import (
+    session_context as session_ctx,
+)
+from oneflow.compatible_single_client_python.framework import (
+    compile_context as compile_context,
+)
+from oneflow.compatible_single_client_python.framework import (
+    remote_blob as remote_blob_util,
+)
+from oneflow.compatible_single_client_python.framework import runtime_mode as rt_mode
+from oneflow.compatible_single_client_python.framework import (
+    distribute as distribute_util,
+)
+from oneflow.compatible_single_client_python.experimental import (
+    name_scope as name_scope,
+)
+from oneflow.core.operator import op_conf_pb2 as op_conf_util
+from oneflow.core.job import initializer_conf_pb2 as initializer_conf_util
+from oneflow.core.job import regularizer_conf_pb2 as regularizer_conf_util
+from oneflow.core.register import logical_blob_id_pb2 as logical_blob_id_util
+from oneflow.compatible_single_client_python.framework import hob as hob
+from oneflow.compatible_single_client_python.eager import boxing_util as boxing_util
+from oneflow.compatible_single_client_python.eager import gradient_util as gradient_util
+from oneflow.compatible_single_client_python.eager import op_executor as op_executor
+from oneflow.compatible_single_client_python.lib.core import enable_if as enable_if
+from oneflow.compatible import single_client as flow
+from oneflow._oneflow_internal.oneflow.core.register import logical_blob_id as lbi_util
+import oneflow._oneflow_internal
+import os
+
+blob_register = oneflow._oneflow_internal.GetDefaultBlobRegister()
+
+
+@oneflow_export("get_variable")
+def api_get_variable(
+    name: str,
+    shape: Optional[Sequence[int]] = None,
+    dtype: Optional[flow.dtype] = flow.float32,
+    initializer: Optional[initializer_conf_util.InitializerConf] = None,
+    regularizer: Optional[regularizer_conf_util.RegularizerConf] = None,
+    trainable: Optional[bool] = None,
+    model_name: Optional[str] = None,
+    random_seed: Optional[int] = None,
+    parallel_distribution: Optional[
+        Union[
+            Sequence[oneflow._oneflow_internal.distribute.Distribute],
+            Sequence[str],
+            str,
+        ]
+    ] = None,
+    distribute: Optional[oneflow._oneflow_internal.distribute.Distribute] = None,
+    reuse: bool = True,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""Create a variable or retrieve an existing one.
+
+    Args:
+        name: Name of this variable. One variable could be shared by multiple OneFlow functions. `None` by default
+        shape: Shape of the variable. `None` by default
+        dtype: Data type of the variable. `None` by default
+        initializer: A initializer object. For instance, a :func:`~oneflow.compatible.single_client.ones_initializer`. `None` by default
+        trainable: A `bool` to indicate if this variable is trainable. `True` by default
+        model_name: A `string`. `'weight'` or `'bias'`. `None` by default
+        random_seed: Random seed for random initializers. `None` by default
+
+    For example:
+
+    Example 1:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+
+
+        def watch_handler(y: tp.Numpy):
+            print("out", y)
+
+
+        @flow.global_function()
+        def variable_Job() -> None:
+            init = flow.constant_initializer(1.25)
+            variable = flow.get_variable(
+                "variable-weight",
+                shape=(1, 3, 2, 2),
+                initializer=init,
+                trainable=True
+            )
+            flow.watch(variable, watch_handler)
+
+
+        checkpoint = flow.train.CheckPoint()
+        checkpoint.init()
+        variable_Job()
+
+        # out [[[[1.25 1.25]
+        #        [1.25 1.25]]
+
+        #       [[1.25 1.25]
+        #        [1.25 1.25]]
+
+        #       [[1.25 1.25]
+        #        [1.25 1.25]]]]
+
+    Example 2:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        def conv2d(input, filters, kernel_size, strides, padding, name):
+            input_shape = input.shape
+            weight_initializer = flow.truncated_normal(0.1)
+            weight_regularizer = flow.regularizers.l2(0.0005)
+            weight_shape = (filters,
+                            input_shape[1],
+                            kernel_size[0],
+                            kernel_size[1])
+
+            weight = flow.get_variable(
+                name + "-weight",
+                shape=weight_shape,
+                initializer=weight_initializer,
+                regularizer=weight_regularizer,
+            )
+            return flow.nn.conv2d(input, weight, strides, padding, name=name)
+
+
+        @flow.global_function()
+        def conv2d_Job(x: tp.Numpy.Placeholder((1, 64, 32, 32))
+        ) -> tp.Numpy:
+            conv = conv2d(x,
+                        filters=128,
+                        kernel_size=[3, 3],
+                        strides=2,
+                        padding='SAME',
+                        name="ConvLayer")
+            return conv
+
+
+        x = np.random.randn(1, 64, 32, 32).astype(np.float32)
+        out = conv2d_Job(x)
+
+        # out.shape (1, 128, 16, 16)
+
+    """
+    if distribute is not None:
+        assert parallel_distribution is None
+        parallel_distribution = [distribute]
+    if parallel_distribution is None:
+        parallel_distribution = []
+    if isinstance(parallel_distribution, str):
+        parallel_distribution = parallel_distribution.split(",")
+    assert isinstance(parallel_distribution, (list, tuple))
+
+    def distribute_to_str(dist):
+        if dist is None:
+            return ""
+        elif type(dist) is str:
+            return dist
+        elif type(dist) is oneflow._oneflow_internal.distribute.SplitDistribute:
+            return "S({})".format(dist.axis)
+        elif type(dist) is oneflow._oneflow_internal.distribute.BroadcastDistribute:
+            return "B"
+        else:
+            raise ValueError("unsupported distribute")
+
+    parallel_distribution = list(map(distribute_to_str, parallel_distribution))
+
+    api = enable_if.unique([get_lazy_variable, get_eager_variable])
+    return api(
+        name,
+        shape=shape,
+        dtype=dtype,
+        initializer=initializer,
+        regularizer=regularizer,
+        trainable=trainable,
+        model_name=model_name,
+        random_seed=random_seed,
+        parallel_distribution=parallel_distribution,
+        reuse=reuse,
+    )
+
+
+@enable_if.condition(hob.in_global_mode & hob.eager_execution_enabled)
+def get_eager_variable(
+    name,
+    shape=None,
+    dtype=None,
+    initializer=None,
+    regularizer=None,
+    trainable=None,
+    model_name=None,
+    random_seed=None,
+    parallel_distribution=None,
+    reuse=True,
+):
+    assert isinstance(name, str)
+    assert isinstance(
+        shape, (list, tuple)
+    ), "param shape should be a list or tuple of dimension"
+
+    job_name = oneflow._oneflow_internal.JobBuildAndInferCtx_GetCurrentJobName()
+    name = name_scope.GetJobNameScopePrefix(job_name) + name
+    sess = session_ctx.GetDefaultSession()
+    var_blob, job_var_blob = sess.TryGetVariableBlobOfJobFromStash(job_name, name)
+
+    if reuse is False:
+        assert job_var_blob is None, (
+            "variable '{}' already exists, "
+            "getting the same variable is not allowed "
+            "when reuse is False".format(name)
+        )
+
+    if job_var_blob is None:
+        op_conf = GenerateVariableOpConf(
+            name=name,
+            shape=shape,
+            dtype=dtype,
+            initializer=initializer,
+            regularizer=regularizer,
+            trainable=trainable,
+            model_name=model_name,
+            random_seed=random_seed,
+            parallel_distribution=parallel_distribution,
+        )
+        op_attribute = compile_context.CurJobAddConsistentOp(op_conf)
+        if var_blob is None:
+            var_blob = CreateEagerVariableBlob(op_attribute)
+            op_executor.EagerInitVariableBlob(sess, op_conf, var_blob)
+
+        assert isinstance(var_blob, oneflow._oneflow_internal.EagerConsistentBlob)
+        sess.StashVariableBlob4Job(job_name, op_conf.name, var_blob)
+    else:
+        assert isinstance(job_var_blob, oneflow._oneflow_internal.EagerConsistentBlob)
+        assert isinstance(var_blob, oneflow._oneflow_internal.EagerConsistentBlob)
+        assert var_blob.IdenticalTo(job_var_blob)
+
+    bw_blob_register = gradient_util.GetDefaultBackwardBlobRegister()
+    bw_blob_register.TrySetObject4BlobName(
+        var_blob.logical_blob_name, var_blob.blob_object
+    )
+    return var_blob
+
+
+@enable_if.condition(hob.in_global_mode & ~hob.eager_execution_enabled)
+def get_lazy_variable(
+    name,
+    shape=None,
+    dtype=None,
+    initializer=None,
+    regularizer=None,
+    trainable=None,
+    model_name=None,
+    random_seed=None,
+    parallel_distribution=None,
+    reuse=True,
+):
+    assert isinstance(name, str)
+    assert isinstance(
+        shape, (list, tuple)
+    ), "param shape should be a list or tuple of dimension"
+
+    job_name = oneflow._oneflow_internal.JobBuildAndInferCtx_GetCurrentJobName()
+    name = name_scope.GetJobNameScopePrefix(job_name) + name
+    sess = session_ctx.GetDefaultSession()
+    var_blob, job_var_blob = sess.TryGetVariableBlobOfJobFromStash(job_name, name)
+
+    if reuse is False:
+        assert job_var_blob is None, (
+            "variable '{}' already exists, "
+            "getting the same variable is not allowed "
+            "when param reuse is False".format(name)
+        )
+
+    if job_var_blob is None:
+        op_conf = GenerateVariableOpConf(
+            name=name,
+            shape=shape,
+            dtype=dtype,
+            initializer=initializer,
+            regularizer=regularizer,
+            trainable=trainable,
+            model_name=model_name,
+            random_seed=random_seed,
+            parallel_distribution=parallel_distribution,
+        )
+        job_var_blob = _CreateVariableBlob(op_conf)
+        assert isinstance(job_var_blob, oneflow._oneflow_internal.LazyConsistentBlob)
+        sess.StashVariableBlob4Job(job_name, op_conf.name, job_var_blob)
+        if var_blob is not None:
+            assert isinstance(var_blob, oneflow._oneflow_internal.LazyConsistentBlob)
+            assert var_blob.IdenticalTo(job_var_blob)
+    else:
+        assert isinstance(job_var_blob, oneflow._oneflow_internal.LazyConsistentBlob)
+        assert isinstance(var_blob, oneflow._oneflow_internal.LazyConsistentBlob)
+        assert var_blob.IdenticalTo(job_var_blob)
+
+    return job_var_blob
+
+
+def GenerateVariableOpConf(
+    name,
+    shape,
+    dtype=None,
+    initializer=None,
+    regularizer=None,
+    trainable=None,
+    model_name=None,
+    random_seed=None,
+    parallel_distribution=None,
+):
+    op_conf = op_conf_util.OperatorConf()
+    op_conf.name = name
+    op_conf.variable_conf.shape.dim.extend(shape)
+
+    assert dtype is not None
+    op_conf.variable_conf.data_type = oneflow._oneflow_internal.deprecated.GetProtoDtype4OfDtype(
+        dtype
+    )
+
+    if rt_mode.CurrentMode() == rt_mode.NORMAL_MODE:
+        root_path = None
+    else:
+        root_path = (
+            compile_context.GetCurJobConfigProto().default_initialize_with_snapshot_path()
+        )
+        dir_path = os.path.join(root_path, name)
+        file_path = os.path.join(dir_path, "out")
+    if root_path and os.path.isfile(file_path):
+        op_conf.variable_conf.initialize_with_snapshot.path = dir_path
+        op_conf.variable_conf.initialize_with_snapshot.key = "out"
+    else:
+        if root_path:
+            print("{} not found, will be initialized".format(file_path))
+        if initializer is not None:
+            op_conf.variable_conf.initializer.CopyFrom(initializer)
+
+    if regularizer is not None:
+        op_conf.variable_conf.regularizer.CopyFrom(regularizer)
+
+    if trainable is not None:
+        op_conf.variable_conf.trainable = trainable
+
+    if model_name is not None:
+        op_conf.variable_conf.model_name = model_name
+
+    if parallel_distribution is None:
+        parallel_distribution = []
+
+    op_conf.variable_conf.parallel_distribution.extend(parallel_distribution)
+
+    if random_seed is not None:
+        op_conf.variable_conf.random_seed = random_seed
+
+    op_conf.variable_conf.out = "out"
+    return op_conf
+
+
+def _CreateVariableBlob(op_conf):
+    compile_context.CurJobAddConsistentOp(op_conf)
+    lbi = logical_blob_id_util.LogicalBlobId()
+    lbi.op_name = op_conf.name
+    lbi.blob_name = op_conf.variable_conf.out
+    return remote_blob_util.RemoteBlob(lbi)
+
+
+def CreateEagerVariableBlob(op_attribute, job_name=""):
+    bn_in_op2blob_object = oneflow._oneflow_internal.deprecated.BnInOp2BlobObject()
+
+    def BuildInstruction(builder):
+        parallel_conf = flow.current_scope().device_parallel_desc_symbol.parallel_conf
+        cfg_op_attribute = oneflow._oneflow_internal.deprecated.MakeOpAttributeByString(
+            str(op_attribute)
+        )
+        builder.StatelessCall(
+            cfg_op_attribute, parallel_conf, bn_in_op2blob_object, boxing_util.BoxingTo,
+        )
+
+    oneflow._oneflow_internal.deprecated.LogicalRun(BuildInstruction)
+    lbi = lbi_util.LogicalBlobId()
+    lbi.set_op_name(op_attribute.op_conf.name)
+    lbi.set_blob_name(op_attribute.op_conf.variable_conf.out)
+    if not isinstance(lbi, lbi_util.LogicalBlobId):
+        cfg_lbi = lbi_util.LogicalBlobId()
+        cfg_lbi.set_op_name(lbi.op_name)
+        cfg_lbi.set_blob_name(lbi.blob_name)
+        lbi = cfg_lbi
+    return oneflow._oneflow_internal.EagerConsistentBlob(
+        lbi,
+        blob_object=bn_in_op2blob_object["out"],
+        blob_register=blob_register,
+        job_name=job_name,
+    )
diff --git a/oneflow/compatible_single_client_python/ops/initializer_util.py b/oneflow/compatible_single_client_python/ops/initializer_util.py
new file mode 100644
index 0000000000000000000000000000000000000000..1e8b9c972874fad5ca375d4119f04b4f968e88b5
--- /dev/null
+++ b/oneflow/compatible_single_client_python/ops/initializer_util.py
@@ -0,0 +1,1250 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+import functools
+import math
+
+import numpy as np
+
+from oneflow.compatible import single_client as flow
+from oneflow.core.operator import op_conf_pb2 as op_conf_util
+from oneflow.core.job import initializer_conf_pb2 as initializer_conf_util
+from oneflow.compatible_single_client_python.oneflow_export import oneflow_export
+from typing import Optional, Sequence, Union
+
+
+@oneflow_export("empty_initializer")
+def empty_initializer(
+    dtype: flow.dtype = flow.float,
+) -> initializer_conf_util.InitializerConf:
+    initializer = initializer_conf_util.InitializerConf()
+    empty_conf = initializer_conf_util.EmptyInitializerConf()
+    initializer.empty_conf.CopyFrom(empty_conf)
+    return initializer
+
+
+@oneflow_export("constant_initializer")
+def constant_initializer(
+    value: float = 0, dtype: flow.dtype = flow.float
+) -> initializer_conf_util.InitializerConf:
+    r"""Initializer that generates blob with constant values.
+
+    Args:
+        value (float, optional): A Python scalar. All elements of the initialized variable . Defaults to 0.
+        dtype (flow.dtype, optional): Default data type. Defaults to flow.float.
+
+    Raises:
+        NotImplementedError:  Do not support such data type.
+
+    Returns:
+        initializer_conf_util.InitializerConf:  An InitializerConf object.
+    
+    For example: 
+
+    Example 1:
+
+    .. code-block:: python 
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+
+
+        def watch_handler(y: tp.Numpy):
+            print("out", y)
+
+
+        @flow.global_function()
+        def constant_Job() -> None:
+            init = flow.constant_initializer(2.5)
+            blob = flow.get_variable(
+                "blob-weight",
+                shape=(3, ),
+                initializer=init,
+                trainable=True
+            )
+            flow.watch(blob, watch_handler)
+
+
+        checkpoint = flow.train.CheckPoint()
+        checkpoint.init()
+        constant_Job()
+
+        # out [2.5 2.5 2.5]
+
+    Example 2:
+
+    .. code-block:: python 
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def conv2d_constant_Job(x: tp.Numpy.Placeholder((1, 256, 32, 32))
+        ) -> tp.Numpy:
+            initializer = flow.constant_initializer(0.01)
+            conv2d = flow.layers.conv2d(
+                x,
+                filters=128,
+                kernel_size=3,
+                strides=1,
+                padding='SAME',
+                kernel_initializer=initializer, 
+                name="Conv2d"
+            )
+            return conv2d
+
+
+        x = np.random.randn(1, 256, 32, 32).astype(np.float32)
+        out = conv2d_constant_Job(x)
+
+        # out.shape (1, 128, 32, 32)
+
+    """
+    initializer = initializer_conf_util.InitializerConf()
+    if dtype in [flow.float, flow.double]:
+        setattr(initializer.constant_conf, "value", float(value))
+    elif dtype in [
+        flow.int8,
+        flow.int32,
+        flow.int64,
+    ]:
+        setattr(initializer.constant_int_conf, "value", int(value))
+    else:
+        raise NotImplementedError("Do not support such data type")
+
+    return initializer
+
+
+@oneflow_export("zeros_initializer")
+def zeros_initializer(
+    dtype: flow.dtype = flow.float,
+) -> initializer_conf_util.InitializerConf:
+    r"""Initializer that generates blobs initialized to 0
+
+    Args:
+        dtype (flow.dtype, optional): Default data type. Defaults to flow.float.
+
+    Returns:
+        initializer_conf_util.InitializerConf: constant_initializer
+
+    For example: 
+
+    Example 1: 
+
+    .. code-block:: python 
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+
+
+        def watch_handler(y: tp.Numpy):
+            print("out", y)
+
+
+        @flow.global_function()
+        def zeros_Job() -> None:
+            init = flow.zeros_initializer()
+            blob = flow.get_variable(
+                "blob-weight",
+                shape=(3, ),
+                initializer=init,
+                trainable=True
+            )
+            flow.watch(blob, watch_handler)
+
+
+        checkpoint = flow.train.CheckPoint()
+        checkpoint.init()
+        zeros_Job()
+
+        # out [0. 0. 0.]
+
+    Example 2: 
+
+    .. code-block:: python 
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def conv2d_zero_Job(x: tp.Numpy.Placeholder((1, 256, 32, 32))
+        ) -> tp.Numpy:
+            initializer = flow.zeros_initializer()
+            conv2d = flow.layers.conv2d(
+                x,
+                filters=128,
+                kernel_size=3,
+                strides=1,
+                padding='SAME',
+                kernel_initializer=initializer, 
+                name="Conv2d"
+            )
+            return conv2d
+
+
+        x = np.random.randn(1, 256, 32, 32).astype(np.float32)
+        out = conv2d_zero_Job(x)
+
+        # out.shape (1, 128, 32, 32)
+
+    """
+    return constant_initializer(0.0, dtype)
+
+
+@oneflow_export("ones_initializer")
+def ones_initializer(
+    dtype: flow.dtype = flow.float,
+) -> initializer_conf_util.InitializerConf:
+    r"""Initializer that generates blobs initialized to 1.
+
+    Args:
+        dtype (flow.dtype, optional): Default data type. Defaults to flow.float.
+
+    Returns:
+        initializer_conf_util.InitializerConf: constant_initializer
+
+    For example: 
+
+    Example 1: 
+
+    .. code-block:: python 
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+
+
+        def watch_handler(y: tp.Numpy):
+            print("out", y)
+
+
+        @flow.global_function()
+        def ones_Job() -> None:
+            init = flow.ones_initializer()
+            blob = flow.get_variable(
+                "blob-weight",
+                shape=(3, ),
+                initializer=init,
+                trainable=True
+            )
+            flow.watch(blob, watch_handler)
+
+
+        checkpoint = flow.train.CheckPoint()
+        checkpoint.init()
+        ones_Job()
+
+        # out [1. 1. 1.]
+
+    Example 2: 
+
+    .. code-block:: python 
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def conv2d_one_Job(x: tp.Numpy.Placeholder((1, 256, 32, 32))
+        ) -> tp.Numpy:
+            initializer = flow.ones_initializer()
+            conv2d = flow.layers.conv2d(
+                x,
+                filters=128,
+                kernel_size=3,
+                strides=1,
+                padding='SAME',
+                kernel_initializer=initializer, 
+                name="Conv2d"
+            )
+            return conv2d
+
+
+        x = np.random.randn(1, 256, 32, 32).astype(np.float32)
+        out = conv2d_one_Job(x)
+        
+        # out.shape (1, 128, 32, 32)
+
+    """
+    return constant_initializer(1.0, dtype)
+
+
+@oneflow_export("random_uniform_initializer")
+def random_uniform_initializer(
+    minval: float = 0, maxval: float = 1, dtype: flow.dtype = flow.float
+) -> initializer_conf_util.InitializerConf:
+    r"""Initializer that generates blobs with a uniform distribution. 
+
+    Args:
+        minval (float, optional): A python scalar. Lower bound of the range of random values to generate. Defaults to 0.
+        maxval (float, optional): A python scalar. Upper bound of the range of random values to generate. Defaults to 1.
+        dtype (flow.dtype, optional): Default data type. Defaults to flow.float.
+
+    Raises:
+        NotImplementedError: Do not support such data type.
+
+    Returns:
+        initializer_conf_util.InitializerConf:  Initial configuration
+
+    For example: 
+
+    Example 1: 
+
+    .. code-block:: python 
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+
+
+        def watch_handler(y: tp.Numpy):
+            print("out", y)
+
+
+        @flow.global_function()
+        def random_uniform_Job() -> None:
+            init = flow.random_uniform_initializer(minval=0, maxval=0.5)
+            blob = flow.get_variable(
+                "blob-weight",
+                shape=(3, ),
+                initializer=init,
+                trainable=True
+            )
+            flow.watch(blob, watch_handler)
+
+
+        checkpoint = flow.train.CheckPoint()
+        checkpoint.init()
+        random_uniform_Job()
+
+        # out [0.07557311 0.3943565  0.31875622]
+
+    Example 2: 
+
+    .. code-block:: python 
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def conv2d_random_uniform_Job(x: tp.Numpy.Placeholder((1, 256, 32, 32))
+        ) -> tp.Numpy:
+            initializer = flow.random_uniform_initializer(minval=0, maxval=0.5)
+
+            conv2d = flow.layers.conv2d(
+                x,
+                filters=128,
+                kernel_size=3,
+                strides=1,
+                padding='SAME',
+                kernel_initializer=initializer, 
+                name="Conv2d"
+            )
+            return conv2d
+
+
+        x = np.random.randn(1, 256, 32, 32).astype(np.float32)
+        out = conv2d_random_uniform_Job(x)
+        
+        # out.shape (1, 128, 32, 32)
+
+    """
+    assert minval <= maxval
+    initializer = initializer_conf_util.InitializerConf()
+    if dtype in [flow.float, flow.double]:
+        setattr(initializer.random_uniform_conf, "min", float(minval))
+        setattr(initializer.random_uniform_conf, "max", float(maxval))
+    elif dtype in [
+        flow.int8,
+        flow.int32,
+        flow.int64,
+    ]:
+        setattr(initializer.random_uniform_int_conf, "min", int(minval))
+        setattr(initializer.random_uniform_int_conf, "max", int(maxval))
+    else:
+        raise NotImplementedError("Do not support such data type")
+
+    return initializer
+
+
+@oneflow_export("random_normal_initializer")
+def random_normal_initializer(
+    mean: float = 0.0,
+    stddev: float = 1.0,
+    seed: Optional[int] = None,
+    dtype: Optional[flow.dtype] = None,
+) -> initializer_conf_util.InitializerConf:
+    r"""Initializer that generates blob with a normal distribution.
+
+    Args:
+        mean (float, optional): A python scalar. Mean of the random values to generate.. Defaults to 0.0.
+        stddev (float, optional): A python scalar. Standard deviation of the random values to generate. Defaults to 1.0.
+        seed (Optional[int], optional): None. Not support yet. Defaults to None.
+        dtype (Optional[flow.dtype], optional): . Defaults to None.
+
+    Returns:
+        initializer_conf_util.InitializerConf: Initial configuration
+
+    For example: 
+
+    Example 1: 
+
+    .. code-block:: python 
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+
+
+        def watch_handler(y: tp.Numpy):
+            print("out", y)
+
+
+        @flow.global_function()
+        def random_normal_Job() -> None:
+            init = flow.random_normal_initializer(mean=1, stddev=1)
+            blob = flow.get_variable(
+                "blob-weight",
+                shape=(3, ),
+                initializer=init,
+                trainable=True
+            )
+            flow.watch(blob, watch_handler)
+
+
+        checkpoint = flow.train.CheckPoint()
+        checkpoint.init()
+        random_normal_Job()
+
+        # out [1.4190257 2.7663114 1.7114428]
+
+    Example 2: 
+
+    .. code-block:: python 
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def conv2d_random_normal_Job(x: tp.Numpy.Placeholder((1, 256, 32, 32))
+        ) -> tp.Numpy:
+            initializer = flow.random_normal_initializer(mean=0, stddev=1)
+
+            conv2d = flow.layers.conv2d(
+                x,
+                filters=128,
+                kernel_size=3,
+                strides=1,
+                padding='SAME',
+                kernel_initializer=initializer, 
+                name="Conv2d"
+            )
+            return conv2d
+
+
+        x = np.random.randn(1, 256, 32, 32).astype(np.float32)
+        out = conv2d_random_normal_Job(x)
+
+        # out.shape (1, 128, 32, 32)
+
+    """
+    assert seed is None
+    assert dtype is None
+    if seed is not None:
+        assert name is not None
+    initializer = initializer_conf_util.InitializerConf()
+    setattr(initializer.random_normal_conf, "mean", float(mean))
+    setattr(initializer.random_normal_conf, "std", float(stddev))
+
+    return initializer
+
+
+@oneflow_export("truncated_normal_initializer")
+def truncated_normal_initializer(
+    mean: float = 0.0, stddev: float = 1.0
+) -> initializer_conf_util.InitializerConf:
+    r"""Initializer that generates a truncated normal distribution.
+
+    Args:
+        mean (float, optional): A scalar (float). Defaults to 0.0.
+        stddev (float, optional): A scalar (float). Defaults to 1.0.
+
+    Returns:
+        initializer_conf_util.InitializerConf: Initial configuration
+
+    For example: 
+
+    Example 1: 
+
+    .. code-block:: python 
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+
+
+        def watch_handler(y: tp.Numpy):
+            print("out", y)
+
+
+        @flow.global_function()
+        def truncated_normal_Job() -> None:
+            init = flow.truncated_normal_initializer(mean=1, stddev=1)
+            blob = flow.get_variable(
+                "blob-weight",
+                shape=(3, ),
+                initializer=init,
+                trainable=True
+            )
+            flow.watch(blob, watch_handler)
+
+
+        checkpoint = flow.train.CheckPoint()
+        checkpoint.init()
+        truncated_normal_Job()
+
+        # out [1.8303236  0.09787154 0.83049864]
+
+    Example 2: 
+
+    .. code-block:: python 
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def conv2d_truncated_normal_Job(x: tp.Numpy.Placeholder((1, 256, 32, 32))
+        ) -> tp.Numpy:
+            initializer = flow.truncated_normal_initializer(mean=0, stddev=1)
+
+            conv2d = flow.layers.conv2d(
+                x,
+                filters=128,
+                kernel_size=3,
+                strides=1,
+                padding='SAME',
+                kernel_initializer=initializer, 
+                name="Conv2d"
+            )
+            return conv2d
+
+
+        x = np.random.randn(1, 256, 32, 32).astype(np.float32)
+        out = conv2d_truncated_normal_Job(x)
+
+        # out.shape (1, 128, 32, 32)
+
+    """
+    initializer = initializer_conf_util.InitializerConf()
+    setattr(initializer.truncated_normal_conf, "mean", float(mean))
+    setattr(initializer.truncated_normal_conf, "std", float(stddev))
+    return initializer
+
+
+@oneflow_export("glorot_uniform_initializer", "xavier_uniform_initializer")
+def glorot_uniform_initializer(
+    data_format: str = "",
+) -> initializer_conf_util.InitializerConf:
+    r"""Initializer that generates a Xavier uniform distribution. 
+    
+    It also can be called as `oneflow.compatible.single_client.glorot_uniform_initializer`.  
+
+    The equation is: 
+
+    .. math:: 
+
+        W\sim U(-\sqrt{\frac{{6}}{{n_j+n_{j+1}}}},\sqrt{\frac{{6}}{{n_j+n_{j+1}}}})
+
+    :math:`U` means uniform distribution 
+
+    :math:`n_j` means the amount of Nth layer parameters 
+
+    Args:
+        data_format (str, optional): The data format. Defaults to "".
+
+    Returns:
+        initializer_conf_util.InitializerConf: Initial configuration
+
+    For example: 
+
+    Example 1:
+
+    .. code-block:: python 
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+
+
+        def watch_handler(y: tp.Numpy):
+            print("out", y)
+
+
+        @flow.global_function()
+        def xavier_uniform_Job() -> None:
+            init = flow.xavier_uniform_initializer()
+            blob = flow.get_variable(
+                "blob-weight",
+                shape=(3, 3),
+                initializer=init,
+                trainable=True
+            )
+            flow.watch(blob, watch_handler)
+
+
+        checkpoint = flow.train.CheckPoint()
+        checkpoint.init()
+        xavier_uniform_Job()
+
+        # out [[-0.14424723 -0.9532095  -0.08723891]
+        #      [-0.8011227  -0.29729813 -0.26769108]
+        #      [ 0.9208976  -0.5971756  -0.15077025]]
+
+    Example 2: 
+
+    .. code-block:: python 
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def conv2d_xavier_uniform_Job(x: tp.Numpy.Placeholder((1, 256, 32, 32))
+        ) -> tp.Numpy:
+            initializer = flow.xavier_uniform_initializer()
+            conv2d = flow.layers.conv2d(
+                x,
+                filters=128,
+                kernel_size=3,
+                strides=1,
+                padding='SAME',
+                kernel_initializer=initializer, 
+                name="Conv2d"
+            )
+            return conv2d
+
+
+        x = np.random.randn(1, 256, 32, 32).astype(np.float32)
+        out = conv2d_xavier_uniform_Job(x)
+
+        # out.shape (1, 128, 32, 32)
+
+    """
+    return variance_scaling_initializer(1.0, "fan_avg", "random_uniform", data_format)
+
+
+@oneflow_export("glorot_normal_initializer", "xavier_normal_initializer")
+def glorot_normal_initializer(
+    data_format: str = "",
+) -> initializer_conf_util.InitializerConf:
+    r"""Initializer that generates a Xavier normal distribution. 
+    
+    It also can be called as `oneflow.compatible.single_client.glorot_normal_initializer`.  
+
+    The equation is: 
+
+    .. math:: 
+
+        W\sim N(0, \sqrt{\frac{{2}}{{n_j+n_{j+1}}}})
+
+    :math:`N` means normal distribution 
+
+    :math:`n_j` means the amount of Nth layer parameters 
+
+    Args:
+        data_format (str, optional): The data format. Defaults to "".
+
+    Returns:
+        initializer_conf_util.InitializerConf: Initial configuration
+
+    For example: 
+
+    Example 1: 
+
+    .. code-block:: python 
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+
+
+        def watch_handler(y: tp.Numpy):
+            print("out", y)
+
+
+        @flow.global_function()
+        def xavier_normal_Job() -> None:
+            init = flow.xavier_normal_initializer()
+            blob = flow.get_variable(
+                "blob-weight",
+                shape=(3, 3),
+                initializer=init,
+                trainable=True
+            )
+            flow.watch(blob, watch_handler)
+
+
+        checkpoint = flow.train.CheckPoint()
+        checkpoint.init()
+        xavier_normal_Job()
+
+        # out [[ 0.5908121  -0.10804518 -0.6148571 ]
+        #      [ 1.4007381  -0.08172473  0.36579943]
+        #      [-0.6461796  -0.15923311  0.33653972]]
+
+    Example 2: 
+
+    .. code-block:: python 
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def conv2d_xavier_normal_Job(x: tp.Numpy.Placeholder((1, 256, 32, 32))
+        ) -> tp.Numpy:
+            initializer = flow.xavier_normal_initializer()
+            conv2d = flow.layers.conv2d(
+                x,
+                filters=128,
+                kernel_size=3,
+                strides=1,
+                padding='SAME',
+                kernel_initializer=initializer, 
+                name="Conv2d"
+            )
+            return conv2d
+
+
+        x = np.random.randn(1, 256, 32, 32).astype(np.float32)
+        out = conv2d_xavier_normal_Job(x)
+
+        # out.shape (1, 128, 32, 32)
+
+    """
+    return variance_scaling_initializer(1.0, "fan_avg", "random_normal", data_format)
+
+
+@oneflow_export("variance_scaling_initializer")
+def variance_scaling_initializer(
+    scale: float = 1.0,
+    mode: str = "fan_in",
+    distribution: str = "truncated_normal",
+    data_format: str = "",
+) -> initializer_conf_util.InitializerConf:
+    r"""Initializer that generates a truncated normal distribution or a random normal distribution or a random uniform distribution with a scale adapting to it.
+
+    When the distribution is "truncated_normal"
+
+    The equation is: 
+
+    .. math:: 
+
+        W\sim N(0, \sqrt{\frac{{scale}}{{n}}})
+
+    If mode is "fan_in", the "n" is the number of input units in the weight Blob. 
+
+    If mode is "fan_out", the "n" is the number of output units in the weight Blob. 
+
+    if mode is "fan_avg", the "n" is the average of the number of input and output units in the weight Blob
+
+    Args:
+        scale (float, optional): Scaling factor (positive float). Defaults to 1.0.
+        mode (str, optional): One of "fan_in", "fan_out", "fan_avg". Defaults to "fan_in".
+        distribution (str, optional): Random distribution to use. One of "truncated_normal",. Defaults to "truncated_normal".
+        data_format (str, optional): A string be one of "N...C" or "NC...". Defaults to "".
+
+    Returns:
+        initializer_conf_util.InitializerConf: Initial configuration
+
+    For example: 
+
+    Example 1: 
+
+    .. code-block:: python 
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+
+
+        def watch_handler(y: tp.Numpy):
+            print("out", y)
+
+
+        @flow.global_function()
+        def variance_scale_Job() -> None:
+            init = flow.variance_scaling_initializer(scale=2.0, mode="fan_avg")
+            blob = flow.get_variable(
+                "blob-weight",
+                shape=(3, 3),
+                initializer=init,
+                trainable=True
+            )
+            flow.watch(blob, watch_handler)
+
+
+        checkpoint = flow.train.CheckPoint()
+        checkpoint.init()
+        variance_scale_Job()
+
+        # out [[-0.13931477  0.12266728 -0.9434968 ]
+        #      [-0.49665168  0.10231158 -0.19194333]
+        #      [-0.7902896  -1.7034698  -0.38695997]]
+
+    Example 2: 
+
+    .. code-block:: python 
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def conv2d_variance_scaling_Job(x: tp.Numpy.Placeholder((1, 256, 32, 32))
+        ) -> tp.Numpy:
+            initializer = flow.variance_scaling_initializer(mode="fan_out")
+            conv2d = flow.layers.conv2d(
+                x,
+                filters=128,
+                kernel_size=3,
+                strides=1,
+                padding='SAME',
+                kernel_initializer=initializer, 
+                name="Conv2d"
+            )
+            return conv2d
+
+
+        x = np.random.randn(1, 256, 32, 32).astype(np.float32)
+        out = conv2d_variance_scaling_Job(x)
+
+        # out.shape (1, 128, 32, 32)
+
+    """
+    initializer = initializer_conf_util.InitializerConf()
+    setattr(initializer.variance_scaling_conf, "scale", float(scale))
+    setattr(
+        initializer.variance_scaling_conf, "variance_norm", _get_variance_norm(mode),
+    )
+    setattr(
+        initializer.variance_scaling_conf,
+        "distribution",
+        _get_random_distribution(distribution),
+    )
+    setattr(
+        initializer.variance_scaling_conf, "data_format", _get_data_format(data_format),
+    )
+    return initializer
+
+
+@oneflow_export("kaiming_initializer")
+def kaiming_initializer(
+    shape: Sequence[int],
+    distribution: str = "random_normal",
+    mode: str = "fan_in",
+    nonlinearity: str = "leaky_relu",
+    negative_slope: float = 0.0,
+    data_format: str = "NCHW",
+) -> None:
+    r"""Initialize weight according to the method described in `Delving deep into
+    rectifiers: Surpassing human-level performance on ImageNet classification`
+    - He, K. et al. (2015), using a normal or uniform distribution.
+
+    When distribution is "random_normal"
+
+    The equation is: 
+
+    .. math:: 
+
+        W \sim N(0, \sqrt{\frac{{2}}{{n}}})
+
+    When distribution is "random_uniform"
+
+    The equation is: 
+
+    .. math:: 
+
+        W \sim U(-\sqrt{\frac{{6}}{{n}}}, \sqrt{\frac{{6}}{{n}}})
+    
+    If mode is "fan_in", the "n" is the number of input units in the weight Blob. 
+
+    If mode is "fan_out", the "n" is the number of output units in the weight Blob. 
+
+    if mode is "fan_avg", the "n" is the average of the number of input and output units in the weight Blob
+
+    Args:
+        shape (Sequence[int]): Blob shape.
+        distribution (str, optional): 'random_normal' or 'random_uniform'. Defaults to "random_normal".
+        mode (str, optional): 'fan_in', 'fan_out' or 'fan_avg'. Defaults to "fan_in".
+        nonlinearity (str, optional): None, 'tanh', 'sigmoid', 'relu' or 'leaky_relu'. Defaults to "leaky_relu".
+        negative_slope (float, optional): The negative slope of leaky_relu. Defaults to 0.0.
+        data_format (str, optional):  'NCHW', 'NHWC'. Defaults to "NCHW".
+
+    Raises:
+        NotImplementedError: Only support normal and uniform distribution
+
+    Returns:
+        [type]: flow.random_normal_initializer or flow.random_uniform_initializer
+
+    For example: 
+
+    Example 1: 
+
+    .. code-block:: python 
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+
+
+        def watch_handler(y: tp.Numpy):
+            print("out", y)
+
+
+        @flow.global_function()
+        def kaiming_Job() -> None:
+            init = flow.kaiming_initializer(shape=(3, 3), 
+                                            mode="fan_avg", 
+                                            nonlinearity="relu")
+            blob = flow.get_variable(
+                "blob-weight",
+                shape=(3, 3),
+                initializer=init,
+                trainable=True
+            )
+            flow.watch(blob, watch_handler)
+
+
+        checkpoint = flow.train.CheckPoint()
+        checkpoint.init()
+        kaiming_Job()
+
+        # out [[ 0.54521346  0.32585594  1.3474437 ]
+        #      [ 0.30729076 -0.19158769  0.2709008 ]
+        #      [-0.95830524 -0.05093324  0.28178614]]
+
+    Example 2: 
+
+    .. code-block:: python 
+    
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def conv2d_kaiming_Job(x: tp.Numpy.Placeholder((1, 256, 32, 32))
+        ) -> tp.Numpy:
+            initializer = flow.kaiming_initializer(shape=(1, 256, 32, 32))
+            conv2d = flow.layers.conv2d(
+                x,
+                filters=128,
+                kernel_size=3,
+                strides=1,
+                padding='SAME',
+                kernel_initializer=initializer, 
+                name="Conv2d"
+            )
+            return conv2d
+
+
+        x = np.random.randn(1, 256, 32, 32).astype(np.float32)
+        out = conv2d_kaiming_Job(x)
+
+        # out.shape (1, 128, 32, 32)
+
+    """
+    assert isinstance(shape, (tuple, flow.Size))
+    # Kaiming Initialization only deals with FC, Conv and Deconv's weight
+    assert len(shape) >= 2
+    elem_cnt = functools.reduce(lambda a, b: a * b, shape, 1)
+    assert elem_cnt > 0
+    assert distribution in ["random_normal", "random_uniform"]
+    assert mode in ["fan_in", "fan_out", "fan_avg"]
+    assert nonlinearity in [None, "tanh", "sigmoid", "relu", "leaky_relu"]
+    assert data_format in ["NCHW", "NHWC"]
+
+    fan = _CalcFan(shape, mode, _get_data_format(data_format))
+    gain = CalcGain(nonlinearity, negative_slope)
+    std = gain / math.sqrt(fan)
+    if distribution == "random_normal":
+        return flow.random_normal_initializer(0.0, std)
+    elif distribution == "random_uniform":
+        bound = math.sqrt(3.0) * std
+        return flow.random_uniform_initializer(-bound, bound)
+    else:
+        raise NotImplementedError("Only support normal and uniform distribution")
+
+
+def _get_variance_norm(mode):
+    if mode.lower() == "fan_in":
+        return initializer_conf_util.kFanIn
+    elif mode.lower() == "fan_out":
+        return initializer_conf_util.kFanOut
+    elif mode.lower() == "fan_avg":
+        return initializer_conf_util.kAverage
+    else:
+        raise ValueError("Invalid variance_norm")
+
+
+def _get_random_distribution(distribution):
+    if distribution.lower() == "truncated_normal":
+        return initializer_conf_util.kTruncatedNormal
+    elif distribution.lower() == "random_normal":
+        return initializer_conf_util.kRandomNormal
+    elif distribution.lower() == "random_uniform":
+        return initializer_conf_util.kRandomUniform
+    else:
+        raise ValueError("Invalid random_distribution")
+
+
+def _get_data_format(data_format):
+    assert isinstance(data_format, str), "data_format must be a string"
+
+    if data_format.startswith("NC"):
+        return "channels_first"
+    elif data_format.startswith("N") and data_format.endswith("C"):
+        return "channels_last"
+    else:
+        assert data_format == "", ValueError(
+            'data_format must be "N...C" or "NC..." or ""'
+        )
+        return ""
+
+
+def _CalcFan(shape, mode, data_format):
+    if len(shape) == 2:  # Linear
+        fan_in = shape[1]
+        fan_out = shape[0]
+    else:  # Conv and Deconv
+        fan_in = 1.0
+        for dim in shape[1:]:
+            fan_in *= dim
+        fan_out = shape[0]
+        if data_format == "channels_first":
+            for dim in shape[2:]:
+                fan_out *= dim
+        elif data_format == "channels_last":
+            for dim in shape[1:-1]:
+                fan_out *= dim
+        else:
+            raise NotImplementedError(
+                "Only support 'channels_first' and 'channels_last' data format"
+            )
+
+    if mode == "fan_avg":
+        return (float(fan_in) + float(fan_out)) / 2
+    elif mode == "fan_in":
+        return float(fan_in)
+    elif mode == "fan_out":
+        return float(fan_out)
+    else:
+        raise NotImplementedError("Only support 'fan_in', 'fan_out' and 'fan_avg' mode")
+
+
+def CalcGain(nonlinearity, param):
+    linear_fns = [
+        "linear",
+        "conv1d",
+        "conv2d",
+        "conv3d",
+        "conv_transpose1d",
+        "conv_transpose2d",
+        "conv_transpose3d",
+    ]
+    if nonlinearity in linear_fns or nonlinearity == "sigmoid":
+        return 1
+    elif nonlinearity == "tanh":
+        return 5.0 / 3
+    elif nonlinearity == "relu":
+        return math.sqrt(2.0)
+    elif nonlinearity == "leaky_relu":
+        if param is None:
+            negative_slope = 0.01
+        elif (
+            not isinstance(param, bool)
+            and isinstance(param, int)
+            or isinstance(param, float)
+        ):
+            # True/False are instances of int, hence check above
+            negative_slope = param
+        else:
+            raise ValueError("negative_slope {} not a valid number".format(param))
+        return math.sqrt(2.0 / (1 + negative_slope ** 2))
+    elif nonlinearity == "selu":
+        return (
+            3.0 / 4
+        )  # Value found empirically (https://github.com/pytorch/pytorch/pull/50664)
+    else:
+        raise ValueError("Unsupported nonlinearity {}".format(nonlinearity))
+
+
+_init_map = {}
+
+
+def register_initializer(flow_initializer):
+    def deco(func):
+        _init_map[flow_initializer] = func
+        return func
+
+    return deco
+
+
+def GetInitializer(initializer_conf, random_seed, var_blob_shape):
+    f = None
+    for m in _init_map:
+        if initializer_conf.HasField(m):
+            f = _init_map[m]
+            break
+    assert f is not None, initializer_conf
+    return f(getattr(initializer_conf, m), random_seed, var_blob_shape)
+
+
+@register_initializer("constant_conf")
+@register_initializer("constant_int_conf")
+def ConstantInitializerImpl(
+    initializer_conf: Union[
+        initializer_conf_util.ConstantInitializerConf,
+        initializer_conf_util.ConstantIntInitializerConf,
+    ],
+    random_seed: int,
+    var_blob_shape: Sequence[int],
+):
+    return lambda length: np.full((length,), initializer_conf.value)
+
+
+@register_initializer("random_normal_conf")
+def RandomNormalInitializerImpl(
+    initializer_conf: initializer_conf_util.RandomNormalInitializerConf,
+    random_seed: int,
+    var_blob_shape: Sequence[int],
+):
+    rng = np.random.default_rng(random_seed)
+    return lambda length: rng.normal(
+        loc=initializer_conf.mean, scale=initializer_conf.std, size=length
+    )
+
+
+@register_initializer("random_uniform_conf")
+def RandomUniformInitializerImpl(
+    initializer_conf: initializer_conf_util.RandomUniformIntInitializerConf,
+    random_seed: int,
+    var_blob_shape: Sequence[int],
+):
+    rng = np.random.default_rng(random_seed)
+    return lambda length: rng.uniform(
+        low=initializer_conf.min,
+        high=np.nextafter(initializer_conf.max, float("inf")),
+        size=length,
+    )
+
+
+@register_initializer("random_uniform_int_conf")
+def RandomUniformIntInitializerImpl(
+    initializer_conf: initializer_conf_util.RandomUniformIntInitializerConf,
+    random_seed: int,
+    var_blob_shape: Sequence[int],
+):
+    rng = np.random.default_rng(random_seed)
+    return lambda length: rng.integers(
+        low=initializer_conf.min, high=initializer_conf.max, size=length
+    )
+
+
+def RngTruncatedNormal(mean, std, length, rng):
+    truncated_value = 2 * std
+    data = np.empty(length)
+    generated = 0
+    ratio = 1.2
+    while generated < length:
+        remaining = length - generated
+        norm = rng.normal(mean, std, size=int(remaining * ratio))
+        truncated = norm[np.abs(norm - mean) < truncated_value][:remaining]
+        data[generated : generated + len(truncated)] = truncated
+        generated += len(truncated)
+    return data
+
+
+@register_initializer("truncated_normal_conf")
+def TruncatedNormalInitializerImpl(
+    initializer_conf: initializer_conf_util.TruncatedNormalInitializerConf,
+    random_seed: int,
+    var_blob_shape: Sequence[int],
+):
+    rng = np.random.default_rng(random_seed)
+    return lambda length: RngTruncatedNormal(
+        initializer_conf.mean, initializer_conf.std, length, rng,
+    )
+
+
+def GenInitialFan(initializer_conf, var_blob_shape: Sequence[int]):
+    variance_norm = initializer_conf.variance_norm
+    data_format = initializer_conf.data_format
+    fan_in = np.prod(var_blob_shape[1:]).astype(np.int).item()
+    fan_out = var_blob_shape[0]
+    if data_format == "channel_first":
+        fan_out *= np.prod(var_blob_shape[2:]).astype(np.int).item()
+    else:
+        fan_out *= np.prod(var_blob_shape[1:-1]).astype(np.int).item()
+
+    if variance_norm == initializer_conf_util.kAverage:
+        fan = (fan_in + fan_out) / 2
+    elif variance_norm == initializer_conf_util.kFanIn:
+        fan = fan_in
+    elif variance_norm == initializer_conf_util.kFanOut:
+        fan = fan_out
+    else:
+        raise NotImplemented()
+    return fan
+
+
+@register_initializer("variance_scaling_conf")
+def VarianceScalingInitializerImpl(
+    initializer_conf: initializer_conf_util.VarianceScalingInitializerConf,
+    random_seed: int,
+    var_blob_shape: Sequence[int],
+):
+    scale = initializer_conf.scale / GenInitialFan(initializer_conf, var_blob_shape)
+    distribution = initializer_conf.distribution
+    rng = np.random.default_rng(random_seed)
+    if distribution == initializer_conf_util.kTruncatedNormal:
+        stddev = math.sqrt(scale) / 0.87962566103423978
+        return lambda length: RngTruncatedNormal(0, stddev, length, rng)
+    elif distribution == initializer_conf_util.kRandomNormal:
+        stddev = math.sqrt(scale)
+        return lambda length: rng.normal(0, stddev, size=length,)
+    elif distribution == initializer_conf_util.kRandomUniform:
+        limit = math.sqrt(3.0 * scale)
+        return lambda length: rng.uniform(low=-limit, high=limit, size=length)
+    else:
+        raise NotImplemented()
+
+
+@register_initializer("empty_conf")
+def EmptyInitializerImpl(
+    initializer_conf: initializer_conf_util.EmptyInitializerConf,
+    random_seed: int,
+    var_blob_shape: Sequence[int],
+):
+    return None
diff --git a/oneflow/compatible_single_client_python/ops/layers.py b/oneflow/compatible_single_client_python/ops/layers.py
new file mode 100644
index 0000000000000000000000000000000000000000..1b42ce272b5db3636d67633b277673154c003033
--- /dev/null
+++ b/oneflow/compatible_single_client_python/ops/layers.py
@@ -0,0 +1,1581 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+from typing import Callable, Optional, Union, Tuple, Sequence
+from oneflow.compatible_single_client_python.oneflow_export import oneflow_export
+
+from oneflow.compatible import single_client as flow
+from oneflow.core.operator import op_conf_pb2 as op_conf_util
+from oneflow.core.job import initializer_conf_pb2 as initializer_conf_util
+from oneflow.core.job import regularizer_conf_pb2 as regularizer_conf_util
+from oneflow.compatible_single_client_python.framework import (
+    distribute as distribute_util,
+)
+from oneflow.compatible_single_client_python.framework import (
+    remote_blob as remote_blob_util,
+)
+import oneflow._oneflow_internal
+
+IntPair = Tuple[int, int]
+
+
+@oneflow_export("layers.dense")
+def dense(
+    inputs: oneflow._oneflow_internal.BlobDesc,
+    units: int,
+    activation: Optional[
+        Callable[
+            [oneflow._oneflow_internal.BlobDesc, str],
+            oneflow._oneflow_internal.BlobDesc,
+        ]
+    ] = None,
+    use_bias: bool = True,
+    kernel_initializer: Optional[initializer_conf_util.InitializerConf] = None,
+    bias_initializer: Optional[initializer_conf_util.InitializerConf] = None,
+    kernel_regularizer: Optional[regularizer_conf_util.RegularizerConf] = None,
+    bias_regularizer: Optional[regularizer_conf_util.RegularizerConf] = None,
+    trainable: bool = True,
+    name: str = "Dense",
+    model_distribute: oneflow._oneflow_internal.distribute.Distribute = oneflow._oneflow_internal.distribute.broadcast(),
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""Fully-connected layer.
+
+    The fully-connected layer multiplies input Blob with weight matrix and produces an Output Blob.
+
+    Args:
+        inputs (oneflow._oneflow_internal.BlobDesc): A 2D input `Blob`.
+        units (int): A positive integer for the dimensionality of the output space.
+        activation (Optional[oneflow._oneflow_internal.BlobDesc], optional):  Activation function. Defaults to None.
+        use_bias (bool, optional): A boolean specifies whether to use a bias vector. Defaults to True.
+        kernel_initializer (Optional[initializer_conf_util.InitializerConf], optional): Initializer for the kernel weights matrix. Defaults to None.
+        bias_initializer (Optional[initializer_conf_util.InitializerConf], optional): Initializer for the bias vector. Defaults to None.
+        kernel_regularizer (Optional[regularizer_conf_util.RegularizerConf], optional): Regularizer function applied to the kernel weights matrix. Defaults to None.
+        bias_regularizer (Optional[regularizer_conf_util.RegularizerConf], optional): Regularizer for the bias vector. Defaults to None.
+        trainable (bool, optional): A boolean specifies whether to train the variables. Defaults to True.
+        name (Optional[str], optional): This layer's name. Defaults to None.
+        model_distribute (oneflow._oneflow_internal.distribute.Distribute, optional): Define the way to ditribute the model. Defaults to oneflow._oneflow_internal.distribute.broadcast().
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc:  A N-D `Blob` with the shape of (batch_size, units).
+
+    Raises:
+        ValueError: The dimension of input `Blob` must be less than 2.
+        VauleError: Model distribute must be in auto, broadcast, split.
+        ValueError: The input must be a 2D `Blob` when the model distribute is split.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def dense_Job(x: tp.Numpy.Placeholder((1, 256))
+        ) -> tp.Numpy:
+            initializer = flow.truncated_normal(0.1)
+            hidden = flow.layers.dense(
+                x,
+                512,
+                activation=flow.nn.relu,
+                kernel_initializer=initializer,
+                name="dense1",
+            )
+            return hidden
+
+
+        x = np.random.randn(1, 256).astype(np.float32)
+        out = dense_Job(x)
+
+        # out.shape (1, 512)
+
+    """
+    in_shape = inputs.shape
+    in_num_axes = len(in_shape)
+    assert in_num_axes >= 2
+
+    assert (
+        model_distribute is oneflow._oneflow_internal.distribute.auto()
+        or model_distribute is oneflow._oneflow_internal.distribute.broadcast()
+        or model_distribute is oneflow._oneflow_internal.distribute.split(0)
+    )
+
+    if model_distribute is oneflow._oneflow_internal.distribute.split(0):
+        assert in_num_axes == 2  # model distribute is hard for reshape split dim 1
+
+    if in_num_axes > 2:
+        inputs = flow.reshape(inputs, (-1, in_shape[-1]))
+
+    with flow.scope.namespace(name):
+        if kernel_initializer is None:
+            kernel_initializer = flow.constant_initializer(0)
+
+        weight = flow.get_variable(
+            name="weight",
+            shape=(units, inputs.shape[1]),
+            dtype=inputs.dtype,
+            initializer=kernel_initializer,
+            regularizer=kernel_regularizer,
+            trainable=trainable,
+            model_name="weight",
+            distribute=model_distribute,
+            reuse=False,
+        )
+        weight = weight.with_distribute(model_distribute)
+
+        out = flow.matmul(a=inputs, b=weight, transpose_b=True, name="matmul")
+
+        if use_bias:
+            if bias_initializer is None:
+                bias_initializer = flow.constant_initializer(0)
+
+            bias = flow.get_variable(
+                name="bias",
+                shape=(units,),
+                dtype=inputs.dtype,
+                initializer=bias_initializer,
+                regularizer=bias_regularizer,
+                trainable=trainable,
+                model_name="bias",
+                distribute=model_distribute,
+                reuse=False,
+            )
+            bias = bias.with_distribute(model_distribute)
+            out = flow.nn.bias_add(out, bias, name="bias_add")
+
+        if callable(activation):
+            out = activation(out, name="activation")
+
+    if in_num_axes > 2:
+        out = flow.reshape(out, in_shape[:-1] + (units,))
+
+    return out
+
+
+@oneflow_export("layers.conv1d")
+def conv1d(
+    inputs: oneflow._oneflow_internal.BlobDesc,
+    filters: int,
+    kernel_size: Union[int, Tuple[int]] = 1,
+    strides: Union[int, Tuple[int]] = 1,
+    padding: Union[str, Tuple[IntPair, IntPair, IntPair]] = "VALID",
+    data_format: str = "NCW",
+    dilation_rate: Optional[Union[int, Tuple[int]]] = None,
+    groups: int = 1,
+    activation: Optional[
+        Callable[
+            [oneflow._oneflow_internal.BlobDesc, str],
+            oneflow._oneflow_internal.BlobDesc,
+        ]
+    ] = None,
+    use_bias: bool = True,
+    kernel_initializer: Optional[initializer_conf_util.InitializerConf] = None,
+    bias_initializer: Optional[initializer_conf_util.InitializerConf] = None,
+    kernel_regularizer: Optional[regularizer_conf_util.RegularizerConf] = None,
+    bias_regularizer: Optional[regularizer_conf_util.RegularizerConf] = None,
+    trainable: bool = True,
+    name: str = "Conv1d",
+    weight_name: Optional[str] = None,
+    bias_name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""1D convolution layer.
+
+    This layer computes a 1-D convolution with 3D input Blob and filters.
+
+    Args:
+        inputs (oneflow._oneflow_internal.BlobDesc): A 3D input `Blob`.
+        filters (int): An integer specifies the dimensionality of the output space.
+        kernel_size (Union[int, List[int], Tuple[int]], optional): An integer or tuple/list specifies the height and width of the convolution window.
+                        When it is an integer, a square window is applied to the input. Defaults to 1.
+        strides (Union[int, List[int], Tuple[int]], optional): An integer or tuple/list specifies the strides of the convolution window along the height and width.
+                        When it is an integer, the same value for the all spatial dimesions is applied. Defaults to 1.
+        padding (str, Tuple[IntPair, IntPair, IntPair], optional): padding: `string` `"SAME"` or `"SAME_LOWER"` or `"SAME_UPPER"` or `"VALID" or Tuple[IntPair, IntPair, IntPair]` indicating the type of padding algorithm to use, or a list indicating the explicit paddings at the start and end of each dimension. Defaults to "VALID".
+        data_format (str, optional): A string specifies the format of the input `Blob`, one of "NCW" or "NWC" (default: "NCW"). "NCW" cooresponds to channels_first, i.e. the input `Blob` with shape (batch_size, channels, width).
+                        "NWC" cooresponds to channels_last, i.e. the input `Blob` with shape (batch_size, channels, width). Defaults to "NCW".
+        dilation_rate (Optional[Union[int, Tuple[int]]], optional): An integer or tuple/list specifies the dilation rate for the dilated convolution. When it is an integer, the same dilation rate is applied for the all dimensions. Defaults to 1.
+        groups (int, optional): A positive integer specifies number of groups for the Group conv. Defaults to 1.
+        activation (Optional[ Callable[[oneflow._oneflow_internal.BlobDesc, str], oneflow._oneflow_internal.BlobDesc] ], optional): Activation function. Defaults to None.
+        use_bias (bool, optional): A boolean specifies whether to use a bias vector. Defaults to True.
+        kernel_initializer (Optional[initializer_conf_util.InitializerConf], optional): Initializer for the kernel weights matrix. Defaults to None.
+        bias_initializer (Optional[initializer_conf_util.InitializerConf], optional): Initializer for the bias vector. Defaults to None.
+        kernel_regularizer (Optional[regularizer_conf_util.RegularizerConf], optional): Regularizer for the kernel weights matrix. Defaults to None.
+        bias_regularizer (Optional[regularizer_conf_util.RegularizerConf], optional): Regularizer for the bias vector . Defaults to None.
+        trainable (bool, optional): A boolean specifies whether to train variables. Defaults to True.
+        name (Optional[str], optional): This layer's name. Defaults to None.
+
+    Raises:
+        ValueError: If the type of kernel_size is not one of integer, list, tuple.
+        ValueError: The number of groups must be positive and number of filters must be divisible by it.
+        ValueError: If data_format is not one of 'NCW', 'NWC'.
+        ValueError: If number of input channels is not divisible by number of groups or less than number of groups.
+        ValueError: Number of group must be one when data_format is 'NWC'.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: A 3D `Blob` with the shape of (batch_size, filters, new_width).
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def conv1d_Job(x: tp.Numpy.Placeholder((1, 64, 32))
+        ) -> tp.Numpy:
+            initializer = flow.truncated_normal(0.1)
+            conv1d = flow.layers.conv1d(
+                x,
+                filters=128,
+                kernel_size=3,
+                strides=1,
+                padding='SAME',
+                kernel_initializer=initializer,
+                name="Conv1d"
+            )
+            return conv1d
+
+
+        x = np.random.randn(1, 64, 32).astype(np.float32)
+        out = conv1d_Job(x)
+
+        # out.shape (1, 128, 32)
+
+    """
+
+    if isinstance(kernel_size, int):
+        kernel_size = (kernel_size,)
+    else:
+        assert isinstance(kernel_size, (list, tuple))
+        assert len(kernel_size) == 1
+        kernel_size = tuple(kernel_size)
+
+    assert isinstance(groups, int)
+    assert groups > 0
+    assert groups <= filters
+    assert filters % groups == 0
+
+    if data_format.upper() == "NCW":
+        assert groups <= inputs.shape[1]
+        assert inputs.shape[1] % groups == 0
+        weight_shape = (filters, inputs.shape[1] // groups) + kernel_size
+    elif data_format.upper() == "NWC":
+        assert groups == 1
+        assert groups <= inputs.shape[2]
+        assert inputs.shape[2] % groups == 0
+        weight_shape = (
+            filters,
+            kernel_size[0],
+            inputs.shape[2] // groups,
+        )
+    else:
+        raise ValueError("data_format must be in NCW or NWC")
+
+    if kernel_initializer is None:
+        kernel_initializer = flow.xavier_uniform_initializer(data_format=data_format)
+
+    if weight_name is None:
+        with flow.scope.namespace(name):
+            weight = flow.get_variable(
+                name="weight",
+                shape=weight_shape,
+                dtype=inputs.dtype,
+                initializer=kernel_initializer,
+                regularizer=kernel_regularizer,
+                trainable=trainable,
+                model_name="weight",
+                reuse=False,
+            )
+    else:
+        weight = flow.get_variable(
+            name=weight_name,
+            shape=weight_shape,
+            dtype=inputs.dtype,
+            initializer=kernel_initializer,
+            regularizer=kernel_regularizer,
+            trainable=trainable,
+            model_name="weight",
+            reuse=False,
+        )
+
+    output = flow.nn.conv1d(
+        inputs,
+        weight,
+        strides,
+        padding,
+        data_format,
+        dilation_rate,
+        groups=groups,
+        name=name,
+    )
+
+    if use_bias:
+        if bias_initializer is None:
+            bias_initializer = flow.constant_initializer(0)
+
+        if bias_name is None:
+            with flow.scope.namespace(name):
+                bias = flow.get_variable(
+                    name="bias",
+                    shape=(filters,),
+                    dtype=inputs.dtype,
+                    initializer=bias_initializer,
+                    regularizer=bias_regularizer,
+                    trainable=trainable,
+                    model_name="bias",
+                    reuse=False,
+                )
+        else:
+            bias = flow.get_variable(
+                name=bias_name,
+                shape=(filters,),
+                dtype=inputs.dtype,
+                initializer=bias_initializer,
+                regularizer=bias_regularizer,
+                trainable=trainable,
+                model_name="bias",
+                reuse=False,
+            )
+
+        with flow.scope.namespace(name):
+            output = flow.nn.bias_add(output, bias, data_format, name="bias_add")
+
+    if callable(activation):
+        with flow.scope.namespace(name):
+            output = activation(output, name="activation")
+
+    return output
+
+
+@oneflow_export("layers.conv2d")
+def conv2d(
+    inputs: oneflow._oneflow_internal.BlobDesc,
+    filters: int,
+    kernel_size: Union[int, IntPair] = 1,
+    strides: Union[int, IntPair] = 1,
+    padding: Union[str, Tuple[IntPair, IntPair, IntPair, IntPair]] = "VALID",
+    data_format: str = "NCHW",
+    dilation_rate: Optional[Union[int, IntPair]] = None,
+    groups: int = 1,
+    activation: Optional[
+        Callable[
+            [oneflow._oneflow_internal.BlobDesc, str],
+            oneflow._oneflow_internal.BlobDesc,
+        ]
+    ] = None,
+    use_bias: bool = True,
+    kernel_initializer: Optional[initializer_conf_util.InitializerConf] = None,
+    bias_initializer: Optional[initializer_conf_util.InitializerConf] = None,
+    kernel_regularizer: Optional[regularizer_conf_util.RegularizerConf] = None,
+    bias_regularizer: Optional[regularizer_conf_util.RegularizerConf] = None,
+    trainable: bool = True,
+    name: str = "Conv2d",
+    weight_name: Optional[str] = None,
+    bias_name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""2D convolution layer.
+
+    This layer computes a 2D convolution with 4D input Blob and filters.
+
+    Args:
+        inputs (oneflow._oneflow_internal.BlobDesc): A 4D input `Blob`.
+        filters (int): An integer specifies the dimensionality of the output space.
+        kernel_size (Union[int, List[int], Tuple[int]], optional): An integer or tuple/list specifies the height and width of the convolution window.
+                        When it is an integer, a square window is applied to the input. Defaults to 1.
+        strides (Union[int, List[int], Tuple[int]], optional): An integer or tuple/list specifies the strides of the convolution window along the height and width.
+                        When it is an integer, the same value for the all spatial dimesions is applied. Defaults to 1.
+        padding (str, Tuple[IntPair, IntPair, IntPair, IntPair], optional): padding: `string` `"SAME"` or `"SAME_LOWER"` or `"SAME_UPPER"` or `"VALID" or Tuple[IntPair, IntPair, IntPair]` indicating the type of padding algorithm to use, or a list indicating the explicit paddings at the start and end of each dimension. Defaults to "VALID".
+        data_format (str, optional): A string specifies the format of the input `Blob`, one of "NCHW" or "NHWC" (default: "NCHW"). "NCHW" cooresponds to channels_first, i.e. the input `Blob` with shape (batch_size, channels, height, width).
+                        "NHWC" cooresponds to channels_last, i.e. the input `Blob` with shape (batch_size, height, width, channels). Defaults to "NCHW".
+        dilation_rate (int, optional): An integer or tuple/list specifies the dilation rate for the dilated convolution. When it is an integer, the same dilation rate is applied for the all dimensions. Defaults to 1.
+        groups (int, optional): A positive integer specifies number of groups for the Group conv. Defaults to 1.
+        activation (Optional[ Callable[[oneflow._oneflow_internal.BlobDesc, str], oneflow._oneflow_internal.BlobDesc] ], optional): Activation function. Defaults to None.
+        use_bias (bool, optional): A boolean specifies whether to use a bias vector. Defaults to True.
+        kernel_initializer (Optional[initializer_conf_util.InitializerConf], optional): Initializer for the kernel weights matrix. Defaults to None.
+        bias_initializer (Optional[initializer_conf_util.InitializerConf], optional): Initializer for the bias vector. Defaults to None.
+        kernel_regularizer (Optional[regularizer_conf_util.RegularizerConf], optional): Regularizer for the kernel weights matrix. Defaults to None.
+        bias_regularizer (Optional[regularizer_conf_util.RegularizerConf], optional): Regularizer for the bias vector . Defaults to None.
+        trainable (bool, optional): A boolean specifies whether to train variables. Defaults to True.
+        name (Optional[str], optional): This layer's name. Defaults to None.
+        weight_name (Optional[str], optional): This weight's name. Defaults to None.
+        bias_name (Optional[str], optional):  This bias's name. Defaults to None.
+
+    Raises:
+        ValueError: If the type of kernel_size is not one of integer, list, tuple.
+        ValueError: The number of groups must be positive and number of filters must be divisible by it.
+        ValueError: If data_format is not one of 'NCHW', 'NHWC'.
+        ValueError: If number of input channels is not divisible by number of groups or less than number of groups.
+        ValueError: Number of group must be one when data_format is 'NHWC'.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: A 4D `Blob` with the shape of (batch_size, filters, new_height, new_width).
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def conv2d_Job(x: tp.Numpy.Placeholder((1, 256, 32, 32))
+        ) -> tp.Numpy:
+            initializer = flow.truncated_normal(0.1)
+            conv2d = flow.layers.conv2d(
+                x,
+                filters=128,
+                kernel_size=3,
+                strides=1,
+                padding='SAME',
+                kernel_initializer=initializer,
+                name="Conv2d"
+            )
+            return conv2d
+
+
+        x = np.random.randn(1, 256, 32, 32).astype(np.float32)
+        out = conv2d_Job(x)
+
+        # out.shape (1, 128, 32, 32)
+
+    """
+
+    if isinstance(kernel_size, int):
+        kernel_size = (kernel_size, kernel_size)
+    else:
+        assert isinstance(kernel_size, (list, tuple))
+        assert len(kernel_size) == 2
+        kernel_size = tuple(kernel_size)
+
+    assert isinstance(groups, int)
+    assert groups > 0
+    assert groups <= filters
+    assert filters % groups == 0
+
+    if data_format.upper() == "NCHW":
+        assert groups <= inputs.shape[1]
+        assert inputs.shape[1] % groups == 0
+        weight_shape = (filters, inputs.shape[1] // groups) + kernel_size
+    elif data_format.upper() == "NHWC":
+        assert groups == 1
+        assert groups <= inputs.shape[3]
+        assert inputs.shape[3] % groups == 0
+        weight_shape = (
+            filters,
+            kernel_size[0],
+            kernel_size[1],
+            inputs.shape[3] // groups,
+        )
+    else:
+        raise ValueError("data_format must be in NCHW or NHWC")
+
+    if kernel_initializer is None:
+        kernel_initializer = flow.xavier_uniform_initializer(data_format=data_format)
+
+    if weight_name is None:
+        with flow.scope.namespace(name):
+            weight = flow.get_variable(
+                name="weight",
+                shape=weight_shape,
+                dtype=inputs.dtype,
+                initializer=kernel_initializer,
+                regularizer=kernel_regularizer,
+                trainable=trainable,
+                model_name="weight",
+                reuse=False,
+            )
+    else:
+        weight = flow.get_variable(
+            name=weight_name,
+            shape=weight_shape,
+            dtype=inputs.dtype,
+            initializer=kernel_initializer,
+            regularizer=kernel_regularizer,
+            trainable=trainable,
+            model_name="weight",
+            reuse=False,
+        )
+    output = flow.nn.conv2d(
+        inputs,
+        weight,
+        strides=strides,
+        padding=padding,
+        bias=None,
+        data_format=data_format,
+        dilations=dilation_rate,
+        groups=groups,
+        name=name,
+    )
+
+    if use_bias:
+        if bias_initializer is None:
+            bias_initializer = flow.constant_initializer(0)
+
+        if bias_name is None:
+            with flow.scope.namespace(name):
+                bias = flow.get_variable(
+                    name="bias",
+                    shape=(filters,),
+                    dtype=inputs.dtype,
+                    initializer=bias_initializer,
+                    regularizer=bias_regularizer,
+                    trainable=trainable,
+                    model_name="bias",
+                    reuse=False,
+                )
+        else:
+            bias = flow.get_variable(
+                name=bias_name,
+                shape=(filters,),
+                dtype=inputs.dtype,
+                initializer=bias_initializer,
+                regularizer=bias_regularizer,
+                trainable=trainable,
+                model_name="bias",
+                reuse=False,
+            )
+
+        with flow.scope.namespace(name):
+            output = flow.nn.bias_add(output, bias, data_format, name="bias_add")
+
+    if callable(activation):
+        with flow.scope.namespace(name):
+            output = activation(output, name="activation")
+
+    return output
+
+
+@oneflow_export("layers.conv3d")
+def conv3d(
+    inputs: oneflow._oneflow_internal.BlobDesc,
+    filters: int,
+    kernel_size: Union[int, Sequence[int]] = 1,
+    strides: Union[int, Sequence[int]] = 1,
+    padding: Union[str, Tuple[IntPair, IntPair, IntPair, IntPair, IntPair]] = "VALID",
+    data_format: str = "NCDHW",
+    dilation_rate: Optional[Union[int, IntPair]] = None,
+    groups: int = 1,
+    activation: Optional[
+        Callable[
+            [oneflow._oneflow_internal.BlobDesc, str],
+            oneflow._oneflow_internal.BlobDesc,
+        ]
+    ] = None,
+    use_bias: bool = True,
+    kernel_initializer: Optional[initializer_conf_util.InitializerConf] = None,
+    bias_initializer: Optional[initializer_conf_util.InitializerConf] = None,
+    kernel_regularizer: Optional[regularizer_conf_util.RegularizerConf] = None,
+    bias_regularizer: Optional[regularizer_conf_util.RegularizerConf] = None,
+    trainable: bool = True,
+    name: str = "Conv3d",
+    weight_name: Optional[str] = None,
+    bias_name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""3D convolution layer.
+
+    This layer computes 3D convolution with 5D input Blob and filters
+
+    Args:
+        inputs (oneflow._oneflow_internal.BlobDesc): A 5D input `Blob`.
+        filters (int): An integer specifies the dimensionality of the output space.
+        kernel_size (Union[int, List[int], Sequence[int]], optional): An integer or tuple/list specifies the height and width of the convolution window.
+                        When it is an integer, a square window is applied to the input. Defaults to 1.
+        strides (Union[int, List[int], Sequence[int]], optional): An integer or tuple/list specifies the strides of the convolution window along the height and width.
+                        When it is an integer, the same value for the all spatial dimesions is applied. Defaults to 1.
+        padding (str, Tuple[IntPair, IntPair, IntPair, IntPair, IntPair], optional): padding: `string` `"SAME"` or `"SAME_LOWER"` or `"SAME_UPPER"` or `"VALID" or Tuple[IntPair, IntPair, IntPair, IntPair, IntPair]` indicating the type of padding algorithm to use, or a list indicating the explicit paddings at the start and end of each dimension. Defaults to "VALID".
+        data_format (str, optional): A string specifies the format of the input `Blob`, one of "NCDHW" or "NDHWC" (default: "NCDHW"). "NCDHW" cooresponds to channels_first, i.e. the input `Blob` with shape (batch_size, channels, depth, height, width).
+                        "NDHWC" cooresponds to channels_last, i.e. the input `Blob` with shape (batch_size, channels, depth, height, width). Defaults to "NCDHW".
+        dilation_rate (int, optional): An integer or tuple/list specifies the dilation rate for the dilated convolution. When it is an integer, the same dilation rate is applied for the all dimensions. Defaults to 1.
+        groups (int, optional): A positive integer specifies number of groups for the Group conv. Defaults to 1.
+        activation (Optional[ Callable[[oneflow._oneflow_internal.BlobDesc, str], oneflow._oneflow_internal.BlobDesc] ], optional): Activation function. Defaults to None.
+        use_bias (bool, optional): A boolean specifies whether to use a bias vector. Defaults to True.
+        kernel_initializer (Optional[initializer_conf_util.InitializerConf], optional): Initializer for the kernel weights matrix. Defaults to None.
+        bias_initializer (Optional[initializer_conf_util.InitializerConf], optional): Initializer for the bias vector. Defaults to None.
+        kernel_regularizer (Optional[regularizer_conf_util.RegularizerConf], optional): Regularizer for the kernel weights matrix. Defaults to None.
+        bias_regularizer (Optional[regularizer_conf_util.RegularizerConf], optional): Regularizer for the bias vector . Defaults to None.
+        trainable (bool, optional): A boolean specifies whether to train variables. Defaults to True.
+        name (Optional[str], optional): This layer's name. Defaults to None.
+        weight_name (Optional[str], optional): This weight's name. Defaults to None.
+        bias_name (Optional[str], optional):  This bias's name. Defaults to None.
+
+    Raises:
+        ValueError: If the type of kernel_size is not one of integer, list, tuple.
+        ValueError: The number of groups must be positive and number of filters must be divisible by it.
+        ValueError: If data_format is not one of 'NCDHW', 'NDHWC'.
+        ValueError: If number of input channels is not divisible by number of groups or less than number of groups.
+        ValueError: Number of group must be one when data_format is 'NDHWC'.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: A 5D `Blob` with the shape of (batch_size, filters, new_height, new_width).
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def conv3d_Job(x: tp.Numpy.Placeholder((1, 64, 16, 16, 16))
+        ) -> tp.Numpy:
+            initializer = flow.truncated_normal(0.1)
+            conv3d = flow.layers.conv3d(
+                x,
+                filters=128,
+                kernel_size=3,
+                strides=1,
+                padding='SAME',
+                kernel_initializer=initializer,
+                name="Conv3d"
+            )
+            return conv3d
+
+
+        x = np.random.randn(1, 64, 16, 16, 16).astype(np.float32)
+        out = conv3d_Job(x)
+
+        # out.shape (1, 128, 16, 16, 16)
+
+    """
+    need_transpose = 0
+    if data_format.upper() == "NDHWC":  # NDHWC is not supported before cudnn 8.0
+        need_transpose = 1
+        data_format = "NCDHW"
+
+    if need_transpose:
+        inputs = flow.transpose(inputs, perm=[0, 4, 1, 2, 3])
+        # padding for `NDHWC` is [0, 0, 1, 1, 1] to `NCDHW` format [0, 1, 1, 1, 0]
+        if isinstance(padding, (list, tuple)):
+            padding = list(padding)
+            padding[1], padding[4] = padding[4], padding[1]
+
+    if isinstance(kernel_size, int):
+        kernel_size = (kernel_size, kernel_size, kernel_size)
+    else:
+        assert isinstance(kernel_size, (list, tuple))
+        assert len(kernel_size) == 3
+        kernel_size = tuple(kernel_size)
+
+    assert isinstance(groups, int)
+    assert groups > 0
+    assert groups <= filters
+    assert filters % groups == 0
+
+    if data_format.upper() == "NCDHW":
+        assert groups <= inputs.shape[1]
+        assert inputs.shape[1] % groups == 0
+        weight_shape = (filters, inputs.shape[1] // groups) + kernel_size
+    elif data_format.upper() == "NDHWC":
+        assert groups == 1
+        assert groups <= inputs.shape[3]
+        assert inputs.shape[3] % groups == 0
+        weight_shape = (
+            filters,
+            kernel_size[0],
+            kernel_size[1],
+            kernel_size[2],
+            inputs.shape[4] // groups,
+        )
+    else:
+        raise ValueError("data_format must be in NCHW or NHWC")
+
+    if kernel_initializer is None:
+        kernel_initializer = flow.xavier_uniform_initializer(data_format=data_format)
+
+    if weight_name is None:
+        with flow.scope.namespace(name):
+            weight = flow.get_variable(
+                name="weight",
+                shape=weight_shape,
+                dtype=inputs.dtype,
+                initializer=kernel_initializer,
+                regularizer=kernel_regularizer,
+                trainable=trainable,
+                model_name="weight",
+                reuse=False,
+            )
+    else:
+        weight = flow.get_variable(
+            name=weight_name,
+            shape=weight_shape,
+            dtype=inputs.dtype,
+            initializer=kernel_initializer,
+            regularizer=kernel_regularizer,
+            trainable=trainable,
+            model_name="weight",
+            reuse=False,
+        )
+
+    output = flow.nn.conv3d(
+        inputs,
+        weight,
+        strides,
+        padding,
+        data_format,
+        dilation_rate,
+        groups=groups,
+        name=name,
+    )
+
+    if use_bias:
+        if bias_initializer is None:
+            bias_initializer = flow.constant_initializer(0)
+
+        if bias_name is None:
+            with flow.scope.namespace(name):
+                bias = flow.get_variable(
+                    name="bias",
+                    shape=(filters,),
+                    dtype=inputs.dtype,
+                    initializer=bias_initializer,
+                    regularizer=bias_regularizer,
+                    trainable=trainable,
+                    model_name="bias",
+                    reuse=False,
+                )
+        else:
+            bias = flow.get_variable(
+                name=bias_name,
+                shape=(filters,),
+                dtype=inputs.dtype,
+                initializer=bias_initializer,
+                regularizer=bias_regularizer,
+                trainable=trainable,
+                model_name="bias",
+                reuse=False,
+            )
+
+        with flow.scope.namespace(name):
+            output = flow.nn.bias_add(output, bias, data_format, name="bias_add")
+
+    if callable(activation):
+        with flow.scope.namespace(name):
+            output = activation(output, name="activation")
+
+    if need_transpose:
+        output = flow.transpose(output, perm=[0, 2, 3, 4, 1])
+
+    return output
+
+
+@oneflow_export("layers.layer_norm")
+def layer_norm(
+    inputs: oneflow._oneflow_internal.BlobDesc,
+    center: bool = True,
+    scale: bool = True,
+    trainable: bool = True,
+    begin_norm_axis: int = 1,
+    begin_params_axis: int = -1,
+    epsilon: float = 1e-5,
+    name: str = "LayerNorm",
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""Layer Normalization.
+
+    Args:
+        inputs (oneflow._oneflow_internal.BlobDesc): Input `Blob`.
+        center (bool, optional): A boolean specifies whether to shift input `Blob`. Defaults to True.
+        scale (bool, optional): A boolean specifies whether to scale input `Blob`. Defaults to True.
+        trainable (bool, optional): A boolean specifies whether to train variables. Defaults to True.
+        begin_norm_axis (int, optional): An integer specifies which axis to normalize at first. Defaults to 1.
+        begin_params_axis (int, optional):  An integer specifies which axis params at . Defaults to -1.
+        epsilon (float, optional): A small float is added to avoid division by zero. Defaults to 1e-5.
+        name (Optional[str], optional): This layer's name. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: A normalized `Blob` with same shape of input.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def layer_norm_Job(x: tp.Numpy.Placeholder((1, 64, 128, 128))
+        ) -> tp.Numpy:
+            layer_norm = flow.layers.layer_norm(
+                x,
+                name="LayerNorm1"
+            )
+            return layer_norm
+
+
+        x = np.random.randn(1, 64, 128, 128).astype(np.float32)
+        out = layer_norm_Job(x)
+
+        # out.shape (1, 64, 128, 128)
+
+    """
+    if center is False and scale is False:
+        trainable = False
+
+    beta = None
+    gamma = None
+
+    param_shape = inputs.shape[begin_params_axis:]
+    if center:
+        with flow.scope.namespace(name):
+            beta = flow.get_variable(
+                name="beta",
+                shape=param_shape,
+                dtype=inputs.dtype,
+                initializer=flow.constant_initializer(0.0),
+                trainable=trainable,
+                model_name="beta",
+                distribute=oneflow._oneflow_internal.distribute.broadcast(),
+                reuse=False,
+            )
+
+    if scale:
+        with flow.scope.namespace(name):
+            gamma = flow.get_variable(
+                name="gamma",
+                shape=param_shape,
+                dtype=inputs.dtype,
+                initializer=flow.constant_initializer(1.0),
+                trainable=trainable,
+                model_name="gamma",
+                distribute=oneflow._oneflow_internal.distribute.broadcast(),
+                reuse=False,
+            )
+
+    if flow.current_scope().device_parallel_desc_symbol.device_tag == "cpu":
+        if begin_norm_axis < 0:
+            begin_norm_axis = begin_norm_axis + len(inputs.shape)
+
+        reduce_axis = []
+        for dim in range(len(inputs.shape)):
+            if dim >= begin_norm_axis:
+                reduce_axis.append(dim)
+        mean, variance = flow.nn.moments(inputs, reduce_axis, keepdims=True)
+
+        axis = begin_norm_axis
+        normalized = flow.nn.batch_normalization(
+            x=inputs,
+            mean=mean,
+            variance=variance,
+            variance_epsilon=epsilon,
+            axis=axis,
+            name=name,
+        )
+        nd_params_shape = [1] * (len(inputs.shape) - len(param_shape)) + list(
+            param_shape
+        )
+        affined = normalized
+        if gamma:
+            gamma = flow.reshape(gamma, nd_params_shape)
+            affined *= gamma
+        if beta:
+            beta = flow.reshape(beta, nd_params_shape)
+            affined += beta
+        return affined
+    elif flow.current_scope().device_parallel_desc_symbol.device_tag == "gpu":
+        op_builder = (
+            flow.user_op_builder(name)
+            .Op("layer_norm")
+            .Input("x", [inputs])
+            .Output("y")
+            .Output("mean")
+            .Output("inv_variance")
+        )
+
+        if beta is not None:
+            op_builder.Input("beta", [beta])
+        if gamma is not None:
+            op_builder.Input("gamma", [gamma])
+            op_builder.Output("normalized")
+        op_builder.Attr("center", center)
+        op_builder.Attr("scale", scale)
+        op_builder.Attr("begin_norm_axis", begin_norm_axis)
+        op_builder.Attr("begin_params_axis", begin_params_axis)
+        op_builder.Attr("epsilon", epsilon)
+
+        return op_builder.Build().InferAndTryRun().RemoteBlobList()[0]
+    else:
+        raise NotImplementedError
+
+
+@oneflow_export("layers.layer_norm_grad")
+def layer_norm_grad(
+    dy: oneflow._oneflow_internal.BlobDesc,
+    x: oneflow._oneflow_internal.BlobDesc,
+    mean: oneflow._oneflow_internal.BlobDesc,
+    inv_variance: oneflow._oneflow_internal.BlobDesc,
+    begin_norm_axis: int = 1,
+    name: str = "LayerNormGrad",
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""Layer normalization
+
+    Args:
+        dy (oneflow._oneflow_internal.BlobDesc): Upstream derivstives.
+        x (oneflow._oneflow_internal.BlobDesc): Input `Blob`.
+        mean (oneflow._oneflow_internal.BlobDesc): Mean over neurons.
+        inv_variance (oneflow._oneflow_internal.BlobDesc): Variance over neurons.
+        begin_norm_axis (int, optional): An integer specifies which axis to normalize at first. Defaults to 1.
+        name (Optional[str], optional): This layer's name. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: Gradient with respect to input `Blob`.
+    """
+    op = (
+        flow.user_op_builder(name)
+        .Op("layer_norm_grad")
+        .Input("dy", [dy])
+        .Input("x", [x])
+        .Input("mean", [mean])
+        .Input("inv_variance", [inv_variance])
+        .Output("dx")
+        .Attr("begin_norm_axis", begin_norm_axis)
+        .Attr("epsilon", 1e-5)
+        .Build()
+    )
+    return op.InferAndTryRun().SoleOutputBlob()
+
+
+@oneflow_export("layers.layer_norm_param_grad")
+def layer_norm_param_grad(
+    dy: oneflow._oneflow_internal.BlobDesc,
+    norm: oneflow._oneflow_internal.BlobDesc,
+    gamma: oneflow._oneflow_internal.BlobDesc,
+    begin_params_axis: int = -1,
+    name: str = "LayerNormParamGrad",
+) -> Tuple[
+    oneflow._oneflow_internal.BlobDesc,
+    oneflow._oneflow_internal.BlobDesc,
+    oneflow._oneflow_internal.BlobDesc,
+]:
+    r"""Backward pass for layer normalization
+
+    Args:
+        dy (oneflow._oneflow_internal.BlobDesc): Upstream derivstives.
+        norm (oneflow._oneflow_internal.BlobDesc): Normalized output.
+        gamma (oneflow._oneflow_internal.BlobDesc): Scale parameter.
+        begin_params_axis (int, optional): From which parameters to begin with. Defaults to -1.
+        name (Optional[str], optional): This layer's name. Defaults to 'LayerNormParamGrad'.
+
+    Returns:
+        Tuple[oneflow._oneflow_internal.BlobDesc]:
+                normalized_diff: Gradient with respect to input `Blob`.
+                beta_diff: Gradient with respect to shift parameter beta.
+                gamma_diff: Gradient with respect to scale parameter gamma.
+    """
+    op = (
+        flow.user_op_builder(name)
+        .Op("layer_norm_param_grad")
+        .Input("dy", [dy])
+        .Input("normalized", [norm])
+        .Input("gamma", [gamma])
+        .Output("normalized_diff")
+        .Output("beta_diff")
+        .Output("gamma_diff")
+        .Output("reduce_buf")
+        .Attr("begin_params_axis", begin_params_axis)
+        .Build()
+    )
+
+    (
+        normalized_diff,
+        beta_diff,
+        gamma_diff,
+        reduce_buf,
+    ) = op.InferAndTryRun().RemoteBlobList()
+
+    return normalized_diff, beta_diff, gamma_diff
+
+
+def _get_batch_normalization_variables(
+    name,
+    gamma_name,
+    beta_name,
+    moving_mean_name,
+    moving_variance_name,
+    center,
+    scale,
+    params_shape,
+    params_dtype,
+    trainable,
+    beta_initializer,
+    beta_regularizer,
+    gamma_initializer,
+    gamma_regularizer,
+    moving_mean_initializer,
+    moving_variance_initializer,
+):
+    def get_beta_var(name):
+        if center:
+            beta = flow.get_variable(
+                name=name,
+                shape=params_shape,
+                dtype=params_dtype,
+                initializer=beta_initializer or flow.zeros_initializer(),
+                regularizer=beta_regularizer,
+                trainable=trainable,
+                distribute=oneflow._oneflow_internal.distribute.broadcast(),
+                reuse=False,
+            )
+        else:
+            beta = flow.constant(0, dtype=params_dtype, shape=params_shape, name=name)
+        return beta
+
+    if beta_name is None:
+        with flow.scope.namespace(name):
+            beta = get_beta_var("beta")
+    else:
+        beta = get_beta_var(beta_name)
+
+    def get_gamma_var(name):
+        if scale:
+            gamma = flow.get_variable(
+                name=name,
+                shape=params_shape,
+                dtype=params_dtype,
+                initializer=gamma_initializer or flow.ones_initializer(),
+                regularizer=gamma_regularizer,
+                trainable=trainable,
+                distribute=oneflow._oneflow_internal.distribute.broadcast(),
+                reuse=False,
+            )
+        else:
+            gamma = flow.constant(1, dtype=params_dtype, shape=params_shape, name=name)
+        return gamma
+
+    if gamma_name is None:
+        with flow.scope.namespace(name):
+            gamma = get_gamma_var("gamma")
+    else:
+        gamma = get_gamma_var(gamma_name)
+
+    def get_moving_mean_var(name):
+        moving_mean = flow.get_variable(
+            name=name,
+            shape=params_shape,
+            dtype=params_dtype,
+            initializer=moving_mean_initializer or flow.zeros_initializer(),
+            trainable=False,
+            distribute=oneflow._oneflow_internal.distribute.broadcast(),
+            reuse=False,
+        )
+        return moving_mean
+
+    if moving_mean_name is None:
+        with flow.scope.namespace(name):
+            moving_mean = get_moving_mean_var("moving_mean")
+    else:
+        moving_mean = get_moving_mean_var(moving_mean_name)
+
+    def get_moving_variance_var(name):
+        moving_variance = flow.get_variable(
+            name=name,
+            shape=params_shape,
+            dtype=params_dtype,
+            initializer=moving_variance_initializer or flow.ones_initializer(),
+            trainable=False,
+            distribute=oneflow._oneflow_internal.distribute.broadcast(),
+            reuse=False,
+        )
+        return moving_variance
+
+    if moving_variance_name is None:
+        with flow.scope.namespace(name):
+            moving_variance = get_moving_variance_var("moving_variance")
+    else:
+        moving_variance = get_moving_variance_var(moving_variance_name)
+
+    return beta, gamma, moving_mean, moving_variance
+
+
+@oneflow_export("layers.batch_normalization")
+def batch_normalization(
+    inputs: oneflow._oneflow_internal.BlobDesc,
+    axis: int = -1,
+    momentum: float = 0.99,
+    epsilon: float = 0.001,
+    center: bool = True,
+    scale: bool = True,
+    beta_initializer: Optional[initializer_conf_util.InitializerConf] = None,
+    gamma_initializer: Optional[initializer_conf_util.InitializerConf] = None,
+    beta_regularizer: Optional[regularizer_conf_util.RegularizerConf] = None,
+    gamma_regularizer: Optional[regularizer_conf_util.RegularizerConf] = None,
+    moving_mean_initializer: Optional[initializer_conf_util.InitializerConf] = None,
+    moving_variance_initializer: Optional[initializer_conf_util.InitializerConf] = None,
+    trainable: bool = True,
+    training: bool = True,
+    name: str = "BatchNorm",
+    gamma_name: Optional[str] = None,
+    beta_name: Optional[str] = None,
+    moving_mean_name: Optional[str] = None,
+    moving_variance_name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""The BatchNormalization Layer.
+
+    This layer can be used in conv or dense layer.
+
+    The input data will be normalized by the mean and variance of the current batch data
+
+    Args:
+        inputs (oneflow._oneflow_internal.BlobDesc): Input `Blob`.
+        axis (int, optional): An int specifies the axis that should be normalized . Default is -1, which normalizes the last axis.
+        momentum (float, optional):  A float specifies the momentum for the moving average. Defaults to 0.99.
+        epsilon (float, optional): A small float added to avoid division by zero. Defaults to 0.001.
+        center (bool, optional): A boolean specifies whether to add offset to normalized `Blob`. Defaults to True.
+        scale (bool, optional): A boolean specifies whether to multiply normalized `Blob` by gamma. Defaults to True.
+        beta_initializer (Optional[initializer_conf_util.InitializerConf], optional): Initializer for beta. Defaults to None.
+        gamma_initializer (Optional[initializer_conf_util.InitializerConf], optional): Initializer for gamma. Defaults to None.
+        beta_regularizer (Optional[regularizer_conf_util.RegularizerConf], optional): Regularizer for beta. Defaults to None.
+        gamma_regularizer (Optional[regularizer_conf_util.RegularizerConf], optional): Regularizer for gamma. Defaults to None.
+        moving_mean_initializer (Optional[initializer_conf_util.InitializerConf], optional): Initializer for moving mean. Defaults to None.
+        moving_variance_initializer (Optional[initializer_conf_util.InitializerConf], optional): Initializer for moving variance. Defaults to None.
+        trainable (bool, optional): A boolean specifies whether to train variables. Defaults to True.
+        training (bool, optional): A boolean specifies whether now is training the model. Defaults to True.
+        name (Optional[str], optional): This layer's name. Defaults to None.
+        gamma_name (Optional[str], optional): This gamma's name. Defaults to None.
+        beta_name (Optional[str], optional): This beta's name. Defaults to None.
+        moving_mean_name (Optional[str], optional): This moving_mean's name. Defaults to None.
+        moving_variance_name (Optional[str], optional): This moving_var's name. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc:  A `Blob` with same shape of input.
+
+    Raises:
+        ValueError: If axis is out of dimension of input.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def batch_norm_Job(x: tp.Numpy.Placeholder((1, 64, 128, 128))
+        ) -> tp.Numpy:
+            initializer = flow.truncated_normal(0.1)
+            conv2d = flow.layers.conv2d(
+                x,
+                filters=128,
+                kernel_size=3,
+                strides=2,
+                padding='SAME',
+                kernel_initializer=initializer,
+                name="Conv2d"
+            )
+            batch_norm = flow.layers.batch_normalization(
+                conv2d,
+                axis=1
+            )
+            return batch_norm
+
+
+        x = np.random.randn(1, 64, 128, 128).astype(np.float32)
+        out = batch_norm_Job(x)
+
+        # out.shape (1, 128, 64, 64)
+
+    """
+    if axis < 0:
+        axis += len(inputs.shape)
+    assert axis >= 0 and axis < len(inputs.shape)
+
+    params_shape = [inputs.shape[axis]]
+    # Float32 required to avoid precision-loss when using fp16 input/output
+    params_dtype = flow.float32 if inputs.dtype == flow.float16 else inputs.dtype
+
+    if not flow.current_global_function_desc().IsTrainable() or not trainable:
+        training = False
+
+    beta, gamma, moving_mean, moving_variance = _get_batch_normalization_variables(
+        name,
+        gamma_name,
+        beta_name,
+        moving_mean_name,
+        moving_variance_name,
+        center,
+        scale,
+        params_shape,
+        params_dtype,
+        trainable,
+        beta_initializer,
+        beta_regularizer,
+        gamma_initializer,
+        gamma_regularizer,
+        moving_mean_initializer,
+        moving_variance_initializer,
+    )
+
+    if flow.current_scope().device_parallel_desc_symbol.device_tag == "cpu":
+        if training:
+            reduce_axis = []
+            for dim in range(len(inputs.shape)):
+                if dim != axis:
+                    reduce_axis.append(dim)
+            mean, variance = flow.nn.moments(inputs, reduce_axis, keepdims=False)
+
+            def update_moving(moving, this_batch):
+                moving_identity = flow.identity(moving)
+                flow.assign(
+                    moving, momentum * moving_identity + (1 - momentum) * this_batch
+                )
+
+            update_moving(moving_mean, mean)
+            update_moving(moving_variance, variance)
+
+            return flow.nn.batch_normalization(
+                x=inputs,
+                mean=mean,
+                variance=variance,
+                offset=beta,
+                scale=gamma,
+                variance_epsilon=epsilon,
+                axis=axis,
+                name=name,
+            )
+        else:
+            mean = moving_mean
+            variance = moving_variance
+            return flow.nn.batch_normalization(
+                x=inputs,
+                mean=mean,
+                variance=variance,
+                offset=beta,
+                scale=gamma,
+                variance_epsilon=epsilon,
+                axis=axis,
+                name=name,
+            )
+    else:
+        builder = (
+            flow.user_op_builder(name)
+            .Op("normalization")
+            .Input("x", [inputs])
+            .Input("moving_mean", [moving_mean])
+            .Input("moving_variance", [moving_variance])
+            .Input("gamma", [gamma])
+            .Input("beta", [beta])
+            .Output("y")
+            .Attr("axis", axis)
+            .Attr("epsilon", epsilon)
+            .Attr("training", training)
+            .Attr("momentum", momentum)
+        )
+        if trainable and training:
+            builder = builder.Output("mean").Output("inv_variance")
+
+        return builder.Build().InferAndTryRun().RemoteBlobList()[0]
+
+
+@oneflow_export("layers.batch_normalization_add_relu")
+def batch_normalization_add_relu(
+    inputs: oneflow._oneflow_internal.BlobDesc,
+    addend: Optional[oneflow._oneflow_internal.BlobDesc] = None,
+    axis: int = -1,
+    momentum: float = 0.99,
+    epsilon: float = 0.001,
+    center: bool = True,
+    scale: bool = True,
+    beta_initializer: Optional[initializer_conf_util.InitializerConf] = None,
+    gamma_initializer: Optional[initializer_conf_util.InitializerConf] = None,
+    beta_regularizer: Optional[regularizer_conf_util.RegularizerConf] = None,
+    gamma_regularizer: Optional[regularizer_conf_util.RegularizerConf] = None,
+    moving_mean_initializer: Optional[initializer_conf_util.InitializerConf] = None,
+    moving_variance_initializer: Optional[initializer_conf_util.InitializerConf] = None,
+    trainable: bool = True,
+    training: bool = True,
+    name: str = "BatchNorm",
+    gamma_name: Optional[str] = None,
+    beta_name: Optional[str] = None,
+    moving_mean_name: Optional[str] = None,
+    moving_variance_name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""Fused flow.layers.batch_normalization + flow.math.add + flow.math.relu
+
+    Args:
+        inputs (oneflow._oneflow_internal.BlobDesc): Input `Blob`.
+        addend (oneflow._oneflow_internal.BlobDesc): `Blob` add to batch_normalization output.
+        axis (int, optional): An int specifies the axis that should be normalized . Default is -1, which normalizes the last axis.
+        momentum (float, optional):  A float specifies the momentum for the moving average. Defaults to 0.99.
+        epsilon (float, optional): A small float added to avoid division by zero. Defaults to 0.001.
+        center (bool, optional): A boolean specifies whether to add offset to normalized `Blob`. Defaults to True.
+        scale (bool, optional): A boolean specifies whether to multiply normalized `Blob` by gamma. Defaults to True.
+        beta_initializer (Optional[initializer_conf_util.InitializerConf], optional): Initializer for beta. Defaults to None.
+        gamma_initializer (Optional[initializer_conf_util.InitializerConf], optional): Initializer for gamma. Defaults to None.
+        beta_regularizer (Optional[regularizer_conf_util.RegularizerConf], optional): Regularizer for beta. Defaults to None.
+        gamma_regularizer (Optional[regularizer_conf_util.RegularizerConf], optional): Regularizer for gamma. Defaults to None.
+        moving_mean_initializer (Optional[initializer_conf_util.InitializerConf], optional): Initializer for moving mean. Defaults to None.
+        moving_variance_initializer (Optional[initializer_conf_util.InitializerConf], optional): Initializer for moving variance. Defaults to None.
+        trainable (bool, optional): A boolean specifies whether to train variables. Defaults to True.
+        training (bool, optional): A boolean specifies whether now is training the model. Defaults to True.
+        name (Optional[str], optional): This layer's name. Defaults to None.
+        gamma_name (Optional[str], optional): This gamma's name. Defaults to None.
+        beta_name (Optional[str], optional): This beta's name. Defaults to None.
+        moving_mean_name (Optional[str], optional): This moving_mean's name. Defaults to None.
+        moving_variance_name (Optional[str], optional): This moving_var's name. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc:  A `Blob` with same shape of input.
+
+    Raises:
+        ValueError: If axis is out of dimension of input.
+
+    """
+    if not flow.current_global_function_desc().IsTrainable() or not trainable:
+        training = False
+
+    if (
+        not training
+        or flow.current_scope().device_parallel_desc_symbol.device_tag == "cpu"
+    ):
+        out = flow.layers.batch_normalization(
+            inputs,
+            axis=axis,
+            momentum=momentum,
+            epsilon=epsilon,
+            center=center,
+            scale=scale,
+            beta_initializer=beta_initializer,
+            gamma_initializer=gamma_initializer,
+            beta_regularizer=beta_regularizer,
+            gamma_regularizer=gamma_regularizer,
+            moving_mean_initializer=moving_mean_initializer,
+            moving_variance_initializer=moving_variance_initializer,
+            trainable=trainable,
+            training=training,
+            name=name,
+        )
+        with flow.scope.namespace("BatchNormAddRelu"):
+            if addend is not None:
+                out = out + addend
+            return flow.math.relu(out)
+
+    if axis < 0:
+        axis += len(inputs.shape)
+    assert 0 <= axis < len(inputs.shape)
+
+    params_shape = [inputs.shape[axis]]
+    # Float32 required to avoid precision-loss when using fp16 input/output
+    params_dtype = flow.float32 if inputs.dtype == flow.float16 else inputs.dtype
+
+    beta, gamma, moving_mean, moving_variance = _get_batch_normalization_variables(
+        name,
+        gamma_name,
+        beta_name,
+        moving_mean_name,
+        moving_variance_name,
+        center,
+        scale,
+        params_shape,
+        params_dtype,
+        trainable,
+        beta_initializer,
+        beta_regularizer,
+        gamma_initializer,
+        gamma_regularizer,
+        moving_mean_initializer,
+        moving_variance_initializer,
+    )
+
+    builder = (
+        flow.user_op_builder(name)
+        .Op("normalization_add_relu")
+        .Input("x", [inputs])
+        .Input("moving_mean", [moving_mean])
+        .Input("moving_variance", [moving_variance])
+        .Input("gamma", [gamma])
+        .Input("beta", [beta])
+        .Output("y")
+        .Output("mean")
+        .Output("inv_variance")
+        .Output("reserve_space")
+        .Attr("axis", axis)
+        .Attr("epsilon", epsilon)
+        .Attr("momentum", momentum)
+    )
+    if addend is not None:
+        builder = builder.Input("addend", [addend])
+    return builder.Build().InferAndTryRun().RemoteBlobList()[0]
+
+
+@oneflow_export("layers.batch_normalization_relu")
+def batch_normalization_relu(
+    inputs: oneflow._oneflow_internal.BlobDesc,
+    axis: int = -1,
+    momentum: float = 0.99,
+    epsilon: float = 0.001,
+    center: bool = True,
+    scale: bool = True,
+    beta_initializer: Optional[initializer_conf_util.InitializerConf] = None,
+    gamma_initializer: Optional[initializer_conf_util.InitializerConf] = None,
+    beta_regularizer: Optional[regularizer_conf_util.RegularizerConf] = None,
+    gamma_regularizer: Optional[regularizer_conf_util.RegularizerConf] = None,
+    moving_mean_initializer: Optional[initializer_conf_util.InitializerConf] = None,
+    moving_variance_initializer: Optional[initializer_conf_util.InitializerConf] = None,
+    trainable: bool = True,
+    training: bool = True,
+    name: str = "BatchNorm",
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""Fused flow.layers.batch_normalization + flow.math.relu
+
+Args:
+    inputs (oneflow._oneflow_internal.BlobDesc): Input `Blob`.
+    axis (int, optional): An int specifies the axis that should be normalized . Default is -1, which normalizes the last axis.
+    momentum (float, optional):  A float specifies the momentum for the moving average. Defaults to 0.99.
+    epsilon (float, optional): A small float added to avoid division by zero. Defaults to 0.001.
+    center (bool, optional): A boolean specifies whether to add offset to normalized `Blob`. Defaults to True.
+    scale (bool, optional): A boolean specifies whether to multiply normalized `Blob` by gamma. Defaults to True.
+    beta_initializer (Optional[initializer_conf_util.InitializerConf], optional): Initializer for beta. Defaults to None.
+    gamma_initializer (Optional[initializer_conf_util.InitializerConf], optional): Initializer for gamma. Defaults to None.
+    beta_regularizer (Optional[regularizer_conf_util.RegularizerConf], optional): Regularizer for beta. Defaults to None.
+    gamma_regularizer (Optional[regularizer_conf_util.RegularizerConf], optional): Regularizer for gamma. Defaults to None.
+    moving_mean_initializer (Optional[initializer_conf_util.InitializerConf], optional): Initializer for moving mean. Defaults to None.
+    moving_variance_initializer (Optional[initializer_conf_util.InitializerConf], optional): Initializer for moving variance. Defaults to None.
+    trainable (bool, optional): A boolean specifies whether to train variables. Defaults to True.
+    training (bool, optional): A boolean specifies whether now is training the model. Defaults to True.
+    name (Optional[str], optional): This layer's name. Defaults to None.
+
+Returns:
+    oneflow._oneflow_internal.BlobDesc:  A `Blob` with same shape of input.
+
+Raises:
+    ValueError: If axis is out of dimension of input.
+
+"""
+    return flow.layers.batch_normalization_add_relu(
+        inputs,
+        axis=axis,
+        momentum=momentum,
+        epsilon=epsilon,
+        center=center,
+        scale=scale,
+        beta_initializer=beta_initializer,
+        gamma_initializer=gamma_initializer,
+        beta_regularizer=beta_regularizer,
+        gamma_regularizer=gamma_regularizer,
+        moving_mean_initializer=moving_mean_initializer,
+        moving_variance_initializer=moving_variance_initializer,
+        trainable=trainable,
+        training=training,
+        name=name,
+    )
+
+
+@oneflow_export("layers.upsample_2d")
+def upsample(
+    x: oneflow._oneflow_internal.BlobDesc,
+    size: Sequence[int] = (2, 2),
+    align_corners: bool = False,
+    data_format: str = "NCHW",
+    interpolation: str = "nearest",
+    name: str = "Upsample2D",
+):
+    r"""The Upsample Layer, this layer can upsample the feature map to a specified scale.
+
+    Args:
+        x ([type]): Input `Blob`.
+        size (tuple, optional): (height_scale, width_scale)  Defaults to (2, 2).
+        align_corners (bool, optional): Defaults to False.
+        data_format (str, optional): A string specifies the format of the input `Blob`, one of "NCHW" or "NHWC" (default: "NCHW"). "NCHW" cooresponds to channels_first, i.e. the input `Blob` with shape (batch_size, channels, height, width).
+                        "NHWC" cooresponds to channels_last, i.e. the input `Blob` with shape (batch_size, height, width, channels).. Defaults to "NCHW".
+        interpolation (str, optional): Image interpolation algorithm to enlarge the image size. Defaults to "nearest". "nearest" and "bilinear" are available now.
+        name ([type], optional): This layer's name. Defaults to None.
+
+    Raises:
+        ValueError: interpolation must be "nearest" or "bilinear".
+        ValueError: data_format must be "NHWC" or "NCHW"
+
+    Returns:
+        [type]: oneflow._oneflow_internal.BlobDesc:  A `Blob` which is the upsampled `x`. If `size` is (2, 2), the shape of return value is [N, C, 2H, 2W].
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def upsample_Job(x: tp.Numpy.Placeholder((1, 32, 32, 32))
+        ) -> tp.Numpy:
+            upsample = flow.layers.upsample_2d(
+                x,
+                size=(2, 2),
+                name="Upsample1"
+            )
+            return upsample
+
+
+        x = np.random.randn(1, 32, 32, 32).astype(np.float32)
+        out = upsample_Job(x)
+
+        # out.shape (1, 32, 64, 64)
+
+    """
+    if isinstance(size, int):
+        height_scale = size
+        width_scale = size
+    else:
+        assert isinstance(size, (list, tuple))
+        assert len(size) == 2
+        height_scale = size[0]
+        width_scale = size[1]
+
+    if interpolation != "nearest" and interpolation != "bilinear":
+        raise ValueError('interpolation must be "nearest" or "bilinear".')
+
+    if interpolation == "nearest" and align_corners:
+        raise ValueError('interpolation "nearest" does not support align_corners.')
+
+    if data_format.upper() != "NCHW" and data_format.upper() != "NHWC":
+        raise ValueError('data_format must be "NHWC" or "NCHW".')
+
+    need_transpose = 0
+    if data_format.upper() == "NHWC":
+        need_transpose = 1
+
+    if need_transpose:
+        x = flow.transpose(x, perm=[0, 3, 1, 2])
+
+    op = (
+        flow.user_op_builder(name)
+        .Op("upsample")
+        .Input("x", [x])
+        .Output("y")
+        .Attr("height_scale", float(height_scale))
+        .Attr("width_scale", float(width_scale))
+        .Attr("align_corners", align_corners)
+        .Attr("data_format", "channels_first")
+        .Attr("interpolation", interpolation)
+        .Build()
+    )
+    output = op.InferAndTryRun().SoleOutputBlob()
+
+    if need_transpose:
+        output = flow.transpose(output, perm=[0, 2, 3, 1])
+
+    return output
diff --git a/oneflow/compatible_single_client_python/ops/linalg.py b/oneflow/compatible_single_client_python/ops/linalg.py
new file mode 100644
index 0000000000000000000000000000000000000000..92ec74c3d61462c7bcd334654fbc7d05cd3a54fb
--- /dev/null
+++ b/oneflow/compatible_single_client_python/ops/linalg.py
@@ -0,0 +1,142 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+import os
+
+from oneflow.compatible import single_client as flow
+from oneflow.core.operator import op_conf_pb2 as op_conf_util
+from oneflow.core.register import logical_blob_id_pb2 as logical_blob_id_util
+from oneflow.compatible_single_client_python.framework import (
+    interpret_util as interpret_util,
+)
+from oneflow.compatible_single_client_python.framework import id_util as id_util
+from oneflow.compatible_single_client_python.framework import (
+    remote_blob as remote_blob_util,
+)
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    stable_api,
+)
+import oneflow._oneflow_internal
+from typing import Optional
+
+
+@oneflow_export("matmul", "linalg.matmul")
+@stable_api
+def matmul(
+    a: oneflow._oneflow_internal.BlobDesc,
+    b: oneflow._oneflow_internal.BlobDesc,
+    transpose_a: bool = False,
+    transpose_b: bool = False,
+    alpha: float = 1.0,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""This operator applies matrix multiplication to two Blobs.
+
+    Args:
+        a (oneflow._oneflow_internal.BlobDesc): A Blob
+        b (oneflow._oneflow_internal.BlobDesc): A Blob
+        transpose_a (bool, optional): Whether to transpose A Blob. Defaults to False.
+        transpose_b (bool, optional): Whether to transpose B Blob. Defaults to False.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def matmul_Job(A: tp.Numpy.Placeholder((3, 3)),
+                    B: tp.Numpy.Placeholder((3, 3))
+        ) -> tp.Numpy:
+            return flow.linalg.matmul(A, B)
+
+
+        A = np.array([[1, 0, 0],
+                    [0, 1, 1],
+                    [0, 0, 1]]).astype(np.float32)
+        B = np.array([[3, 4, 5],
+                    [6, 7, 8],
+                    [9, 10, 11]]).astype(np.float32)
+        out = matmul_Job(A, B)
+
+        # output [[ 3.  4.  5.]
+        #         [15. 17. 19.]
+        #         [ 9. 10. 11.]]
+
+    """
+    if name is None:
+        name = id_util.UniqueStr("Matmul_")
+
+    assert len(a.shape) >= 2
+    assert len(b.shape) >= 2
+
+    if len(a.shape) == len(b.shape):
+        if len(a.shape) == 2:
+            op = (
+                flow.user_op_builder(name)
+                .Op("matmul")
+                .Input("a", [a])
+                .Input("b", [b])
+                .Output("out")
+                .Attr("transpose_a", transpose_a)
+                .Attr("transpose_b", transpose_b)
+                .Attr("alpha", float(alpha))
+                .Build()
+            )
+        else:
+            op = (
+                flow.user_op_builder(name)
+                .Op("batch_matmul")
+                .Input("a", [a])
+                .Input("b", [b])
+                .Output("out")
+                .Attr("transpose_a", transpose_a)
+                .Attr("transpose_b", transpose_b)
+                .Attr("alpha", float(alpha))
+                .Build()
+            )
+    else:
+        # NOTE: support broadcast b to a only for now
+        if len(b.shape) != 2:
+            raise ValueError(
+                "don't support number of dimensions of a being less than number of dimensions of b"
+            )
+
+        if transpose_a:
+            raise ValueError("don't support tensor a to be tranpose")
+
+        op = (
+            flow.user_op_builder(name)
+            .Op("broadcast_matmul")
+            .Input("a", [a])
+            .Input("b", [b])
+            .Output("out")
+            .Attr("transpose_a", transpose_a)
+            .Attr("transpose_b", transpose_b)
+            .Attr("alpha", float(alpha))
+            .Build()
+        )
+
+    return op.InferAndTryRun().SoleOutputBlob()
diff --git a/oneflow/compatible_single_client_python/ops/loss_ops.py b/oneflow/compatible_single_client_python/ops/loss_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..d791100bd998c30c899da07ba17abe9ef3f3d7e3
--- /dev/null
+++ b/oneflow/compatible_single_client_python/ops/loss_ops.py
@@ -0,0 +1,284 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.framework import id_util as id_util
+from oneflow.compatible_single_client_python.oneflow_export import oneflow_export
+from oneflow.compatible_single_client_python.framework import (
+    remote_blob as remote_blob_util,
+)
+import oneflow._oneflow_internal
+from typing import Optional, Tuple
+
+
+@oneflow_export("smooth_l1_loss")
+def smooth_l1_loss(
+    prediction: oneflow._oneflow_internal.BlobDesc,
+    label: oneflow._oneflow_internal.BlobDesc,
+    beta: float = 1.0,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""This operator computes the smooth l1 loss.
+
+    The equation is:
+
+    .. math::
+
+        & out = \frac{(\beta*x)^2}{2}, \left|x\right|<\frac{1}{{\beta}^2}
+
+        & out = \left|x\right|-\frac{0.5}{{\beta}^2}, otherwise
+
+
+    Args:
+        prediction (oneflow._oneflow_internal.BlobDesc): The prediction Blob
+        label (oneflow._oneflow_internal.BlobDesc): The label Blob
+        beta (float, optional): The :math:`\beta` in the equation. Defaults to 1.0.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def smooth_l1_loss_Job(prediction: tp.Numpy.Placeholder((5, )),
+                            label: tp.Numpy.Placeholder((5, ))
+        ) -> tp.Numpy:
+            return flow.smooth_l1_loss(prediction=prediction,
+                                    label=label)
+
+
+        prediction = np.array([0.1, 0.4, 0.3, 0.5, 0.9]).astype(np.float32)
+        label = np.array([0.3, 0.9, 2.5, 0.4, 0.3]).astype(np.float32)
+        out = smooth_l1_loss_Job(prediction, label)
+
+        # out [0.02       0.12499999 1.7        0.005      0.17999998]
+
+    """
+    op = (
+        flow.user_op_builder(
+            name if name is not None else id_util.UniqueStr("SmoothL1Loss_")
+        )
+        .Op("smooth_l1_loss")
+        .Input("prediction", [prediction])
+        .Input("label", [label])
+        .Output("loss")
+    )
+    op.Attr("beta", float(beta))
+    return op.Build().InferAndTryRun().RemoteBlobList()[0]
+
+
+@oneflow_export("ctc_loss")
+def ctc_loss(
+    log_probs: oneflow._oneflow_internal.BlobDesc,
+    targets: oneflow._oneflow_internal.BlobDesc,
+    input_lengths: oneflow._oneflow_internal.BlobDesc,
+    target_lengths: oneflow._oneflow_internal.BlobDesc,
+    blank: int = 0,
+    reduction: str = "mean",
+    zero_infinity: bool = False,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""Computes the CTC(Connectionist Temporal Classification) loss.
+    This operator implements the CTC loss as presented in (Graves et al., 2006).
+
+
+    Args:
+        log_probs (oneflow._oneflow_internal.BlobDesc): A Blob of shape [input_length, batch_size, num_labels]. The logarithmized probabilities of the outputs (e.g. obtained with flow.nn.logsoftmax()).
+        targets (oneflow._oneflow_internal.BlobDesc): A Blob of shape [batch_size, max_target_length]. It represent the target sequences. Each element in the target sequence is a class index. And the target index cannot be blank (default=0).
+        input_lengths (oneflow._oneflow_internal.BlobDesc): A Blob of shape [batch_size]. It represent the lengths of the inputs. And the lengths are specified for each sequence to achieve masking under the assumption that sequences are padded to equal lengths.
+        target_lengths (oneflow._oneflow_internal.BlobDesc): A Blob of shape [batch_size]. It represent lengths of the targets. Lengths are specified for each sequence to achieve masking under the assumption that sequences are padded to equal lengths.
+        blank (int, optional): Blank label. Defaults to 0.
+        reduction (str, optional): The reduce type, it can be the one of "none", "mean", "sum". "none": no reduction will be applied, "mean": the output losses will be divided by the target lengths and then the mean over the batch is taken, "sum": the output will be summed. Defaults to "mean".
+        zero_infinity (bool, optional):  Whether to zero infinite losses and the associated gradients. Infinite losses mainly occur when the inputs are too short to be aligned to the targets. Defaults to False.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+        import numpy as np
+
+
+        @flow.global_function()
+        def ctc_loss_job(
+            log_probs: tp.Numpy.Placeholder(shape=(5, 2, 3)),
+            targets: tp.Numpy.Placeholder(shape=(2, 3), dtype=flow.int32),
+            input_lengths: tp.Numpy.Placeholder(shape=(2,), dtype=flow.int32),
+            target_lengths: tp.Numpy.Placeholder(shape=(2,), dtype=flow.int32),
+        ) -> tp.Numpy:
+            loss = flow.ctc_loss(
+                log_probs, targets, input_lengths, target_lengths, blank=0, reduction="none"
+            )
+            return loss
+
+
+        log_probs = np.array(
+            [
+                [[-1.1031, -0.7998, -1.5200], [-0.9808, -1.1363, -1.1908]],
+                [[-1.2258, -1.0665, -1.0153], [-1.1135, -1.2331, -0.9671]],
+                [[-1.3348, -0.6611, -1.5118], [-0.9823, -1.2355, -1.0941]],
+                [[-1.3850, -1.3273, -0.7247], [-0.8235, -1.4783, -1.0994]],
+                [[-0.9049, -0.8867, -1.6962], [-1.4938, -1.3630, -0.6547]],
+            ]
+        ).astype(np.float32)
+        targets = np.array([[1, 2, 2], [1, 2, 2]]).astype("int32")
+        input_lengths = np.array([5, 5]).astype("int32")
+        target_lengths = np.array([3, 3]).astype("int32")
+        loss = ctc_loss_job(log_probs, targets, input_lengths, target_lengths)
+
+        # loss [3.918017 2.907672]
+
+    """
+    name = name if name is not None else id_util.UniqueStr("CTCLoss_")
+    loss, _ = (
+        flow.user_op_builder(name)
+        .Op("ctc_loss")
+        .Input("log_probs", [log_probs])
+        .Input("targets", [targets])
+        .Input("input_lengths", [input_lengths])
+        .Input("target_lengths", [target_lengths])
+        .Output("loss")
+        .Output("alpha")
+        .Attr("blank", int(blank))
+        .Attr("zero_infinity", zero_infinity)
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()
+    )
+
+    if zero_infinity:
+        cond = flow.math.equal(
+            loss,
+            flow.constant(
+                float("inf"),
+                dtype=loss.dtype,
+                shape=loss.shape,
+                name=name + "_constant",
+            ),
+            name=name + "_equal",
+        )
+        loss = flow.where(
+            cond,
+            flow.zeros(dtype=loss.dtype, shape=loss.shape, name=name + "_zeros"),
+            loss,
+            name=name + "_where",
+        )
+
+    if reduction == "mean":
+        return flow.math.reduce_mean(
+            flow.math.xdivy(
+                loss,
+                flow.cast(
+                    flow.math.clip_by_value(
+                        target_lengths, min_value=1, name=name + "_clip_by_value"
+                    ),
+                    dtype=log_probs.dtype,
+                    name=name + "_cast",
+                ),
+                name=name + "_xdivy",
+            ),
+            name=name + "_reduce_mean",
+        )
+    elif reduction == "sum":
+        return flow.math.reduce_sum(loss, name=name + "_reduce_sum")
+    else:
+        return loss
+
+
+@oneflow_export("nn.ctc_greedy_decoder")
+def ctc_greedy_decoder(
+    log_probs: oneflow._oneflow_internal.BlobDesc,
+    input_lengths: oneflow._oneflow_internal.BlobDesc,
+    merge_repeated: bool = True,
+    name: Optional[str] = None,
+) -> Tuple[oneflow._oneflow_internal.BlobDesc, oneflow._oneflow_internal.BlobDesc]:
+    r"""Performs greedy decoding on the logits given in input (best path).
+
+    Args:
+        log_probs (oneflow._oneflow_internal.BlobDesc): A Blob of shape [input_length, batch_size, num_labels]. The logarithmized probabilities of the outputs (e.g. obtained with flow.nn.logsoftmax()).
+        input_lengths (oneflow._oneflow_internal.BlobDesc): A Blob of shape [batch_size]. It represent the lengths of the inputs. And the lengths are specified for each sequence to achieve masking under the assumption that sequences are padded to equal lengths.
+        merge_repeated (bool, optional): If merge_repeated is True, merge repeated classes in output. This means that if consecutive logits' maximum indices are the same, only the first of these is emitted. Defaults to True.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        decoded(oneflow._oneflow_internal.BlobDesc): A Blob of shape [batch_size, input_length], The decoded outputs.
+        neg_sum_logits(oneflow._oneflow_internal.BlobDesc): A float matrix (batch_size x 1) containing, for the sequence found, the negative of the sum of the greatest logit at each timeframe.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+        import numpy as np
+        from typing import Tuple
+
+
+        @flow.global_function()
+        def ctc_greedy_decoder_job(
+            log_probs: tp.Numpy.Placeholder(shape=(4, 2, 5)),
+            input_lengths: tp.Numpy.Placeholder(shape=(2,), dtype=flow.int64),
+        ) -> Tuple[tp.Numpy, tp.Numpy]:
+            decoded, neg_sum_logits = flow.nn.ctc_greedy_decoder(
+                log_probs, input_lengths, merge_repeated=True
+            )
+            return decoded, neg_sum_logits
+
+
+        log_probs = np.array(
+            [
+                [[-1.54, -1.20, -1.95, -1.65, -1.81], [-1.84, -1.74, -1.58, -1.55, -1.12]],
+                [[-1.68, -1.48, -1.89, -1.30, -2.07], [-1.13, -1.45, -1.24, -1.61, -1.66]],
+                [[-1.56, -1.40, -2.83, -1.67, -1.48], [-1.20, -2.01, -2.05, -1.95, -1.24]],
+                [[-2.09, -1.76, -1.36, -1.67, -1.45], [-1.85, -1.48, -1.34, -2.16, -1.55]],
+            ]
+        ).astype(np.float32)
+        input_lengths = np.array([4, 4])
+        decoded, neg_sum_logits = ctc_greedy_decoder_job(log_probs, input_lengths)
+
+        # decoded [[1 3 1 2] [0 2 0 0]]
+        # neg_sum_logits [[5.26] [4.79]]
+
+
+    """
+    name = name if name is not None else id_util.UniqueStr("CTCGreedyDecode_")
+    decoded, neg_sum_logits = (
+        flow.user_op_builder(name)
+        .Op("ctc_greedy_decoder")
+        .Input("log_probs", [log_probs])
+        .Input("input_lengths", [input_lengths])
+        .Output("decoded")
+        .Output("neg_sum_logits")
+        .Attr("merge_repeated", merge_repeated)
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()
+    )
+    return decoded, neg_sum_logits
diff --git a/oneflow/compatible_single_client_python/ops/losses/__init__.py b/oneflow/compatible_single_client_python/ops/losses/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/oneflow/compatible_single_client_python/ops/losses/add_loss.py b/oneflow/compatible_single_client_python/ops/losses/add_loss.py
new file mode 100644
index 0000000000000000000000000000000000000000..d3a4a789a31d12b1c04e2f35fbba65769587bec7
--- /dev/null
+++ b/oneflow/compatible_single_client_python/ops/losses/add_loss.py
@@ -0,0 +1,53 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+from oneflow.compatible_single_client_python.framework import c_api_util as c_api_util
+from oneflow.compatible_single_client_python.framework import hob as hob
+from oneflow.compatible_single_client_python.eager import gradient_util as gradient_util
+from oneflow.compatible_single_client_python.lib.core import enable_if as enable_if
+from oneflow.compatible_single_client_python.oneflow_export import oneflow_export
+from oneflow.compatible_single_client_python.framework import (
+    remote_blob as remote_blob_util,
+)
+import oneflow._oneflow_internal
+
+
+@oneflow_export("losses.add_loss")
+def api_add_loss(loss: oneflow._oneflow_internal.BlobDesc) -> None:
+    r"""Mark a `Blob` as a loss. Auto grad starts at every loss blob. It doesn't has to be a product of typical "loss" operator like softmax loss but can also be a `Blob` produced by any operator.
+
+    Args:
+        loss: A `Blob`.
+    """
+    return enable_if.unique([lazy_add_loss, eager_add_loss])(loss)
+
+
+@enable_if.condition(
+    hob.in_global_mode & hob.is_trainable & ~hob.eager_execution_enabled
+)
+def lazy_add_loss(loss):
+    c_api_util.CurJobBuildAndInferCtx_AddLossLogicalBlobName(loss.unique_name)
+
+
+@enable_if.condition(
+    hob.in_global_mode & hob.is_trainable & hob.eager_execution_enabled
+)
+def eager_add_loss(loss):
+    c_api_util.CurJobBuildAndInferCtx_AddLossLogicalBlobName(loss.unique_name)
+    gradient_util.GetDefaultBackwardBlobRegister().TrySetObject4BlobName(
+        loss.logical_blob_name, loss.blob_object
+    )
diff --git a/oneflow/compatible_single_client_python/ops/math_binary_elementwise_ops.py b/oneflow/compatible_single_client_python/ops/math_binary_elementwise_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..ee6898a0d0c68e2e077992c5c7442107b6425398
--- /dev/null
+++ b/oneflow/compatible_single_client_python/ops/math_binary_elementwise_ops.py
@@ -0,0 +1,299 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+import os
+from typing import Optional, Union
+
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.framework import id_util as id_util
+from oneflow.compatible_single_client_python.framework import (
+    remote_blob as remote_blob_util,
+)
+from oneflow.compatible_single_client_python.oneflow_export import oneflow_export
+import oneflow._oneflow_internal
+
+
+def build_math_binary_elementwise_op(math_op, x, y, name=None):
+    if name is None:
+        name = id_util.UniqueStr(math_op + "_")
+    return (
+        flow.user_op_builder(name)
+        .Op(math_op)
+        .Input("x", [x])
+        .Input("y", [y])
+        .Output("z")
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
+
+
+@oneflow_export("math.atan2")
+def atan2(
+    x: oneflow._oneflow_internal.BlobDesc,
+    y: oneflow._oneflow_internal.BlobDesc,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""This operator computes the values of :math:`arctan(\frac{x}{y})`.
+
+    The equation is:
+
+    .. math::
+
+        out = arctan(\frac{x}{y})
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): A Blob
+        y (oneflow._oneflow_internal.BlobDesc): A Blob
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def atan2Job(x: tp.Numpy.Placeholder((3,),), y: tp.Numpy.Placeholder((3, ))
+        )-> tp.Numpy:
+            return flow.math.atan2(x, y)
+
+        x = np.array([1, 2, 3]).astype(np.float32)
+        y = np.array([4, 4, 4]).astype(np.float32)
+        out = atan2Job(x, y)
+
+
+        # out [0.24497867 0.4636476  0.6435011 ]
+        # We take the first value as an example
+        # (arctan(1/4) * pi) / 180 = 0.24497867
+
+    """
+    return build_math_binary_elementwise_op("atan2", x, y, name)
+
+
+@oneflow_export("math.pow")
+def pow(
+    x: oneflow._oneflow_internal.BlobDesc,
+    y: Union[oneflow._oneflow_internal.BlobDesc, float],
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator computes the Pow result.
+
+    The equation is:
+
+    .. math::
+
+        out = x^y
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): A Blob
+        y (Union[oneflow._oneflow_internal.BlobDesc, float]): A Blob or float value, the exponential factor of Pow
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob
+
+    For example:
+
+    Example 1:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def powJob(x: tp.Numpy.Placeholder((3,), ), y: tp.Numpy.Placeholder((3,))
+                ) -> tp.Numpy:
+            return flow.math.pow(x, y)
+
+
+        x = np.array([2, 3, 4]).astype(np.float32)
+        y = np.array([2, 3, 4]).astype(np.float32)
+        out = powJob(x, y)
+
+        # out [  4.  27. 256.]
+
+    Example 2:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+        import numpy as np
+
+
+        @flow.global_function()
+        def scalar_pow_job(x: tp.Numpy.Placeholder(shape=(3, )))->tp.Numpy:
+            with flow.scope.placement("cpu", "0:0"):
+                out = flow.math.pow(x, 2.0)
+            return out
+
+
+        x = np.array([1, 2, 3]).astype(np.float32)
+        out = scalar_pow_job(x)
+
+        # out [1. 4. 9.]
+    """
+    if name is None:
+        name = id_util.UniqueStr("Pow_")
+
+    if isinstance(y, (int, float)):
+        return (
+            flow.user_op_builder(name)
+            .Op("scalar_pow")
+            .Input("in", [x])
+            .Attr("exponent", float(y))
+            .Output("out")
+            .Build()
+            .InferAndTryRun()
+            .RemoteBlobList()[0]
+        )
+    else:
+        return build_math_binary_elementwise_op("pow", x, y, name)
+
+
+@oneflow_export("math.floordiv")
+def floordiv(
+    x: oneflow._oneflow_internal.BlobDesc,
+    y: oneflow._oneflow_internal.BlobDesc,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator computes the result of :math:`x/y`, rounding toward the most negative integer value
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): A Blob
+        y (oneflow._oneflow_internal.BlobDesc): A Blob
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def floor_div_Job(x: tp.Numpy.Placeholder((3,)),
+                        y: tp.Numpy.Placeholder((3,))
+        ) -> tp.Numpy:
+            return flow.math.floordiv(x, y)
+
+
+        x = np.array([4, 3, 5]).astype(np.float32)
+        y = np.array([3, 2, 2]).astype(np.float32)
+        out = floor_div_Job(x, y)
+
+        # out [1. 1. 2.]
+    """
+    return build_math_binary_elementwise_op("floordiv", x, y, name)
+
+
+@oneflow_export("math.xdivy")
+def xdivy(
+    x: oneflow._oneflow_internal.BlobDesc,
+    y: oneflow._oneflow_internal.BlobDesc,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator computes the result of :math:`x/y`
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): A Blob
+        y (oneflow._oneflow_internal.BlobDesc): A Blob
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def xdivy_Job(x: tp.Numpy.Placeholder((3,)),
+                        y: tp.Numpy.Placeholder((3,))
+        ) -> tp.Numpy:
+            return flow.math.xdivy(x, y)
+
+
+        x = np.array([4, 3, 5]).astype(np.float32)
+        y = np.array([3, 2, 2]).astype(np.float32)
+        out = xdivy_Job(x, y)
+
+        # out [1.3333334 1.5       2.5      ]
+
+    """
+    return build_math_binary_elementwise_op("xdivy", x, y, name)
+
+
+@oneflow_export("math.xlogy")
+def xlogy(
+    x: oneflow._oneflow_internal.BlobDesc,
+    y: oneflow._oneflow_internal.BlobDesc,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator computes the result of :math:`x*log(y)`
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): A Blob
+        y (oneflow._oneflow_internal.BlobDesc): A Blob
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def xlogy_Job(x: tp.Numpy.Placeholder((3,)),
+                    y: tp.Numpy.Placeholder((3,))
+        ) -> tp.Numpy:
+            return flow.math.xlogy(x, y)
+
+
+        x = np.array([2, 2, 2]).astype(np.float32)
+        y = np.array([4, 8, 16]).astype(np.float32)
+        out = xlogy_Job(x, y)
+
+        # out [2.7725887 4.158883  5.5451775]
+    """
+    return build_math_binary_elementwise_op("xlogy", x, y, name)
diff --git a/oneflow/compatible_single_client_python/ops/math_ops.py b/oneflow/compatible_single_client_python/ops/math_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..d6f350064fa9aabf68d96214ff9faccffb4917fe
--- /dev/null
+++ b/oneflow/compatible_single_client_python/ops/math_ops.py
@@ -0,0 +1,2201 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+import os
+from typing import Union, Optional, Sequence, List, Tuple
+
+from oneflow.compatible import single_client as flow
+from oneflow.core.operator import op_conf_pb2 as op_conf_util
+from oneflow.core.register import logical_blob_id_pb2 as logical_blob_id_util
+from oneflow.compatible_single_client_python.framework import (
+    interpret_util as interpret_util,
+)
+from oneflow.compatible_single_client_python.framework import id_util as id_util
+from oneflow.compatible_single_client_python.framework import (
+    remote_blob as remote_blob_util,
+)
+from oneflow.compatible_single_client_python.framework import module as module_util
+from oneflow.compatible_single_client_python.ops import (
+    math_unary_elementwise_ops as math_unary_elementwise_ops,
+)
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    stable_api,
+)
+from oneflow.compatible_single_client_python.ops.transpose_util import (
+    get_perm_when_transpose_axis_to_last_dim,
+)
+from oneflow.compatible_single_client_python.ops.transpose_util import get_inversed_perm
+import oneflow._oneflow_internal
+
+
+@oneflow_export("math.add")
+def add(
+    x: Union[int, float, oneflow._oneflow_internal.BlobDesc],
+    y: Union[int, float, oneflow._oneflow_internal.BlobDesc],
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    """Compute :math:`X + Y` element-wise, math.add supports broadcasting.
+    The equation is:
+
+    .. math::
+        out = X + Y
+
+    Args:
+        x (Union[int, float, oneflow._oneflow_internal.BlobDesc]): A Blob.
+        y (Union[int, float, oneflow._oneflow_internal.BlobDesc]): A Blob has the same type of x.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: A Blob is added by x and y, and has the same type of x.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+        @flow.global_function()
+        def addJob(x: tp.Numpy.Placeholder((3, )),
+                y: tp.Numpy.Placeholder((3, ))
+        )->tp.Numpy:
+            return flow.math.add(x, y)
+
+        x = np.array([1, 2, 3]).astype(np.float32)
+        y = np.array([1, 1, 1]).astype(np.float32)
+        out = addJob(x, y)
+
+        # out [2., 3., 4.]
+
+    """
+    if isinstance(x, (int, float)):
+        return scalar_add(y, x, name)
+    elif isinstance(y, (int, float)):
+        return scalar_add(x, y, name)
+    elif x.shape == y.shape and x.is_dynamic == y.is_dynamic:
+        return element_wise_add(x, y, name)
+    elif x.shape == (1,):
+        return scalar_add_by_tensor(y, x, name)
+    elif y.shape == (1,):
+        return scalar_add_by_tensor(x, y, name)
+    else:
+        return broadcast_add(x, y, name)
+
+
+def _recursive_build_add_n(inputs, name=None):
+    inputs = list(inputs)
+    kernel_max_inputs = 8
+    if len(inputs) == 1:
+        return inputs[0]
+    elif len(inputs) <= kernel_max_inputs:
+        return (
+            flow.user_op_builder(
+                name if name is not None else id_util.UniqueStr("AddN_")
+            )
+            .Op("add_n")
+            .Input("in", inputs)
+            .Output("out")
+            .Build()
+            .InferAndTryRun()
+            .RemoteBlobList()[0]
+        )
+    else:
+        assert len(inputs) > kernel_max_inputs
+        new_inputs = inputs[kernel_max_inputs:]
+        new_inputs.append(_recursive_build_add_n(inputs[:kernel_max_inputs]))
+        return _recursive_build_add_n(new_inputs)
+
+
+@oneflow_export("math.add_n")
+def add_n(
+    inputs: Sequence[oneflow._oneflow_internal.BlobDesc], name: Optional[str] = None
+) -> oneflow._oneflow_internal.BlobDesc:
+    """Add all the input tensors in element-wise.
+
+    Args:
+        inputs (Sequence[oneflow._oneflow_internal.BlobDesc]): A list of Blob, each Blob has the same shape and type.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The sum of the inputs, has the same shape and type as the elements of inputs.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+        @flow.global_function()
+        def add_n_Job(x: tp.Numpy.Placeholder((3, )),
+                    y: tp.Numpy.Placeholder((3, ))
+        )->tp.Numpy:
+            return flow.math.add_n([x, y])
+
+        x = np.array([1, 2, 3]).astype(np.float32)
+        y = np.array([1, 1, 1]).astype(np.float32)
+        out = add_n_Job(x, y)
+        print(out)
+
+        # out [2., 3., 4.]
+
+    """
+    return _recursive_build_add_n(inputs, name)
+
+
+@oneflow_export("math.subtract")
+def subtract(
+    x: Union[int, float, oneflow._oneflow_internal.BlobDesc],
+    y: Union[int, float, oneflow._oneflow_internal.BlobDesc],
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    """Compute :math:`X - Y` element-wise.
+
+    The equation is:
+
+    .. math::
+        out = X - Y
+
+    Args:
+        x (Union[int, float, oneflow._oneflow_internal.BlobDesc]): A Blob.
+        y (Union[int, float, oneflow._oneflow_internal.BlobDesc]): A Blob has the same type of x.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: A Blob after subtracting, has the same type as x.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+        @flow.global_function()
+        def subtractJob(x: tp.Numpy.Placeholder((3, )),
+                        y: tp.Numpy.Placeholder((3, ))
+        )->tp.Numpy:
+            return flow.math.subtract(x, y)
+
+        x = np.array([1, 2, 3]).astype(np.float32)
+        y = np.array([2, 4, 1]).astype(np.float32)
+        out = subtractJob(x, y)
+
+        # out [-1., -2., 2.]
+
+    """
+    if isinstance(x, (int, float)):
+        return scalar_add(-1 * y, x, name)
+    elif isinstance(y, (int, float)):
+        return scalar_add(x, -1 * y, name)
+    elif x.shape == y.shape:
+        # TODO: add element-wise op
+        return broadcast_sub(x, y, name)
+    elif y.shape == (1,):
+        return scalar_sub_by_tensor(x, y, name)
+    else:
+        return broadcast_sub(x, y, name)
+
+
+@oneflow_export("math.multiply")
+def multiply(
+    x: Union[int, float, oneflow._oneflow_internal.BlobDesc],
+    y: Union[int, float, oneflow._oneflow_internal.BlobDesc],
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""Compute :math:`x \times y` element-wise.
+
+    The equation is:
+
+    .. math::
+        out = X \times Y
+
+    Args:
+        x (Union[int, float, oneflow._oneflow_internal.BlobDesc]): A Blob.
+        y (Union[int, float, oneflow._oneflow_internal.BlobDesc]): A Blob has the same type of x.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: A Blob after multiplying, has the same type as x.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+        @flow.global_function()
+        def multiplyJob(x: tp.Numpy.Placeholder((3, )),
+                        y: tp.Numpy.Placeholder((3, ))
+        )->tp.Numpy:
+            return flow.math.multiply(x, y)
+
+        x = np.array([1, 2, 3]).astype(np.float32)
+        y = np.array([2, 3, 3]).astype(np.float32)
+        out = multiplyJob(x, y)
+
+        # out [2., 6., 9.]
+
+    """
+    if isinstance(x, (int, float)):
+        return scalar_mul(y, x, name)
+    elif isinstance(y, (int, float)):
+        return scalar_mul(x, y, name)
+    elif x.shape == y.shape:
+        return element_wise_mul(x, y, name)
+    elif x.shape == (1,):
+        return scalar_mul_by_tensor(y, x, name)
+    elif y.shape == (1,):
+        return scalar_mul_by_tensor(x, y, name)
+    else:
+        return broadcast_mul(x, y, name)
+
+
+@oneflow_export("math.divide")
+def divide(
+    x: Union[int, float, oneflow._oneflow_internal.BlobDesc],
+    y: Union[int, float, oneflow._oneflow_internal.BlobDesc],
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""Computes the division of x by y.
+
+    The equation is:
+
+    .. math::
+        out = \frac{X}{Y}
+
+    Args:
+        x (Union[int, float, oneflow._oneflow_internal.BlobDesc]): A Blob.
+        y (Union[int, float, oneflow._oneflow_internal.BlobDesc]): A Blob.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: A Blob with same shape as input x.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+        @flow.global_function()
+        def divideJob(x: tp.Numpy.Placeholder((3, )),
+                    y: tp.Numpy.Placeholder((3, ))
+        )->tp.Numpy:
+            return flow.math.divide(x, y)
+
+        x = np.array([25, 16, 9]).astype(np.float32)
+        y = np.array([10, 4, 2]).astype(np.float32)
+        out = divideJob(x, y)
+
+        # out [2.5, 4., 4.5]
+
+    """
+    if isinstance(x, (int, float)):
+        return scalar_mul(math_unary_elementwise_ops.reciprocal_no_nan(y), x, name)
+    elif isinstance(y, (int, float)):
+        if y == 0 or y == 0.0:
+            y = 0.0
+        else:
+            y = 1.0 / (float(y))
+        return scalar_mul(x, y, name)
+    elif x.shape == y.shape:
+        # TODO: add element-wise op
+        return broadcast_div(x, y, name)
+    elif y.shape == (1,):
+        return scalar_div_by_tensor(x, y, name)
+    else:
+        return broadcast_div(x, y, name)
+
+
+@oneflow_export("math.mod")
+def floor_mod(
+    x: Union[int, float, oneflow._oneflow_internal.BlobDesc],
+    y: Union[int, float, oneflow._oneflow_internal.BlobDesc],
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""This operator mods two Blobs.
+
+    The equation is:
+
+    .. math::
+        out = X \bmod Y
+
+    Args:
+        x (Union[int, float, oneflow._oneflow_internal.BlobDesc]): A Blob
+        y (Union[int, float, oneflow._oneflow_internal.BlobDesc]): A Blob has the same type of x
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Raises:
+        NotImplementedError: x must be an int or a float
+        NotImplementedError: y must be an int or a float
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: A Blob with same type as input x.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+        @flow.global_function()
+        def modJob(x: tp.Numpy.Placeholder((3, )),
+                y: tp.Numpy.Placeholder((3, ))
+        )->tp.Numpy:
+            return flow.math.mod(x, y)
+
+        x = np.array([16, 9, 5]).astype(np.float32)
+        y = np.array([6, 4, 3]).astype(np.float32)
+        out = modJob(x, y)
+
+        # out [4., 1., 2.]
+
+    """
+    if isinstance(x, (int, float)):
+        raise NotImplementedError
+    elif isinstance(y, (int, float)):
+        raise NotImplementedError
+    elif x.shape == y.shape:
+        # TODO: add element-wise op
+        return broadcast_floor_mod(x, y, name)
+    else:
+        return broadcast_floor_mod(x, y, name)
+
+
+def scalar_add(x, operand, name=None):
+    if name is None:
+        name = id_util.UniqueStr("ScalarAdd_")
+    builder = flow.user_op_builder(name).Op("scalar_add").Input("in", [x]).Output("out")
+    if isinstance(operand, int):
+        builder = (
+            builder.Attr("has_int_operand", True)
+            .Attr("has_float_operand", False)
+            .Attr("int_operand", operand)
+            .Attr("float_operand", 0.0)
+        )
+    elif isinstance(operand, float):
+        builder = (
+            builder.Attr("has_int_operand", False)
+            .Attr("has_float_operand", True)
+            .Attr("int_operand", 0)
+            .Attr("float_operand", operand)
+        )
+    return builder.Build().InferAndTryRun().RemoteBlobList()[0]
+
+
+def scalar_add_by_tensor(x, scalar, name=None):
+    return (
+        flow.user_op_builder(name or id_util.UniqueStr("ScalarAddByTensor_"))
+        .Op("scalar_add_by_tensor")
+        .Input("x", [x])
+        .Input("scalar", [scalar])
+        .Output("y")
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
+
+
+def element_wise_add(x, y, name=None):
+    return flow.math.add_n([x, y], name)
+
+
+def build_broadcast_binary_op(math_op, x, y, name=None):
+    if name is None:
+        name = id_util.UniqueStr(math_op + "_")
+    return (
+        flow.user_op_builder(name)
+        .Op(math_op)
+        .Input("x", [x])
+        .Input("y", [y])
+        .Output("z")
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
+
+
+def broadcast_add(x, y, name=None):
+    return build_broadcast_binary_op("broadcast_add", x, y, name)
+
+
+def broadcast_sub(x, y, name=None):
+    return build_broadcast_binary_op("broadcast_sub", x, y, name)
+
+
+def scalar_sub_by_tensor(x, scalar, name=None):
+    return (
+        flow.user_op_builder(name or id_util.UniqueStr("ScalarSubByTensor_"))
+        .Op("scalar_sub_by_tensor")
+        .Input("x", [x])
+        .Input("scalar", [scalar])
+        .Output("y")
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
+
+
+def element_wise_mul(x, y, name=None):
+    return (
+        flow.user_op_builder(name or id_util.UniqueStr("ElementWiseMul_"))
+        .Op("multiply")
+        .Input("x", [x])
+        .Input("y", [y])
+        .Output("out")
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
+
+
+def broadcast_mul(x, y, name=None):
+    return build_broadcast_binary_op("broadcast_mul", x, y, name)
+
+
+def scalar_mul(x, operand, name=None):
+    if name is None:
+        name = id_util.UniqueStr("ScalarMul_")
+    builder = flow.user_op_builder(name).Op("scalar_mul").Input("in", [x]).Output("out")
+    if isinstance(operand, int):
+        builder = (
+            builder.Attr("has_int_operand", True)
+            .Attr("has_float_operand", False)
+            .Attr("int_operand", operand)
+            .Attr("float_operand", 0.0)
+        )
+    elif isinstance(operand, float):
+        builder = (
+            builder.Attr("has_int_operand", False)
+            .Attr("has_float_operand", True)
+            .Attr("int_operand", 0)
+            .Attr("float_operand", operand)
+        )
+    return builder.Build().InferAndTryRun().RemoteBlobList()[0]
+
+
+def scalar_mul_by_tensor(x, scalar, name=None):
+    return (
+        flow.user_op_builder(name or id_util.UniqueStr("ScalarMulByTensor_"))
+        .Op("scalar_mul_by_tensor")
+        .Input("x", [x])
+        .Input("scalar", [scalar])
+        .Output("y")
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
+
+
+def broadcast_div(x, y, name=None):
+    return build_broadcast_binary_op("broadcast_div", x, y, name)
+
+
+def scalar_div_by_tensor(x, scalar, name=None):
+    return (
+        flow.user_op_builder(name or id_util.UniqueStr("ScalarDivByTensor_"))
+        .Op("scalar_div_by_tensor")
+        .Input("x", [x])
+        .Input("scalar", [scalar])
+        .Output("y")
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
+
+
+def broadcast_floor_mod(x, y, name=None):
+    return build_broadcast_binary_op("broadcast_floor_mod", x, y, name)
+
+
+@oneflow_export("math.gelu")
+def gelu(
+    x: oneflow._oneflow_internal.BlobDesc, name: Optional[str] = None
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""Gelu activation operator.
+
+    The equation is:
+
+    .. math::
+        out = 0.5 * x * (1 + tanh(\sqrt{\frac{2}{\pi}} * (x + 0.044715x^{3})))
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): Input Blob
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: A Blob.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+        @flow.global_function()
+        def geluJob(x: tp.Numpy.Placeholder((3, ))
+        )->tp.Numpy:
+            return flow.math.gelu(x)
+
+        x = np.array([-0.5, 0, 0.5]).astype(np.float32)
+        out = geluJob(x)
+
+        # out [-0.15426877, 0., 0.34573123]
+
+    """
+    return (
+        flow.user_op_builder(name if name is not None else id_util.UniqueStr("Gelu_"))
+        .Op("gelu")
+        .Input("in", [x])
+        .Output("out")
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
+
+
+@oneflow_export("math.relu", "nn.relu")
+def relu(
+    x: oneflow._oneflow_internal.BlobDesc, name: Optional[str] = None
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""Relu activation
+
+    The equation is:
+
+    .. math::
+        out = max(X, 0)
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): Input Blob
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: An activated Blob.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+        @flow.global_function()
+        def reluJob(x: tp.Numpy.Placeholder((3, ))
+        )->tp.Numpy:
+            return flow.math.relu(x)
+
+        x = np.array([-1, 0, 5]).astype(np.float32)
+        out = reluJob(x)
+
+        # out [0., 0., 5.]
+
+    """
+
+    return (
+        flow.user_op_builder(name if name is not None else id_util.UniqueStr("Relu_"))
+        .Op("relu")
+        .Input("in", [x])
+        .Output("out")
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
+
+
+@oneflow_export("math.sigmoid")
+def sigmoid(
+    x: oneflow._oneflow_internal.BlobDesc, name: Optional[str] = None
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""Sigmoid activation
+
+    The equation is:
+
+    .. math::
+        out = \frac{1}{1 + e^{-x}}
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): Input Blob
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: An activated Blob.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+        @flow.global_function()
+        def sigmoidJob(x: tp.Numpy.Placeholder((3, ))
+        )->tp.Numpy:
+            return flow.math.sigmoid(x)
+
+        x = np.array([-1, 0, 1]).astype(np.float32)
+        out = sigmoidJob(x)
+
+        # out [0.26894143, 0.5, 0.7310586]
+
+    """
+    return (
+        flow.user_op_builder(
+            name if name is not None else id_util.UniqueStr("Sigmoid_")
+        )
+        .Op("sigmoid")
+        .Input("in", [x])
+        .Output("out")
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
+
+
+@oneflow_export("math.sigmoid_grad")
+def sigmoid_grad(
+    y: oneflow._oneflow_internal.BlobDesc,
+    dy: oneflow._oneflow_internal.BlobDesc,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    return (
+        flow.user_op_builder(
+            name if name is not None else id_util.UniqueStr("SigmoidGrad_")
+        )
+        .Op("sigmoid_grad")
+        .Input("y", [y])
+        .Input("dy", [dy])
+        .Output("dx")
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
+
+
+@oneflow_export("math.unsorted_segment_sum", "unsorted_segment_sum")
+def unsorted_segment_sum(
+    data: oneflow._oneflow_internal.BlobDesc,
+    segment_ids: oneflow._oneflow_internal.BlobDesc,
+    num_segments: int,
+    axis: int = 0,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""Computes the sum along segments of a Blob.
+
+    Args:
+        data (oneflow._oneflow_internal.BlobDesc): Input Blob
+        segment_ids (oneflow._oneflow_internal.BlobDesc): A Blob should be the size of the first dimension, with consecutive IDs in the range 0 to k (k < d0).
+        num_segments (int): num_segments should equal the number of distinct segment IDs.
+        axis (int, optional): The axis of data. Defaults to 0.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: A Blob with the same type of data.
+
+    For example:
+
+    .. code-block:: python
+
+        # Example 1:
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+        @flow.global_function()
+        def unsorted_segment_sumJob(data: tp.Numpy.Placeholder((3, 4)),
+                                    segment_ids: tp.Numpy.Placeholder((4, ), dtype=flow.int32)
+        )->tp.Numpy:
+            return flow.math.unsorted_segment_sum(data, segment_ids, num_segments=2, axis=1)
+
+        input_blob = np.array([[1, 2, 3, 4],
+                               [5, 6, 7 ,8],
+                               [9, 10, 11, 12]]).astype(np.float32)
+        segment_ids = np.array([0, 1, 0, 1]).astype(np.int32)
+        out = unsorted_segment_sumJob(input_blob, segment_ids)
+
+        # out [[ 4.  6.]
+        #      [12. 14.]
+        #      [20. 22.]]
+
+        # Example 2
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+        @flow.global_function()
+        def unsorted_segment_sumJob(data: tp.Numpy.Placeholder((3, 4)),
+                                    segment_ids: tp.Numpy.Placeholder((3, ), dtype=flow.int32)
+        )->tp.Numpy:
+            return flow.math.unsorted_segment_sum(data, segment_ids, num_segments=2, axis=0)
+
+        input_blob = np.array([[1, 2, 3, 4],
+                               [5, 6, 7 ,8],
+                               [9, 10, 11, 12]]).astype(np.float32)
+        segment_ids = np.array([0, 1, 0]).astype(np.int32)
+        out = unsorted_segment_sumJob(input_blob, segment_ids)
+
+        #  out [[10. 12. 14. 16.]
+        #       [ 5.  6.  7.  8.]]
+
+    """
+    return (
+        flow.user_op_builder(
+            name if name is not None else id_util.UniqueStr("UnsortedSegmentSum_")
+        )
+        .Op("unsorted_segment_sum")
+        .Input("data", [data])
+        .Input("segment_ids", [segment_ids])
+        .Output("out")
+        .Attr("axis", int(axis))
+        .Attr("num_segments", int(num_segments))
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
+
+
+@oneflow_export("math.unsorted_segment_sum_like", "unsorted_segment_sum_like")
+def unsorted_segment_sum_like(
+    data: oneflow._oneflow_internal.BlobDesc,
+    segment_ids: oneflow._oneflow_internal.BlobDesc,
+    like: oneflow._oneflow_internal.BlobDesc,
+    axis: int = 0,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""Computes the sum along segments of a Blob, the output shape is the same as the `like` Blob.
+
+    Args:
+        data (oneflow._oneflow_internal.BlobDesc): Input Blob
+        segment_ids (oneflow._oneflow_internal.BlobDesc): A Blob should be the size of the first dimension, with consecutive IDs in the range 0 to k (k < d0).
+        like (oneflow._oneflow_internal.BlobDesc): The input Blob which specifies shape
+        axis (int, optional): The axis of data. Defaults to 0.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: A Blob.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+        @flow.global_function()
+        def unsorted_segment_sum_like_Job(data: tp.Numpy.Placeholder((3, 4)),
+                                        segment_ids: tp.Numpy.Placeholder((3, ), dtype=flow.int32),
+                                        like: tp.Numpy.Placeholder((2, 4), dtype=flow.float32)
+        )->tp.Numpy:
+            return flow.math.unsorted_segment_sum_like(data, segment_ids, like, axis=0)
+
+        input_blob = np.array([[1, 2, 3, 4],
+                            [5, 6, 7 ,8],
+                            [9, 10, 11, 12]]).astype(np.float32)
+        segment_ids = np.array([0, 1, 0]).astype(np.int32)
+        like = np.zeros(shape=(2, 4), dtype=np.float32)
+
+        out = unsorted_segment_sum_like_Job(input_blob, segment_ids, like)
+
+        # out [[10. 12. 14. 16.]
+        #      [ 5.  6.  7.  8.]]
+
+    """
+    return (
+        flow.user_op_builder(
+            name if name is not None else id_util.UniqueStr("UnsortedSegmentSumLike_")
+        )
+        .Op("unsorted_segment_sum_like")
+        .Input("data", [data])
+        .Input("segment_ids", [segment_ids])
+        .Input("like", [like])
+        .Output("out")
+        .Attr("axis", int(axis))
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
+
+
+@oneflow_export("math.unsorted_batch_segment_sum", "unsorted_batch_segment_sum")
+def unsorted_batch_segment_sum(
+    data: oneflow._oneflow_internal.BlobDesc,
+    segment_ids: oneflow._oneflow_internal.BlobDesc,
+    num_segments: int,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""It is similar with `unsorted_segment_sum`, the difference is that `unsorted_batch_segment_sum` brings a `batch axis`. We can do the segment sum in different batch of data.
+
+    For example, the segment id is like:
+
+    .. code-block:: python
+
+        [[0 0 0 1 2 2 3 3],
+         [0 0 1 1 2 3 3 3]]
+
+    Args:
+        data (oneflow._oneflow_internal.BlobDesc): Input Blob
+        segment_ids (oneflow._oneflow_internal.BlobDesc): A Blob with shape (d0, d1). The d0, d1 are the first and second dimension of data.
+        num_segments (int): num_segments should equal the number of distinct segment IDs.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: A Blob.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+        @flow.global_function()
+        def unsorted_batch_segment_sum_Job(data: tp.Numpy.Placeholder((3, 4)),
+                                        segment_ids: tp.Numpy.Placeholder((3, 4), dtype=flow.int32)
+        )->tp.Numpy:
+            return flow.math.unsorted_batch_segment_sum(data, segment_ids, 2)
+
+        input_blob = np.array([[1, 2, 3, 4],
+                            [1, 2, 3 ,4],
+                            [1, 2, 3, 4]]).astype(np.float32)
+        segment_ids = np.array([[0, 0, 0, 1],
+                                [0, 0, 1, 0],
+                                [0, 1, 0, 0]]).astype(np.int32)
+        out = unsorted_batch_segment_sum_Job(input_blob, segment_ids)
+
+        # out [[6. 4.]
+        #      [7. 3.]
+        #      [8. 2.]]
+
+    """
+    return (
+        flow.user_op_builder(
+            name if name is not None else id_util.UniqueStr("UnsortedBatchSegmentSum_")
+        )
+        .Op("unsorted_batch_segment_sum")
+        .Input("data", [data])
+        .Input("segment_ids", [segment_ids])
+        .Output("out")
+        .Attr("num_segments", int(num_segments))
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
+
+
+@oneflow_export("cast")
+@stable_api
+def cast(
+    x: oneflow._oneflow_internal.BlobDesc,
+    dtype: flow.dtype,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""The op takes input x and casts it to the output with `dtype`
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): Input Blob
+        dtype (flow.dtype): Data type of the output
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: A Blob
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+        @flow.global_function()
+        def cast_Job(x: tp.Numpy.Placeholder((3, ), dtype=flow.float32)
+        )->tp.Numpy:
+            return flow.cast(x, dtype=flow.int32)
+
+        x = np.array([1, 2, 3]).astype(np.float32)
+        out = cast_Job(x)
+
+        # out.dtype = "int32"
+
+    """
+    if x.dtype == dtype:
+        return x
+    if name is None:
+        name = id_util.UniqueStr("Cast_")
+
+    return (
+        flow.user_op_builder(name)
+        .Op("cast")
+        .Input("in", [x])
+        .Output("out")
+        .Attr("dtype", dtype)
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
+
+
+@oneflow_export("math.equal")
+def equal(
+    x: oneflow._oneflow_internal.BlobDesc,
+    y: oneflow._oneflow_internal.BlobDesc,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""Returns the truth value of :math:`{x}=={y}` element-wise.
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): A Blob
+        y (oneflow._oneflow_internal.BlobDesc): A Blob
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: A Blob with int8 type.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+        @flow.global_function()
+        def equal_Job(x: tp.Numpy.Placeholder((3, )),
+                    y: tp.Numpy.Placeholder((3, ))
+        )->tp.Numpy:
+            return flow.math.equal(x, y)
+
+        x = np.array([1, 2, 3]).astype(np.float32)
+        y = np.array([1, 2, 1]).astype(np.float32)
+        out = equal_Job(x, y)
+
+        # out [1 1 0]
+
+    """
+    return build_broadcast_binary_op("broadcast_equal", x, y, name)
+
+
+@oneflow_export("math.not_equal")
+def not_equal(
+    x: oneflow._oneflow_internal.BlobDesc,
+    y: oneflow._oneflow_internal.BlobDesc,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""Returns the truth value of :math:`{x}!={y}` element-wise.
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): A Blob
+        y (oneflow._oneflow_internal.BlobDesc): A Blob
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: A Blob with int8 type.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+        @flow.global_function()
+        def not_equal_Job(x: tp.Numpy.Placeholder((3, )),
+                        y: tp.Numpy.Placeholder((3, ))
+        )->tp.Numpy:
+            return flow.math.not_equal(x, y)
+
+        x = np.array([1, 2, 3]).astype(np.float32)
+        y = np.array([1, 2, 1]).astype(np.float32)
+        out = not_equal_Job(x, y)
+
+        # out [0 0 1]
+
+    """
+    return build_broadcast_binary_op("broadcast_not_equal", x, y, name)
+
+
+@oneflow_export("math.less")
+def less(
+    x: oneflow._oneflow_internal.BlobDesc,
+    y: oneflow._oneflow_internal.BlobDesc,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""Returns the truth value of :math:`x < y` element-wise.
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): A Blob
+        y (oneflow._oneflow_internal.BlobDesc): A Blob
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: A Blob with int8 type.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+        @flow.global_function()
+        def less_Job(x: tp.Numpy.Placeholder((3, )),
+                    y: tp.Numpy.Placeholder((3, ))
+        )->tp.Numpy:
+            return flow.math.less(x, y)
+
+        x = np.array([1, 2, 3]).astype(np.float32)
+        y = np.array([1, 2, 4]).astype(np.float32)
+        out = less_Job(x, y)
+
+        # out [0 0 1]
+
+    """
+    return build_broadcast_binary_op("broadcast_less", x, y, name)
+
+
+@oneflow_export("math.less_equal")
+def less_equal(
+    x: oneflow._oneflow_internal.BlobDesc,
+    y: oneflow._oneflow_internal.BlobDesc,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""Returns the truth value of :math:`x <= y` element-wise.
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): A Blob
+        y (oneflow._oneflow_internal.BlobDesc): A Blob
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: A Blob with int8 type.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+        @flow.global_function()
+        def less_equal_Job(x: tp.Numpy.Placeholder((3, )),
+                        y: tp.Numpy.Placeholder((3, ))
+        )->tp.Numpy:
+            return flow.math.less_equal(x, y)
+
+        x = np.array([1, 2, 3]).astype(np.float32)
+        y = np.array([1, 1, 4]).astype(np.float32)
+        out = less_equal_Job(x, y)
+
+        # out [1 0 1]
+
+    """
+    return build_broadcast_binary_op("broadcast_less_equal", x, y, name)
+
+
+@oneflow_export("math.greater")
+def greater(
+    x: oneflow._oneflow_internal.BlobDesc,
+    y: oneflow._oneflow_internal.BlobDesc,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""Returns the truth value of :math:`x > y` element-wise.
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): A Blob
+        y (oneflow._oneflow_internal.BlobDesc): A Blob
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: A Blob with int8 type.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+        @flow.global_function()
+        def greater_Job(x: tp.Numpy.Placeholder((3, )),
+                        y: tp.Numpy.Placeholder((3, ))
+        )->tp.Numpy:
+            return flow.math.greater(x, y)
+
+        x = np.array([1, 1, 4]).astype(np.float32)
+        y = np.array([1, 2, 3]).astype(np.float32)
+        out = greater_Job(x, y)
+
+        # out [0 0 1]
+
+    """
+    return build_broadcast_binary_op("broadcast_greater", x, y, name)
+
+
+@oneflow_export("math.greater_equal")
+def greater_equal(
+    x: oneflow._oneflow_internal.BlobDesc,
+    y: oneflow._oneflow_internal.BlobDesc,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""Returns the truth value of :math:`x >= y` element-wise.
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): A Blob
+        y (oneflow._oneflow_internal.BlobDesc): A Blob
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: A Blob with int8 type.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+        @flow.global_function()
+        def greater_equal_Job(x: tp.Numpy.Placeholder((3, )),
+                            y: tp.Numpy.Placeholder((3, ))
+        )->tp.Numpy:
+            return flow.math.greater_equal(x, y)
+
+        x = np.array([1, 1, 4]).astype(np.float32)
+        y = np.array([1, 2, 3]).astype(np.float32)
+        out = greater_equal_Job(x, y)
+
+        # out [1 0 1]
+
+    """
+    return build_broadcast_binary_op("broadcast_greater_equal", x, y, name)
+
+
+@oneflow_export("math.logical_and")
+def logical_and(
+    x: oneflow._oneflow_internal.BlobDesc,
+    y: oneflow._oneflow_internal.BlobDesc,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""Logical AND function.
+
+    Each element is calculated by:
+
+    .. math::
+        out = X \land Y
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): A Blob
+        y (oneflow._oneflow_internal.BlobDesc): A Blob
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: A Blob with int8 type.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+        @flow.global_function()
+        def logical_and_Job(x: tp.Numpy.Placeholder((3, )),
+                            y: tp.Numpy.Placeholder((3, ))
+        )->tp.Numpy:
+            return flow.math.logical_and(x, y)
+
+        x = np.array([1, 0, 1]).astype(np.float32)
+        y = np.array([0, 0, 1]).astype(np.float32)
+        out = logical_and_Job(x, y)
+
+        # out [0 0 1]
+
+    """
+    return build_broadcast_binary_op("broadcast_logical_and", x, y, name)
+
+
+@oneflow_export("math.minimum")
+def minimum(
+    x: oneflow._oneflow_internal.BlobDesc,
+    y: oneflow._oneflow_internal.BlobDesc,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""Returns the min of x and y element-wise, this op supports broadcasting.
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): A Blob
+        y (oneflow._oneflow_internal.BlobDesc): A Blob. Must have the same type of x
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: A Blob, has the same type of x.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+        @flow.global_function()
+        def minimum_Job(x: tp.Numpy.Placeholder((3, )),
+                        y: tp.Numpy.Placeholder((3, ))
+        )->tp.Numpy:
+            return flow.math.minimum(x, y)
+
+        x = np.array([2, 3, 4]).astype(np.float32)
+        y = np.array([4, 2, 1]).astype(np.float32)
+        out = minimum_Job(x, y)
+
+        # out [2. 2. 1.]
+
+    """
+    if x.shape == y.shape:
+        return (
+            flow.user_op_builder(name or id_util.UniqueStr("ElementWiseMinimum_"))
+            .Op("elementwise_minimum")
+            .Input("x", [x])
+            .Input("y", [y])
+            .Output("z")
+            .Build()
+            .InferAndTryRun()
+            .RemoteBlobList()[0]
+        )
+    else:
+        return build_broadcast_binary_op("broadcast_minimum", x, y, name)
+
+
+@oneflow_export("math.maximum")
+def maximum(
+    x: oneflow._oneflow_internal.BlobDesc,
+    y: oneflow._oneflow_internal.BlobDesc,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    """Returns the max of x and y element-wise, this op supports broadcasting.
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): A Blob
+        y (oneflow._oneflow_internal.BlobDesc): A Blob. Must have the same type of x
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: A Blob, has the same type of x.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+        @flow.global_function()
+        def maximum_Job(x: tp.Numpy.Placeholder((3, )),
+                        y: tp.Numpy.Placeholder((3, ))
+        )->tp.Numpy:
+            return flow.math.maximum(x, y)
+
+        x = np.array([2, 3, 4]).astype(np.float32)
+        y = np.array([4, 2, 1]).astype(np.float32)
+        out = maximum_Job(x, y)
+
+        # out [4. 3. 4.]
+
+    """
+    if x.shape == y.shape:
+        return (
+            flow.user_op_builder(name or id_util.UniqueStr("ElementWiseMaximum_"))
+            .Op("elementwise_maximum")
+            .Input("x", [x])
+            .Input("y", [y])
+            .Output("z")
+            .Build()
+            .InferAndTryRun()
+            .RemoteBlobList()[0]
+        )
+    else:
+        return build_broadcast_binary_op("broadcast_maximum", x, y, name)
+
+
+@oneflow_export("math.reduced_shape_elem_cnt")
+def elem_cnt(
+    input_blob: oneflow._oneflow_internal.BlobDesc,
+    axis: Optional[Sequence[int]] = None,
+    dtype: Optional[flow.dtype] = None,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    """Computes the product of input_blob's dimensions along the parameter `axis`. By default, all the dimensions will be computed.
+
+    Args:
+        input_blob (oneflow._oneflow_internal.BlobDesc): Input Blob
+        axis (Optional[Sequence[int]], optional): The dimensions along which the op is performed. Defaults to None.
+        dtype (Optional[flow.dtype], optional): The data type. Defaults to None.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: A Blob
+
+    For example:
+
+    .. code-block:: python
+
+        # Example 1:
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+        @flow.global_function()
+        def elem_cnt_Job(x: tp.Numpy.Placeholder((3, 4, 5))
+        )->tp.Numpy:
+            return flow.math.reduced_shape_elem_cnt(x, axis=[0, 1])
+
+        x = np.ones(shape=(3, 4, 5), dtype=np.float32)
+        out = elem_cnt_Job(x) # 3 x 4 = 12
+
+        # out [12]
+
+        # Example 2:
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+        @flow.global_function()
+        def elem_cnt_Job(x: tp.Numpy.Placeholder((3, 4, 5))
+        )->tp.Numpy:
+            return flow.math.reduced_shape_elem_cnt(x)
+
+        x = np.ones(shape=(3, 4, 5), dtype=np.float32)
+        out = elem_cnt_Job(x) # 3 x 4 x 5 = 60
+
+        # out [60]
+
+    """
+    op_conf = op_conf_util.OperatorConf()
+    setattr(
+        op_conf,
+        "name",
+        name if name is not None else id_util.UniqueStr("ShapeElemCnt_"),
+    )
+    op_conf.shape_elem_cnt_conf.x = input_blob.unique_name
+    if axis is None:
+        op_conf.shape_elem_cnt_conf.exclude_axis_conf.SetInParent()
+    else:
+        assert isinstance(axis, (tuple, list))
+        op_conf.shape_elem_cnt_conf.include_axis_conf.axis.extend(axis)
+    if dtype is not None:
+        op_conf.shape_elem_cnt_conf.data_type = oneflow._oneflow_internal.deprecated.GetProtoDtype4OfDtype(
+            dtype
+        )
+    op_conf.shape_elem_cnt_conf.y = "y"
+    interpret_util.Forward(op_conf)
+    out_lbi = logical_blob_id_util.LogicalBlobId()
+    out_lbi.op_name = op_conf.name
+    out_lbi.blob_name = "y"
+    return remote_blob_util.RemoteBlob(out_lbi)
+
+
+def _top_k_at_last_dim(
+    input: oneflow._oneflow_internal.BlobDesc,
+    k: int = 1,
+    sorted: bool = True,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    return (
+        flow.user_op_builder(name if name is not None else id_util.UniqueStr("TopK_"))
+        .Op("top_k")
+        .Input("in", [input])
+        .Output("out")
+        .Attr("k", k)
+        .Attr("sorted", sorted)
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
+
+
+@oneflow_export("math.top_k")
+def top_k(
+    input: oneflow._oneflow_internal.BlobDesc,
+    axis: int = -1,
+    k: int = 1,
+    sorted: bool = True,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    """Finds the indices of the k largest entries at specified axis, the difference between other framework is that oneflow only return the indices.
+
+    Args:
+        input (oneflow._oneflow_internal.BlobDesc): The input Blob
+        axis (int, optional): dimension to be calculated. Defaults to the last dim (-1)
+        k (int, optional): Number of top elements to look for along the last dimension. Defaults to 1.
+        sorted (bool, optional): If true the resulting k elements will be sorted by the values in descending order. Defaults to True.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: A Blob(dtype=int32) contains the indices of the k largest elements.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+        @flow.global_function()
+        def topk_Job(x: tp.Numpy.Placeholder((5, ))
+        )->tp.Numpy:
+            return flow.math.top_k(x, 2)
+
+        x = np.array([1, 3, 8, 7, 2], dtype=np.float32)
+        out = topk_Job(x)
+
+        # out [2 3]
+
+    """
+    name = name if name is not None else id_util.UniqueStr("TopK_")
+    num_axes = len(input.shape)
+    axis = axis if axis >= 0 else axis + num_axes
+    assert 0 <= axis < num_axes, "axis out of range"
+    if axis == num_axes - 1:
+        return _top_k_at_last_dim(input, k, sorted, name)
+    else:
+        perm = get_perm_when_transpose_axis_to_last_dim(num_axes, axis)
+        x = flow.transpose(input, perm, False, True, name + "_transpose")
+        x = _top_k_at_last_dim(x, k, sorted, name)
+        return flow.transpose(
+            x, get_inversed_perm(perm), False, True, name + "_inverse_transpose"
+        )
+
+
+def _argmax_at_last_dim(
+    input: oneflow._oneflow_internal.BlobDesc, name: Optional[str] = None
+) -> oneflow._oneflow_internal.BlobDesc:
+    return (
+        flow.user_op_builder(name if name is not None else id_util.UniqueStr("ArgMax_"))
+        .Op("argmax")
+        .Input("in", [input])
+        .Output("out")
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
+
+
+@oneflow_export("math.argmax")
+def argmax(
+    input: oneflow._oneflow_internal.BlobDesc,
+    axis: int = -1,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    """The op computes the index with the largest value of a Blob at specified axis.
+
+    Args:
+        input (oneflow._oneflow_internal.BlobDesc): Input Blob
+        axis (int, optional): dimension to be calculated. Defaults to the last dim (-1)
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: A Blob(dtype=int32) contains the index with the largest value of `input`
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+        @flow.global_function()
+        def argmax_Job(x: tp.Numpy.Placeholder((2, 5))
+        )->tp.Numpy:
+            return flow.math.argmax(x)
+
+        x = np.array([[1, 3, 8, 7, 2],
+                    [1, 9, 4, 3, 2]], dtype=np.float32)
+
+        out = argmax_Job(x)
+
+        # out [2 1]
+
+    """
+    name = name if name is not None else id_util.UniqueStr("ArgMax_")
+    num_axes = len(input.shape)
+    axis = axis if axis >= 0 else axis + num_axes
+    assert 0 <= axis < num_axes, "axis out of range"
+    if axis == num_axes - 1:
+        return _argmax_at_last_dim(input, name)
+    else:
+        perm = get_perm_when_transpose_axis_to_last_dim(num_axes, axis)
+        x = flow.transpose(input, perm, False, True, name + "_transpose")
+        x = _argmax_at_last_dim(x, name)
+        x = flow.expand_dims(x, -1, name + "_expand_dims")
+        x = flow.transpose(
+            x, get_inversed_perm(perm), False, True, name + "_inverse_transpose"
+        )
+        x = flow.squeeze(x, [axis], name + "_squeeze")
+        return x
+
+
+@oneflow_export("math.broadcast_to_compatible_with", "broadcast_to_compatible_with")
+def broadcast_to_compatible_with(
+    x: oneflow._oneflow_internal.BlobDesc,
+    compatible: Sequence[oneflow._oneflow_internal.BlobDesc],
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""Returns a 'Blob' with the shape can be broadcasted by other shapes
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): a 'Blob'
+        compatible (Sequence[oneflow._oneflow_internal.BlobDesc]): Sequence of different shape
+        name (Optional[str], optional): This operator's name. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: A 'Blob' with the biggest shape
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+        @flow.global_function()
+        def broadcast_to_compatible_with_Job(x: tp.Numpy.Placeholder((4, 1, 1))
+        )->tp.Numpy:
+            blob_a = flow.constant(value=1, dtype=flow.float32, shape=(1, 2, 1))
+            blob_b = flow.constant(value=1, dtype=flow.float32, shape=(1, 1, 3))
+
+            return flow.math.broadcast_to_compatible_with(x, [blob_a, blob_b])
+
+        x = np.ones(shape=(4, 1, 1), dtype=np.float32)
+
+        out = broadcast_to_compatible_with_Job(x)
+
+        # out.shape (4, 2, 3)
+
+    """
+    assert isinstance(compatible, (list, tuple))
+    if name is None:
+        name = id_util.UniqueStr("BroadcastToCompatibleWith_")
+
+    op_conf = op_conf_util.OperatorConf()
+    setattr(op_conf, "name", name)
+    setattr(op_conf.broadcast_to_compatible_with_conf, "x", x.unique_name)
+    setattr(op_conf.broadcast_to_compatible_with_conf, "y", "y")
+    op_conf.broadcast_to_compatible_with_conf.compatible.extend(
+        [cp.unique_name for cp in compatible]
+    )
+    interpret_util.Forward(op_conf)
+
+    ret_lbi = logical_blob_id_util.LogicalBlobId()
+    ret_lbi.op_name = op_conf.name
+    ret_lbi.blob_name = "y"
+    return remote_blob_util.RemoteBlob(ret_lbi)
+
+
+@oneflow_export(
+    "math.clip_by_value", "clip_by_value", "clip_by_scalar", "clip", "clamp"
+)
+@stable_api
+def clip_by_value(
+    values: oneflow._oneflow_internal.BlobDesc,
+    min_value: Optional[Union[int, float]] = None,
+    max_value: Optional[Union[int, float]] = None,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This op clips Blob values to a specified min value and max value.
+
+    The equation is:
+
+    .. math::
+        out = MIN(MAX(x, min), max)
+
+    Args:
+        values (oneflow._oneflow_internal.BlobDesc): Input Blob
+        min_value (Optional[Union[int, float]], optional): The minimum value to clip by. Defaults to None.
+        max_value (Optional[Union[int, float]], optional): The maximum value to clip by. Defaults to None.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Raises:
+        ValueError: min_value and max_value `cannot be None at the same time`
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: A clipped Blob
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+        @flow.global_function()
+        def clip_by_value_Job(x: tp.Numpy.Placeholder((4, ))
+        )->tp.Numpy:
+            return flow.math.clip_by_value(x, min_value=-1, max_value=5)
+
+        x = np.array([-2, 1, 4, 7], dtype=np.float32)
+
+        out = clip_by_value_Job(x)
+
+        # out [-1. 1. 4. 5.]
+
+    """
+    if name is None:
+        name = id_util.UniqueStr("ClipByValue_")
+
+    is_floating = values.dtype in [
+        flow.float32,
+        flow.float16,
+        flow.float64,
+    ]
+
+    if min_value is not None:
+        floating_min_value = float(min_value) if is_floating else 0.0
+        integral_min_value = 0 if is_floating else int(min_value)
+    if max_value is not None:
+        floating_max_value = float(max_value) if is_floating else 0.0
+        integral_max_value = 0 if is_floating else int(max_value)
+
+    if min_value is not None and max_value is not None:
+        op_builder = (
+            flow.user_op_builder(name)
+            .Op("clip_by_scalar")
+            .Attr("floating_min", floating_min_value)
+            .Attr("integral_min", integral_min_value)
+            .Attr("floating_max", floating_max_value)
+            .Attr("integral_max", integral_max_value)
+        )
+    elif min_value is not None:
+        op_builder = (
+            flow.user_op_builder(name)
+            .Op("clip_by_scalar_min")
+            .Attr("floating_min", floating_min_value)
+            .Attr("integral_min", integral_min_value)
+        )
+    elif max_value is not None:
+        op_builder = (
+            flow.user_op_builder(name)
+            .Op("clip_by_scalar_max")
+            .Attr("floating_max", floating_max_value)
+            .Attr("integral_max", integral_max_value)
+        )
+    else:
+        raise ValueError("min_value and max_value cannot be None at the same time")
+
+    return (
+        op_builder.Input("x", [values])
+        .Output("y")
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
+
+
+@oneflow_export("math.l2_normalize")
+def l2_normalize(
+    input: oneflow._oneflow_internal.BlobDesc,
+    axis: Optional[int] = None,
+    epsilon: float = 1e-12,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""Use L2 norm to normalizes along dimension `axis`
+
+    The equation is:
+
+    .. math::
+        out = \frac{x}{\sqrt{\Sigma{x^2}+\epsilon}}
+
+    Args:
+        input (oneflow._oneflow_internal.BlobDesc): Input Blob
+        axis (Optional[int], optional): The axis on which to apply L2 normalization. Defaults to None.
+        epsilon (float, optional): The epsilon value is used to avoid division by zero. Defaults to 1e-12.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The normalized Blob
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+        @flow.global_function()
+        def l2_normalize_Job(x: tp.Numpy.Placeholder((4, ))
+        )->tp.Numpy:
+            return flow.math.l2_normalize(x, axis=0)
+
+        x = np.array([1, 2, 3, 4], dtype=np.float32)
+
+        out = l2_normalize_Job(x)
+
+        # out [0.18257418 0.36514837 0.5477226  0.73029673]
+
+    """
+    if axis < 0:
+        axis += len(input.shape)
+    assert axis >= 0 and axis < len(input.shape)
+    y, square_x_sum = (
+        flow.user_op_builder(
+            name if name is not None else id_util.UniqueStr("L2Normalize_")
+        )
+        .Op("l2_normalize")
+        .Input("x", [input])
+        .Output("y")
+        .Output("square_x_sum")
+        .Attr("axis", int(axis))
+        .Attr("epsilon", float(epsilon))
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()
+    )
+    return y
+
+
+@oneflow_export("math.squared_difference")
+def squared_difference(
+    x: Union[int, float, oneflow._oneflow_internal.BlobDesc],
+    y: Union[int, float, oneflow._oneflow_internal.BlobDesc],
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This op computes :math:`(x - y)^2` element-wise.
+
+    Args:
+        x (Union[int, float, oneflow._oneflow_internal.BlobDesc]): A Blob
+        y (Union[int, float, oneflow._oneflow_internal.BlobDesc]): A Blob with the same type of x
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: A Blob
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+        @flow.global_function()
+        def squared_difference_Job(x: tp.Numpy.Placeholder((4, )),
+                                y: tp.Numpy.Placeholder((4, ))
+        )->tp.Numpy:
+            return flow.math.squared_difference(x, y)
+
+        x = np.array([1, 2, 3, 4], dtype=np.float32)
+        y = np.array([2, 4, 6, 8], dtype=np.float32)
+
+        out = squared_difference_Job(x, y)
+
+        # out [ 1.  4.  9. 16.]
+
+    """
+    name_subtract, name_square = None, None
+    if name is not None:
+        name_subtract = name + "_subtract"
+        name_square = name + "_square"
+    return flow.math.square(flow.math.subtract(x, y, name_subtract), name_square)
+
+
+@oneflow_export("math.gelu_grad")
+def gelu_grad(
+    x: oneflow._oneflow_internal.BlobDesc,
+    dy: oneflow._oneflow_internal.BlobDesc,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    return (
+        flow.user_op_builder(
+            name if name is not None else id_util.UniqueStr("GeluGrad_")
+        )
+        .Op("gelu_grad")
+        .Input("x", [x])
+        .Input("dy", [dy])
+        .Output("dx")
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
+
+
+@oneflow_export("math.tril", "nn.tril")
+def tril(
+    x: oneflow._oneflow_internal.BlobDesc,
+    diagonal: int = 0,
+    fill_value: Union[int, float] = 0,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""Compute lower triangle of an matrix.
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): Input Blob.
+        diagonal (int): Diagonal offset, when diagonal > 0, diagonal offset up,
+                        otherwise, offset downward.
+        fill_value(Union[int, float]): The value filled into the upper triangle.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Attention:
+        The dimension of x must greater or equal to 2.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The lower triangle blob of input.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+        @flow.global_function()
+        def tril_Job(x: tp.Numpy.Placeholder((4, 4))
+        )->tp.Numpy:
+            return flow.math.tril(x, 0)
+        x = np.array([[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]],
+                      dtype=np.float32)
+        out = tril_Job(x).get()
+
+        # output [[1, 0, 0, 0],
+                  [1, 2, 0, 0],
+                  [1, 2, 3, 0],
+                  [1, 2, 3, 4]]
+
+    """
+    if isinstance(fill_value, float):
+        is_floating_fill_value = True
+        floating_fill_value = float(fill_value)
+        integer_fill_value = int(0)
+    else:
+        is_floating_fill_value = False
+        floating_fill_value = float(0)
+        integer_fill_value = int(fill_value)
+    return (
+        flow.user_op_builder(name if name is not None else id_util.UniqueStr("Tril_"))
+        .Op("tril")
+        .Input("in", [x])
+        .Attr("diagonal", diagonal)
+        .Attr("is_floating_fill_value", is_floating_fill_value)
+        .Attr("floating_fill_value", floating_fill_value)
+        .Attr("integer_fill_value", integer_fill_value)
+        .Output("out")
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
+
+
+@oneflow_export("math.fused_scale_tril", "nn.fused_scale_tril")
+def fused_scale_tril(
+    x: oneflow._oneflow_internal.BlobDesc,
+    diagonal: int = 0,
+    fill_value: Union[int, float] = 0,
+    scale: Union[int, float] = 1,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+
+    if isinstance(fill_value, float):
+        is_floating_fill_value = True
+        floating_fill_value = float(fill_value)
+        integer_fill_value = int(0)
+    else:
+        is_floating_fill_value = False
+        floating_fill_value = float(0)
+        integer_fill_value = int(fill_value)
+
+    if isinstance(scale, float):
+        is_floating_scale_value = True
+        floating_scale_value = float(scale)
+        integer_scale_value = int(1)
+    else:
+        is_floating_scale_value = False
+        floating_scale_value = float(1)
+        integer_scale_value = int(scale)
+    return (
+        flow.user_op_builder(
+            name if name is not None else id_util.UniqueStr("FusedScaleTril_")
+        )
+        .Op("fused_scale_tril")
+        .Input("in", [x])
+        .Attr("diagonal", diagonal)
+        .Attr("is_floating_fill_value", is_floating_fill_value)
+        .Attr("floating_fill_value", floating_fill_value)
+        .Attr("integer_fill_value", integer_fill_value)
+        .Attr("is_floating_scale_value", is_floating_scale_value)
+        .Attr("floating_scale_value", floating_scale_value)
+        .Attr("integer_scale_value", integer_scale_value)
+        .Output("out")
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
+
+
+@oneflow_export(
+    "math.fused_scale_tril_softmax_dropout", "nn.fused_scale_tril_softmax_dropout"
+)
+def fused_scale_tril_softmax_dropout(
+    x: oneflow._oneflow_internal.BlobDesc,
+    diagonal: int = 0,
+    fill_value: Union[int, float] = 0,
+    scale: Union[int, float] = 1,
+    rate: float = 0.0,
+    noise_shape: Optional[oneflow._oneflow_internal.BlobDesc] = None,
+    seed: Optional[int] = None,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    if name is None:
+        name = id_util.UniqueStr("FusedTrilScaleSoftmaxMaskScale_")
+    mask = flow.nn.random_mask_like(
+        x, rate, seed, noise_shape, "%s-dropout_random_mask_like" % name
+    )
+
+    y, softmax_y = (
+        flow.user_op_builder(name)
+        .Op("fused_tril_scale_softmax_mask_scale")
+        .Input("x", [x])
+        .Input("mask", [mask])
+        .Attr("diagonal", diagonal)
+        .Attr("tril_fill_value", float(fill_value))
+        .Attr("tril_scale_value", float(scale))
+        .Attr("mask_scale_value", float(1.0 / (1.0 - rate)))
+        .Output("y")
+        .Output("softmax_y")
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()
+    )
+    return y
+
+
+@oneflow_export("math.polyval")
+def polyval(
+    coeffs: Union[List, Tuple],
+    x: oneflow._oneflow_internal.BlobDesc,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""Computes the elementwise value of a polynomial.
+
+    Args:
+        coeffs (Union[List, Tuple]): The coefficients of the polynomial.
+        x (oneflow._oneflow_internal.BlobDesc): A Blob.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: A Blob, has the same data type of x.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+        @flow.global_function()
+        def polyval_Job(
+            x: tp.Numpy.Placeholder((3,), dtype=flow.float32)
+        ) -> tp.Numpy:
+            coeffs = [1.0, 3.0, -2.0]
+            return flow.math.polyval(coeffs, x)
+
+        x = np.array([1.0, 2.0, 3.0]).astype(np.float32)
+        out = polyval_Job(x)
+
+        # output [ 2. 8. 16.]
+
+    """
+    if name is None:
+        name = id_util.UniqueStr("Polyval_")
+    if not isinstance(coeffs, (list, tuple)):
+        raise ValueError(
+            "Argument coeffs must be list type " "found {}".format(type(coeffs))
+        )
+    if len(coeffs) < 1:
+        return flow.zeros_like(x, name=name)
+    p = flow.zeros_like(x, name=name)
+    for c in coeffs:
+        p = flow.math.add(c, flow.math.multiply(p, x))
+    return p
+
+
+@oneflow_export("math.in_top_k", "in_top_k")
+def in_top_k(
+    targets: oneflow._oneflow_internal.BlobDesc,
+    predictions: oneflow._oneflow_internal.BlobDesc,
+    k: Optional[int],
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""Says whether the targets are in the top K predictions.
+
+    Args:
+        targets (oneflow._oneflow_internal.BlobDesc): A Blob of type int32 or int64.
+        predictions (oneflow._oneflow_internal.BlobDesc): A Blob of type float32.
+        k (Optional[int], optional): Number of top elements to look at for computing precision.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: A Blob of type bool. Computed Precision at k as a bool Blob.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+        @flow.global_function()
+        def intopk_Job(
+            targets: tp.Numpy.Placeholder((2,), dtype=flow.int32),
+            predictions: tp.Numpy.Placeholder((2, 4), dtype=flow.float32),
+        ) -> tp.Numpy:
+            return flow.math.in_top_k(targets, predictions, 1)
+
+        targets = np.array([3, 1], dtype=np.int32)
+        predictions = np.array([[0.0, 1.0, 2.0, 3.0], [3.0, 2.0, 1.0, 0.0],], dtype=np.float32)
+        out = intopk_Job(targets, predictions)
+
+        # out [1 0]
+
+    """
+    return (
+        flow.user_op_builder(name if name is not None else id_util.UniqueStr("InTopK_"))
+        .Op("in_top_k")
+        .Input("targets", [targets])
+        .Input("predictions", [predictions])
+        .Attr("k", k)
+        .Output("out")
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
+
+
+@oneflow_export("range")
+@stable_api
+def range(
+    start, limit=None, delta=1, dtype=None, name="range"
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""This operator is similar to python `range`, the difference is that `oneflow.compatible.single_client.range` generates
+    a Blob.
+
+    Args:
+        start ([type]): The start of interval. Its type should be `int`.
+        limit ([type], optional): The limit of interval. Its type should be `int`.
+        delta (int, optional): The numerical spacing between elements. Defaults to 1.
+        dtype ([type], optional): The output's data type. Currently we only support `oneflow.compatible.single_client.int64`. Defaults to None.
+        name (str, optional): The name for the operation. Defaults to "range".
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob
+
+    For example:
+
+    Example 1:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def range_job()->tp.Numpy:
+            with flow.scope.placement("cpu", "0:0"):
+                out = flow.range(10, dtype=flow.int64)
+
+            return out
+
+        out = range_job()
+
+        # out [0 1 2 3 4 5 6 7 8 9]
+
+    Example2:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def range_job()->tp.Numpy:
+            with flow.scope.placement("cpu", "0:0"):
+                out = flow.range(1, 10, 3, dtype=flow.int64)
+
+            return out
+
+        out = range_job()
+
+        # out [1 4 7]
+
+    """
+    # Ensure the dtype is not None
+    if dtype is None:
+        dtype = flow.int64
+
+    if limit is None:
+        # If limit is None, We start from zero.
+        start, limit = 0, start
+
+    assert limit > start, "Limit should be larger than start"
+    assert delta <= limit - start, "Delta is ilegal"
+
+    # Ensure start, limit, delta's dtype is int, We will Add dtype hierarchy in Later version.
+    assert type(start) == int, "Params `start`'s type should be int"
+    assert type(limit) == int, "Params `limit`'s type should be int"
+    assert type(delta) == int, "Params `delta`'s type should be int"
+
+    # Build User OP
+    return (
+        flow.user_op_builder(name if name is not None else id_util.UniqueStr("Range_"))
+        .Op("range")
+        .Attr("start", start)
+        .Attr("delta", delta)
+        .Attr("limit", limit)
+        .Attr("dtype", dtype)
+        .Output("out")
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
diff --git a/oneflow/compatible_single_client_python/ops/math_unary_elementwise_ops.py b/oneflow/compatible_single_client_python/ops/math_unary_elementwise_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..b44d299c12066f663aadb1cf4ad03640e623cdb0
--- /dev/null
+++ b/oneflow/compatible_single_client_python/ops/math_unary_elementwise_ops.py
@@ -0,0 +1,1424 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+import os
+import traceback
+from typing import Optional
+
+from oneflow.compatible import single_client as flow
+from oneflow.core.operator import op_conf_pb2 as op_conf_util
+from oneflow.core.register import logical_blob_id_pb2 as logical_blob_id_util
+from oneflow.compatible_single_client_python.framework import id_util as id_util
+from oneflow.compatible_single_client_python.framework import (
+    interpret_util as interpret_util,
+)
+from oneflow.compatible_single_client_python.framework import (
+    remote_blob as remote_blob_util,
+)
+from oneflow.compatible_single_client_python.ops import (
+    user_op_builder as user_op_builder,
+)
+from oneflow.compatible_single_client_python.oneflow_export import oneflow_export
+import oneflow._oneflow_internal
+
+
+def build_unary_elemwise_math_op(math_op, x, name=None):
+    if name is None:
+        name = id_util.UniqueStr(math_op + "_")
+    return (
+        flow.user_op_builder(name)
+        .Op(math_op)
+        .Input("x", [x])
+        .Output("y")
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
+
+
+@oneflow_export("math.abs")
+def abs(
+    x: oneflow._oneflow_internal.BlobDesc, name: Optional[str] = None
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""This operator returns the absolute value of Blob.
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): A Blob
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def abs_Job(x: tp.Numpy.Placeholder((3,))
+        ) -> tp.Numpy:
+            return flow.math.abs(x)
+
+
+        x = np.array([-1, 2, -3]).astype(np.float32)
+        out = abs_Job(x)
+
+        # out [1. 2. 3.]
+
+    """
+    return build_unary_elemwise_math_op("abs", x, name)
+
+
+@oneflow_export("math.acos")
+def acos(
+    x: oneflow._oneflow_internal.BlobDesc, name: Optional[str] = None
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""This operator computes the acos value of Blob.
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): A Blob
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def acos_Job(x: tp.Numpy.Placeholder((3,))
+        ) -> tp.Numpy:
+            return flow.math.acos(x)
+
+
+        x = np.array([0.5, 0.6, 0.7]).astype(np.float32)
+        out = acos_Job(x)
+
+        # out [1.0471976 0.9272952 0.7953989]
+        # We take the first value as an example
+        # (arccos(0.5) * pi) / 180 = 1.0471976
+
+    """
+    return build_unary_elemwise_math_op("acos", x, name)
+
+
+@oneflow_export("math.acosh")
+def acosh(
+    x: oneflow._oneflow_internal.BlobDesc, name: Optional[str] = None
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""This operator computes the inverse hyperbolic cosine value of Blob.
+
+    The equation is:
+
+    .. math::
+
+        out = log(x+(x^2-1)^\frac{1}{2})
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): A Blob, the range is [1, inf]
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def acosh_Job(x: tp.Numpy.Placeholder((3,))
+        ) -> tp.Numpy:
+            return flow.math.acosh(x)
+
+
+        x = np.array([2, 3, 4]).astype(np.float32)
+        out = acosh_Job(x)
+
+        # out [1.316958  1.7627473 2.063437 ]
+
+    """
+    return build_unary_elemwise_math_op("acosh", x, name)
+
+
+@oneflow_export("math.asin")
+def asin(
+    x: oneflow._oneflow_internal.BlobDesc, name: Optional[str] = None
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""This operator computes the arcsin value of Blob.
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): A Blob
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def asin_Job(x: tp.Numpy.Placeholder((3,))
+        ) -> tp.Numpy:
+            return flow.math.asin(x)
+
+
+        x = np.array([0.5, 0.6, 0.7]).astype(np.float32)
+        out = asin_Job(x)
+
+        # out [0.5235988  0.64350116 0.7753975 ]
+        # We take the first value as an example
+        # (arcsin(0.5) * pi) / 180 = 0.5235988
+
+    """
+    return build_unary_elemwise_math_op("asin", x, name)
+
+
+@oneflow_export("math.asinh")
+def asinh(
+    x: oneflow._oneflow_internal.BlobDesc, name: Optional[str] = None
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""This operator computes the inverse hyperbolic sine value of Blob.
+
+    The equation is:
+
+    .. math::
+
+        out = log(x+(x^2+1)^\frac{1}{2})
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): A Blob
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def asinh_Job(x: tp.Numpy.Placeholder((3,))
+        ) -> tp.Numpy:
+            return flow.math.asinh(x)
+
+
+        x = np.array([2, 3, 4]).astype(np.float32)
+        out = asinh_Job(x)
+
+        # out [1.4436355 1.8184464 2.0947125]
+
+    """
+    return build_unary_elemwise_math_op("asinh", x, name)
+
+
+@oneflow_export("math.atan")
+def atan(
+    x: oneflow._oneflow_internal.BlobDesc, name: Optional[str] = None
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""This operator computes the arctan value of Blob.
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): A Blob
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob
+
+    For example:
+
+    .. code-block:: python
+
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def atan_Job(x: tp.Numpy.Placeholder((3,))
+        ) -> tp.Numpy:
+            return flow.math.atan(x)
+
+
+        x = np.array([0.5, 0.6, 0.7]).astype(np.float32)
+        out = atan_Job(x)
+
+        # out [0.4636476  0.5404195  0.61072594]
+        # We take the first value as an example
+        # (arctan(0.5) * pi) / 180 = 0.4636476
+
+    """
+    return build_unary_elemwise_math_op("atan", x, name)
+
+
+@oneflow_export("math.atanh")
+def atanh(
+    x: oneflow._oneflow_internal.BlobDesc, name: Optional[str] = None
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""This operator computes the inverse hyperbolic tangent value of Blob.
+
+    The equation is:
+
+    .. math::
+
+        out = \frac{1}{2}*log(\frac{1+x}{1-x})
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): A Blob
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def atanh_Job(x: tp.Numpy.Placeholder((3,))
+        ) -> tp.Numpy:
+            return flow.math.atanh(x)
+
+
+        x = np.array([0.5, 0.6, 0.7]).astype(np.float32)
+        out = atanh_Job(x)
+
+        # out [0.54930615 0.6931472  0.8673005 ]
+
+    """
+    return build_unary_elemwise_math_op("atanh", x, name)
+
+
+@oneflow_export("math.ceil")
+def ceil(
+    x: oneflow._oneflow_internal.BlobDesc, name: Optional[str] = None
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator computes the ceiling value of Blob.
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): A Blob
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def ceil_Job(x: tp.Numpy.Placeholder((3,))
+        ) -> tp.Numpy:
+            return flow.math.ceil(x)
+
+
+        x = np.array([1.3, 1.5, 2.7]).astype(np.float32)
+        out = ceil_Job(x)
+
+        # out [2. 2. 3.]
+
+    """
+    return build_unary_elemwise_math_op("ceil", x, name)
+
+
+@oneflow_export("math.cos")
+def cos(
+    x: oneflow._oneflow_internal.BlobDesc, name: Optional[str] = None
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator computes the cosine value of Blob.
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): A Blob
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob
+
+    For example:
+
+    .. code-block:: python
+
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def cos_Job(x: tp.Numpy.Placeholder((3,))
+        ) -> tp.Numpy:
+            return flow.math.cos(x)
+
+
+        x = np.array([1/3*np.pi, 0.25*np.pi, 1.25*np.pi]).astype(np.float32)
+        out = cos_Job(x)
+
+        # out [ 0.49999997  0.70710677 -0.7071068 ]
+
+    """
+    return build_unary_elemwise_math_op("cos", x, name)
+
+
+@oneflow_export("math.cosh")
+def cosh(
+    x: oneflow._oneflow_internal.BlobDesc, name: Optional[str] = None
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""This operator computes hyperbolic cosine value of Blob.
+
+    The equation is:
+
+    .. math::
+
+        out = \frac{e^x+e^{-x}}{2}
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): A Blob
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def cosh_Job(x: tp.Numpy.Placeholder((3,))
+                    ) -> tp.Numpy:
+            return flow.math.cosh(x)
+
+
+        x = np.array([1, 2, 3]).astype(np.float32)
+        out = cosh_Job(x)
+
+        # out [ 1.5430806  3.7621958 10.067662 ]
+
+    """
+    return build_unary_elemwise_math_op("cosh", x, name)
+
+
+@oneflow_export("math.erf")
+def erf(
+    x: oneflow._oneflow_internal.BlobDesc, name: Optional[str] = None
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""This operator computes the Gauss error value of Blob.
+
+    The equation is:
+
+    .. math ::
+
+        out = \frac{2}{\sqrt{\pi}}*\int_{0}^{x}e^{-z^2}\mathrm{d}{z}
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): A Blob
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def erf_Job(x: tp.Numpy.Placeholder((3,))
+                    ) -> tp.Numpy:
+            return flow.math.erf(x)
+
+
+        x = np.array([1, 2, 3]).astype(np.float32)
+        out = erf_Job(x)
+
+        # out [0.8427008 0.9953223 0.9999779]
+
+    """
+    return build_unary_elemwise_math_op("erf", x, name)
+
+
+@oneflow_export("math.erfc")
+def erfc(
+    x: oneflow._oneflow_internal.BlobDesc, name: Optional[str] = None
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator computes the :math:`1-erf(x)`, for more details of `erf` function
+    please refer to `math.erf`.
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): A Blob
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def erfc_Job(x: tp.Numpy.Placeholder((3,))
+                    ) -> tp.Numpy:
+            return flow.math.erfc(x)
+
+
+        x = np.array([1, 2, 3]).astype(np.float32)
+        out = erfc_Job(x)
+
+        # out [1.5729921e-01 4.6777353e-03 2.2090495e-05]
+
+    """
+    return build_unary_elemwise_math_op("erfc", x, name)
+
+
+@oneflow_export("math.exp")
+def exp(
+    x: oneflow._oneflow_internal.BlobDesc, name: Optional[str] = None
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator computes the exponential of Blob.
+
+    The equation is:
+
+    .. math::
+
+        out = e^x
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): A Blob
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob
+
+    For example:
+
+    .. code-block:: python
+
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def exp_Job(x: tp.Numpy.Placeholder((3,))
+                    ) -> tp.Numpy:
+            return flow.math.exp(x)
+
+
+        x = np.array([1, 2, 3]).astype(np.float32)
+        out = exp_Job(x)
+
+        # out [ 2.7182817  7.389056  20.085537 ]
+
+    """
+    return build_unary_elemwise_math_op("exp", x, name)
+
+
+@oneflow_export("math.expm1")
+def expm1(
+    x: oneflow._oneflow_internal.BlobDesc, name: Optional[str] = None
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator computes :math:`y=e^x-1`.
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): A Blob
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def expm1_Job(x: tp.Numpy.Placeholder((3,))
+                    ) -> tp.Numpy:
+            return flow.math.expm1(x)
+
+
+        x = np.array([1, 2, 3]).astype(np.float32)
+        out = expm1_Job(x)
+
+        # out [ 1.7182819  6.389056  19.085537 ]
+
+    """
+    return build_unary_elemwise_math_op("expm1", x, name)
+
+
+@oneflow_export("math.floor")
+def floor(
+    x: oneflow._oneflow_internal.BlobDesc, name: Optional[str] = None
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator computes the largest integer not greater than input Blob.
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): A Blob
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def floor_Job(x: tp.Numpy.Placeholder((3,))
+        ) -> tp.Numpy:
+            return flow.math.floor(x)
+
+
+        x = np.array([1.3, 1.5, 2.7]).astype(np.float32)
+        out = floor_Job(x)
+
+        # out [1. 1. 2.]
+
+    """
+    return build_unary_elemwise_math_op("floor", x, name)
+
+
+@oneflow_export("math.lgamma")
+def lgamma(
+    x: oneflow._oneflow_internal.BlobDesc, name: Optional[str] = None
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""This operator computes the :math:`Gamma(x)` value.
+
+    The equation is:
+
+    .. math::
+
+        out = \int_{0}^{\infty}t^{x-1}*e^{-t}\mathrm{d}{t}
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): A Blob
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def lgamma_Job(x: tp.Numpy.Placeholder((3,))
+        ) -> tp.Numpy:
+            return flow.math.lgamma(x)
+
+
+        x = np.array([1.3, 1.5, 2.7]).astype(np.float32)
+        out = lgamma_Job(x)
+
+        # out [-0.1081748  -0.12078223  0.4348206 ]
+
+    """
+    return build_unary_elemwise_math_op("lgamma", x, name)
+
+
+@oneflow_export("math.log")
+def log(
+    x: oneflow._oneflow_internal.BlobDesc, name: Optional[str] = None
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator computes the log value of input Blob.
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): A Blob
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def log_Job(x: tp.Numpy.Placeholder((3,))
+        ) -> tp.Numpy:
+            return flow.math.log(x)
+
+
+        x = np.array([1.3, 1.5, 2.7]).astype(np.float32)
+        out = log_Job(x)
+
+        # out [0.26236424 0.40546513 0.9932518 ]
+
+    """
+    return build_unary_elemwise_math_op("log", x, name)
+
+
+@oneflow_export("math.log1p")
+def log1p(
+    x: oneflow._oneflow_internal.BlobDesc, name: Optional[str] = None
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator computes the :math:`log(x)+1` value of input Blob.
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): A Blob
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def log1p_Job(x: tp.Numpy.Placeholder((3,))
+        ) -> tp.Numpy:
+            return flow.math.log1p(x)
+
+
+        x = np.array([1.3, 1.5, 2.7]).astype(np.float32)
+        out = log1p_Job(x)
+
+        # out [0.8329091  0.91629076 1.3083328 ]
+
+    """
+    return build_unary_elemwise_math_op("log1p", x, name)
+
+
+@oneflow_export("math.log_sigmoid")
+def log_sigmoid(
+    x: oneflow._oneflow_internal.BlobDesc, name: Optional[str] = None
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""This operator computes the log sigmoid value of input Blob.
+
+    The equation is:
+
+    .. math::
+
+        out = log(\frac{1}{1+e^{-x}})
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): A Blob
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def log_sigmoid_Job(x: tp.Numpy.Placeholder((3,))
+        ) -> tp.Numpy:
+            return flow.math.log_sigmoid(x)
+
+
+        x = np.array([1.3, 1.5, 2.7]).astype(np.float32)
+        out = log_sigmoid_Job(x)
+
+        # out [-0.24100842 -0.20141333 -0.0650436 ]
+
+    """
+    return build_unary_elemwise_math_op("log_sigmoid", x, name)
+
+
+@oneflow_export("math.negative")
+def negative(
+    x: oneflow._oneflow_internal.BlobDesc, name: Optional[str] = None
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator computes the negative value of Blob.
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): A Blob
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def negative_Job(x: tp.Numpy.Placeholder((3,))
+        ) -> tp.Numpy:
+            return flow.math.negative(x)
+
+
+        x = np.array([1.3, 1.5, 2.7]).astype(np.float32)
+        out = negative_Job(x)
+
+        # out [-1.3 -1.5 -2.7]
+
+    """
+    return build_unary_elemwise_math_op("negative", x, name)
+
+
+@oneflow_export("math.reciprocal")
+def reciprocal(
+    x: oneflow._oneflow_internal.BlobDesc, name: Optional[str] = None
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""This operator computes the reciprocal of x.
+
+    The equation is:
+
+    .. math::
+
+        out = \frac{1}{x}
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): A Blob
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def reciprocal_Job(x: tp.Numpy.Placeholder((3,))
+        ) -> tp.Numpy:
+            return flow.math.reciprocal(x)
+
+
+        x = np.array([1, 2, 4]).astype(np.float32)
+        out = reciprocal_Job(x)
+
+        # out [1.   0.5  0.25]
+
+    """
+    return build_unary_elemwise_math_op("reciprocal", x, name)
+
+
+@oneflow_export("math.reciprocal_no_nan")
+def reciprocal_no_nan(
+    x: oneflow._oneflow_internal.BlobDesc, name: Optional[str] = None
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator computes the safe reciprocal of x. If x is zero, the reciprocal will
+    be also set to zero.
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): A Blob
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def reciprocal_no_nan_Job(x: tp.Numpy.Placeholder((3,))
+        ) -> tp.Numpy:
+            return flow.math.reciprocal_no_nan(x)
+
+
+        x = np.array([0, 2, 4]).astype(np.float32)
+        out = reciprocal_no_nan_Job(x)
+
+        # out [0.   0.5  0.25]
+
+    """
+    return build_unary_elemwise_math_op("reciprocal_no_nan", x, name)
+
+
+@oneflow_export("math.rint")
+def rint(
+    x: oneflow._oneflow_internal.BlobDesc, name: Optional[str] = None
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator computes the closest integer to Blob.
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): A Blob
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def rint_Job(x: tp.Numpy.Placeholder((3,))
+        ) -> tp.Numpy:
+            return flow.math.rint(x)
+
+
+        x = np.array([1.49999, 1.500001, 2.7]).astype(np.float32)
+        out = rint_Job(x)
+
+        # out [1. 2. 3.]
+
+    """
+    return build_unary_elemwise_math_op("rint", x, name)
+
+
+@oneflow_export("math.round")
+def round(
+    x: oneflow._oneflow_internal.BlobDesc, name: Optional[str] = None
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator rounds the value of Blob to the nearest integer.
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): A Blob
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def round_Job(x: tp.Numpy.Placeholder((3,))
+        ) -> tp.Numpy:
+            return flow.math.round(x)
+
+
+        x = np.array([1.49999, 1.500001, 2.7]).astype(np.float32)
+        out = round_Job(x)
+
+        # out [1. 2. 3.]
+
+    """
+    return build_unary_elemwise_math_op("round", x, name)
+
+
+@oneflow_export("math.rsqrt")
+def rsqrt(
+    x: oneflow._oneflow_internal.BlobDesc, name: Optional[str] = None
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""This operator computes the reciprocal of square root value of Blob.
+
+    The equation is:
+
+    .. math::
+
+        out=\frac{1}{\sqrt{x}}
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): A Blob
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def rsqrt_Job(x: tp.Numpy.Placeholder((3,))
+        ) -> tp.Numpy:
+            return flow.math.rsqrt(x)
+
+
+        x = np.array([4, 16, 25]).astype(np.float32)
+        out = rsqrt_Job(x)
+
+        # out [0.5  0.25 0.2 ]
+
+    """
+    return build_unary_elemwise_math_op("rsqrt", x, name)
+
+
+@oneflow_export("math.sigmoid_v2")
+def sigmoid_v2(
+    x: oneflow._oneflow_internal.BlobDesc, name: Optional[str] = None
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""This operator computes the sigmoid value of Blob.
+
+    The equation is:
+
+    .. math::
+
+        out=\frac{1}{1+e^{-x}}
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): A Blob
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def sigmoidv2_Job(x: tp.Numpy.Placeholder((3,))
+        ) -> tp.Numpy:
+            return flow.math.sigmoid_v2(x)
+
+        x = np.array([-0.5, 0, 0.5]).astype(np.float32)
+        out = sigmoidv2_Job(x)
+
+        # out [0.37754068 0.5        0.62245935]
+
+    """
+    return build_unary_elemwise_math_op("sigmoid_v2", x, name)
+
+
+@oneflow_export("math.sign")
+def sign(
+    x: oneflow._oneflow_internal.BlobDesc, name: Optional[str] = None
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator returns the sign of Blob.
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): A Blob
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def sign_Job(x: tp.Numpy.Placeholder((3,))
+        ) -> tp.Numpy:
+            return flow.math.sign(x)
+
+
+        x = np.array([-2, 0, 2]).astype(np.float32)
+        out = sign_Job(x)
+
+        # out [-1.  0.  1.]
+
+    """
+    return build_unary_elemwise_math_op("sign", x, name)
+
+
+@oneflow_export("math.sin")
+def sin(
+    x: oneflow._oneflow_internal.BlobDesc, name: Optional[str] = None
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator computes the sin value of Blob.
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): A Blob
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def sin_Job(x: tp.Numpy.Placeholder((3,))
+        ) -> tp.Numpy:
+            return flow.math.sin(x)
+
+
+        x = np.array([-1/6*np.pi, 0, 1/6*np.pi]).astype(np.float32)
+        out = sin_Job(x)
+
+        # out [-0.5  0.   0.5]
+
+    """
+    return build_unary_elemwise_math_op("sin", x, name)
+
+
+@oneflow_export("math.sinh")
+def sinh(
+    x: oneflow._oneflow_internal.BlobDesc, name: Optional[str] = None
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""This operator computes the hyperbolic sine value of Blob.
+
+    The equation is:
+
+    .. math::
+
+        out =\frac{e^x-e^{-x}}{2}
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): A Blob
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def sinh_Job(x: tp.Numpy.Placeholder((3,))
+        ) -> tp.Numpy:
+            return flow.math.sinh(x)
+
+
+        x = np.array([-1, 0, 1]).astype(np.float32)
+        out = sinh_Job(x)
+
+        # out [-1.1752012  0.         1.1752012]
+
+    """
+    return build_unary_elemwise_math_op("sinh", x, name)
+
+
+@oneflow_export("math.softplus")
+def softplus(
+    x: oneflow._oneflow_internal.BlobDesc, name: Optional[str] = None
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator computes the softplus value of Blob.
+
+    The equation is:
+
+    .. math::
+
+        out = log(e^x+1)
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): A Blob
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def softplus_Job(x: tp.Numpy.Placeholder((3,))
+        ) -> tp.Numpy:
+            return flow.math.softplus(x)
+
+
+        x = np.array([-1, 0, 1]).astype(np.float32)
+        out = softplus_Job(x)
+
+        # out [0.31326166 0.6931472  1.3132616 ]
+
+    """
+    return build_unary_elemwise_math_op("softplus", x, name)
+
+
+@oneflow_export("math.sqrt")
+def sqrt(
+    x: oneflow._oneflow_internal.BlobDesc, name: Optional[str] = None
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator computes the sqrt root value of Blob.
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): A Blob
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def sqrt_Job(x: tp.Numpy.Placeholder((3,))
+        ) -> tp.Numpy:
+            return flow.math.sqrt(x)
+
+
+        x = np.array([4, 16, 25]).astype(np.float32)
+        out = sqrt_Job(x)
+
+        # out [2. 4. 5.]
+
+    """
+    return build_unary_elemwise_math_op("sqrt", x, name)
+
+
+@oneflow_export("math.square")
+def square(
+    x: oneflow._oneflow_internal.BlobDesc, name: Optional[str] = None
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator computes the square value of Blob.
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): A Blob
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def square_Job(x: tp.Numpy.Placeholder((3,))
+        ) -> tp.Numpy:
+            return flow.math.square(x)
+
+
+        x = np.array([2, 3, 4]).astype(np.float32)
+        out = square_Job(x)
+
+        # out [ 4.  9. 16.]
+
+    """
+    return build_unary_elemwise_math_op("square", x, name)
+
+
+@oneflow_export("math.tan")
+def tan(
+    x: oneflow._oneflow_internal.BlobDesc, name: Optional[str] = None
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator computes the tan value of Blob.
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): A Blob
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def tan_Job(x: tp.Numpy.Placeholder((3,))
+        ) -> tp.Numpy:
+            return flow.math.tan(x)
+
+
+        x = np.array([-1/4*np.pi, 0, 1/4*np.pi]).astype(np.float32)
+        out = tan_Job(x)
+
+        # out [-1.  0.  1.]
+
+    """
+    return build_unary_elemwise_math_op("tan", x, name)
+
+
+@oneflow_export("math.tanh")
+def tanh(
+    x: oneflow._oneflow_internal.BlobDesc, name: Optional[str] = None
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""This operator computes the hyperbolic tangent value of Blob.
+
+    The equation is:
+
+    .. math::
+
+        out = \frac{e^x-e^{-x}}{e^x+e^{-x}} 
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): A Blob
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def tanh_Job(x: tp.Numpy.Placeholder((3,))
+        ) -> tp.Numpy:
+            return flow.math.tanh(x)
+
+
+        x = np.array([-1, 0, 1]).astype(np.float32)
+        out = tanh_Job(x)
+
+        # out [-0.7615942  0.         0.7615942]
+
+    """
+    return build_unary_elemwise_math_op("tanh", x, name)
+
+
+@oneflow_export("math.tanh_v2")
+def tanh_v2(
+    x: oneflow._oneflow_internal.BlobDesc, name: Optional[str] = None
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""This operator computes the hyperbolic tangent value of Blob.
+
+    The equation is:
+
+    .. math::
+
+        out = \frac{e^x-e^{-x}}{e^x+e^{-x}}
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): A Blob
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob
+    """
+
+    print(
+        """WARNING: flow.math.tanh_v2 has been deprecated. Please replace it by flow.math.tanh.
+        """
+    )
+    print(traceback.format_stack()[-2])
+    return flow.math.tanh(x, name)
diff --git a/oneflow/compatible_single_client_python/ops/nn_ops.py b/oneflow/compatible_single_client_python/ops/nn_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..68657ddb48ca341f45032d140fc1a74b1be7283b
--- /dev/null
+++ b/oneflow/compatible_single_client_python/ops/nn_ops.py
@@ -0,0 +1,4594 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+import collections
+import os
+import sys
+import random
+from typing import Union, Optional, Sequence, Tuple, List
+from oneflow.compatible import single_client as flow
+from oneflow.core.operator import op_conf_pb2 as op_conf_util
+from oneflow.core.register import logical_blob_id_pb2 as logical_blob_id_util
+from oneflow.compatible_single_client_python.framework import (
+    interpret_util as interpret_util,
+)
+from oneflow.compatible_single_client_python.framework import id_util as id_util
+from oneflow.compatible_single_client_python.framework import module as module_util
+from oneflow.compatible_single_client_python.framework import (
+    remote_blob as remote_blob_util,
+)
+from oneflow.compatible_single_client_python.framework import (
+    distribute as distribute_util,
+)
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    stable_api,
+)
+import oneflow._oneflow_internal
+
+IntPair = Tuple[int, int]
+
+
+def calc_same_padding(input_size, filter_size, dilation_rate, stride):
+    effective_filter_size = (filter_size - 1) * dilation_rate + 1
+    output_size = (input_size + stride - 1) // stride
+    padding_needed = max(
+        0, int((output_size - 1) * stride + effective_filter_size - input_size)
+    )
+    return padding_needed
+
+
+def get_dhw_offset(channel_pos):
+    if channel_pos == "channels_first":
+        return 2
+    else:
+        return 1
+
+
+def check_conv_cudnn_padding_support(
+    input_size, pad, filter_size, dilation_rate, stride, is_dynamic
+):
+    assert len(pad) == 2
+    if pad[0] == pad[1]:
+        return True
+    elif is_dynamic or pad[0] < pad[1] or pad[0] - pad[1] > 1:
+        return False
+    else:
+        effective_filter_size = (filter_size - 1) * dilation_rate + 1
+        cudnn_output_size = (
+            input_size + 2 * pad[0] - effective_filter_size + stride
+        ) // stride
+        output_size = (
+            input_size + pad[0] + pad[1] - effective_filter_size + stride
+        ) // stride
+        return cudnn_output_size == output_size
+
+
+def check_ndim_conv_cudnn_padding_support(
+    inputs_shape,
+    ndim_pads_list,
+    kernel_sizes,
+    dilations,
+    strides,
+    dhw_offset,
+    is_dynamic,
+):
+    ndims = len(ndim_pads_list)
+    for i in range(ndims):
+        cudnn_support = check_conv_cudnn_padding_support(
+            inputs_shape[dhw_offset + i],
+            ndim_pads_list[i],
+            kernel_sizes[i],
+            dilations[i],
+            strides[i],
+            is_dynamic,
+        )
+        if not cudnn_support:
+            return False
+    return True
+
+
+def get_ndim_pads_list(padding, dhw_offset, ndims):
+    pads_list = []
+    for i in range(len(padding)):
+        pad = padding[i]
+        if isinstance(pad, int):
+            pad = [pad, pad]
+        elif isinstance(pad, (list, tuple)):
+            assert len(pad) == 2
+            pad = [pad[0], pad[1]]
+        else:
+            raise ValueError("padding must be list tuple or int")
+        if i in range(dhw_offset, dhw_offset + ndims):
+            pads_list.append(pad)
+        else:
+            assert pad == [0, 0]
+    return pads_list
+
+
+def calc_ndim_same_padding(
+    input_shape, padding, kernel_sizes, dilations, strides, dhw_offset
+):
+    ndim_padding_needed = []
+    ndims = len(kernel_sizes)
+    for i in range(ndims):
+        ndim_padding_needed.append(
+            calc_same_padding(
+                input_shape[dhw_offset + i], kernel_sizes[i], dilations[i], strides[i],
+            )
+        )
+    pads_small = [padding_needed // 2 for padding_needed in ndim_padding_needed]
+    pads_large = [ndim_padding_needed[i] - pads_small[i] for i in range(ndims)]
+    if padding.upper() == "SAME_LOWER":
+        return [[pads_large[i], pads_small[i]] for i in range(ndims)]
+    elif padding.upper() == "SAME_UPPER":
+        return [[pads_small[i], pads_large[i]] for i in range(ndims)]
+    else:
+        raise NotImplementedError
+
+
+def calc_conv_padding(inputs, padding, data_format, kernel_sizes, dilations, strides):
+    ndims = len(inputs.shape) - 2
+    assert len(kernel_sizes) == ndims
+    assert len(dilations) == ndims
+    assert len(strides) == ndims
+    is_dynamic = inputs.is_dynamic
+    channel_pos = "channels_first" if data_format.startswith("NC") else "channels_last"
+    dhw_offset = get_dhw_offset(channel_pos)
+    ndim_pads_list = []
+    if isinstance(padding, str):
+        padding = "SAME_LOWER" if padding.upper() == "SAME" else padding
+        assert padding.upper() in ["VALID", "SAME_LOWER", "SAME_UPPER"]
+
+        if padding.upper() == "VALID":
+            return_pads_list = [[0, 0]] * ndims
+            return inputs, return_pads_list
+        else:
+            if is_dynamic:
+                return_pads_list = [[0, 0]] * ndims
+                inputs = flow.same_padding(
+                    inputs,
+                    padding.lower(),
+                    data_format=data_format,
+                    kernel_size=kernel_sizes,
+                    strides=strides,
+                    dilation_rate=dilations,
+                )
+                return inputs, return_pads_list
+            else:
+                ndim_pads_list = calc_ndim_same_padding(
+                    inputs.shape, padding, kernel_sizes, dilations, strides, dhw_offset
+                )
+                assert len(ndim_pads_list) == ndims
+    elif isinstance(padding, (list, tuple)):
+        assert len(padding) == ndims + 2
+        ndim_pads_list = get_ndim_pads_list(padding, dhw_offset, ndims)
+        assert len(ndim_pads_list) == ndims
+    else:
+        raise ValueError("padding must be str or a list.")
+
+    cudnn_padding_support = check_ndim_conv_cudnn_padding_support(
+        inputs.shape,
+        ndim_pads_list,
+        kernel_sizes,
+        dilations,
+        strides,
+        dhw_offset,
+        is_dynamic,
+    )
+
+    if cudnn_padding_support:
+        return inputs, ndim_pads_list
+    else:
+        pad_op_list = [[0, 0]] * (ndims + 2)
+        for i in range(ndims):
+            pad_op_list[dhw_offset + i] = ndim_pads_list[i]
+        inputs = flow.pad(inputs, paddings=pad_op_list)
+        return_pads_list = [[0, 0]] * ndims
+        return inputs, return_pads_list
+
+
+class ConvUtil(object):
+    @classmethod
+    def split(cls, x, axis, split_num):
+        split_len = x.shape[axis] // split_num
+        result_list = []
+        slice_begin = [0] * len(x.shape)
+        slice_size = [-1] * len(x.shape)
+        slice_size[axis] = split_len
+        for i in range(split_num):
+            slice_begin[axis] = i * split_len
+            result = flow.slice(x, slice_begin, slice_size)
+            result_list.append(result)
+        return result_list
+
+
+def conv_op(
+    conv_type,
+    inputs,
+    filters,
+    bias,
+    padding_before,
+    channel_pos,
+    kernel_size_list,
+    strides,
+    dilations,
+    groups,
+    name,
+):
+    op_builder = (
+        flow.user_op_builder(name if name is not None else id_util.UniqueStr("Conv_"))
+        .Op(conv_type)
+        .Input("in", [inputs])
+        .Input("weight", [filters])
+        .Output("out")
+        .Attr("filters", filters.shape[0])
+        .Attr("padding_before", padding_before)
+        .Attr("data_format", channel_pos)
+        .Attr("kernel_size", kernel_size_list)
+        .Attr("strides", strides)
+        .Attr("dilation_rate", dilations)
+        .Attr("groups", groups)
+    )
+    if bias is not None:
+        op_builder = op_builder.Input("bias", [bias])
+    return op_builder.Build().InferAndTryRun().RemoteBlobList()[0]
+
+
+@oneflow_export("nn.conv1d")
+def conv1d(
+    input: oneflow._oneflow_internal.BlobDesc,
+    filters: oneflow._oneflow_internal.BlobDesc,
+    strides: Union[int, Tuple[int]],
+    padding: Union[str, Tuple[IntPair, IntPair, IntPair]],
+    data_format: str = "NCW",
+    dilations: Optional[Union[int, Tuple[int]]] = None,
+    groups: int = 1,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""1D convolution layer.
+
+    Args:
+        input (oneflow._oneflow_internal.BlobDesc): A 3D input `Blob`. [batch_num, channel, width]
+        filters (oneflow._oneflow_internal.BlobDesc): A `Blob` with the same type as `input` and has the shape [out_channels, in_channels//groups, filter_width] for `NCW`, or [out_channels, filter_width, in_channels//groups] for `NWC`
+        strides (Union[int, Tuple[int]]): An int or list of `ints` that has length `1`. The stride of the sliding window for each dimension of `input`.
+        padding (Union[str, Tuple[IntPair, IntPair, IntPair]]): padding: `string` `"SAME"` or `"SAME_LOWER"` or `"SAME_UPPER"` or `"VALID" or Tuple[IntPair, IntPair, IntPair]` indicating the type of padding algorithm to use, or a list indicating the explicit paddings at the start and end of each dimension.
+        data_format (str, optional): `"NWC" or "NCW"`. Defaults to `"NCW"`.
+        dilations (Optional[Union[int, Tuple[int]]], optional): An int or list of `ints` that has length `1`. The dilation factor for each dimension of `input`. Defaults to None.
+        groups (int, optional): int value greater than 0. Defaults to 1.
+        name (Optional[str], optional): This operator's name. Defaults to None.
+
+    Raises:
+        ValueError: strides must be an int or a list.
+        ValueError: padding must be "SAME" or "SAME_LOWER" or "SAME_UPPER" or "VALID" or Tuple[IntPair, IntPair, IntPair, IntPair].
+        ValueError: data_format must be "NWC" or "NCW".
+        ValueError: dilations must be an int or a list.
+        ValueError: invalid data_format.
+        ValueError: data_format NWC not support groups > 1
+        ValueError: invalid data_format.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: A `Blob` with the same type as `input` and the same outer batch shape.
+
+    Note:
+
+        This api is more flexible, if you're new to OneFlow, it's more recommend to use `oneflow.compatible.single_client.layers.conv1d`
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        def conv1d(input, filters, kernel_size, strides, padding, name):
+            input_shape = input.shape
+            weight_initializer = flow.truncated_normal(0.1)
+            weight_regularizer = flow.regularizers.l2(0.0005)
+            weight_shape = (filters,
+                            input_shape[1],
+                            kernel_size)
+
+            weight = flow.get_variable(
+                name + "-weight",
+                shape=weight_shape,
+                initializer=weight_initializer,
+                regularizer=weight_regularizer,
+            )
+            return flow.nn.conv1d(input, weight, strides, padding, name=name)
+
+
+        @flow.global_function()
+        def conv1d_Job(x: tp.Numpy.Placeholder((1, 64, 32))
+        ) -> tp.Numpy:
+            conv = conv1d(x,
+                        filters=32,
+                        kernel_size=3,
+                        strides=1,
+                        padding='SAME',
+                        name="Convlayer")
+            return conv
+
+
+        x = np.random.randn(1, 64, 32).astype(np.float32)
+        out = conv1d_Job(x)
+
+        # out.shape (1, 32, 32)
+
+    """
+    assert len(input.shape) == 3
+    assert len(filters.shape) == 3
+
+    if isinstance(strides, (list, tuple)):
+        assert len(strides) == 1, ValueError(
+            "strides length must be 1 when passed as a list."
+        )
+    elif isinstance(strides, int):
+        strides = [strides]
+    else:
+        raise ValueError("strides must be an int or a list.")
+
+    if data_format.upper() != "NCW" and data_format.upper() != "NWC":
+        raise ValueError('data_format must be "NCW" or "NWC".')
+
+    channel_pos = "channels_first" if data_format == "NCW" else "channels_last"
+
+    if dilations is None:
+        dilations = [1]
+    else:
+        if isinstance(dilations, (list, tuple)):
+            assert len(dilations) == 1, ValueError(
+                "dilations length must be 1 when passed as a list."
+            )
+        elif isinstance(dilations, int):
+            dilations = [dilations]
+        else:
+            raise ValueError("dilations must be an int or a list.")
+
+    if channel_pos == "channels_first":
+        kernel_size_list = filters.shape[2:3]
+        in_channel_axis = 1
+        filter_out_axis = 0
+        filter_in_axis = 1
+    elif channel_pos == "channels_last":
+        kernel_size_list = filters.shape[-2:-1]
+        in_channel_axis = 2
+        filter_out_axis = 0
+        filter_in_axis = 2
+        if groups > 1:
+            raise ValueError("data_format NWC not support groups > 1")
+    else:
+        raise ValueError("invalid data_format")
+    assert isinstance(kernel_size_list, tuple)
+    assert isinstance(groups, int)
+    assert groups > 0
+
+    assert groups <= filters.shape[filter_out_axis]
+    assert filters.shape[filter_out_axis] % groups == 0
+    assert groups <= input.shape[in_channel_axis]
+    assert input.shape[in_channel_axis] % groups == 0
+    assert filters.shape[filter_in_axis] == input.shape[in_channel_axis] // groups
+
+    inputs, pads_list = calc_conv_padding(
+        input, padding, data_format.upper(), kernel_size_list, dilations, strides,
+    )
+    assert len(pads_list) == len(inputs.shape) - 2
+    padding_before = [pad[0] for pad in pads_list]
+
+    if (
+        groups > 1
+        and flow.current_scope().device_parallel_desc_symbol.device_tag == "cpu"
+    ):
+        in_split_list = ConvUtil.split(inputs, axis=in_channel_axis, split_num=groups)
+        filter_split_list = ConvUtil.split(
+            filters, axis=filter_out_axis, split_num=groups
+        )
+        out_list = []
+        name = name if name is not None else id_util.UniqueStr("Conv1d_")
+        for i in range(len(in_split_list)):
+            out_list.append(
+                conv_op(
+                    "conv1d",
+                    in_split_list[i],
+                    filter_split_list[i],
+                    None,
+                    padding_before,
+                    channel_pos,
+                    kernel_size_list,
+                    strides,
+                    dilations,
+                    groups=1,
+                    name=name + str(i),
+                )
+            )
+        return flow.concat(out_list, axis=in_channel_axis)
+    else:
+        return conv_op(
+            "conv1d",
+            inputs,
+            filters,
+            None,
+            padding_before,
+            channel_pos,
+            kernel_size_list,
+            strides,
+            dilations,
+            groups,
+            name,
+        )
+
+
+@oneflow_export("nn.conv2d")
+def conv2d(
+    input: oneflow._oneflow_internal.BlobDesc,
+    filters: oneflow._oneflow_internal.BlobDesc,
+    strides: Union[int, IntPair],
+    padding: Union[str, Tuple[IntPair, IntPair, IntPair, IntPair]],
+    bias: Optional[oneflow._oneflow_internal.BlobDesc] = None,
+    data_format: str = "NCHW",
+    dilations: Optional[Union[int, IntPair]] = None,
+    groups: int = 1,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""2D convolution layer.
+
+    Args:
+        input (oneflow._oneflow_internal.BlobDesc): A 4D input `Blob`. [batch_num, channel, height, width]
+        filters (oneflow._oneflow_internal.BlobDesc): A `Blob` with the same type as `input` and has the shape `[out_channels, in_channels//groups, filter_height, filter_width] for NCHW, or [out_channels, filter_height, filter_width, in_channels//groups] for NHWC`
+        strides (Union[int, IntPair]): An int or list of `ints` that has length `2`. The stride of the sliding window for each dimension of `input`.
+        padding (Union[str, Tuple[IntPair, IntPair, IntPair, IntPair]]): padding: `string` `"SAME"` or `"SAME_LOWER"` or `"SAME_UPPER"` or `"VALID" or Tuple[IntPair, IntPair, IntPair, IntPair]` indicating the type of padding algorithm to use, or a list indicating the explicit paddings at the start and end of each dimension.
+        data_format (str, optional): `"NHWC"` or `"NCHW"`. Defaults to `"NCHW"`.
+        dilations (Optional[Union[int, IntPair]], optional): An int or list of `ints` that has length `2`. The dilation factor for each dimension of `input`. Defaults to None.
+        groups (int, optional): int value greater than 0. Defaults to 1.
+        name (Optional[str], optional): This operator's name. Defaults to None.
+
+    Raises:
+        ValueError: strides must be an int or a list.
+        ValueError: padding must be `"SAME"` or `"SAME_LOWER" or `"SAME_UPPER"` or `"VALID"` or Tuple[IntPair, IntPair, IntPair, IntPair].
+        ValueError: data_format must be `"NHWC"` or `"NCHW"`.
+        ValueError: dilations must be an int or a list.
+        ValueError: invalid data_format.
+        ValueError: data_format NHWC not support groups > 1
+        ValueError: invalid data_format.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: A `Blob` with the same type as `input` and the same outer batch shape.
+
+    Note:
+
+        This api is more flexible, if you're new to OneFlow, it's more recommend to use `oneflow.compatible.single_client.layers.conv2d`.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        def conv2d(input, filters, kernel_size, strides, padding, name):
+            input_shape = input.shape
+            weight_initializer = flow.truncated_normal(0.1)
+            weight_regularizer = flow.regularizers.l2(0.0005)
+            weight_shape = (filters,
+                            input_shape[1],
+                            kernel_size[0],
+                            kernel_size[1])
+
+            weight = flow.get_variable(
+                name + "-weight",
+                shape=weight_shape,
+                initializer=weight_initializer,
+                regularizer=weight_regularizer,
+            )
+            return flow.nn.conv2d(input, weight, strides, padding, name=name)
+
+
+        @flow.global_function()
+        def conv2d_Job(x: tp.Numpy.Placeholder((1, 64, 32, 32))
+        ) -> tp.Numpy:
+            conv = conv2d(x,
+                        filters=128,
+                        kernel_size=[3, 3],
+                        strides=2,
+                        padding='SAME',
+                        name="Convlayer")
+            return conv
+
+
+        x = np.random.randn(1, 64, 32, 32).astype(np.float32)
+        out = conv2d_Job(x)
+
+        # out.shape (1, 128, 16, 16)
+
+    """
+    assert len(input.shape) == 4
+    assert len(filters.shape) == 4
+
+    if bias is not None:
+        assert len(bias.shape) == 1
+
+    if isinstance(strides, (list, tuple)):
+        assert len(strides) == 2, ValueError(
+            "strides length must be 2 when passed as a list."
+        )
+    elif isinstance(strides, int):
+        strides = [strides, strides]
+    else:
+        raise ValueError("strides must be an int or a list.")
+
+    if data_format.upper() != "NCHW" and data_format.upper() != "NHWC":
+        raise ValueError('data_format must be "NHWC" or "NCHW".')
+
+    channel_pos = "channels_first" if data_format == "NCHW" else "channels_last"
+
+    if dilations is None:
+        dilations = [1, 1]
+    else:
+        if isinstance(dilations, (list, tuple)):
+            assert len(dilations) == 2, ValueError(
+                "dilations length must be 2 when passed as a list."
+            )
+        elif isinstance(dilations, int):
+            dilations = [dilations, dilations]
+        else:
+            raise ValueError("dilations must be an int or a list.")
+
+    assert isinstance(groups, int)
+    assert groups > 0
+
+    if data_format.upper() == "NCHW":
+        kernel_size_list = filters.shape[2:4]
+        in_channel_axis = 1
+        filter_out_axis = 0
+        filter_in_axis = 1
+    elif data_format.upper() == "NHWC":
+        kernel_size_list = filters.shape[-3:-1]
+        in_channel_axis = 3
+        filter_out_axis = 0
+        filter_in_axis = 3
+        if (
+            groups > 1
+            and flow.current_scope().device_parallel_desc_symbol.device_tag == "gpu"
+        ):
+            raise ValueError("gpu data_format NHWC not support groups > 1")
+    else:
+        raise ValueError('data_format must be "NHWC" or "NCHW".')
+
+    assert isinstance(kernel_size_list, tuple)
+    inputs, pads_list = calc_conv_padding(
+        input, padding, data_format.upper(), kernel_size_list, dilations, strides,
+    )
+    assert len(pads_list) == len(inputs.shape) - 2
+    padding_before = [pad[0] for pad in pads_list]
+
+    assert groups <= filters.shape[filter_out_axis]
+    assert filters.shape[filter_out_axis] % groups == 0
+    assert groups <= inputs.shape[in_channel_axis]
+    assert inputs.shape[in_channel_axis] % groups == 0
+    assert filters.shape[filter_in_axis] == inputs.shape[in_channel_axis] // groups
+    if bias is not None:
+        assert bias.shape[filter_out_axis] == filters.shape[filter_out_axis]
+    if (
+        groups > 1
+        and flow.current_scope().device_parallel_desc_symbol.device_tag == "cpu"
+    ):
+        in_split_list = ConvUtil.split(inputs, axis=in_channel_axis, split_num=groups)
+        filter_split_list = ConvUtil.split(
+            filters, axis=filter_out_axis, split_num=groups
+        )
+        bias_spilt_list = (
+            ConvUtil.split(bias, axis=filter_out_axis, split_num=groups)
+            if bias is not None
+            else [None for _ in range(groups)]
+        )
+        out_list = []
+        name = name if name is not None else id_util.UniqueStr("Conv2d_")
+        for i in range(len(in_split_list)):
+            out_list.append(
+                conv_op(
+                    "conv2d",
+                    in_split_list[i],
+                    filter_split_list[i],
+                    bias_spilt_list[i],
+                    padding_before,
+                    channel_pos,
+                    kernel_size_list,
+                    strides,
+                    dilations,
+                    groups=1,
+                    name=name + str(i),
+                )
+            )
+        return flow.concat(out_list, axis=in_channel_axis)
+    else:
+        return conv_op(
+            "conv2d",
+            inputs,
+            filters,
+            bias,
+            padding_before,
+            channel_pos,
+            kernel_size_list,
+            strides,
+            dilations,
+            groups,
+            name,
+        )
+
+
+@oneflow_export("nn.conv3d")
+def conv3d(
+    input: oneflow._oneflow_internal.BlobDesc,
+    filters: oneflow._oneflow_internal.BlobDesc,
+    strides: Union[int, Sequence[int]],
+    padding: Union[str, Tuple[IntPair, IntPair, IntPair, IntPair, IntPair]],
+    data_format: str = "NCDHW",
+    dilations: Optional[Union[int, Sequence[int]]] = None,
+    groups: int = 1,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""3D convolution layer.
+
+    Args:
+        input (oneflow._oneflow_internal.BlobDesc):  A 5D input `Blob`. [batch_num, channel, depth, height, width]
+        filters (oneflow._oneflow_internal.BlobDesc): A `Blob` with the same type as `input` and has the shape `[out_channels, in_channels//groups, filter_depth, filter_height, filter_width] for NCDHW, or [out_channels, filter_depth, filter_height, filter_width, in_channels//groups] for NDHWC`
+        strides (Union[int, Sequence[int]]): An `int` or `list of ints` that has length `3`. The stride of the sliding window for each dimension of `input`.
+        padding (Union[str, Tuple[IntPair, IntPair, IntPair, IntPair, IntPair]]): padding: `string` `"SAME"` or `"SAME_LOWER"` or `"SAME_UPPER"` or `"VALID"` or Tuple[IntPair, IntPair, IntPair, IntPair, IntPair]` indicating the type of padding algorithm to use, or a list indicating the explicit paddings at the start and end of each dimension.
+        data_format (str, optional): `"NDHWC" or "NCDHW"`. Defaults to `"NCDHW"`.
+        dilations (Optional[Union[int, Sequence[int]]], optional): An int or list of `ints` that has length `3`. The dilation factor for each dimension of `input`. Defaults to None.
+        groups (int, optional): int value greater than 0. Defaults to 1.
+        name (Optional[str], optional): This operator's name. Defaults to None.
+
+    Raises:
+        ValueError: strides must be an int or a list.
+        ValueError: padding must be "SAME" or "SAME_LOWER" or "SAME_UPPER" or "VALID" or Tuple[IntPair, IntPair, IntPair, IntPair, IntPair].
+        ValueError: data_format must be "NDHWC" or "NCDHW".
+        ValueError: dilations must be an int or a list.
+        ValueError: invalid data_format.
+        ValueError: data_format NDHWC not support groups > 1
+        ValueError: invalid data_format.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: A `Blob` with the same type as `input` and the same outer batch shape.
+
+    Note:
+
+        This api is more flexible, if you're new to OneFlow, it's more recommend to use `oneflow.compatible.single_client.layers.conv3d`
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        def conv3d(input, filters, kernel_size, strides, padding, name):
+            input_shape = input.shape
+            weight_initializer = flow.truncated_normal(0.1)
+            weight_regularizer = flow.regularizers.l2(0.0005)
+            weight_shape = (filters,
+                            input_shape[1],
+                            kernel_size[0],
+                            kernel_size[1],
+                            kernel_size[2])
+
+            weight = flow.get_variable(
+                name + "-weight",
+                shape=weight_shape,
+                initializer=weight_initializer,
+                regularizer=weight_regularizer,
+            )
+            return flow.nn.conv3d(input, weight, strides, padding, name=name)
+
+
+        @flow.global_function()
+        def conv3d_Job(x: tp.Numpy.Placeholder((1, 64, 10, 16, 16))
+        ) -> tp.Numpy:
+            conv = conv3d(x,
+                        filters=128,
+                        kernel_size=[3, 3, 3],
+                        strides=1,
+                        padding='SAME',
+                        name="Convlayer")
+            return conv
+
+
+        x = np.random.randn(1, 64, 10, 16, 16).astype(np.float32)
+        out = conv3d_Job(x)
+
+        # out.shape (1, 128, 10, 16, 16)
+
+    """
+
+    need_transpose = 0
+    if data_format.upper() == "NDHWC":  # NDHWC is not supported before cudnn 8.0
+        need_transpose = 1
+        data_format = "NCDHW"
+    if need_transpose:
+        input = flow.transpose(input, perm=[0, 4, 1, 2, 3])
+        filters = flow.transpose(filters, perm=[0, 4, 1, 2, 3])
+        # padding for `NDHWC` is [0, 0, 1, 1, 1] to `NCDHW` format [0, 1, 1, 1, 0]
+        if isinstance(padding, (list, tuple)):
+            padding = list(padding)
+            padding[1], padding[4] = padding[4], padding[1]
+
+    assert len(input.shape) == 5
+    assert len(filters.shape) == 5
+
+    if isinstance(strides, (list, tuple)):
+        assert len(strides) == 3, ValueError(
+            "strides length must be 3 when passed as a list."
+        )
+    elif isinstance(strides, int):
+        strides = [strides, strides, strides]
+    else:
+        raise ValueError("strides must be an int or a list.")
+
+    if data_format.upper() != "NCDHW" and data_format.upper() != "NDHWC":
+        raise ValueError('data_format must be "NDHWC" or "NCDHW".')
+
+    channel_pos = "channels_first" if data_format == "NCDHW" else "channels_last"
+
+    if dilations is None:
+        dilations = [1, 1, 1]
+    else:
+        if isinstance(dilations, (list, tuple)):
+            assert len(dilations) == 3, ValueError(
+                "dilations length must be 3 when passed as a list."
+            )
+        elif isinstance(dilations, int):
+            dilations = [dilations, dilations, dilations]
+        else:
+            raise ValueError("dilations must be an int or a list.")
+
+    if channel_pos == "channels_first":
+        kernel_size_list = filters.shape[2:5]
+        in_channel_axis = 1
+        filter_out_axis = 0
+        filter_in_axis = 1
+    elif channel_pos == "channels_last":
+        kernel_size_list = filters.shape[-4:-1]
+        in_channel_axis = 4
+        filter_out_axis = 0
+        filter_in_axis = 4
+        if groups > 1:
+            raise ValueError("data_format NDHWC not support groups > 1")
+    else:
+        raise ValueError("invalid data_format")
+    assert isinstance(kernel_size_list, tuple)
+    assert isinstance(groups, int)
+    assert groups > 0
+
+    assert groups <= filters.shape[filter_out_axis]
+    assert filters.shape[filter_out_axis] % groups == 0
+    assert groups <= input.shape[in_channel_axis]
+    assert input.shape[in_channel_axis] % groups == 0
+    assert filters.shape[filter_in_axis] == input.shape[1] // groups
+
+    inputs, pads_list = calc_conv_padding(
+        input, padding, data_format.upper(), kernel_size_list, dilations, strides,
+    )
+    assert len(pads_list) == len(inputs.shape) - 2
+    padding_before = [pad[0] for pad in pads_list]
+
+    if (
+        groups > 1
+        and flow.current_scope().device_parallel_desc_symbol.device_tag == "cpu"
+    ):
+        in_split_list = ConvUtil.split(inputs, axis=in_channel_axis, split_num=groups)
+        filter_split_list = ConvUtil.split(
+            filters, axis=filter_out_axis, split_num=groups
+        )
+        out_list = []
+        name = name if name is not None else id_util.UniqueStr("Conv3d_")
+        for i in range(len(in_split_list)):
+            out_list.append(
+                conv_op(
+                    "conv3d",
+                    in_split_list[i],
+                    filter_split_list[i],
+                    None,
+                    padding_before,
+                    channel_pos,
+                    kernel_size_list,
+                    strides,
+                    dilations,
+                    groups=1,
+                    name=name + str(i),
+                )
+            )
+        output = flow.concat(out_list, axis=in_channel_axis)
+    else:
+        output = conv_op(
+            "conv3d",
+            inputs,
+            filters,
+            None,
+            padding_before,
+            channel_pos,
+            kernel_size_list,
+            strides,
+            dilations,
+            groups,
+            name,
+        )
+
+    if need_transpose:
+        output = flow.transpose(output, perm=[0, 2, 3, 4, 1])
+    return output
+
+
+@oneflow_export("nn.moments")
+def moments(
+    x: oneflow._oneflow_internal.BlobDesc,
+    axes: List[int],
+    keepdims: Optional[bool] = False,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator computes the mean and variance value of input Blob.
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): A Blob
+        axes (List): Array of ints. Axes along which to compute the mean and variance
+        keepdims (bool, optional): Whether to keep the same dimensanality as the input x. Defaults to False.
+        name (str, optional): The operator's name. Defaults to None.
+
+    Returns:
+        remote_blob: Two Blobs, mean and variance.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+        from typing import Tuple
+
+
+        @flow.global_function()
+        def moments_Job(x: tp.Numpy.Placeholder((5,))
+        ) -> Tuple[tp.Numpy, tp.Numpy]:
+            return flow.nn.moments(x, axes=[0])
+
+
+        x = np.array([1, 2, 3, 4, 5]).astype(np.float32)
+        mean, variance = moments_Job(x)
+
+        # mean: [3.]
+        # variance: [2.]
+
+    """
+    assert isinstance(axes, list)
+    if name is None:
+        name = id_util.UniqueStr("Moments_")
+    with flow.scope.namespace(name):
+        return (
+            flow.math.reduce_mean(x, axis=axes, keepdims=keepdims),
+            flow.math.reduce_variance(x, axis=axes, keepdims=keepdims),
+        )
+
+
+@oneflow_export("nn.GroupNorm")
+@stable_api
+def group_normalization(
+    x: oneflow._oneflow_internal.BlobDesc,
+    num_groups: int = 32,
+    eps: float = 1e-05,
+    affine: bool = True,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""Applies Group Normalization over a ND(N>=3) input.
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): input tensor with shape (N,C,âˆ—), where C means the number of channels.
+        eps (float): A value added to the denominator for numerical stability. Default: 1e-5.
+        affine (bool): A boolean value that when set to True, this module has learnable affine parameters,
+                       initialized the same way as done for batch normalization. Default: True.
+        name (Optional[str], optional): Name of this op.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The normalized input tensor.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+        @flow.global_function()
+        def group_norm_Job(x: tp.Numpy.Placeholder((4, 4, 32, 32))
+        ) -> tp.Numpy:
+            group_norm = flow.nn.GroupNorm(
+                x,
+                num_group=2,
+                eps=1e-5,
+                affine=True,
+            )
+            return group_norm
+
+        x = np.random.random(size=(4, 4, 32, 32)).astype(np.float32)
+        out = group_norm_Job(x)
+
+    """
+    assert len(x.shape) >= 3
+    assert (
+        x.shape[1] % num_groups == 0
+    ), "The channel should be divisible by num_groups."
+
+    if name is None:
+        name = id_util.UniqueStr("GroupNorm_")
+
+    channel = x.shape[1]
+    assert channel % num_groups == 0
+    group_size = channel // num_groups
+    orig_shape = x.shape
+    reshape_to_1d = flow.reshape(x, shape=[orig_shape[0], num_groups, -1])
+    (mean, variance) = flow.nn.moments(reshape_to_1d, [2], keepdims=True)
+    normalized = (reshape_to_1d - mean) / flow.math.sqrt(variance + eps)
+    normalized = flow.reshape(normalized, shape=[orig_shape[0], channel, -1])
+    if affine == True:
+        gamma = flow.get_variable(
+            name + "_gamma",
+            shape=(1, channel, 1),
+            dtype=x.dtype,
+            initializer=flow.ones_initializer(),
+            trainable=True,
+        )
+        beta = flow.get_variable(
+            name + "_beta",
+            shape=(1, channel, 1),
+            dtype=x.dtype,
+            initializer=flow.zeros_initializer(),
+            trainable=True,
+        )
+        normalized = gamma * normalized + beta
+    reshape_back = flow.reshape_like(normalized, like=x)
+
+    return reshape_back
+
+
+@oneflow_export("nn.InstanceNorm1d")
+@stable_api
+def instance_normalization1d(
+    x: oneflow._oneflow_internal.BlobDesc,
+    eps: float = 1e-05,
+    affine: bool = True,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""Applies Instance Normalization over a 3D input.
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): 3D input tensor with NCL data layout.
+        eps (float): A value added to the denominator for numerical stability. Default: 1e-5.
+        affine (bool): A boolean value that when set to True, this module has learnable affine parameters,
+                       initialized the same way as done for batch normalization. Default: True.
+        name (Optional[str], optional): Name of this op.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The normalized input tensor.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+        @flow.global_function()
+        def instance_norm_Job(x: tp.Numpy.Placeholder((4, 2, 32))
+        ) -> tp.Numpy:
+            instance_norm = flow.nn.InstanceNorm1d(
+                x,
+                eps=1e-5,
+                affine=True,
+            )
+            return instance_norm
+
+        x = np.random.random(size=(4, 2, 32)).astype(np.float32)
+        out = instance_norm_Job(x)
+
+    """
+    assert len(x.shape) == 3
+
+    if name is None:
+        name = id_util.UniqueStr("InstanceNorm1D_")
+
+    channel = x.shape[1]
+    (mean, variance) = flow.nn.moments(x, [2], keepdims=True)
+    normalized = (x - mean) / flow.math.sqrt(variance + eps)
+    if affine == True:
+        gamma = flow.get_variable(
+            name + "_gamma",
+            shape=(1, channel, 1),
+            dtype=x.dtype,
+            initializer=flow.ones_initializer(),
+            trainable=True,
+        )
+        beta = flow.get_variable(
+            name + "_beta",
+            shape=(1, channel, 1),
+            dtype=x.dtype,
+            initializer=flow.zeros_initializer(),
+            trainable=True,
+        )
+        return gamma * normalized + beta
+    else:
+        return normalized
+
+
+@oneflow_export("nn.InstanceNorm2d")
+@stable_api
+def instance_normalization2d(
+    x: oneflow._oneflow_internal.BlobDesc,
+    eps: float = 1e-05,
+    affine: bool = True,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""Applies Instance Normalization over a 4D input.
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): 4D input tensor with NCHW data layout.
+        eps (float): A value added to the denominator for numerical stability. Default: 1e-5.
+        affine (bool): A boolean value that when set to True, this module has learnable affine parameters,
+                       initialized the same way as done for batch normalization. Default: True.
+        name (Optional[str], optional): Name of this op.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The normalized input tensor.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+        @flow.global_function()
+        def instance_norm_Job(x: tp.Numpy.Placeholder((4, 2, 32, 32))
+        ) -> tp.Numpy:
+            instance_norm = flow.nn.InstanceNorm2d(
+                x,
+                eps=1e-5,
+                affine=True,
+            )
+            return instance_norm
+
+        x = np.random.random(size=(4, 2, 32, 32)).astype(np.float32)
+        out = instance_norm_Job(x)
+
+    """
+    assert len(x.shape) == 4
+
+    if name is None:
+        name = id_util.UniqueStr("InstanceNorm2D_")
+
+    reshape_to_1d = flow.reshape(x, shape=[x.shape[0], x.shape[1], -1])
+    normalized_1d_out = flow.nn.InstanceNorm1d(
+        reshape_to_1d, eps=eps, affine=affine, name=name
+    )
+    reshape_back_to_2d = flow.reshape(normalized_1d_out, shape=list(x.shape))
+
+    return reshape_back_to_2d
+
+
+@oneflow_export("nn.InstanceNorm3d")
+@stable_api
+def instance_normalization3d(
+    x: oneflow._oneflow_internal.BlobDesc,
+    eps: float = 1e-05,
+    affine: bool = True,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""Applies Instance Normalization over a 5D input.
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): 5D input tensor with NCDHW data layout.
+        eps (float): A value added to the denominator for numerical stability. Default: 1e-5.
+        affine (bool): A boolean value that when set to True, this module has learnable affine parameters,
+                       initialized the same way as done for batch normalization. Default: True.
+        name (Optional[str], optional): Name of this op.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The normalized input tensor.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+        @flow.global_function()
+        def instance_norm_Job(x: tp.Numpy.Placeholder((4, 2, 32, 32, 32))
+        ) -> tp.Numpy:
+            instance_norm = flow.nn.InstanceNorm2d(
+                x,
+                eps=1e-5,
+                affine=True,
+            )
+            return instance_norm
+
+        x = np.random.random(size=(4, 2, 32, 32, 32)).astype(np.float32)
+        out = instance_norm_Job(x)
+
+    """
+    assert len(x.shape) == 5
+
+    if name is None:
+        name = id_util.UniqueStr("InstanceNorm3D_")
+
+    reshape_to_1d = flow.reshape(x, shape=[x.shape[0], x.shape[1], -1])
+    normalized_1d_out = flow.nn.InstanceNorm1d(
+        reshape_to_1d, eps=eps, affine=affine, name=name
+    )
+    reshape_back_to_3d = flow.reshape(normalized_1d_out, shape=list(x.shape))
+
+    return reshape_back_to_3d
+
+
+@oneflow_export("nn.batch_normalization")
+def batch_normalization(
+    x: oneflow._oneflow_internal.BlobDesc,
+    mean: oneflow._oneflow_internal.BlobDesc,
+    variance: oneflow._oneflow_internal.BlobDesc,
+    offset: Optional[oneflow._oneflow_internal.BlobDesc] = None,
+    scale: Optional[oneflow._oneflow_internal.BlobDesc] = None,
+    variance_epsilon: Optional[float] = 1e-5,
+    axis: int = 1,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""This op does not fully align with tf.nn.batch_normalization.
+
+    The `mean`, `variable`, `offset` and `scale` are always 1D. Users need to specify `axis` to 1 for NCHW data format.
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): Input `Blob` of arbitrary dimensionality.
+        mean (oneflow._oneflow_internal.BlobDesc): A 1D mean `Blob`.
+        variance (oneflow._oneflow_internal.BlobDesc):   A 1D variance `Blob`.
+        offset (Optional[oneflow._oneflow_internal.BlobDesc]): An 1D offset `Blob`, often denoted  in equations, or None. If present, will be added to the normalized `Blob`.
+        scale (Optional[oneflow._oneflow_internal.BlobDesc]): A 1D scale `Blob`, often denoted  in equations, or None. If present, the scale is applied to the normalized `Blob`.
+        variance_epsilon (float):   A small float number to avoid dividing by 0.
+        axis (int, optional): 1 for '`NCHW'` data format. Defaults to 1.
+        name (Optional[str], optional): This operator's name.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc:  the normalized, scaled, offset `Blob`.
+
+    Note:
+
+        This api is more flexible, if you're new to OneFlow, it's more recommend to use `oneflow.compatible.single_client.layers.batch_normalization`
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def batch_norm_Job(x: tp.Numpy.Placeholder((1, 5))
+        ) -> tp.Numpy:
+            bn_mean, bn_variance = flow.nn.moments(x, axes=[1])
+            batch_norm = flow.nn.batch_normalization(
+                x,
+                mean=bn_mean,
+                variance=bn_variance,
+                axis=0
+            )
+            return batch_norm
+
+
+        x = np.array([[1, 2, 3, 4, 5]]).astype(np.float32)
+        out = batch_norm_Job(x)
+
+        # out [[-1.41421  -0.707105  0.        0.707105  1.41421 ]]
+
+    """
+
+    assert axis >= -len(x.shape) and axis < len(x.shape)
+    if axis < 0:
+        axis += len(x.shape)
+
+    if name is None:
+        name = id_util.UniqueStr("BatchNorm_")
+
+    params_shape = [x.shape[axis]]
+
+    if flow.current_scope().device_parallel_desc_symbol.device_tag == "cpu":
+        if len(mean.shape) == 1:
+            nd_params_shape = [1] * len(x.shape)
+            nd_params_shape[axis] = params_shape[0]
+            mean = flow.reshape(mean, nd_params_shape)
+            variance = flow.reshape(variance, nd_params_shape)
+            if scale:
+                scale = flow.reshape(scale, nd_params_shape)
+            if offset:
+                offset = flow.reshape(offset, nd_params_shape)
+        elif len(mean.shape) == len(x.shape):
+            pass
+        else:
+            raise ValueError(
+                "shape of mean and variance should be 1D or has number of axes and x's"
+            )
+        variance += variance_epsilon
+        std_inv = flow.math.rsqrt(variance)
+        normalized = (x - mean) * std_inv
+        affined = normalized
+        if scale:
+            affined *= scale
+        if offset:
+            affined += offset
+        return affined
+    elif flow.current_scope().device_parallel_desc_symbol.device_tag == "gpu":
+        params_dtype = flow.float32 if x.dtype == flow.float16 else x.dtype
+        if scale is None:
+            scale = flow.constant(
+                1, dtype=params_dtype, shape=params_shape, name="gamma"
+            )
+        if offset is None:
+            offset = flow.constant(
+                0, dtype=params_dtype, shape=params_shape, name="beta"
+            )
+        builder = (
+            flow.user_op_builder(name)
+            .Op("normalization")
+            .Input("x", [x])
+            .Input("moving_mean", [mean])
+            .Input("moving_variance", [variance])
+            .Input("gamma", [scale])
+            .Input("beta", [offset])
+            .Output("y")
+            .Attr("axis", axis)
+            .Attr("epsilon", variance_epsilon)
+            .Attr("training", False)
+            # momentum is not used
+            .Attr("momentum", 0.0)
+        )
+        return builder.Build().InferAndTryRun().RemoteBlobList()[0]
+    else:
+        raise NotImplementedError
+
+
+@oneflow_export("nn.layer_norm")
+def layer_norm(
+    inputs: oneflow._oneflow_internal.BlobDesc,
+    gamma: Optional[oneflow._oneflow_internal.BlobDesc] = None,
+    beta: Optional[oneflow._oneflow_internal.BlobDesc] = None,
+    begin_norm_axis: int = 1,
+    begin_params_axis: int = -1,
+    epsilon: float = 1e-5,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""Layer Normalization.
+
+    Args:
+        inputs (oneflow._oneflow_internal.BlobDesc): Input `Blob`.
+        gamma (Optional[oneflow._oneflow_internal.BlobDesc]).
+        beta (Optional[oneflow._oneflow_internal.BlobDesc]).
+        begin_norm_axis (int, optional): An integer specifies which axis to normalize at first. Defaults to 1.
+        begin_params_axis (int, optional):  An integer specifies which axis params at . Defaults to -1.
+        epsilon (float, optional): A small float is added to avoid division by zero. Defaults to 1e-5.
+        name (Optional[str], optional): This operator's name. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: A normalized `Blob` with same shape of input.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def layer_norm_Job(x: tp.Numpy.Placeholder((1, 64, 128, 128))
+        ) -> tp.Numpy:
+            layer_norm = flow.nn.layer_norm(
+                x,
+                name="LayerNorm1"
+            )
+            return layer_norm
+
+
+        x = np.random.randn(1, 64, 128, 128).astype(np.float32)
+        out = layer_norm_Job(x)
+
+        # out.shape (1, 64, 128, 128)
+
+    """
+    param_shape = inputs.shape[begin_params_axis:]
+
+    if name is None:
+        name = id_util.UniqueStr("LayerNorm_")
+
+    if flow.current_scope().device_parallel_desc_symbol.device_tag == "cpu":
+        if begin_norm_axis < 0:
+            begin_norm_axis = begin_norm_axis + len(inputs.shape)
+
+        reduce_axis = []
+        for dim in range(len(inputs.shape)):
+            if dim >= begin_norm_axis:
+                reduce_axis.append(dim)
+        mean, variance = flow.nn.moments(inputs, reduce_axis, keepdims=True)
+
+        axis = begin_norm_axis
+        normalized = flow.nn.batch_normalization(
+            x=inputs,
+            mean=mean,
+            variance=variance,
+            variance_epsilon=epsilon,
+            axis=axis,
+            name=name,
+        )
+        nd_params_shape = [1] * (len(inputs.shape) - len(param_shape)) + list(
+            param_shape
+        )
+        affined = normalized
+        if gamma:
+            gamma = flow.reshape(gamma, nd_params_shape)
+            affined *= gamma
+        if beta:
+            beta = flow.reshape(beta, nd_params_shape)
+            affined += beta
+        return affined
+    elif flow.current_scope().device_parallel_desc_symbol.device_tag == "gpu":
+        op_builder = (
+            flow.user_op_builder(name)
+            .Op("layer_norm")
+            .Input("x", [inputs])
+            .Output("y")
+            .Output("mean")
+            .Output("inv_variance")
+        )
+        scale = False
+        center = False
+        if beta is not None:
+            center = True
+            op_builder.Input("beta", [beta])
+        if gamma is not None:
+            scale = True
+            op_builder.Input("gamma", [gamma])
+            op_builder.Output("normalized")
+        op_builder.Attr("center", center)
+        op_builder.Attr("scale", scale)
+        op_builder.Attr("begin_norm_axis", begin_norm_axis)
+        op_builder.Attr("begin_params_axis", begin_params_axis)
+        op_builder.Attr("epsilon", epsilon)
+
+        y = op_builder.Build().InferAndTryRun().RemoteBlobList()[0]
+        return y
+    else:
+        raise NotImplementedError
+
+
+@oneflow_export("nn.compat_conv2d")
+def tf_conv2d(
+    input: oneflow._oneflow_internal.BlobDesc,
+    filters: oneflow._oneflow_internal.BlobDesc,
+    strides: Union[int, Sequence[int]],
+    padding: str,
+    data_format: str = "NCHW",
+    dilations: Optional[Union[int, Sequence[int]]] = None,
+    groups: int = 1,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""Computes a 2-D convolution given `input` and 4-D `filters` `Blob`.
+
+    Args:
+        input (oneflow._oneflow_internal.BlobDesc): A `Blob` of rank at least 4.
+        filters (oneflow._oneflow_internal.BlobDesc): A `Blob` with the same type as `input` and has the shape `[out_channels, in_channels//groups, filter_height, filter_width] for NCHW, or [out_channels, filter_height, filter_width, in_channels//groups] for NHWC`
+        strides (Union[int, Sequence[int]]): An int or list of `ints` that has length `1`, or `2`. The stride of the sliding window for each dimension of `input`.
+        padding (str): `"SAME"` or `"VALID"` indicating the type of padding algorithm to use, or a list indicating the explicit paddings at the start and end of each dimension.
+        data_format (str, optional): `"NHWC"` or `"NCHW"`. Defaults to `"NCHW"`.
+        dilations (Optional[Union[int, Sequence[int]]], optional): The dilation factor for each dimension of`input`. Defaults to None.
+        groups (int, optional): int value greater than 0. Defaults to 1.
+        name (Optional[str], optional): This operator's name. Defaults to None.
+
+    Raises:
+        ValueError: strides must be an int or a list.
+        ValueError: data_format must be "NHWC" or "NCHW".
+        ValueError: dilations length must be 2 when passed as a list.
+        ValueError: dilations must be an int or a list.
+        ValueError: data_format NHWC not support groups > 1.
+        ValueError: invalid data_format.
+        ValueError: padding must be "SAME" or "VALID".
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc:  A `Blob` with the same type as `input` and the same outer batch shape.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        def conv2d(input, filters, kernel_size, strides, padding, name):
+            input_shape = input.shape
+            weight_initializer = flow.truncated_normal(0.1)
+            weight_regularizer = flow.regularizers.l2(0.0005)
+            weight_shape = (filters,
+                            input_shape[1],
+                            kernel_size[0],
+                            kernel_size[1])
+
+            weight = flow.get_variable(
+                name + "-weight",
+                shape=weight_shape,
+                initializer=weight_initializer,
+                regularizer=weight_regularizer,
+            )
+            return flow.nn.compat_conv2d(input, weight, strides, padding, name=name)
+
+
+        @flow.global_function()
+        def conv2d_Job(x: tp.Numpy.Placeholder((1, 64, 32, 32))
+        ) -> tp.Numpy:
+            conv = conv2d(x,
+                        filters=128,
+                        kernel_size=[3, 3],
+                        strides=2,
+                        padding='SAME',
+                        name="Convlayer")
+            return conv
+
+
+        x = np.random.randn(1, 64, 32, 32).astype(np.float32)
+        out = conv2d_Job(x)
+
+        # out.shape (1, 128, 16, 16)
+
+    """
+    if padding.upper() == "SAME":
+        padding = "SAME_UPPER"
+    return flow.nn.conv2d(
+        input, filters, strides, padding, None, data_format, dilations, groups, name
+    )
+
+
+@oneflow_export("nn.bias_add")
+def bias_add(
+    value: oneflow._oneflow_internal.BlobDesc,
+    bias: oneflow._oneflow_internal.BlobDesc,
+    data_format: Optional[str] = None,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""This operator adds a bias to Blob.
+
+    Args:
+        value (oneflow._oneflow_internal.BlobDesc):  A `Blob`.
+        bias (oneflow._oneflow_internal.BlobDesc): A 1-D `Blob` with size matching the channel dimension of value. And has the same type as value unless value is a quantized type.
+        data_format (Optional[str], optional): A string. '`N...C'` or '`NC...'`. Defaults to None.
+        name (Optional[str], optional): This operator's name. Defaults to None.
+
+    Raises:
+        ValueError: ValueError if data format is unrecognized, if value has less than two dimensions with '`N..C'`/None data_format or value has less than three dimensions with '`NC..'` data_format, if bias is a vector, or if the size of bias does not match the size of the channel dimension of value.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: A `Blob` with the same type as value.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def bias_add_Job(x: tp.Numpy.Placeholder((1, 64, 128, 128))
+        ) -> tp.Numpy:
+            bias_initializer = flow.truncated_normal(0.1)
+            bias_regularizer = flow.regularizers.l2(0.0005)
+            bias = flow.get_variable(
+                    "Add_bias",
+                    shape=(64,),
+                    initializer=bias_initializer,
+                    regularizer=bias_regularizer,
+                )
+            bias_out = flow.nn.bias_add(x, bias)
+            return bias_out
+
+
+        x = np.random.randn(1, 64, 128, 128).astype(np.float32)
+        out = bias_add_Job(x)
+
+        # out.shape (1, 64, 128, 128)
+
+    """
+    # TODO: name unused, fix it
+    if name is None:
+        name = id_util.UniqueStr("BiasAdd_")
+
+    if data_format is None:
+        bias_add_axis = 1
+    else:
+        if data_format.startswith("NC"):
+            bias_add_axis = 1
+        elif data_format.startswith("N") and data_format.endswith("C"):
+            bias_add_axis = len(value.shape) - 1
+        else:
+            raise ValueError("data_format must be of the form `N...C` or `NC...`")
+
+    return (
+        flow.user_op_builder(name)
+        .Op("bias_add")
+        .Input("a", [value])
+        .Input("b", [bias])
+        .Output("out")
+        .Attr("axis", bias_add_axis)
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
+
+
+@oneflow_export("nn.fused_bias_add_gelu")
+def fused_bias_add_gelu(
+    value: oneflow._oneflow_internal.BlobDesc,
+    bias: oneflow._oneflow_internal.BlobDesc,
+    data_format: Optional[str] = None,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""This operator fuse flow.nn.bias_add and flow.math.gelu operator.
+
+    Args:
+        value (oneflow._oneflow_internal.BlobDesc):  A `Blob`.
+        bias (oneflow._oneflow_internal.BlobDesc): A 1-D `Blob` with size matching the channel dimension of value. And has the same type as value unless value is a quantized type.
+        data_format (Optional[str], optional): A string. '`N...C'` or '`NC...'`. Defaults to None.
+        name (Optional[str], optional): This operator's name. Defaults to None.
+
+    Raises:
+        ValueError: ValueError if data format is unrecognized, if value has less than two dimensions with '`N..C'`/None data_format or value has less than three dimensions with '`NC..'` data_format, if bias is a vector, or if the size of bias does not match the size of the channel dimension of value.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: A `Blob` with the same type as value.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def fused_bias_add_gelu_Job(x: tp.Numpy.Placeholder((1, 64, 128, 128))
+        ) -> tp.Numpy:
+            bias_initializer = flow.truncated_normal(0.1)
+            bias_regularizer = flow.regularizers.l2(0.0005)
+            bias = flow.get_variable(
+                    "Add_bias",
+                    shape=(64,),
+                    initializer=bias_initializer,
+                    regularizer=bias_regularizer,
+                )
+            out = flow.nn.fused_bias_add_gelu(x, bias)
+            return out
+
+
+        x = np.random.randn(1, 64, 128, 128).astype(np.float32)
+        out = fused_bias_add_gelu_Job(x)
+
+        # out.shape (1, 64, 128, 128)
+
+    """
+    if name is None:
+        name = id_util.UniqueStr("FusedBiasAddGelu_")
+
+    if data_format is None:
+        bias_add_axis = 1
+    else:
+        if data_format.startswith("NC"):
+            bias_add_axis = 1
+        elif data_format.startswith("N") and data_format.endswith("C"):
+            bias_add_axis = len(value.shape) - 1
+        else:
+            raise ValueError("data_format must be of the form `N...C` or `NC...`")
+
+    return (
+        flow.user_op_builder(name)
+        .Op("fused_bias_add_gelu")
+        .Input("a", [value])
+        .Input("b", [bias])
+        .Output("out")
+        .Attr("axis", bias_add_axis)
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
+
+
+@oneflow_export("nn.fused_bias_add_dropout")
+def fused_bias_add_dropout(
+    value: oneflow._oneflow_internal.BlobDesc,
+    bias: oneflow._oneflow_internal.BlobDesc,
+    data_format: Optional[str] = None,
+    rate: float = 0.0,
+    noise_shape: Optional[oneflow._oneflow_internal.BlobDesc] = None,
+    seed: Optional[int] = None,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""This operator fuse flow.nn.bias_add and flow.nn.dropout operator.
+
+    Args:
+        value (oneflow._oneflow_internal.BlobDesc):  A `Blob`.
+        bias (oneflow._oneflow_internal.BlobDesc): A 1-D `Blob` with size matching the channel dimension of value. And has the same type as value unless value is a quantized type.
+        data_format (Optional[str], optional): A string. '`N...C'` or '`NC...'`. Defaults to None.
+        rate (float): A scalar `Blob` with the same type as x. The probability that each element is dropped.
+        noise_shape (Optional[oneflow._oneflow_internal.BlobDesc], optional):  optional: A 1-D `Blob`, representing the shape for randomly generated keep/drop flags. Defaults to None.Defaults to None.
+        seed (Optional[int], optional):  Optional int value. Defaults to None.
+        name (Optional[str], optional): This operator's name. Defaults to None.
+
+    Raises:
+        ValueError: ValueError if data format is unrecognized, if value has less than two dimensions with '`N..C'`/None data_format or value has less than three dimensions with '`NC..'` data_format, if bias is a vector, or if the size of bias does not match the size of the channel dimension of value.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: A `Blob` with the same type as value.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def fused_bias_add_dropout_Job(x: tp.Numpy.Placeholder((1, 64, 128, 128))
+        ) -> tp.Numpy:
+            bias_initializer = flow.truncated_normal(0.1)
+            bias_regularizer = flow.regularizers.l2(0.0005)
+            bias = flow.get_variable(
+                    "Add_bias",
+                    shape=(64,),
+                    initializer=bias_initializer,
+                    regularizer=bias_regularizer,
+                )
+            out = flow.nn.fused_bias_add_dropout(x, bias)
+            return out
+
+
+        x = np.random.randn(1, 64, 128, 128).astype(np.float32)
+        out = fused_bias_add_dropout_Job(x)
+
+        # out.shape (1, 64, 128, 128)
+
+    """
+
+    assert rate is not None and rate >= 0.0 and rate < 1.0
+    if not flow.current_global_function_desc().IsTrainable() or rate == 0.0:
+        return flow.nn.bias_add(value, bias, data_format, name)
+
+    if name is None:
+        name = id_util.UniqueStr("BiasAddDropout_")
+    mask = flow.nn.random_mask_like(
+        value, rate, seed, noise_shape, "%s-dropout_random_mask_like" % name
+    )
+
+    if data_format is None:
+        bias_add_axis = 1
+    else:
+        if data_format.startswith("NC"):
+            bias_add_axis = 1
+        elif data_format.startswith("N") and data_format.endswith("C"):
+            bias_add_axis = len(value.shape) - 1
+        else:
+            raise ValueError("data_format must be of the form `N...C` or `NC...`")
+
+    return (
+        flow.user_op_builder(name)
+        .Op("fused_bias_add_mask_scale")
+        .Input("a", [value])
+        .Input("b", [bias])
+        .Input("mask", [mask])
+        .Output("out")
+        .Attr("axis", bias_add_axis)
+        .Attr("scale", float(1.0 / (1.0 - rate)))
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
+
+
+@oneflow_export("nn.max_pool1d")
+def max_pool1d(
+    input: oneflow._oneflow_internal.BlobDesc,
+    ksize: Union[int, Sequence[int]],
+    strides: Union[int, Sequence[int]],
+    padding: Union[str, Sequence[Sequence[int]]],
+    data_format: str = "NWC",
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""Performs the 1d-max pooling on the input.
+
+    Args:
+        input (oneflow._oneflow_internal.BlobDesc): A 3-D `Blob` of the format specified by data_format.
+        ksize (Union[int, Sequence[int]]): An int or list of ints that has length 1 or 3. The size of the window for each dimension of the input `Blob`.
+        strides (Union[int, Sequence[int]]): An int or list of ints that has length 1 or 3. The stride of the sliding window for each dimension of the input `Blob`.
+        padding (str):  '`VALID'` or '`SAME'`. The padding algorithm.
+        data_format (str, optional):  An optional string from: '`NWC'`, '`NCW'`. Defaults to '`NWC'`.
+        name (Optional[str], optional): This operator's name(optional).Defaults to None.
+
+    Raises:
+        NotImplementedError: TODO: fix cuDNN bugs in pooling_1d
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: A `Blob` of format specified by data_format. The max pooled output `Blob`.
+    """
+    # TODO: fix cuDNN bugs in pooling_1d
+    raise NotImplementedError
+
+
+@oneflow_export("nn.avg_pool1d")
+def avg_pool1d(
+    input: oneflow._oneflow_internal.BlobDesc,
+    ksize: Union[int, Sequence[int]],
+    strides: Union[int, Sequence[int]],
+    padding: Union[str, Sequence[Sequence[int]]],
+    data_format: str = "NCW",
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""Performs the average pooling on the input `Blob`.
+
+    Args:
+        input (oneflow._oneflow_internal.BlobDesc): A 3-D `Blob` of the format specified by data_format.
+        ksize (Union[int, Sequence[int]]): An int or list of ints that has length 1 or 3. The size of the window for each dimension of the input `Blob`.
+        strides (Union[int, Sequence[int]]): An int or list of ints that has length 1 or 3. The stride of the sliding window for each dimension of the input `Blob`.
+        padding (str): '`VALID'` or '`SAME'`.
+        data_format (str, optional):  '`NWC'` or '`NCW'`. Defaults to '`NWC'`.
+        name (Optional[str], optional):  This operator's name(optional). Defaults to None.
+
+    Raises:
+        NotImplementedError: TODO: fix cuDNN bugs in pooling_1d
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: A `Blob` of format specified by data_format. The max pooled output `Blob`.
+    """
+    # TODO: fix cuDNN bugs in pooling_1d
+    raise NotImplementedError
+
+
+def calc_pool_padding(padding, dhw_offset, ndims):
+    if isinstance(padding, str):
+        padding = "SAME_LOWER" if padding.upper() == "SAME" else padding
+        assert padding.upper() in ["VALID", "SAME_LOWER", "SAME_UPPER"]
+        padding_type = padding.lower()
+        ndim_pads_list = [[0, 0]] * ndims
+    elif isinstance(padding, (list, tuple)):
+        padding_type = "customized"
+        ndim_pads_list = get_ndim_pads_list(padding, dhw_offset, ndims)
+    else:
+        raise ValueError("padding must be str or a list.")
+    return padding_type, ndim_pads_list
+
+
+@oneflow_export("nn.max_pool2d")
+def max_pool2d(
+    input: oneflow._oneflow_internal.BlobDesc,
+    ksize: Union[int, IntPair],
+    strides: Union[int, IntPair],
+    padding: Union[str, Tuple[IntPair, IntPair, IntPair, IntPair]],
+    data_format: str = "NCHW",
+    ceil_mode: bool = False,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""Performs the 2d-max pooling on the input `Blob`.
+
+    Args:
+        input (oneflow._oneflow_internal.BlobDesc): A 4-D `Blob` of the format specified by data_format.
+        ksize (Union[int, IntPair]): An int or list of ints that has length 1, 2. The size of the window for each dimension of the input `Blob`.
+        strides (Union[int, IntPair]): An int or list of ints that has length 1, 2. The stride of the sliding window for each dimension of the input `Blob`.
+        padding (str): '`VALID'` or '`SAME'` or '`SAME_LOWER'` or '`SAME_UPPER'` or Tuple[IntPair, IntPair, IntPair, IntPair]`. The padding algorithm.
+        data_format (str, optional): '`NHWC'`, '`NCHW'` or '`NCHW_VECT_C'`. Defaults to "NCHW".
+        name (Optional[str], optional): This operator's name(optional). Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc:  A `Blob` of format specified by data_format. The max pooled output `Blob`.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def maxpool2d_Job(x: tp.Numpy.Placeholder((1, 32, 128, 128))
+        ) -> tp.Numpy:
+            pool_out = flow.nn.max_pool2d(
+                input=x,
+                ksize=3,
+                strides=2,
+                padding='SAME',
+                data_format='NCHW'
+            )
+
+            return pool_out
+
+
+        x = np.random.randn(1, 32, 128, 128).astype(np.float32)
+        out = maxpool2d_Job(x)
+
+        # out.shape (1, 32, 64, 64)
+
+    """
+    op = (
+        flow.user_op_builder(
+            name if name is not None else id_util.UniqueStr("MaxPool2D_")
+        )
+        .Op("max_pool_2d")
+        .Input("x", [input])
+        .Output("y")
+    )
+    assert data_format in ["NHWC", "NCHW", "NCHW_VECT_C"]
+    channel_pos = "channels_last" if data_format == "NHWC" else "channels_first"
+    op.Attr("data_format", channel_pos)
+    pool_size = _GetSequence(ksize, 2, "ksize")
+    op.Attr("pool_size", pool_size)
+    strides = _GetSequence(strides, 2, "strides")
+    op.Attr("strides", strides)
+    padding_type, pads_list = calc_pool_padding(padding, get_dhw_offset(channel_pos), 2)
+    assert len(pads_list) == len(input.shape) - 2
+    padding_before = [pad[0] for pad in pads_list]
+    padding_after = [pad[1] for pad in pads_list]
+    op.Attr("padding", padding_type)
+    op.Attr("padding_before", padding_before)
+    op.Attr("padding_after", padding_after)
+    op.Attr("ceil_mode", ceil_mode)
+    return op.Build().InferAndTryRun().RemoteBlobList()[0]
+
+
+@oneflow_export("nn.avg_pool2d")
+def avg_pool2d(
+    input: oneflow._oneflow_internal.BlobDesc,
+    ksize: Union[int, IntPair],
+    strides: Union[int, IntPair],
+    padding: Union[str, Tuple[IntPair, IntPair, IntPair, IntPair]],
+    data_format: str = "NCHW",
+    ceil_mode: bool = False,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""Performs the 2d-average pooling on the input.
+
+    Args:
+        input (oneflow._oneflow_internal.BlobDesc): A 4-D `Blob` of shape [batch, height, width, channels].
+        ksize (Union[int, IntPair]):  An int or list of ints that has length 1, 2. The size of the window for each dimension of the input `Blob`.
+        strides (Union[int, IntPair]): An int or list of ints that has length 1, 2. The stride of the sliding window for each dimension of the input `Blob`.
+        padding (str): '`VALID'` or '`SAME'` or '`SAME_LOWER'` or '`SAME_UPPER'` or Tuple[IntPair, IntPair, IntPair, IntPair]. The padding algorithm.
+        data_format (str, optional): '`NHWC'` or '`NCHW'`. Defaults to "NCHW".
+        name (Optional[str], optional):  This operator's name(optional). Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc:  A `Blob` with the same type as '`value'`. The average pooled output `Blob`.
+
+    For example:
+
+    .. code-block:: python
+
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def avgpool2d_Job(x: tp.Numpy.Placeholder((1, 32, 128, 128))
+        ) -> tp.Numpy:
+            pool_out = flow.nn.avg_pool2d(
+                input=x,
+                ksize=3,
+                strides=2,
+                padding='SAME',
+                data_format='NCHW'
+            )
+
+            return pool_out
+
+
+        x = np.random.randn(1, 32, 128, 128).astype(np.float32)
+        out = avgpool2d_Job(x)
+
+        # out.shape (1, 32, 64, 64)
+
+    """
+    op = (
+        flow.user_op_builder(
+            name if name is not None else id_util.UniqueStr("AvgPool2D_")
+        )
+        .Op("avg_pool_2d")
+        .Input("x", [input])
+        .Output("y")
+    )
+    assert data_format in ["NHWC", "NCHW", "NCHW_VECT_C"]
+    channel_pos = "channels_last" if data_format == "NHWC" else "channels_first"
+    op.Attr("data_format", channel_pos)
+    pool_size = _GetSequence(ksize, 2, "ksize")
+    op.Attr("pool_size", pool_size)
+    strides = _GetSequence(strides, 2, "strides")
+    op.Attr("strides", strides)
+    padding_type, pads_list = calc_pool_padding(padding, get_dhw_offset(channel_pos), 2)
+    assert len(pads_list) == len(input.shape) - 2
+    padding_before = [pad[0] for pad in pads_list]
+    padding_after = [pad[1] for pad in pads_list]
+    op.Attr("padding", padding_type)
+    op.Attr("padding_before", padding_before)
+    op.Attr("padding_after", padding_after)
+    op.Attr("ceil_mode", ceil_mode)
+    return op.Build().InferAndTryRun().RemoteBlobList()[0]
+
+
+@oneflow_export("nn.max_pool3d")
+def max_pool3d(
+    input: oneflow._oneflow_internal.BlobDesc,
+    ksize: Union[int, Sequence[int]],
+    strides: Union[int, Sequence[int]],
+    padding: Union[str, Sequence[Sequence[int]]],
+    data_format: str = "NCDHW",
+    ceil_mode: bool = False,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""Performs the 3d-max pooling on the input.
+
+    Args:
+        input (oneflow._oneflow_internal.BlobDesc):  A 5-D `Blob` of the format specified by data_format.
+        ksize (Union[int, Sequence[int]]):  An int or list of ints that has length 1, 3 or 5. The size of the window for each dimension of the input `Blob`.
+        strides (Union[int, Sequence[int]]): An int or list of ints that has length 1, 3 or 5. The stride of the sliding window for each dimension of the input `Blob`.
+        padding (str): '`VALID'` or '`SAME'` or '`SAME_LOWER'` or '`SAME_UPPER'` or '`Sequence[Sequence[int]]'`.
+        data_format (str, optional):   "NDHWC" or "NCDHW". Defaults to "NCDHW".
+        name (Optional[str], optional): This operator's name(optional).
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: A `Blob` of format specified by data_format. The max pooled output `Blob`.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def maxpool3d_Job(x: tp.Numpy.Placeholder((1, 32, 10, 128, 128))
+        ) -> tp.Numpy:
+            pool_out = flow.nn.max_pool3d(
+                input=x,
+                ksize=3,
+                strides=2,
+                padding='SAME',
+                data_format='NCDHW'
+            )
+
+            return pool_out
+
+
+        x = np.random.randn(1, 32, 10, 128, 128).astype(np.float32)
+        out = maxpool3d_Job(x)
+
+        # out.shape (1, 32, 5, 64, 64)
+
+    """
+    op = (
+        flow.user_op_builder(
+            name if name is not None else id_util.UniqueStr("MaxPool3D_")
+        )
+        .Op("max_pool_3d")
+        .Input("x", [input])
+        .Output("y")
+    )
+    assert data_format in ["NDHWC", "NCDHW"]
+    channel_pos = "channels_last" if data_format == "NDHWC" else "channels_first"
+    op.Attr("data_format", channel_pos)
+    pool_size = _GetSequence(ksize, 3, "ksize")
+    op.Attr("pool_size", pool_size)
+    strides = _GetSequence(strides, 3, "strides")
+    op.Attr("strides", strides)
+    padding_type, pads_list = calc_pool_padding(padding, get_dhw_offset(channel_pos), 3)
+    assert len(pads_list) == len(input.shape) - 2
+    padding_before = [pad[0] for pad in pads_list]
+    padding_after = [pad[1] for pad in pads_list]
+    op.Attr("padding", padding_type)
+    op.Attr("padding_before", padding_before)
+    op.Attr("padding_after", padding_after)
+    op.Attr("ceil_mode", ceil_mode)
+    return op.Build().InferAndTryRun().RemoteBlobList()[0]
+
+
+@oneflow_export("nn.avg_pool3d")
+def avg_pool3d(
+    input: oneflow._oneflow_internal.BlobDesc,
+    ksize: Union[int, Sequence[int]],
+    strides: Union[int, Sequence[int]],
+    padding: Union[str, Sequence[Sequence[int]]],
+    data_format: str = "NCDHW",
+    ceil_mode: bool = False,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""Performs the 3d-average pooling on the input.
+
+    Args:
+        input (oneflow._oneflow_internal.BlobDesc): A 5-D `Blob` of shape [batch, height, width, channels].
+        ksize (Union[int, Sequence[int]]): An int or list of ints that has length 1, 3 or 5. The size of the window for each dimension of the input `Blob`.
+        strides (Union[int, Sequence[int]]): An int or list of ints that has length 1, 3 or 5. The stride of the sliding window for each dimension of the input `Blob`.
+        padding (str): '`VALID'` or '`SAME'` or '`SAME_LOWER'` or '`SAME_UPPER or Sequence[Sequence[int]]'`.
+        data_format (str, optional):  '`NDHWC'` or '`NCDHW'`. Defaults to "NCDHW".
+        name (Optional[str], optional):  This operator's name(optional).Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: A `Blob` with the same type as value. The average pooled output `Blob`.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def avgpool3d_Job(x: tp.Numpy.Placeholder((1, 32, 10, 128, 128))
+        ) -> tp.Numpy:
+            pool_out = flow.nn.avg_pool3d(
+                input=x,
+                ksize=3,
+                strides=2,
+                padding='SAME',
+                data_format='NCDHW'
+            )
+
+            return pool_out
+
+
+        x = np.random.randn(1, 32, 10, 128, 128).astype(np.float32)
+        out = avgpool3d_Job(x)
+
+        # out.shape (1, 32, 5, 64, 64)
+
+    """
+    op = (
+        flow.user_op_builder(
+            name if name is not None else id_util.UniqueStr("AvgPool3D_")
+        )
+        .Op("avg_pool_3d")
+        .Input("x", [input])
+        .Output("y")
+    )
+    assert data_format in ["NDHWC", "NCDHW"]
+    channel_pos = "channels_last" if data_format == "NDHWC" else "channels_first"
+    op.Attr("data_format", channel_pos)
+    pool_size = _GetSequence(ksize, 3, "ksize")
+    op.Attr("pool_size", pool_size)
+    strides = _GetSequence(strides, 3, "strides")
+    op.Attr("strides", strides)
+    padding_type, pads_list = calc_pool_padding(padding, get_dhw_offset(channel_pos), 3)
+    assert len(pads_list) == len(input.shape) - 2
+    padding_before = [pad[0] for pad in pads_list]
+    padding_after = [pad[1] for pad in pads_list]
+    op.Attr("padding", padding_type)
+    op.Attr("padding_before", padding_before)
+    op.Attr("padding_after", padding_after)
+    op.Attr("ceil_mode", ceil_mode)
+    return op.Build().InferAndTryRun().RemoteBlobList()[0]
+
+
+def _softmax_need_transpose(x, axis):
+    assert type(axis) is int
+    dim_num = len(x.shape)
+    assert dim_num >= 2
+    if axis < 0:
+        axis += dim_num
+    assert axis >= 0
+    assert axis < dim_num
+
+    need_transpose = False
+    permute = list(range(dim_num))
+    if axis != dim_num - 1:
+        need_transpose = True
+        permute[axis] = permute[-1]
+        permute[-1] = axis
+    return need_transpose, permute
+
+
+@oneflow_export("nn.softmax")
+def softmax(
+    logits: oneflow._oneflow_internal.BlobDesc,
+    axis: Optional[int] = None,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""Computes softmax activations.
+
+    For each element, we apply:
+
+    .. math::
+        S_i = \frac{e^i}{\sum_1^j e^j }
+
+    Args:
+        logits (oneflow._oneflow_internal.BlobDesc): A non-empty `Blob`.
+        axis (Optional[int], optional): The dimension softmax would be performed on. Defaults to None.
+        name (Optional[str], optional): This operator's name(optional). Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc:  A `Blob` has the same type and shape as logits.
+
+    Raises:
+        InvalidArgumentError: if logits is empty or axis is beyond the last dimension of logits.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def softmax_Job(x: tp.Numpy.Placeholder((1, 5))
+        ) -> tp.Numpy:
+            softmax_out = flow.nn.softmax(x, axis=1)
+
+            return softmax_out
+
+
+        x = np.array([[1, 2, 1, 5, 4]]).astype(np.float32)
+        out = softmax_Job(x)
+
+        # out [[0.01259415 0.03423444 0.01259415 0.68761706 0.2529602 ]]
+
+    """
+    if axis is None:
+        axis = -1
+
+    need_transpose, permute = _softmax_need_transpose(logits, axis)
+    if need_transpose:
+        logits = flow.transpose(logits, perm=permute)
+
+    out = (
+        flow.user_op_builder(
+            name if name is not None else id_util.UniqueStr("Softmax_")
+        )
+        .Op("softmax")
+        .Input("in", [logits])
+        .Output("out")
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
+
+    if need_transpose:
+        out = flow.transpose(out, perm=permute)
+    return out
+
+
+@oneflow_export("nn.logsoftmax")
+def logsoftmax(
+    logits: oneflow._oneflow_internal.BlobDesc,
+    axis: Optional[int] = None,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""Computes logsoftmax activations.
+
+    For each element, we apply:
+
+    .. math::
+
+        LogSoftmax(x_i) = Log(\frac{e^i}{\sum_1^j e^j })
+
+    Args:
+        logits (oneflow._oneflow_internal.BlobDesc): A non-empty `Blob`.
+        axis (Optional[int], optional): The dimension logsoftmax would be performed on. Defaults to None.
+        name (Optional[str], optional): This operator's name(optional). Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc:  A `Blob` has the same type and shape as logits.
+
+    Raises:
+        InvalidArgumentError: if logits is empty or axis is beyond the last dimension of logits.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def logsoftmax_Job(x: tp.Numpy.Placeholder((1, 5))
+        ) -> tp.Numpy:
+            logsoftmax_out = flow.nn.logsoftmax(x, axis=1)
+            return logsoftmax_out
+
+
+        x = np.array([[1, 2, 1, 5, 4]]).astype(np.float32)
+        out = logsoftmax_Job(x)
+
+        # out [[-4.374523  -3.3745232 -4.374523  -0.3745232 -1.374523 ]]
+    """
+    if axis is None:
+        axis = -1
+    if name is None:
+        name = id_util.UniqueStr("logsoftmax")
+    return flow.math.log(
+        flow.nn.softmax(logits, axis, name=name + "_softmax"), name=name + "_log"
+    )
+
+
+@oneflow_export("nn.softmax_grad")
+def softmax_grad(
+    y: oneflow._oneflow_internal.BlobDesc,
+    dy: oneflow._oneflow_internal.BlobDesc,
+    axis: Optional[int] = None,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""Computes gradient of softmax activations.
+
+    Args:
+        y (oneflow._oneflow_internal.BlobDesc):  A `Blob` representing the softmax of x.
+        dy (oneflow._oneflow_internal.BlobDesc):  gradient of y.
+        axis (Optional[int], optional):  The dimension softmax would be performed on. Defaults to None.
+        name (Optional[str], optional):  This operator's name(optional).
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc:  A `Blob` representing the gradient of x.
+    """
+    if axis is None:
+        axis = -1
+
+    need_transpose, permute = _softmax_need_transpose(y, axis)
+    if need_transpose:
+        y = flow.transpose(y, perm=permute)
+        dy = flow.transpose(dy, perm=permute)
+
+    dx = (
+        flow.user_op_builder(
+            name if name is not None else id_util.UniqueStr("Softmax_")
+        )
+        .Op("softmax_grad")
+        .Input("y", [y])
+        .Input("dy", [dy])
+        .Output("dx")
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
+
+    if need_transpose:
+        dx = flow.transpose(dx, perm=permute)
+    return dx
+
+
+@oneflow_export("nn.sparse_cross_entropy")
+def sparse_cross_entropy(
+    labels: oneflow._oneflow_internal.BlobDesc,
+    prediction: oneflow._oneflow_internal.BlobDesc,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""Computes sparse cross entropy
+
+    Args:
+        labels (oneflow._oneflow_internal.BlobDesc): A `Blob` of shape [d_0, d_1, ..., d_{r-1}] (where r is rank of labels and result). Each entry in labels must be an index in [0, num_classes).
+        prediction (oneflow._oneflow_internal.BlobDesc): A `Blob` with the rank that is equal to the rank of the labels plus one.
+        name (Optional[str], optional): This operator's name(optional). Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: A `Blob` of the same shape as labels.
+
+    Note:
+
+        The labels data type should be `oneflow.compatible.single_client.int32`.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def sparse_cross_entropy_Job(input: tp.Numpy.Placeholder((5, 2), dtype=flow.float32),
+                                    labels: tp.Numpy.Placeholder((5,), dtype=flow.int32)
+        ) -> tp.Numpy:
+            loss = flow.nn.sparse_cross_entropy(labels=labels,
+                                                prediction=input)
+            return loss
+
+
+        x = np.array([[0.3, 0.7],
+                    [0.4, 0.6],
+                    [0.5, 0.5],
+                    [0.1, 0.9],
+                    [0.2, 0.8]]).astype(np.float32)
+        labels = np.array([0, 1, 1, 0, 1]).astype(np.int32)
+        loss = sparse_cross_entropy_Job(x, labels)
+
+        # out [1.2039728  0.5108256  0.6931472  2.3025851  0.22314353]
+
+    """
+    assert labels is not None
+    assert prediction is not None
+
+    if len(labels.shape) == len(prediction.shape):
+        assert labels.shape[-1] == 1
+        labels = flow.squeeze(labels, axis=[-1])
+    else:
+        assert len(labels.shape) == len(prediction.shape) - 1
+
+    if prediction.distribute is oneflow._oneflow_internal.distribute.split(
+        len(prediction.shape) - 1
+    ):
+        return (
+            flow.user_op_builder(
+                name if name is not None else id_util.UniqueStr("SparseCrossEntropyMs_")
+            )
+            .Op("sparse_cross_entropy_ms")
+            .Input("prediction", [prediction])
+            .Input("label", [labels])
+            .Output("out")
+            .Attr("depth", int(prediction.shape[-1]))
+            .Build()
+            .InferAndTryRun()
+            .RemoteBlobList()[0]
+        )
+    else:
+        return (
+            flow.user_op_builder(
+                name if name is not None else id_util.UniqueStr("SparseCrossEntropy_")
+            )
+            .Op("sparse_cross_entropy")
+            .Input("prediction", [prediction])
+            .Input("label", [labels])
+            .Output("out")
+            .Attr("depth", int(prediction.shape[-1]))
+            .Build()
+            .InferAndTryRun()
+            .RemoteBlobList()[0]
+        )
+
+
+@oneflow_export("nn.softmax_cross_entropy_with_logits")
+def softmax_cross_entropy_with_logits(
+    labels: oneflow._oneflow_internal.BlobDesc,
+    logits: oneflow._oneflow_internal.BlobDesc,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""Computes softmax cross entropy between logits and labels.
+
+    Args:
+        labels (oneflow._oneflow_internal.BlobDesc): Each vector along the class dimension should hold a valid probability distribution.
+        logits (oneflow._oneflow_internal.BlobDesc): Per-label activations, typically a linear output. logits has same shape and dtype as labels.
+        name (Optional[str], optional): This operator's name(optional). Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: A `Blob` that contains the softmax cross entropy loss. Its type is the same as logits and its shape is the same as labels except that it does not have the last dimension of labels.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def softmax_cross_entropy_Job(input: tp.Numpy.Placeholder((3, 3), dtype=flow.float32),
+                                    labels: tp.Numpy.Placeholder((3, 3), dtype=flow.float32)
+        ) -> tp.Numpy:
+            loss = flow.nn.softmax_cross_entropy_with_logits(labels=labels,
+                                                            logits=input)
+            return loss
+
+
+        x = np.array([[4, 1, 2],
+                    [3, 2, 3],
+                    [1, 5, 10]]).astype(np.float32)
+        labels = np.array([[0.9, 0.05, 0.05],
+                        [0.3, 0.4, 0.3],
+                        [0.8, 0.1, 0.1]]).astype(np.float32)
+        loss = softmax_cross_entropy_Job(x, labels)
+
+        # out [0.73441553 1.1240788  1.4488925 ]
+
+    """
+
+    assert labels is not None
+    assert logits is not None
+
+    assert labels.shape == logits.shape
+    assert labels.dtype == logits.dtype
+
+    prob, out = (
+        flow.user_op_builder(
+            name if name is not None else id_util.UniqueStr("SoftmaxCrossEntropy_")
+        )
+        .Op("softmax_cross_entropy")
+        .Input("prediction", [logits])
+        .Input("label", [labels])
+        .Output("prob")
+        .Output("out")
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()
+    )
+    return out
+
+
+@oneflow_export("nn.sparse_softmax_cross_entropy_with_logits")
+def sparse_softmax_cross_entropy_with_logits(
+    labels: oneflow._oneflow_internal.BlobDesc,
+    logits: oneflow._oneflow_internal.BlobDesc,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""Computes sparse softmax cross entropy between logits and labels.
+
+    Args:
+        labels (oneflow._oneflow_internal.BlobDesc): `Blob` of shape [d_0, d_1, ..., d_{r-1}] (where r is rank of labels and result). Each entry in labels must be an index in [0, num_classes).
+        logits (oneflow._oneflow_internal.BlobDesc): Unscaled log probabilities of shape [d_0, d_1, ..., d_{r-1},num_classes].
+        name (Optional[str], optional):  This operator's name(optional). Defaults to None.
+
+    Raises:
+        ValueError: If logits are scalars (need to have rank >= 1) or if the rank of the labels is not equal to the rank of the logits minus one.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc:  A `Blob` of the same shape as labels and of the same type as logits with the softmax cross entropy loss.
+
+    Note:
+
+        The labels data type should be `oneflow.compatible.single_client.int32`.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def sparse_softmax_cross_entropy_Job(input: tp.Numpy.Placeholder((3, 3), dtype=flow.float32),
+                                             labels: tp.Numpy.Placeholder((3, ), dtype=flow.int32)
+        ) -> tp.Numpy:
+            loss = flow.nn.sparse_softmax_cross_entropy_with_logits(labels=labels,
+                                                                    logits=input)
+            return loss
+
+
+        x = np.array([[4, 1, 2],
+                    [3, 2, 3],
+                    [1, 5, 10]]).astype(np.float32)
+        labels = np.array([0, 1, 2]).astype(np.int32)
+        loss = sparse_softmax_cross_entropy_Job(x, labels)
+
+        # out [0.65784633 1.2842525  0.5557927 ]
+
+    """
+    assert labels is not None
+    assert logits is not None
+
+    if len(labels.shape) == len(logits.shape):
+        assert labels.shape[-1] == 1
+        labels = flow.squeeze(labels, axis=[-1])
+    else:
+        assert len(labels.shape) == len(logits.shape) - 1
+
+    if logits.distribute is oneflow._oneflow_internal.distribute.split(
+        len(logits.shape) - 1
+    ):
+        prob, out = (
+            flow.user_op_builder(
+                name
+                if name is not None
+                else id_util.UniqueStr("SparseSoftmaxCrossEntropyMs_")
+            )
+            .Op("sparse_softmax_cross_entropy_ms")
+            .Input("prediction", [logits])
+            .Input("label", [labels])
+            .Output("prob")
+            .Output("out")
+            .Attr("depth", int(logits.shape[-1]))
+            .Build()
+            .InferAndTryRun()
+            .RemoteBlobList()
+        )
+    else:
+        prob, out = (
+            flow.user_op_builder(
+                name
+                if name is not None
+                else id_util.UniqueStr("SparseSoftmaxCrossEntropy_")
+            )
+            .Op("sparse_softmax_cross_entropy")
+            .Input("prediction", [logits])
+            .Input("label", [labels])
+            .Output("prob")
+            .Output("out")
+            .Attr("depth", int(logits.shape[-1]))
+            .Build()
+            .InferAndTryRun()
+            .RemoteBlobList()
+        )
+    return out
+
+
+@oneflow_export("nn.distributed_sparse_softmax_cross_entropy_with_logits")
+def distributed_sparse_softmax_cross_entropy_with_logits(
+    labels: oneflow._oneflow_internal.BlobDesc,
+    logits: oneflow._oneflow_internal.BlobDesc,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    assert labels is not None
+    assert logits is not None
+    if len(labels.shape) == len(logits.shape):
+        assert labels.shape[-1] == 1
+        labels = flow.squeeze(labels, axis=[-1])
+    else:
+        assert len(labels.shape) == len(logits.shape) - 1
+
+    prob, out = (
+        flow.user_op_builder(
+            name
+            if name is not None
+            else id_util.UniqueStr("DistributedSparseSoftmaxCrossEntropy_")
+        )
+        .Op("sparse_softmax_cross_entropy_ms")
+        .Input("prediction", [logits])
+        .Input("label", [labels])
+        .Output("prob")
+        .Output("out")
+        .Attr("depth", int(logits.shape[-1]))
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()
+    )
+    return out
+
+
+@oneflow_export("nn.sigmoid_cross_entropy_with_logits")
+def sigmoid_cross_entropy_with_logits(
+    labels: oneflow._oneflow_internal.BlobDesc,
+    logits: oneflow._oneflow_internal.BlobDesc,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""Computes sigmoid cross entropy given logits.
+
+    Args:
+        labels (oneflow._oneflow_internal.BlobDesc): A `Blob` of the same type and shape as logits.
+        logits (oneflow._oneflow_internal.BlobDesc): A `Blob` of type float.
+        name (Optional[str], optional): This operator's name(optional). Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc:   A `Blob` of the same shape as logits with the componentwise logistic losses.
+
+    Raises:
+        ValueError: If logits and labels do not have the same shape.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def sigmoid_cross_entropy_Job(input: tp.Numpy.Placeholder((3, 2), dtype=flow.float32),
+                                    labels: tp.Numpy.Placeholder((3, 2), dtype=flow.float32)
+        ) -> tp.Numpy:
+            loss = flow.nn.sigmoid_cross_entropy_with_logits(labels=labels,
+                                                            logits=input)
+            return loss
+
+
+        x = np.array([[4, 1],
+                    [3, 2],
+                    [1, 5]]).astype(np.float32)
+        labels = np.array([[0.7, 0.3],
+                        [0.4, 0.6],
+                        [0.2, 0.8]]).astype(np.float32)
+        loss = sigmoid_cross_entropy_Job(x, labels)
+
+        # out [[0.612735   0.90472794]
+        #      [0.89778364 0.6990613 ]
+        #      [0.97783387 0.51372755]]
+
+
+    """
+    assert labels is not None
+    assert logits is not None
+
+    op = (
+        flow.user_op_builder(
+            name if name is not None else id_util.UniqueStr("SigmoidCrossEntropy_")
+        )
+        .Op("sigmoid_cross_entropy")
+        .Input("prediction", [logits])
+        .Input("label", [labels])
+        .Output("loss")
+        .Build()
+    )
+    return op.InferAndTryRun().RemoteBlobList()[0]
+
+
+def _GetSequence(value, n, name):
+    """Formats value from input"""
+    if value is None:
+        value = [1]
+    elif not isinstance(value, collections.Sized):
+        value = [value]
+
+    current_n = len(value)
+    if current_n == 1:
+        return list(value * n)
+    elif current_n == n:
+        return list(value)
+    else:
+        raise ValueError(
+            "{} should be of length 1 or {} but was {}".format(name, n, current_n)
+        )
+
+
+@oneflow_export("nn.random_mask_like")
+def random_mask_like(
+    like: oneflow._oneflow_internal.BlobDesc,
+    rate: float,
+    seed: Optional[int] = None,
+    noise_shape: Optional[Sequence] = None,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""Random mask `Blob` with same shape as '`like'`.
+
+    Args:
+        like (oneflow._oneflow_internal.BlobDesc): A `Blob`.
+        rate (float): A float value for the probability that each element is dropped.
+        seed (Optional[int], optional): Optional, int value. Defaults to None.
+        noise_shape (Optional[Sequence], optional): Optional, A 1-D `Blob`, representing the shape for randomly generated keep/drop flags. Defaults to None.
+        name (Optional[str], optional):  This operator's name(optional). Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: A random mask `Blob` of the same shape of `like`.
+
+    Raises:
+        ValueError: If rate is not in [0, 1). Rate=1 is not allowed.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def random_mask_like_Job(like: tp.Numpy.Placeholder((5, 5), dtype=flow.float32)
+        ) -> tp.Numpy:
+
+            return flow.nn.random_mask_like(like=like,
+                                            rate=0.5)
+
+
+        like = np.ones(shape=(5, 5)).astype(np.float32)
+        random_mask = random_mask_like_Job(like)
+
+        # out [[0 0 0 0 0]
+        #      [1 1 1 0 0]
+        #      [1 0 1 1 0]
+        #      [0 0 0 0 1]
+        #      [1 0 1 1 1]]
+
+    """
+    assert rate is not None and rate >= 0.0 and rate < 1.0
+    if noise_shape is not None:
+        assert 0, "noise_shape will be supported later."
+        assert isinstance(noise_shape, (list, tuple))
+    if seed is not None:
+        assert name is not None
+    if name is None:
+        mask_op = (
+            flow.user_op_builder(id_util.UniqueStr("RandomMaskLike_"))
+            .Op("random_mask_like")
+            .Input("like", [like])
+            .Output("out")
+            .Attr("rate", float(rate))
+        )
+        if seed is not None:
+            mask_op.Attr("seed", seed)
+        else:
+            mask_op.Attr("seed", random.randint(-sys.maxsize, sys.maxsize))
+        return mask_op.Build().InferAndTryRun().RemoteBlobList()[0]
+    else:
+        module = flow.find_or_create_module(
+            name, lambda: RandomMaskLike(rate=rate, seed=seed, name=name,),
+        )
+        return module(like)
+
+
+class RandomMaskLike(module_util.Module):
+    def __init__(
+        self, rate: float, seed: Optional[int] = None, name: str = None,
+    ):
+        module_util.Module.__init__(self, name)
+        if seed is None:
+            seed = random.randint(-sys.maxsize, sys.maxsize)
+
+        self.op_module_builder = (
+            flow.user_op_module_builder("random_mask_like")
+            .InputSize("like", 1)
+            .Output("out")
+            .Attr("rate", float(rate))
+            .Attr("seed", seed)
+            .CheckAndComplete()
+        )
+        self.op_module_builder.user_op_module.InitOpKernel()
+
+    def forward(self, like: oneflow._oneflow_internal.BlobDesc):
+        if self.call_seq_no == 0:
+            name = self.module_name
+        else:
+            name = id_util.UniqueStr("RandomMaskLike_")
+        return (
+            self.op_module_builder.OpName(name)
+            .Input("like", [like])
+            .Build()
+            .InferAndTryRun()
+            .RemoteBlobList()[0]
+        )
+
+
+@oneflow_export("nn.dropout")
+def dropout(
+    x: oneflow._oneflow_internal.BlobDesc,
+    rate: float,
+    noise_shape: Optional[oneflow._oneflow_internal.BlobDesc] = None,
+    seed: Optional[int] = None,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""For preventing overfitting, randomly set elements to zero.
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): A floating point `Blob`.
+        rate (float): A scalar `Blob` with the same type as x. The probability that each element is dropped.
+        noise_shape (Optional[oneflow._oneflow_internal.BlobDesc], optional):  optional: A 1-D `Blob`, representing the shape for randomly generated keep/drop flags. Defaults to None.Defaults to None.
+        seed (Optional[int], optional):  Optional int value. Defaults to None.
+        name (Optional[str], optional): This operator's name(optional). Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc:   A `Blob` of the same shape of x.
+
+    Raises:
+        ValueError: If rate is not in [0, 1) or if x is not a floating point `Blob`. Rate=1 is not allowed.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+
+
+        def lenet(data, train=False):
+            initializer = flow.truncated_normal(0.1)
+            conv1 = flow.layers.conv2d(
+                data,
+                32,
+                5,
+                padding="SAME",
+                activation=flow.nn.relu,
+                name="conv1",
+                kernel_initializer=initializer,
+            )
+            pool1 = flow.nn.max_pool2d(
+                conv1, ksize=2, strides=2, padding="SAME", name="pool1", data_format="NCHW"
+            )
+            conv2 = flow.layers.conv2d(
+                pool1,
+                64,
+                5,
+                padding="SAME",
+                activation=flow.nn.relu,
+                name="conv2",
+                kernel_initializer=initializer,
+            )
+            pool2 = flow.nn.max_pool2d(
+                conv2, ksize=2, strides=2, padding="SAME", name="pool2", data_format="NCHW"
+            )
+            reshape = flow.reshape(pool2, [pool2.shape[0], -1])
+            hidden = flow.layers.dense(
+                reshape,
+                512,
+                activation=flow.nn.relu,
+                kernel_initializer=initializer,
+                name="dense1",
+            )
+            if train:
+                hidden = flow.nn.dropout(hidden, rate=0.5, name="dropout")
+
+            return flow.layers.dense(hidden, 10, kernel_initializer=initializer, name="dense2")
+
+    """
+    assert rate is not None and rate >= 0.0 and rate < 1.0
+    if not flow.current_global_function_desc().IsTrainable() or rate == 0.0:
+        return x
+    if seed is not None:
+        assert name is not None
+    if name is None:
+        name = id_util.UniqueStr("Dropout_")
+    mask = random_mask_like(
+        x, rate, seed, noise_shape, "%s-dropout_random_mask_like" % name
+    )
+    return (
+        flow.user_op_builder(name)
+        .Op("dropout")
+        .Input("in", [x])
+        .Input("mask", [mask])
+        .Output("out")
+        .Attr("scale", float(1.0 / (1.0 - rate)))
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
+
+
+@oneflow_export("nn.conv2d_transpose")
+def deconv2d(
+    value: Optional[oneflow._oneflow_internal.BlobDesc] = None,
+    filter: Optional[oneflow._oneflow_internal.BlobDesc] = None,
+    output_shape: Tuple[int, int, int, int] = None,
+    strides: Optional[Union[int, Sequence[int]]] = None,
+    padding: str = "VALID",
+    data_format: str = "NCHW",
+    name: Optional[str] = None,
+    input: Optional[oneflow._oneflow_internal.BlobDesc] = None,
+    filters: Optional[oneflow._oneflow_internal.BlobDesc] = None,
+    dilations: Optional[Union[int, Sequence[int]]] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""2d transposed convolution.
+
+    Args:
+        value (Optional[oneflow._oneflow_internal.BlobDesc], optional):   4-d `Blob`. Defaults to None.
+        filter (Optional[oneflow._oneflow_internal.BlobDesc], optional): Filter of transposed convolution, usually a variable. Defaults to None.
+        output_shape (Tuple[int, int, int, int]): A 1-D `Blob` representing the output shape of the deconvolution op. Defaults to None.
+        strides (Optional[Union[int, Sequence[int]]], optional): `int` or `int list`. Defaults to None.
+        padding (str, optional):  `'VALID'` or `'SAME'`. Defaults to "VALID".
+        data_format (str, optional): `'NHWC'` or `'NCHW'`. Defaults to "NCHW".
+        name (Optional[str], optional): This operator's name(optional). Defaults to None.
+        input (Optional[oneflow._oneflow_internal.BlobDesc], optional): Alias for value. Defaults to None.
+        filters (Optional[oneflow._oneflow_internal.BlobDesc], optional): Alias for filter. Defaults to None.
+        dilations (Optional[Union[int, Sequence[int]]], optional): The dilation factor for each dimension of input. Defaults to None.
+
+    Raises:
+        ValueError: shapes of `filter` and `input` must match.
+        ValueError: dilations must be an int or a list.
+        ValueError: data_format must be "NHWC" or "NCHW".
+        ValueError: padding must be "SAME" or "VALID".
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: A `Blob` with the same type as `value`.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        def deconv2d(input, filters, kernel_size, strides, padding, name):
+            input_shape = input.shape
+            weight_initializer = flow.truncated_normal(0.1)
+            weight_regularizer = flow.regularizers.l2(0.0005)
+            weight_shape = (filters,
+                            input_shape[1],
+                            kernel_size[0],
+                            kernel_size[1])
+
+            weight = flow.get_variable(
+                name + "-weight",
+                shape=weight_shape,
+                initializer=weight_initializer,
+                regularizer=weight_regularizer,
+            )
+            return flow.nn.conv2d_transpose(value=input,
+                                            output_shape=(1, 32, 64, 64),
+                                            filter=weight,
+                                            strides=strides,
+                                            padding=padding,
+                                            name=name)
+
+
+        @flow.global_function()
+        def deconv2d_Job(x: tp.Numpy.Placeholder((1, 32, 32, 32),)
+        ) -> tp.Numpy:
+            deconv = deconv2d(x,
+                            filters=32,
+                            kernel_size=[3, 3],
+                            strides=2,
+                            padding='SAME',
+                            name="Convlayer")
+            return deconv
+
+
+        x = np.random.randn(1, 32, 32, 32).astype(np.float32)
+        out = deconv2d_Job(x)
+
+        # out.shape (1, 32, 64, 64)
+
+    """
+    assert (value is not None) ^ (
+        input is not None
+    ), "only one of `input` and `value` could be not None"
+    assert (filter is not None) ^ (
+        filters is not None
+    ), "only one of `filter` and `filters` could be not None"
+    filters = filters or filter
+    input = input or value
+
+    NDims = 2
+    assert len(input.shape) == 2 + NDims
+    assert len(filters.shape) == 2 + NDims
+    assert len(output_shape) == 2 + NDims
+    assert output_shape[0] == input.shape[0]
+
+    # dilations
+    if dilations is None:
+        dilations = [1, 1]
+    else:
+        if isinstance(dilations, (list, tuple)):
+            assert len(dilations) == 2, ValueError(
+                "dilations length must be 2 when passed as a list."
+            )
+        elif isinstance(dilations, int):
+            dilations = [dilations, dilations]
+        else:
+            raise ValueError("dilations must be an int or a list.")
+
+    # data format
+    if data_format.upper() == "NCHW":
+        input_shape = input.shape[2:]
+        kernel_size = filters.shape[2:4]
+        channels = filters.shape[1]
+        assert output_shape[1] == channels
+        output_shape = output_shape[2:4]
+    elif data_format.upper() == "NHWC":
+        input_shape = input.shape[1:3]
+        kernel_size = filters.shape[-3:-1]
+        channels = filters.shape[3]
+        assert output_shape[3] == channels
+        output_shape = output_shape[1:3]
+        assert dilations == [1, 1], ValueError(
+            "dialtions must be 1 when data format is NHWC "
+        )
+    else:
+        raise ValueError('data_format must be "NHWC" or "NCHW".')
+
+    channel_pos = "channels_first" if data_format.startswith("NC") else "channels_last"
+
+    # strides
+    if isinstance(strides, (list, tuple)):
+        assert len(strides) == NDims, ValueError(
+            "strides length must be 2 when passed as a list."
+        )
+    elif isinstance(strides, int):
+        strides = [strides, strides]
+    else:
+        raise ValueError("strides must be an int or a list.")
+
+    # output_padding and padding_needed
+    output_padding = [0] * NDims
+    padding_needed = [0] * NDims
+    if padding.upper() == "VALID":
+        for i in range(NDims):
+            effective_filter_size = (kernel_size[i] - 1) * dilations[i] + 1
+            assert (output_shape[i] + strides[i] - effective_filter_size) // strides[
+                i
+            ] == input_shape[i]
+            tmp_output_shape = (input_shape[i] - 1) * strides[i] + effective_filter_size
+            output_padding[i] = output_shape[i] - tmp_output_shape
+    elif padding.upper() == "SAME":
+        padding_left = [0] * NDims
+        padding_right = [0] * NDims
+        for i in range(NDims):
+            assert (output_shape[i] + strides[i] - 1) // strides[i] == input_shape[i]
+            effective_filter_size = (kernel_size[i] - 1) * dilations[i] + 1
+            padding_needed[i] = max(
+                0,
+                (input_shape[i] - 1) * strides[i]
+                + effective_filter_size
+                - output_shape[i],
+            )
+            tmp_output_shape = (
+                (input_shape[i] - 1) * strides[i]
+                + effective_filter_size
+                - padding_needed[i]
+            )
+            output_padding[i] = output_shape[i] - tmp_output_shape
+            padding_left[i] = padding_needed[i] // 2
+            padding_right[i] = padding_needed[i] - padding_needed[i] // 2
+    else:
+        raise ValueError('padding must be "SAME" or "VALID".')
+    # add pad op if needs odd padding
+    if padding.upper() == "SAME" and padding_left != padding_right:
+        assert data_format.upper() == "NCHW"
+        padding_before = [0] * NDims
+        input = (
+            flow.user_op_builder(
+                name if name is not None else id_util.UniqueStr("Deconv2d_")
+            )
+            .Op("deconv2d")
+            .Input("in", [input])
+            .Input("weight", [filters])
+            .Output("out")
+            .Attr("filters", channels)
+            .Attr("padding_before", padding_before)
+            .Attr("data_format", channel_pos)
+            .Attr("kernel_size", kernel_size)
+            .Attr("strides", strides)
+            .Attr("dilation_rate", dilations)
+            .Attr("output_padding", output_padding)
+            .Build()
+            .InferAndTryRun()
+            .RemoteBlobList()[0]
+        )
+        return flow.pad_grad(
+            input,
+            [
+                (0, 0),
+                (0, 0),
+                (padding_left[0], padding_right[0]),
+                (padding_left[1], padding_right[1]),
+            ],
+            name=name + "_pad_grad" if name is not None else None,
+        )
+    assert len(padding_needed) == len(input.shape) - 2
+    padding_before = []
+    for pad in padding_needed:
+        assert pad % 2 == 0
+        padding_before.append(pad // 2)
+    return (
+        flow.user_op_builder(
+            name if name is not None else id_util.UniqueStr("Deconv2d_")
+        )
+        .Op("deconv2d")
+        .Input("in", [input])
+        .Input("weight", [filters])
+        .Output("out")
+        .Attr("filters", channels)
+        .Attr("padding_before", padding_before)
+        .Attr("data_format", channel_pos)
+        .Attr("kernel_size", kernel_size)
+        .Attr("strides", strides)
+        .Attr("dilation_rate", dilations)
+        .Attr("output_padding", output_padding)
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
+
+
+@oneflow_export("nn.torch_conv2d_transpose")
+def deconv2d_torch(
+    value=None,
+    filter=None,
+    output_padding=None,
+    strides=None,
+    padding_needed=None,
+    data_format="NCHW",
+    name=None,
+    input=None,
+    filters=None,
+    dilations=None,
+):
+
+    assert (value is not None) ^ (
+        input is not None
+    ), "only one of `input` and `value` could be not None"
+
+    assert (filter is not None) ^ (
+        filters is not None
+    ), "only one of `filter` and `filters` could be not None"
+    filters = filters or filter
+    input = input or value
+
+    NDims = 2
+    assert len(input.shape) == 2 + NDims
+    assert len(filters.shape) == 2 + NDims
+
+    # dilations
+    if dilations is None:
+        dilations = [1, 1]
+    else:
+        if isinstance(dilations, (list, tuple)):
+            assert len(dilations) == 2, ValueError(
+                "dilations length must be 2 when passed as a list."
+            )
+        elif isinstance(dilations, int):
+            dilations = [dilations, dilations]
+        else:
+            raise ValueError("dilations must be an int or a list.")
+
+    # data format
+    if data_format.upper() == "NCHW":
+        input_shape = input.shape[2:]
+        kernel_size = filters.shape[2:4]
+        channels = filters.shape[1]
+    elif data_format.upper() == "NHWC":
+        input_shape = input.shape[1:3]
+        kernel_size = filters.shape[-3:-1]
+        channels = filters.shape[3]
+        assert dilations == [1, 1], ValueError(
+            "dialtions must be 1 when data format is NHWC "
+        )
+    else:
+        raise ValueError('data_format must be "NHWC" or "NCHW".')
+
+    channel_pos = "channels_first" if data_format.startswith("NC") else "channels_last"
+
+    # strides
+    if isinstance(strides, (list, tuple)):
+        assert len(strides) == NDims, ValueError(
+            "strides length must be 2 when passed as a list."
+        )
+    elif isinstance(strides, int):
+        strides = [strides, strides]
+    else:
+        raise ValueError("strides must be an int or a list.")
+
+    # output_padding and padding_needed
+    assert len(padding_needed) == len(input.shape) - 2
+    padding_before = []
+    for pad in padding_needed:
+        assert pad % 2 == 0
+        padding_before.append(pad // 2)
+
+    if output_padding is None:
+        output_padding = (0, 0)
+
+    return (
+        flow.user_op_builder(
+            name if name is not None else id_util.UniqueStr("Deconv2d_")
+        )
+        .Op("deconv2d")
+        .Input("in", [input])
+        .Input("weight", [filters])
+        .Output("out")
+        .Attr("filters", channels)
+        .Attr("padding_before", padding_before)
+        .Attr("data_format", channel_pos)
+        .Attr("kernel_size", kernel_size)
+        .Attr("strides", strides)
+        .Attr("dilation_rate", dilations)
+        .Attr("output_padding", output_padding)
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
+
+
+@oneflow_export("nn.leaky_relu")
+def leaky_relu(
+    x: oneflow._oneflow_internal.BlobDesc,
+    alpha: float = 0.2,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""Leaky ReLU activation.
+
+    .. math::
+        out = max(x, alpha*x)
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc):  A `Blob` representing preactivation values.
+        alpha (float, optional): Slope of the activation function at x < 0 with float type. Default value is 0.2.
+        name (Optional[str], optional): This operator's name(optional). Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The activation `Blob`.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def leaky_relu_Job(x: tp.Numpy.Placeholder((5, ),)
+        ) -> tp.Numpy:
+            leaky_relu = flow.nn.leaky_relu(x, alpha=0.2)
+
+            return leaky_relu
+
+
+        x = np.array([-10, -5, 0, 5, 10]).astype(np.float32)
+        out = leaky_relu_Job(x)
+
+        # out [-2. -1.  0.  5. 10.]
+
+    """
+    return (
+        flow.user_op_builder(
+            name if name is not None else id_util.UniqueStr("LeakyRelu_")
+        )
+        .Op("leaky_relu")
+        .Input("x", [x])
+        .Output("y")
+        .Attr("alpha", float(alpha))
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
+
+
+@oneflow_export("nn.elu")
+def elu(
+    x: oneflow._oneflow_internal.BlobDesc,
+    alpha: float = 1.0,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""The ELU activation.
+
+    The formula is:
+
+    .. math::
+
+        \text{ELU}(x) = \begin{cases}
+				x & \text{ if } x \gt 0  \\
+                \alpha*(exp(x)-1) & \text{ if } x \le 0 \\
+    		    \end{cases}
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+        import numpy as np
+
+
+        @flow.global_function()
+        def elu_job(x: tp.Numpy.Placeholder(shape=(3, )))->tp.Numpy:
+            return flow.nn.elu(x, alpha=1.0)
+
+
+        x = np.array([-3.5, 1, 3.5]).astype(np.float32)
+        out = elu_job(x)
+
+        # output [-0.9698026  1.         3.5      ]
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): The input Tensor.
+        alpha (float, optional): The `alpha` value for the ELU formula. Defaults to 1.0.
+        name (Optional[str], optional): The name for the operator. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The activated Tensor.
+    """
+    alpha = float(alpha)
+    if name is None:
+        name = id_util.UniqueStr("Elu_")
+    return (
+        flow.user_op_builder(name)
+        .Op("elu")
+        .Input("in", [x])
+        .Output("out")
+        .Attr("alpha", alpha)
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
+
+
+@oneflow_export("nn.hardsigmoid")
+def hard_sigmoid(
+    x: oneflow._oneflow_internal.BlobDesc, name: Optional[str] = None
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""The Hardsigmoid activation.
+
+    The formula is:
+
+    .. math::
+
+        \text{Hardsigmoid}(x) = \begin{cases}
+            0 & \text{ if } x \le -3  \\
+            1 & \text{ if } x \ge +3 \\
+            \frac{x}{6} + \frac{1}{2} & \text{ otherwise } \\
+        \end{cases}
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+        import numpy as np
+
+
+        @flow.global_function()
+        def hardsigmoid_job(x: tp.Numpy.Placeholder(shape=(3, )))->tp.Numpy:
+            out = flow.nn.hardsigmoid(x)
+
+            return out
+
+
+        x = np.array([-3.1, 0, 3.3]).astype(np.float32)
+        out = hardsigmoid_job(x)
+
+        # output [0.  0.5 1. ]
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): The input Tensor.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The activated Tensor.
+    """
+    return (
+        flow.user_op_builder(
+            name if name is not None else id_util.UniqueStr("HardSigmoid_")
+        )
+        .Op("hardsigmoid")
+        .Input("in", [x])
+        .Output("out")
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
+
+
+@oneflow_export("nn.mish")
+def mish(
+    x: oneflow._oneflow_internal.BlobDesc, name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    """The Mish activation function.
+
+    The equation is:
+
+    .. math::
+
+        out = x*tanh(ln(1+e^x))
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+        import numpy as np
+
+
+        @flow.global_function()
+        def mish_job(x: tp.Numpy.Placeholder(shape=(5, )))->tp.Numpy:
+            return flow.nn.mish(x)
+
+
+        x = np.array([-0.5, 0, 0.5, 1.0, 1.5]).astype(np.float32)
+        out = mish_job(x)
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): The input Blob.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob.
+    """
+    if name is None:
+        name = id_util.UniqueStr("Mish_")
+
+    return x * flow.math.tanh(
+        flow.math.softplus(x, name=name + "softplus"), name=name + "tanh"
+    )
+
+
+@oneflow_export("nn.swish")
+def swish(
+    x: oneflow._oneflow_internal.BlobDesc,
+    beta: float = 1.0,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""The Swish activation function.
+
+    The equation is:
+
+    .. math::
+
+        out = x * sigmoid(\beta*x)
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+        import numpy as np
+
+
+        @flow.global_function()
+        def swish_job(x: tp.Numpy.Placeholder(shape=(5, )))->tp.Numpy:
+            return flow.nn.swish(x)
+
+
+        x = np.array([-0.5, 0, 0.5, 1, 1.5]).astype(np.float32)
+
+
+        out = swish_job(x)
+        # output [-0.18877034  0.          0.31122968  0.7310586   1.2263618 ]
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): The input Blob.
+        beta (float, optional): The smooth factor. Defaults to 1.0.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob.
+    """
+    if name is None:
+        name = id_util.UniqueStr("Swish_")
+
+    return x * flow.math.sigmoid(beta * x, name=name + "_sigmoid")
+
+
+@oneflow_export("nn.hardswish")
+def hardswish(
+    x: oneflow._oneflow_internal.BlobDesc, name: Optional[str] = None
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""The Hardswish activation.
+
+    The formula is:
+
+    .. math::
+
+        \text{Hardswish}(x) = \begin{cases}
+            0 & \text{ if } x \le -3  \\
+            x & \text{ if } x \ge +3 \\
+            x*(x+3)/6 & \text{ otherwise } \\
+        \end{cases}
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+        import numpy as np
+
+
+        @flow.global_function()
+        def hardswish_job(x: tp.Numpy.Placeholder(shape=(3, )))->tp.Numpy:
+            return flow.nn.hardswish(x)
+
+
+        x = np.array([-3.5, 1, 3.5]).astype(np.float32)
+        out = hardswish_job(x)
+
+        # output [0.        0.6666667 3.5      ]
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): The input Tensor.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The activated Tensor.
+    """
+    if name is None:
+        name = id_util.UniqueStr("HardSwish_")
+    return (
+        flow.user_op_builder(name)
+        .Op("hardswish")
+        .Input("in", [x])
+        .Output("out")
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
+
+
+@oneflow_export("nn.hardtanh")
+def hardtanh(
+    x: oneflow._oneflow_internal.BlobDesc,
+    min_val: float = -1.0,
+    max_val: float = 1.0,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""The Hardtanh activation.
+
+    The equation is:
+
+    .. math::
+
+        \text{HardTanh}(x) = \begin{cases}
+            max\_val & \text{ if } x > max\_val \\
+            -min\_val & \text{ if } x < min\_val \\
+            x & \text{ otherwise } \\
+        \end{cases}
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+        import numpy as np
+
+        @flow.global_function()
+        def hardtanh_job(x: tp.Numpy.Placeholder(shape=(2, 3)))->tp.Numpy:
+            return flow.nn.hardtanh(x, min_val=-1.25, max_val=1.2)
+
+
+        x = np.array([[-1.5, -1.1, 0.6],
+                    [1.2, 1.3, 1.5]]).astype(np.float32)
+        out = hardtanh_job(x)
+
+        # output [[-1.25 -1.1   0.6 ]
+        #         [ 1.2   1.2   1.2 ]]
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): The input Tensor.
+        min_val (float, optional): The minimum value of the linear region range. Defaults to -1.
+        max_val (float, optional): The maximum value of the linear region range. Defaults to 1.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The activated tensor.
+    """
+    if name is None:
+        name = id_util.UniqueStr("Hardtanh_")
+
+    min_val = float(min_val)
+    max_val = float(max_val)
+
+    assert min_val < max_val, "max_val should be larger than min_val"
+
+    return (
+        flow.user_op_builder(name)
+        .Op("hardtanh")
+        .Input("in", [x])
+        .Attr("min_val", min_val)
+        .Attr("max_val", max_val)
+        .Output("out")
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
+
+
+@oneflow_export("nn.relu6")
+def relu6(
+    x: oneflow._oneflow_internal.BlobDesc, name: Optional[str] = None
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""Relu6 activation, it clips the value around (0, 6).
+
+    The equation is:
+
+    .. math::
+
+        \text{Relu6}(x) = \begin{cases}
+            6 & \text{ if } x > 6 \\
+            0 & \text{ if } x < 0 \\
+            x & \text{ otherwise } \\
+        \end{cases}
+
+    For example:
+
+    .. code-block::
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+        import numpy as np
+
+
+        @flow.global_function()
+        def relu6_job(x: tp.Numpy.Placeholder(shape=(2, 3)))->tp.Numpy:
+            return flow.nn.relu6(x)
+
+        x = np.array([[-1, -0.5, 0.0],
+                      [0.5, 6.0, 7]]).astype(np.float32)
+
+        out = relu6_job(x)
+
+        # output [[0.  0.  0. ]
+        #         [0.5 6.  6. ]]
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): The input Tensor.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The activated Tensor.
+    """
+    if name is None:
+        name = id_util.UniqueStr("Relu6_")
+    return flow.nn.hardtanh(x, min_val=0.0, max_val=6.0, name=name)
+
+
+@oneflow_export("nn.L1Loss")
+@stable_api
+def l1_loss(
+    input: oneflow._oneflow_internal.BlobDesc,
+    target: oneflow._oneflow_internal.BlobDesc,
+    reduction: str = "mean",
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""This operator computes the L1 Loss between each element in `input` and `target`.
+
+    The equation is:
+
+    if reduction = "none":
+
+    .. math::
+
+        output = |Target - Input|
+
+    if reduction = "mean":
+
+    .. math::
+
+        output = \frac{1}{n}\sum_{i=1}^n|Target_i - Input_i|
+
+    if reduction = "sum":
+
+    .. math::
+
+        output = \sum_{i=1}^n|Target_i - Input_i|
+
+    Args:
+        input (oneflow._oneflow_internal.BlobDesc): The input Blob.
+        target (oneflow._oneflow_internal.BlobDesc): The target value.
+        reduction (str): The reduce type, it can be one of "none", "mean", "sum". Defaults to "mean".
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob.
+
+    For example:
+
+    Example 1:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+        import numpy as np
+
+
+        @flow.global_function()
+        def l1_job(x: tp.Numpy.Placeholder(shape=(3, 3)),
+                y: tp.Numpy.Placeholder(shape=(3, 3))) -> tp.Numpy:
+            out = flow.nn.L1Loss(x, y, reduction="mean", name="l1")
+
+            return out
+
+
+        input = np.array([[1, 1, 1], [2, 2, 2], [7, 7, 7]]).astype(np.float32)
+        target = np.array([[4, 4, 4], [4, 4, 4], [4, 4, 4]]).astype(np.float32)
+
+        out = l1_job(input, target)
+
+        # output [2.6666667]
+
+    Example 2:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+        import numpy as np
+
+
+        @flow.global_function()
+        def l1_job(x: tp.Numpy.Placeholder(shape=(3, 3)),
+                y: tp.Numpy.Placeholder(shape=(3, 3))) -> tp.Numpy:
+            out = flow.nn.L1Loss(x, y, reduction="sum", name="l1")
+
+            return out
+
+
+        input = np.array([[1, 1, 1], [2, 2, 2], [7, 7, 7]]).astype(np.float32)
+        target = np.array([[4, 4, 4], [4, 4, 4], [4, 4, 4]]).astype(np.float32)
+
+        out = l1_job(input, target)
+
+        # output [24.]
+
+    """
+    assert (
+        input.shape == target.shape
+    ), "The Input shape must be the same as Target shape"
+
+    assert reduction in [
+        "none",
+        "mean",
+        "sum",
+    ], "{} is not a valid value for reduction, The reduction must be the one of `none`, `mean`, `sum`. ".format(
+        reduction
+    )
+
+    if name is None:
+        name = id_util.UniqueStr("L1Loss")
+
+    l1_value = flow.math.abs(
+        flow.math.subtract(target, input, name=name + "_sub"), name=name + "_abs"
+    )
+
+    if reduction == "mean":
+        return flow.math.reduce_mean(l1_value, name=name + "_reduce_mean")
+    elif reduction == "sum":
+        return flow.math.reduce_sum(l1_value, name=name + "_reduce_sum")
+    else:
+        # Do no reduction
+        return l1_value
+
+
+@oneflow_export("nn.BCELoss")
+@stable_api
+def bce_loss(
+    input: oneflow._oneflow_internal.BlobDesc,
+    target: oneflow._oneflow_internal.BlobDesc,
+    weight: remote_blob_util = None,
+    reduction: str = "mean",
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""This operator computes the binary cross entropy loss.
+
+    The equation is:
+
+    if reduction = "none":
+
+    .. math::
+
+        out = -(Target_i*log(Input_i) + (1-Target_i)*log(1-Input_i))
+
+    if reduction = "mean":
+
+    .. math::
+
+        out = -\frac{1}{n}\sum_{i=1}^n(Target_i*log(Input_i) + (1-Target_i)*log(1-Input_i))
+
+    if reduction = "sum":
+
+    .. math::
+
+        out = -\sum_{i=1}^n(Target_i*log(Input_i) + (1-Target_i)*log(1-Input_i))
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+        import numpy as np
+
+
+        @flow.global_function()
+        def bce_loss_job(input: tp.Numpy.Placeholder(shape=(2, 3)),
+                                target: tp.Numpy.Placeholder(shape=(2, 3)),
+                                weight: tp.Numpy.Placeholder(shape=(2, 3)))->tp.Numpy:
+            sigmoid_input = flow.math.sigmoid(input)
+            return flow.nn.BCELoss(sigmoid_input, target, weight, reduction='mean')
+
+
+        np_input = np.array([[1.2, 0.2, -0.3],
+                             [0.7, 0.6, -2]]).astype(np.float32)
+
+        np_target = np.array([[0, 1, 0],
+                              [1, 0, 1]]).astype(np.float32)
+
+        np_weight = np.array([[2, 2, 2],
+                              [2, 2, 2]]).astype(np.float32)
+
+        # output [2.0611262]
+
+    Args:
+        input (oneflow._oneflow_internal.BlobDesc): The input Blob.
+        target (oneflow._oneflow_internal.BlobDesc): The target value.
+        weight (remote_blob_util, optional): The manual rescaling weight to the loss. Default to None, whose corresponding weight value is 1.
+        reduction (str, optional): The reduce type, it can be one of "none", "mean", "sum". Defaults to "mean".
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Attention:
+        The input value must be in the range of (0, 1). Or the loss function may return `nan` value.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob.
+    """
+    # TODO: Check the input and target value range is in (0, 1)
+    assert (
+        input.shape == target.shape
+    ), "The Input shape must be the same as Target shape"
+    assert reduction in [
+        "none",
+        "mean",
+        "sum",
+    ], "{} is not a valid value for reduction, The reduction must be the one of `none`, `mean`, `sum`. ".format(
+        reduction
+    )
+
+    if name is None:
+        name = id_util.UniqueStr("BCELoss")
+
+    _cross_entropy_loss = flow.math.negative(
+        target * flow.math.log(input) + (1 - target) * flow.math.log(1 - input)
+    )
+
+    if weight is not None:
+        assert (
+            weight.shape == input.shape
+        ), "The weight shape must be the same as Input shape"
+        _weighted_loss = weight * _cross_entropy_loss
+    else:
+        _weighted_loss = _cross_entropy_loss
+
+    if reduction == "mean":
+        return flow.math.reduce_mean(_weighted_loss, name=name + "_reduce_mean")
+    elif reduction == "sum":
+        return flow.math.reduce_sum(_weighted_loss, name=name + "_reduce_sum")
+    else:
+        # Do no reduction
+        return _weighted_loss
+
+
+@oneflow_export("nn.BCEWithLogitsLoss")
+@stable_api
+def bce_with_logits_loss(
+    input: oneflow._oneflow_internal.BlobDesc,
+    target: oneflow._oneflow_internal.BlobDesc,
+    weight: remote_blob_util = None,
+    pos_weight: remote_blob_util = None,
+    reduction: str = "mean",
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""This operator combines the `Sigmoid` and `BCELoss` together. For numerical stability,
+    we apply some math tricks instead of using `Sigmoid` layer with `BCELoss`.
+
+    The equation is:
+
+    if reduction = "none":
+
+    .. math::
+
+        out = -weight*[Pos\_weight*y*log\sigma({x}) + (1-y)*log(1-\sigma(x))]
+
+    if reduction = "mean":
+
+    .. math::
+
+        out = -\frac{weight}{n}\sum_{i=1}^n[Pos\_weight*y*log\sigma({x}) + (1-y)*log(1-\sigma(x))]
+
+    if reduction = "sum":
+
+    .. math::
+
+        out = -weight*\sum_{i=1}^n[Pos\_weight*y*log\sigma({x}) + (1-y)*log(1-\sigma(x))]
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+        import numpy as np
+
+
+        @flow.global_function()
+        def bce_with_logits_loss_job(input: tp.Numpy.Placeholder(shape=(2, 3)),
+                                     target: tp.Numpy.Placeholder(shape=(2, 3)),
+                                     weight: tp.Numpy.Placeholder(shape=(2, 3)),
+                                     pos_weight: tp.Numpy.Placeholder(shape=(3, )))->tp.Numpy:
+            return flow.nn.BCEWithLogitsLoss(input, target, weight, pos_weight, reduction='mean')
+
+
+        np_input = np.array([[1.2, 0.2, -0.3],
+                             [0.7, 0.6, -2]]).astype(np.float32)
+
+        np_target = np.array([[0, 1, 0],
+                              [1, 0, 1]]).astype(np.float32)
+
+        np_weight = np.array([[2, 2, 2],
+                              [2, 2, 2]]).astype(np.float32)
+
+        np_pos_weight = np.array([1.2, 1.3, 1.4]).astype(np.float32)
+
+        out = bce_with_logits_loss_job(np_input, np_target, np_weight, np_pos_weight)
+
+        # output [2.4314096]
+
+    Args:
+        input (oneflow._oneflow_internal.BlobDesc): The input Tensor.
+        target (oneflow._oneflow_internal.BlobDesc): The target Tensor.
+        weight (remote_blob_util, optional): The manual rescaling weight to the loss. Defaults to None.
+        pos_weight (remote_blob_util, optional): The manual rescaling weight to the positive examples. Defaults to None.
+        reduction (str, optional): The reduce type, it can be one of "none", "mean", "sum". Defaults to "mean".
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob.
+    """
+    assert (
+        input.shape == target.shape
+    ), "The Input shape must be the same as Target shape"
+
+    assert reduction in [
+        "none",
+        "mean",
+        "sum",
+    ], "{} is not a valid value for reduction, The reduction must be the one of `none`, `mean`, `sum`. ".format(
+        reduction
+    )
+
+    if name is None:
+        name = id_util.UniqueStr("BCEWithLogitsLoss")
+
+    _neg_input = flow.math.negative(input)
+    _max_val = flow.clip(_neg_input, min_value=0)
+    _neg_max_val = flow.math.negative(_max_val)
+
+    if pos_weight:
+        assert pos_weight.shape[0] == input.shape[-1], (
+            "The length of `pos_weight` must be equal to the number of classes. "
+            "Found the length of pos_weight {} vs classes {}".format(
+                pos_weight.shape[0], input.shape[-1]
+            )
+        )
+        _log_weight = ((pos_weight - 1) * target) + 1
+        _loss = (1 - target) * input + _log_weight * (
+            flow.math.log(
+                flow.math.exp(_neg_max_val) + flow.math.exp(_neg_input - _max_val)
+            )
+            + _max_val
+        )
+    else:
+        _loss = (1 - target) * input + _max_val
+        _loss += flow.math.log(
+            flow.math.exp(_neg_max_val) + flow.math.exp(_neg_input - _max_val)
+        )
+
+    if weight is not None:
+        assert (
+            weight.shape == input.shape
+        ), "The weight shape must be the same as Input shape"
+        _weighted_loss = weight * _loss
+    else:
+        _weighted_loss = _loss
+
+    if reduction == "mean":
+        return flow.math.reduce_mean(_weighted_loss, name=name + "_reduce_mean")
+    elif reduction == "sum":
+        return flow.math.reduce_sum(_weighted_loss, name=name + "_reduce_sum")
+    else:
+        # Do no reduction
+        return _weighted_loss
+
+
+@oneflow_export("nn.MSELoss")
+@stable_api
+def mse_loss(
+    input: oneflow._oneflow_internal.BlobDesc,
+    target: oneflow._oneflow_internal.BlobDesc,
+    reduction: str = "mean",
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""This operator computes the mean squared error between each element in `input` and `target`.
+
+    The equation is:
+
+    if reduction = "none":
+
+    .. math::
+
+        out = (Target_i - Input_i)^2
+
+    if reduction = "mean":
+
+    .. math::
+
+        out = \frac{1}{n}\sum_{i=1}^n(Target_i - Input_i)^2
+
+    if reduction = "sum":
+
+    .. math::
+
+        out = \sum_{i=1}^n(Target_i - Input_i)^2
+
+    Args:
+        input (oneflow._oneflow_internal.BlobDesc): The input Blob.
+        target (oneflow._oneflow_internal.BlobDesc): The target value.
+        reduction (str) = The reduce type, it can be the one of "none", "mean", "sum". Defaults to "mean".
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob.
+
+    For example:
+
+    Example 1:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+        import numpy as np
+
+
+        @flow.global_function()
+        def mseloss_job(input: tp.Numpy.Placeholder(shape=(3, 3)),
+                        target: tp.Numpy.Placeholder(shape=(3, 3)))->tp.Numpy:
+            out = flow.nn.MSELoss(input, target, reduction="mean")
+            return out
+
+        input = np.array([[1, 1, 1], [2, 2, 2], [7, 7, 7]]).astype(np.float32)
+        target = np.array([[4, 4, 4], [4, 4, 4], [4, 4, 4]]).astype(np.float32)
+
+        out = mseloss_job(input, target)
+
+        # output [7.3333335]
+
+    Example 2:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+        import numpy as np
+
+
+        @flow.global_function()
+        def mseloss_job(input: tp.Numpy.Placeholder(shape=(3, 3)),
+                        target: tp.Numpy.Placeholder(shape=(3, 3)))->tp.Numpy:
+            out = flow.nn.MSELoss(input, target, reduction="sum")
+            return out
+
+        input = np.array([[1, 1, 1], [2, 2, 2], [7, 7, 7]]).astype(np.float32)
+        target = np.array([[4, 4, 4], [4, 4, 4], [4, 4, 4]]).astype(np.float32)
+
+        out = mseloss_job(input, target)
+
+        # output [66.]
+    """
+    assert (
+        input.shape == target.shape
+    ), "The Input shape must be the same as Target shape"
+
+    assert reduction in [
+        "none",
+        "mean",
+        "sum",
+    ], "{} is not a valid value for reduction, The reduction must be the one of `none`, `mean`, `sum`. ".format(
+        reduction
+    )
+
+    if name is None:
+        name = id_util.UniqueStr("MSELoss")
+
+    mean_squared_difference = flow.math.squared_difference(
+        target, input, name=name + "_mean_squared"
+    )
+
+    if reduction == "mean":
+        return flow.math.reduce_mean(
+            mean_squared_difference, name=name + "_reduce_mean"
+        )
+    elif reduction == "sum":
+        return flow.math.reduce_sum(mean_squared_difference, name=name + "_reduce_sum")
+    else:
+        # Do no reduction
+        return mean_squared_difference
+
+
+@oneflow_export("nn.MarginRankingLoss")
+@stable_api
+def margin_ranking_loss(
+    input1: oneflow._oneflow_internal.BlobDesc,
+    input2: oneflow._oneflow_internal.BlobDesc,
+    target: oneflow._oneflow_internal.BlobDesc,
+    margin: float = 0.0,
+    reduction: str = "mean",
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""This operator computes the Margin Ranking loss.
+
+    The equation is:
+
+    if reduction = "none":
+
+    .. math::
+
+        out = \max\ (0, -y*(x_1-x_2)+margin)
+
+    if reduction = "mean":
+
+    .. math::
+
+        out = \frac{1}{n}\sum_{i=1}^n\max\ (0, -y*(x_1-x_2)+margin)
+
+    if reduction = "sum":
+
+    .. math::
+
+        out = \sum_{i=1}^n\max\ (0, -y*(x_1-x_2)+margin)
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+        import numpy as np
+
+
+        @flow.global_function()
+        def margin_ranking_loss_job(input1: tp.Numpy.Placeholder(shape=(3, 3)),
+                                    input2: tp.Numpy.Placeholder(shape=(3, 3)),
+                                    target: tp.Numpy.Placeholder(shape=(3, 3)))->tp.Numpy:
+            out = flow.nn.MarginRankingLoss(input1, input2, target, margin=1.0)
+            return out
+
+        np_input1 = np.array([[1, 2, 3],
+                            [4, 5, 6],
+                            [7, 8, 9]]).astype(np.float32)
+        np_input2 = np.array([[2, 2, 2],
+                            [2, 2, 2],
+                            [2, 2, 2]]).astype(np.float32)
+        np_target = np.array([[3, 3, 3],
+                            [3, 3, 3],
+                            [3, 3, 3]]).astype(np.float32)
+
+        out = margin_ranking_loss_job(np_input1, np_input2, np_target)
+
+        # output [0.5555556]
+
+    Args:
+        input1 (oneflow._oneflow_internal.BlobDesc): The ranking score of input1 Blob.
+        input2 (oneflow._oneflow_internal.BlobDesc): The ranking score of input2 Blob.
+        target (oneflow._oneflow_internal.BlobDesc): The target Blob.
+        margin (float): The margin value. Defaults to 0.0.
+        reduction (str, optional): The reduce type, it can be one of "none", "mean", "sum". Defaults to "mean".
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob.
+    """
+    assert (
+        input1.shape == input2.shape
+    ), "The shape of `input1`, `input2` must be the same. "
+
+    assert reduction in [
+        "none",
+        "mean",
+        "sum",
+    ], "{} is not a valid value for reduction, The reduction must be the one of `none`, `mean`, `sum`. ".format(
+        reduction
+    )
+
+    if name is None:
+        name = id_util.UniqueStr("MarginRankingLoss")
+
+    _margin_loss = flow.math.negative(flow.math.subtract(input1, input2))
+    _margin_loss = flow.math.multiply(target, _margin_loss)
+    _margin_loss = flow.math.add(margin, _margin_loss)
+
+    _clipped_margin_loss = flow.clip(_margin_loss, min_value=0.0)
+
+    if reduction == "none":
+        return _clipped_margin_loss
+    elif reduction == "mean":
+        return flow.math.reduce_mean(_clipped_margin_loss, name=name + "_reduce_mean")
+    else:
+        return flow.math.reduce_sum(_clipped_margin_loss, name=name + "_reduce_sum")
+
+
+@oneflow_export("nn.TripletMarginLoss")
+@stable_api
+def triplet_margin_loss(
+    anchor: oneflow._oneflow_internal.BlobDesc,
+    positive: oneflow._oneflow_internal.BlobDesc,
+    negative: oneflow._oneflow_internal.BlobDesc,
+    margin: float = 1.0,
+    p: float = 2.0,
+    eps: float = 1e-6,
+    swap: bool = False,
+    reduction: str = "mean",
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""This operator computes the Triplet Margin Loss.
+
+    The equation is:
+
+    if reduction = "none":
+
+    .. math::
+
+        output = \max\{\left\lVert a_i - p_i \right\rVert_p - \left\lVert a_i - n_i \right\rVert_p + {\rm margin}, 0\}
+
+    if reduction = "mean":
+
+    .. math::
+
+        output = \frac{1}{n}\sum_{i=1}^n\max\{\left\lVert a_i - p_i \right\rVert_p - \left\lVert a_i - n_i \right\rVert_p + {\rm margin}, 0\}
+
+    if reduction = "sum":
+
+    .. math::
+
+        output = \sum_{i=1}^n\max\{\left\lVert a_i - p_i \right\rVert_p - \left\lVert a_i - n_i \right\rVert_p + {\rm margin}, 0\}
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+        import numpy as np
+
+
+        @flow.global_function()
+        def triplet_loss_job(anchor: tp.Numpy.Placeholder(shape=(3, 3)),
+                            pos: tp.Numpy.Placeholder(shape=(3, 3)),
+                            neg: tp.Numpy.Placeholder(shape=(3, 3)))->tp.Numpy:
+            out = flow.nn.TripletMarginLoss(anchor, pos, neg, margin=1.0, p=2.0)
+            return out
+
+        np_anchor = np.array([[1, 2, 3],
+                            [4, 5, 6],
+                            [7, 8, 9]]).astype(np.float32)
+        np_pos = np.array([[2, 2, 2],
+                        [2, 2, 2],
+                        [2, 2, 2]]).astype(np.float32)
+        np_neg = np.array([[3, 3, 3],
+                        [3, 3, 3],
+                        [3, 3, 3]]).astype(np.float32)
+
+        out = triplet_loss_job(np_anchor, np_pos, np_neg)
+
+        # output [1.8449262]
+
+    Args:
+        anchor (oneflow._oneflow_internal.BlobDesc): The anchor Blob.
+        positive (oneflow._oneflow_internal.BlobDesc): The positive sample Blob.
+        negative (oneflow._oneflow_internal.BlobDesc): The negative sample Blob.
+        margin (float, optional): The margin value. Defaults to 1.0.
+        p (float, optional): The norm degree for computing distance. Defaults to 2.0.
+        eps (float, optional): A small value use in norm computation. Defaults to 1e-6.
+        swap (bool, optional): Whether to swap the distance.
+        For more details you can check the Paper `Learning shallow convolutional feature descriptors with triplet losses`. Defaults to False.
+        reduction (str, optional): The reduce type, it can be one of "none", "mean", "sum". Defaults to "mean".
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob.
+    """
+    assert reduction in [
+        "none",
+        "mean",
+        "sum",
+    ], "{} is not a valid value for reduction, The reduction must be the one of `none`, `mean`, `sum`. ".format(
+        reduction
+    )
+
+    assert (
+        swap == False
+    ), "For now we only support `swap=True`, OneFlow still have backward error in minimum"
+
+    if name is None:
+        name = id_util.UniqueStr("TripletMarginLoss")
+
+    def _p_norm(x, p=2.0, name="p_norm"):
+        r"""Compute the p-norm
+
+        The equation is:
+
+        .. math::
+
+            out = \sqrt[P]{\sum_{i=0}^{n}(abs(x)^P)}
+
+        Args:
+            x ([type]): The input Blob.
+            p ([type], optional): The norm degree. Defaults to 2..
+
+        """
+        # In order to avoid the `nan` case.
+        _abs_val = flow.math.abs(x, name=name + "_abs")
+
+        if p == 2.0:
+            # Use Square to compute the l2-norm
+            _norm = flow.math.square(_abs_val, name=name + "_square")
+            _norm = flow.math.reduce_sum(_norm, axis=1, name=name + "_sum")
+            _norm_val = flow.math.sqrt(_norm, name=name + "_sqrt")
+        else:
+            _p_constant = flow.constant_like(
+                like=_abs_val, value=p, dtype=flow.float32, name=name + "_p_constant"
+            )
+            _norm = flow.math.pow(_abs_val, _p_constant, name=name + "_pow1")
+            _norm = flow.math.reduce_sum(_norm, axis=1, name=name + "_sum")
+            _p_reciprocal_constant = flow.constant_like(
+                like=_norm,
+                value=1.0 / p,
+                dtype=flow.float32,
+                name=name + "_p_reciprocal_constant",
+            )
+            _norm_val = flow.math.pow(
+                _norm, _p_reciprocal_constant, name=name + "_norm_val"
+            )
+
+        return _norm_val
+
+    # Compute the distance
+
+    _distance_1 = _p_norm(anchor - positive + eps, p=p, name=name + "_distance_1")
+    _distance_2 = _p_norm(anchor - negative + eps, p=p, name=name + "_distance_2")
+
+    if swap:
+        _distance_swap = _p_norm(positive - negative + eps, p=p)
+        _distance_swap = flow.math.reduce_sum(_distance_swap, axis=1)
+        # TODO(zhengzekang): minimum still not support backward
+        _distance_2 = flow.math.minimum(_distance_2, _distance_swap)
+
+    _triplet_loss = flow.clip(margin + _distance_1 - _distance_2, min_value=0.0)
+
+    if reduction == "mean":
+        return flow.math.reduce_mean(_triplet_loss, name=name + "_reduce_mean")
+    elif reduction == "sum":
+        return flow.math.reduce_sum(_triplet_loss, name=name + "_reduce_sum")
+    else:
+        return _triplet_loss
+
+
+@oneflow_export("nn.PixelShuffle")
+@stable_api
+def pixel_shuffle(
+    input: oneflow._oneflow_internal.BlobDesc,
+    upscale_factor: int,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator do the pixel shuffle, the shape of input(B, C*r*r, H, W) is arranged to
+    (B, C, H*r, W*r). It can be used to do the sub-pixel convolution.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+        import numpy as np
+
+
+        @flow.global_function()
+        def PixelShuffleJob(input: tp.Numpy.Placeholder(shape=(3, 4, 2, 2), dtype=flow.float32))->tp.Numpy:
+            out = flow.nn.PixelShuffle(input, upscale_factor=2)
+
+            return out
+
+        input = np.random.uniform(size=(3, 4, 2, 2)).astype(np.float32)
+        out = PixelShuffleJob(input)
+
+        # out.shape (3, 1, 4, 4)
+
+    Args:
+        input (oneflow._oneflow_internal.BlobDesc): The input Blob.
+        upscale_factor (int): The upscale factor.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob.
+    """
+    return flow.nn.PixelShufflev2(input, upscale_factor, upscale_factor, name=name)
+
+
+@oneflow_export("nn.PixelShufflev2")
+def pixel_shufflev2(
+    input: oneflow._oneflow_internal.BlobDesc,
+    h_upscale_factor: int,
+    w_upscale_factor: int,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator is similar to `oneflow.compatible.single_client.nn.PixelShuffle`. The difference is that in
+    `oneflow.compatible.single_client.nn.PixelShuffle`, the upscale factor of height and width is the same. But in
+    `oneflow.compatible.single_client.nn.PixelShufflev2`, you can set different upscale factor for height and width.
+
+    Args:
+        input (oneflow._oneflow_internal.BlobDesc): The input Blob.
+        h_upscale_factor (int): The upscale factor of height.
+        w_upscale_factor (int): The upscale factor of width.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+        import numpy as np
+
+
+        @flow.global_function()
+        def PixelShufflev2Job(input: tp.Numpy.Placeholder(shape=(3, 16, 2, 4), dtype=flow.float32))->tp.Numpy:
+            out = flow.nn.PixelShufflev2(input, h_upscale_factor=2, w_upscale_factor=4)
+
+            return out
+
+        input = np.random.uniform(size=(3, 16, 2, 4)).astype(np.float32)
+        out = PixelShuffleJob(input)
+
+        # out.shape (3, 2, 4, 16)
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob.
+    """
+    assert (
+        h_upscale_factor > 0 and w_upscale_factor > 0
+    ), "The scale factor of height and width must larger than zero"
+    assert len(input.shape) == 4, "Only Accept 4D Blob"
+
+    _batch, _channel, _height, _width = input.shape
+    assert (
+        _channel % (h_upscale_factor * w_upscale_factor) == 0
+    ), "The channels of input tensor must be divisible by (h_upscale_factor * w_upscale_factor)"
+
+    if name is None:
+        name = id_util.UniqueStr("PixelShufflev2")
+
+    _new_c = int(_channel / (h_upscale_factor * w_upscale_factor))
+
+    out = flow.reshape(
+        input,
+        [_batch, _new_c, h_upscale_factor * w_upscale_factor, _height, _width],
+        name=name + "_reshape1",
+    )
+    out = flow.reshape(
+        out,
+        [_batch, _new_c, h_upscale_factor, w_upscale_factor, _height, _width],
+        name=name + "_reshape2",
+    )
+    out = flow.transpose(out, [0, 1, 4, 2, 5, 3], name=name + "_transpose")
+    out = flow.reshape(
+        out,
+        [_batch, _new_c, _height * h_upscale_factor, _width * w_upscale_factor],
+        name=name + "_reshape3",
+    )
+
+    return out
+
+
+@oneflow_export("nn.KLDivLoss")
+@stable_api
+def kldivloss(
+    input: oneflow._oneflow_internal.BlobDesc,
+    target: oneflow._oneflow_internal.BlobDesc,
+    log_target: bool = False,
+    reduction: str = "mean",
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator computes the Kullback-Leiber divergence loss.
+
+    The equation is:
+
+    If :math:`log\_target = True`:
+
+    .. math::
+
+            loss = e^{target}*(target-input)
+
+    If :math:`log\_target = False`:
+
+    .. math::
+
+            loss = target*(log(target)-input)
+
+    Attention:
+        In `log_target = False` case, the element in loss will set to be `0` when the element in target is less than `0`
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+        import numpy as np
+
+
+        @flow.global_function()
+        def of_kldivloss(input: tp.Numpy.Placeholder(shape=(3, 3)),
+                        target: tp.Numpy.Placeholder(shape=(3, 3))) -> tp.Numpy:
+            return flow.nn.KLDivLoss(input, target, log_target=False, reduction='none')
+
+
+        input = np.array([[0.1, 0.2, 0.7],
+                    [0.8, 0.9, 0.5],
+                    [0.5, 0.15, 0.35]]).astype(np.float32)
+        target = np.array([[0.3, 0.1, 0.6],
+                    [-0.3, 0.4, 0.4],
+                    [0.35, 0.25, 0.4]]).astype(np.float32)
+
+        out = of_kldivloss(input, target)
+
+        # output [[-0.39119187 -0.25025854 -0.7264954 ]
+        #         [ 0.         -0.72651625 -0.56651634]
+        #         [-0.54243773 -0.3840736  -0.5065163 ]]
+
+    Args:
+        input (oneflow._oneflow_internal.BlobDesc): The input tensor.
+        target (oneflow._oneflow_internal.BlobDesc): The target tensor.
+        log_target (bool, optional): Whether the `target` is passed in the log space. Defaults to False.
+        reduction (str, optional): The reduce type, it can be one of "none", "mean", "sum". Defaults to "mean".
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result tensor.
+    """
+    assert reduction in [
+        "none",
+        "mean",
+        "sum",
+    ], "{} is not a valid value for reduction, The reduction must be the one of `none`, `mean`, `sum`. ".format(
+        reduction
+    )
+
+    if name is None:
+        name = id_util.UniqueStr("KLDivLoss_")
+
+    if log_target:
+        _kl_div_loss = flow.math.exp(target, name=name + "exp") * (target - input)
+    else:
+        _kl_div_out_loss = target * (flow.math.log(target, name=name + "log") - input)
+        _zeros = flow.zeros_like(
+            _kl_div_out_loss, dtype=_kl_div_out_loss.dtype, name=name + "zeros"
+        )
+        # when target < 0, we set to `0`, when target > 0, we set to `1`.
+        _condition = flow.cast(
+            flow.math.rint(target + 0.5, name=name + "rint"),
+            dtype=flow.int8,
+            name=name + "cast2int",
+        )
+        # To avoid the `nan` value in log operation
+        # We set those positions which `target` is less than zero as `0`
+        _kl_div_loss = flow.where(
+            _condition, _kl_div_out_loss, _zeros, name=name + "where"
+        )
+
+    if reduction == "mean":
+        return flow.math.reduce_mean(_kl_div_loss, name=name + "_reduce_mean")
+    elif reduction == "sum":
+        return flow.math.reduce_sum(_kl_div_loss, name=name + "_reduce_sum")
+    else:
+        return _kl_div_loss
diff --git a/oneflow/compatible_single_client_python/ops/one_hot.py b/oneflow/compatible_single_client_python/ops/one_hot.py
new file mode 100644
index 0000000000000000000000000000000000000000..0a8ecc919b01003172046ad3922c064759b73eea
--- /dev/null
+++ b/oneflow/compatible_single_client_python/ops/one_hot.py
@@ -0,0 +1,150 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+import os
+
+from oneflow.compatible import single_client as flow
+from oneflow.core.operator import op_conf_pb2 as op_conf_util
+from oneflow.core.register import logical_blob_id_pb2 as logical_blob_id_util
+from oneflow.compatible_single_client_python.framework import (
+    distribute as distribute_util,
+)
+from oneflow.compatible_single_client_python.framework import id_util as id_util
+from oneflow.compatible_single_client_python.framework import (
+    remote_blob as remote_blob_util,
+)
+from oneflow.compatible_single_client_python.oneflow_export import oneflow_export
+import oneflow._oneflow_internal
+from typing import Optional, Union
+
+
+@oneflow_export("one_hot")
+def one_hot(
+    indices: oneflow._oneflow_internal.BlobDesc,
+    depth: int,
+    on_value: Union[int, float] = 1,
+    off_value: Union[int, float] = 0,
+    axis: int = -1,
+    dtype: Optional[flow.dtype] = None,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator generates a onehot Blob from input Blob.
+
+    If input Blob's rank is `N`, the corresponding onehot Blob's rank is `N+1`. The new axis is generated on the specified dimension according to the parameter `axis`.
+
+    The locations represented by `indices` take value `on_value`, while other locations take `off_value`
+
+    Args:
+        indices (oneflow._oneflow_internal.BlobDesc): The input Blob.
+        depth (int): The length of onehot Blob.
+        on_value (Union[int, float], optional): The fill value when `indices[i] == i`. Defaults to 1.
+        off_value (Union[int, float], optional): The fill value when `indice[i] != i`. Defaults to 0.
+        axis (int, optional): The specified dimension that the new axis is generated on. Defaults to -1.
+        dtype (Optional[flow.dtype], optional): The output data type, it can be "oneflow.compatible.single_client.int32", "oneflow.compatible.single_client.int64", "oneflow.compatible.single_client.float", "oneflow.compatible.single_client.double". Defaults to None.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Note:
+
+        The data type of input blob should be `int32` or `int64`
+
+    For example:
+
+    Example 1:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+        import numpy as np
+
+
+        @flow.global_function()
+        def onehot_Job(x: tp.Numpy.Placeholder((4, ), dtype=flow.int32)
+        ) -> tp.Numpy:
+            return flow.one_hot(indices=x,
+                                depth=5,
+                                axis=-1,
+                                dtype=flow.int32)
+
+
+        x = np.array([0, 3, 1, 2]).astype(np.int32)
+        out = onehot_Job(x)
+
+        # out [[1 0 0 0 0]
+        #      [0 0 0 1 0]
+        #      [0 1 0 0 0]
+        #      [0 0 1 0 0]]
+
+    Example 2:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+        import numpy as np
+
+
+        @flow.global_function()
+        def onehot_Job(x: tp.Numpy.Placeholder((4, ), dtype=flow.int32)
+        ) -> tp.Numpy:
+            return flow.one_hot(indices=x,
+                                depth=5,
+                                axis=0,
+                                dtype=flow.int32)
+
+
+        x = np.array([0, 3, 1, 2]).astype(np.int32)
+        out = onehot_Job(x)
+
+        # out [[1 0 0 0]
+        #      [0 0 1 0]
+        #      [0 0 0 1]
+        #      [0 1 0 0]
+        #      [0 0 0 0]]
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: [description]
+    """
+    out_ndims = len(indices.shape) + 1
+    if axis < 0:
+        axis += out_ndims
+    assert axis >= 0 and axis < out_ndims, ValueError(
+        "Expected axis to between [%d, %d).  But received: %d "
+        % (-out_ndims, out_ndims, axis)
+    )
+    out = (
+        flow.user_op_builder(name if name is not None else id_util.UniqueStr("OneHot_"))
+        .Op("one_hot")
+        .Input("indices", [indices])
+        .Attr("depth", int(depth))
+        .Attr("floating_on_value", float(on_value))
+        .Attr("integer_on_value", int(on_value))
+        .Attr("floating_off_value", float(off_value))
+        .Attr("integer_off_value", int(off_value))
+        .Attr("dtype", dtype)
+        .Output("out")
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
+    if axis != (out_ndims - 1):
+        dim_list = list(range(0, out_ndims))
+        dim_list.insert(axis, out_ndims - 1)
+        dim_list.pop()
+        return flow.transpose(out, dim_list)
+    else:
+        return out
diff --git a/oneflow/compatible_single_client_python/ops/optimizer.py b/oneflow/compatible_single_client_python/ops/optimizer.py
new file mode 100644
index 0000000000000000000000000000000000000000..38b729e3a2dbe60e273da28fb23ffc5cb3d3bc5e
--- /dev/null
+++ b/oneflow/compatible_single_client_python/ops/optimizer.py
@@ -0,0 +1,2045 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+import collections.abc
+import traceback
+from typing import Optional, Union, Sequence, List, Text, Callable
+
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.framework import c_api_util as c_api_util
+from oneflow.compatible_single_client_python.framework import (
+    session_context as session_ctx,
+)
+from oneflow.compatible_single_client_python.framework import runtime_mode as rt_mode
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    oneflow_deprecate,
+)
+from oneflow._oneflow_internal.oneflow.core.job import job_conf as job_conf_cfg
+from oneflow._oneflow_internal.oneflow.core.job import (
+    learning_rate_schedule_conf as learning_rate_schedule_conf_cfg,
+)
+import oneflow._oneflow_internal
+
+
+def GetVariablesForCurrentJob() -> List[Text]:
+    sess = session_ctx.GetDefaultSession()
+    assert (
+        rt_mode.CurrentMode() == rt_mode.GLOBAL_MODE
+    ), "Optimizer's Variables() or minimize() method should be called inside a Job Function to implicitly get variables from a job."
+    # TODO(): Use new api when new GetCurrentJobName api is ready.
+    job_name = oneflow._oneflow_internal.JobBuildAndInferCtx_GetCurrentJobName()
+    return list(sess.job_name2var_name2var_blob_[job_name].keys())
+
+
+class ClipGradientConf:
+    @property
+    def clip_conf(self) -> job_conf_cfg.ClipConf:
+        raise NotImplementedError()
+
+
+@oneflow_export("optimizer.grad_clipping.by_global_norm")
+class by_global_norm(ClipGradientConf):
+    r"""This operator limits the norm of `Input` with `clip_norm`.
+
+    If the norm of `Input` is less than the `clip_norm`,
+
+    the `Output` will be the same as `Input`.
+
+    If the norm of `Input` is greater than the `clip_norm`, the `Output` will be scaled.
+
+    The equation is:
+
+    .. math::
+
+        Output = \frac{clip\_norm*Input}{norm(Input)}
+
+    Args:
+        clip_norm (float): The maximum norm value.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+
+        @flow.global_function(type="train")
+        def train_job(
+            images: tp.Numpy.Placeholder((BATCH_SIZE, 1, 28, 28), dtype=flow.float),
+            labels: tp.Numpy.Placeholder((BATCH_SIZE,), dtype=flow.int32),
+        ) -> tp.Numpy:
+            with flow.scope.placement("gpu", "0:0"):
+                logits = lenet(images, train=True)
+                loss = flow.nn.sparse_softmax_cross_entropy_with_logits(
+                    labels, logits, name="softmax_loss"
+                )
+            # Set learning rate as 0.001
+            lr_scheduler = flow.optimizer.PiecewiseConstantScheduler([], [0.001])
+            # Set gradient_clip
+            gradient_clip = flow.optimizer.grad_clipping.by_global_norm(1.0)
+            # Set AdamW optimizer with gradient clip
+            flow.optimizer.AdamW(lr_scheduler,
+                        do_bias_correction=False, weight_decay=0.00005,
+                        grad_clipping=gradient_clip).minimize(loss)
+
+            return loss
+
+    """
+
+    def __init__(self, clip_norm):
+        self.clip_norm = clip_norm
+
+    @property
+    def clip_conf(self):
+        clip_conf = job_conf_cfg.ClipConf()
+        clip_conf.mutable_clip_by_global_norm().set_clip_norm(self.clip_norm)
+        return clip_conf
+
+
+class WarmupConf:
+    @property
+    def warmup_conf(self) -> learning_rate_schedule_conf_cfg.WarmupConf:
+        raise NotImplementedError()
+
+
+@oneflow_export("optimizer.warmup.constant")
+class constant(WarmupConf):
+    r"""This operator use the constant warmup strategy to adjust the learning rate.
+
+    Before the steps are specified by user, the learning rate is:
+
+    .. math::
+
+        learning\_rate = base\_learning\_rate*multiplier
+
+    After the steps are specified by user, the learning rate is:
+
+    .. math::
+
+        learning\_rate = base\_learning\_rate
+
+    Args:
+        steps (int): [description]
+        multiplier (float): The scale factor :math:`multiplier`, it should be greater than 0. and less than 1.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+
+        @flow.global_function(type="train")
+        def train_job(
+            images: tp.Numpy.Placeholder((BATCH_SIZE, 1, 28, 28), dtype=flow.float),
+            labels: tp.Numpy.Placeholder((BATCH_SIZE,), dtype=flow.int32),
+        ) -> tp.Numpy:
+            with flow.scope.placement("gpu", "0:0"):
+                logits = lenet(images, train=True)
+                loss = flow.nn.sparse_softmax_cross_entropy_with_logits(
+                    labels, logits, name="softmax_loss"
+                )
+
+            # Before 10 epochs, the learning rate is 0.001
+            # After 10 epochs, the learning rate is 0.01
+            warmup_scheduler = flow.optimizer.warmup.constant(10, 0.1)
+            lr_scheduler = flow.optimizer.PiecewiseConstantScheduler([], [0.01], warmup=warmup_scheduler)
+            flow.optimizer.Adam(lr_scheduler).minimize(loss)
+
+            return loss
+
+    """
+
+    def __init__(self, steps, multiplier):
+        self.steps = steps
+        self.multiplier = multiplier
+
+    @property
+    def warmup_conf(self) -> learning_rate_schedule_conf_cfg.WarmupConf:
+        warmup_conf = learning_rate_schedule_conf_cfg.WarmupConf()
+        warmup_conf.mutable_constant_conf().set_warmup_batches(self.steps)
+        warmup_conf.mutable_constant_conf().set_multiplier(self.multiplier)
+        return warmup_conf
+
+
+@oneflow_export("optimizer.warmup.linear")
+class linear(WarmupConf):
+    r"""This operator uses the linear warmup strategy to adjust the learning rate.
+
+    When current train step is less than warmup steps, the learning rate will be updated as:
+
+    .. math::
+
+        & current\_multiplier = start\_multiplier + (1-start\_multiplier)*\frac{train\_step}{warmup\_step}
+
+        & current\_learning\_rate = learning\_rate*current\_multiplier
+
+    Args:
+        steps (int): The warmup steps.
+        start_multiplier (float): The start multiplier(:math:`start\_multiplier`). It should be greater than 0. and less than 1.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+
+        @flow.global_function(type="train")
+        def train_job(
+            images: tp.Numpy.Placeholder((BATCH_SIZE, 1, 28, 28), dtype=flow.float),
+            labels: tp.Numpy.Placeholder((BATCH_SIZE,), dtype=flow.int32),
+        ) -> tp.Numpy:
+            with flow.scope.placement("gpu", "0:0"):
+                logits = lenet(images, train=True)
+                loss = flow.nn.sparse_softmax_cross_entropy_with_logits(
+                    labels, logits, name="softmax_loss"
+                )
+
+            # Before 10 epochs, the learning rate will increase from 0.001 to 0.01 in linear.
+            warmup_scheduler = flow.optimizer.warmup.linear(10, 0.1)
+            lr_scheduler = flow.optimizer.PiecewiseConstantScheduler([], [0.01], warmup=warmup_scheduler)
+            flow.optimizer.Adam(lr_scheduler).minimize(loss)
+
+            return loss
+
+    """
+
+    def __init__(self, steps, start_multiplier):
+        self.steps = steps
+        self.start_multiplier = start_multiplier
+
+    @property
+    def warmup_conf(self) -> learning_rate_schedule_conf_cfg.WarmupConf:
+        warmup_conf = learning_rate_schedule_conf_cfg.WarmupConf()
+        warmup_conf.mutable_linear_conf().set_warmup_batches(self.steps)
+        warmup_conf.mutable_linear_conf().set_start_multiplier(self.start_multiplier)
+        return warmup_conf
+
+
+class LrScheduler:
+    def __init__(
+        self,
+        base_lr: Optional[float] = None,
+        lr_lbn: Optional[Text] = None,
+        warmup: Optional[WarmupConf] = None,
+    ):
+        self.base_lr = base_lr
+        self.lr_lbn = lr_lbn
+        self.warmup = warmup
+
+    @property
+    def warmup_conf(self) -> learning_rate_schedule_conf_cfg.WarmupConf:
+        if self.warmup is None:
+            return None
+        return self.warmup.warmup_conf
+
+    @property
+    def learning_rate_decay_conf(
+        self,
+    ) -> Optional[learning_rate_schedule_conf_cfg.LearningRateDecayConf]:
+        raise NotImplementedError()
+
+    def SetLrFieldsInOptimizerConf(self, optimizer_conf) -> None:
+        if self.lr_lbn is not None:
+            assert self.base_lr is None
+            assert self.warmup is None
+            assert self.learning_rate_decay_conf is None
+            optimizer_conf.set_learning_rate_lbn(self.lr_lbn)
+        else:
+            assert self.base_lr is not None
+            optimizer_conf.set_base_learning_rate(self.base_lr)
+            if self.warmup_conf is not None:
+                optimizer_conf.mutable_warmup_conf().CopyFrom(self.warmup_conf)
+            if self.learning_rate_decay_conf is not None:
+                optimizer_conf.mutable_learning_rate_decay().CopyFrom(
+                    self.learning_rate_decay_conf
+                )
+
+
+@oneflow_export("optimizer.CosineScheduler")
+class CosineScheduler(LrScheduler):
+    r"""This operator creates a Cosine decayed learning rate scheduler.
+
+    Before the steps are specified by user, the learning rate will be updated as:
+
+    .. math::
+
+        & cos\_decay = 0.5*(1+cos(\pi*\frac{current\_batch}{decayed\_batch}))
+
+        & decay\_factor = (1-\alpha)*cos\_decay+\alpha
+
+        & learning\_rate = base\_learning\_rate*decay\_factor
+
+    After the steps specified by user, the learning rate will be :
+
+    .. math::
+
+        learning\_rate = {base\_learning\_rate}*{\alpha}
+
+    Args:
+        base_lr (float): The base learning rate (:math:`base\_learning\_rate`)
+        steps (int): The decay steps in the scheduler (:math:`decayed\_batch`)
+        alpha (float, optional): The learning rate scale factor (:math:`\alpha`). Defaults to 0.0.
+        warmup (Optional[WarmupConf], optional): The warmup strategy. Defaults to None.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+
+        @flow.global_function(type="train")
+        def train_job(
+            images: tp.Numpy.Placeholder((BATCH_SIZE, 1, 28, 28), dtype=flow.float),
+            labels: tp.Numpy.Placeholder((BATCH_SIZE,), dtype=flow.int32),
+        ) -> tp.Numpy:
+            with flow.scope.placement("gpu", "0:0"):
+                logits = lenet(images, train=True)
+                loss = flow.nn.sparse_softmax_cross_entropy_with_logits(
+                    labels, logits, name="softmax_loss"
+                )
+
+            lr_scheduler = flow.optimizer.CosineScheduler(base_lr=0.01,
+                                                          steps=10,
+                                                          alpha=0.1)
+            flow.optimizer.Adam(lr_scheduler).minimize(loss)
+
+            return loss
+
+    """
+
+    def __init__(
+        self,
+        base_lr: float,
+        steps: int,
+        alpha: float = 0.0,
+        warmup: Optional[WarmupConf] = None,
+    ):
+        super().__init__(base_lr=base_lr, warmup=warmup)
+        self.steps = steps
+        self.alpha = alpha
+
+    @property
+    def learning_rate_decay_conf(
+        self,
+    ) -> Optional[learning_rate_schedule_conf_cfg.LearningRateDecayConf]:
+        learning_rate_decay_conf = (
+            learning_rate_schedule_conf_cfg.LearningRateDecayConf()
+        )
+        learning_rate_decay_conf.mutable_cosine_conf().set_decay_batches(self.steps)
+        learning_rate_decay_conf.mutable_cosine_conf().set_alpha(self.alpha)
+        return learning_rate_decay_conf
+
+
+@oneflow_export("optimizer.CustomScheduler")
+class CustomScheduler(LrScheduler):
+    def __init__(self, lbn: Text):
+        super().__init__(lr_lbn=lbn)
+
+    @property
+    def learning_rate_decay_conf(
+        self,
+    ) -> learning_rate_schedule_conf_cfg.LearningRateDecayConf:
+        return None
+
+
+@oneflow_export("optimizer.PiecewiseConstantScheduler")
+class PiecewiseConstantScheduler(LrScheduler):
+    r"""This operator creates a piecewise constant learning rate scheduler.
+
+    The change in learning rate can be described as follows:
+
+    .. code-block:: python
+
+        boundaries = [1000, 2000]
+        values = [0.1, 0.01, 0.001]
+
+        if current_step < 1000:
+            learning_rate = 0.1
+        elif 1000 < current_step < 2000:
+            learning_rate = 0.01
+        else:
+            learning_rate = 0.001
+
+    Args:
+        boundaries (Sequence[int]): A list of train steps.
+        values (Sequence[float]): A list of learning rate values during the different train step boundary.
+        warmup (Optional[WarmupConf], optional): The warmup strategy. Defaults to None.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+
+        @flow.global_function(type="train")
+        def train_job(
+                images: tp.Numpy.Placeholder((BATCH_SIZE, 1, 28, 28), dtype=flow.float),
+                labels: tp.Numpy.Placeholder((BATCH_SIZE,), dtype=flow.int32),
+        ) -> tp.Numpy:
+            with flow.scope.placement("gpu", "0:0"):
+                logits = lenet(images, train=True)
+                loss = flow.nn.sparse_softmax_cross_entropy_with_logits(
+                    labels, logits, name="softmax_loss"
+                )
+
+            lr_scheduler = flow.optimizer.PiecewiseConstantScheduler(boundaries=[10, 20],
+                                                                     values=[0.1, 0.01, 0.001])
+            flow.optimizer.Adam(lr_scheduler).minimize(loss)
+
+            return loss
+
+    """
+
+    def __init__(
+        self,
+        boundaries: Sequence[int],
+        values: Sequence[float],
+        warmup: Optional[WarmupConf] = None,
+    ):
+        assert len(boundaries) + 1 == len(values)
+        super().__init__(base_lr=values[0], warmup=warmup)
+        self.boundaries = boundaries
+        self.values = values
+
+    @property
+    def learning_rate_decay_conf(
+        self,
+    ) -> Optional[learning_rate_schedule_conf_cfg.LearningRateDecayConf]:
+        learning_rate_decay_conf = (
+            learning_rate_schedule_conf_cfg.LearningRateDecayConf()
+        )
+        for boundary in self.boundaries:
+            learning_rate_decay_conf.mutable_piecewise_constant_conf().add_boundaries(
+                boundary
+            )
+        for value in self.values:
+            learning_rate_decay_conf.mutable_piecewise_constant_conf().add_values(value)
+        return learning_rate_decay_conf
+
+
+@oneflow_export("optimizer.PiecewiseScalingScheduler")
+class PiecewiseScalingScheduler(LrScheduler):
+    """This operator creates a piecewise scaled decayed learning rate scheduler.
+
+    The change in learning rate can be described as follows:
+
+    .. code-block:: python
+
+        boundaries = [1000, 2000]
+        scale = [0.1, 0.01]
+        base_lr = 0.1
+
+        if current_step < 1000:
+            learning_rate = base_lr
+        elif 1000 < current_step < 2000:
+            learning_rate = 0.1*base_lr
+        else:
+            learning_rate = 0.01*base_lr
+
+    Args:
+        base_lr (float): The base learning rate
+        boundaries (Sequence[int]): A list of train steps.
+        scale (Union[float, Sequence[float]]): A list of learning rate scaled factors during the different train step boundary.
+        warmup (Optional[WarmupConf], optional): The warmup strategy. Defaults to None.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+
+        @flow.global_function(type="train")
+        def train_job(
+            images: tp.Numpy.Placeholder((BATCH_SIZE, 1, 28, 28), dtype=flow.float),
+            labels: tp.Numpy.Placeholder((BATCH_SIZE,), dtype=flow.int32),
+        ) -> tp.Numpy:
+            with flow.scope.placement("gpu", "0:0"):
+                logits = lenet(images, train=True)
+                loss = flow.nn.sparse_softmax_cross_entropy_with_logits(
+                    labels, logits, name="softmax_loss"
+                )
+
+            lr_scheduler = flow.optimizer.PiecewiseScalingScheduler(base_lr=0.1,
+                                                                    boundaries=[5, 10],
+                                                                    scale=[0.5, 0.1])
+            flow.optimizer.SGD(lr_scheduler, momentum=0).minimize(loss)
+
+            return loss
+
+    """
+
+    def __init__(
+        self,
+        base_lr: float,
+        boundaries: Sequence[int],
+        scale: Union[float, Sequence[float]],
+        warmup: Optional[WarmupConf] = None,
+    ):
+        super().__init__(base_lr=base_lr, warmup=warmup)
+        self.boundaries = boundaries
+        if not isinstance(scale, collections.abc.Sequence):
+            scale = [scale] * len(boundaries)
+        assert len(boundaries) == len(scale)
+        self.scales = [1] + list(scale)
+
+    @property
+    def learning_rate_decay_conf(
+        self,
+    ) -> Optional[learning_rate_schedule_conf_cfg.LearningRateDecayConf]:
+        learning_rate_decay_conf = (
+            learning_rate_schedule_conf_cfg.LearningRateDecayConf()
+        )
+        for boundary in self.boundaries:
+            learning_rate_decay_conf.mutable_piecewise_scaling_conf().add_boundaries(
+                boundary
+            )
+        for scale in self.scales:
+            learning_rate_decay_conf.mutable_piecewise_scaling_conf().add_scales(scale)
+        return learning_rate_decay_conf
+
+
+@oneflow_export("optimizer.PolynomialScheduler")
+class PolynomialScheduler(LrScheduler):
+    r"""This operator creates a polynomial decayed learning rate scheduler.
+
+    The learning rate will be updated as follows:
+
+    If cycle is `True`, the equation is:
+
+    .. math::
+
+        & decay\_batch = decay\_batch*ceil(\frac{current\_batch}{decay\_batch})
+
+        & learning\_rate = (base\_lr-end\_lr)*(1-\frac{current\_batch}{decay\_batch})^{pow}+end\_lr
+
+    If cycle is `False`, the equation is:
+
+    .. math::
+
+        & decay\_batch = min(decay\_batch, current\_batch)
+
+        & learning\_rate = (base\_lr-end\_lr)*(1-\frac{current\_batch}{decay\_batch})^{pow}+end\_lr
+
+    Args:
+        base_lr (float): The base learning rate
+        steps (int): The decayed steps
+        end_learning_rate (float, optional): The final learning rate. Defaults to 0.0001.
+        power (float, optional): The power of polynomial. Defaults to 1.0.
+        cycle (bool, optional): If cycle is true, the scheduler will decay the learning rate every decay steps. Defaults to False.
+        warmup (Optional[WarmupConf], optional): The warmup strategy. Defaults to None.
+
+    For example:
+
+        .. code-block:: python
+
+            import oneflow.compatible.single_client as flow
+            import oneflow.compatible.single_client.typing as tp
+
+            @flow.global_function(type="train")
+            def train_job(
+                    images: tp.Numpy.Placeholder((BATCH_SIZE, 1, 28, 28), dtype=flow.float),
+                    labels: tp.Numpy.Placeholder((BATCH_SIZE,), dtype=flow.int32),
+            ) -> tp.Numpy:
+                with flow.scope.placement("gpu", "0:0"):
+                    logits = lenet(images, train=True)
+                    loss = flow.nn.sparse_softmax_cross_entropy_with_logits(
+                        labels, logits, name="softmax_loss"
+                    )
+
+                lr_scheduler = flow.optimizer.PolynomialScheduler(base_lr=0.001,
+                                                                 steps=5,
+                                                                 end_learning_rate=0.00001,
+                                                                 power=2)
+                flow.optimizer.Adam(lr_scheduler).minimize(loss)
+
+                return loss
+
+    """
+
+    def __init__(
+        self,
+        base_lr: float,
+        steps: int,
+        end_learning_rate: float = 0.0001,
+        power: float = 1.0,
+        cycle: bool = False,
+        warmup: Optional[WarmupConf] = None,
+    ):
+        super().__init__(base_lr=base_lr, warmup=warmup)
+        self.steps = steps
+        self.end_learning_rate = end_learning_rate
+        self.power = power
+        self.cycle = cycle
+
+    @property
+    def learning_rate_decay_conf(
+        self,
+    ) -> Optional[learning_rate_schedule_conf_cfg.LearningRateDecayConf]:
+        learning_rate_decay_conf = (
+            learning_rate_schedule_conf_cfg.LearningRateDecayConf()
+        )
+        learning_rate_decay_conf.mutable_polynomial_conf().set_decay_batches(self.steps)
+        learning_rate_decay_conf.mutable_polynomial_conf().set_end_learning_rate(
+            (self.end_learning_rate)
+        )
+        learning_rate_decay_conf.mutable_polynomial_conf().set_power(self.power)
+        learning_rate_decay_conf.mutable_polynomial_conf().set_cycle(self.cycle)
+        return learning_rate_decay_conf
+
+
+@oneflow_export("optimizer.PolynomialSchduler")
+@oneflow_deprecate()
+class PolynomialSchduler(PolynomialScheduler):
+    def __init__(
+        self,
+        base_lr: float,
+        steps: int,
+        end_learning_rate: float = 0.0001,
+        power: float = 1.0,
+        cycle: bool = False,
+        warmup: Optional[WarmupConf] = None,
+    ):
+        print(
+            "WARNING:",
+            "oneflow.compatible.single_client.optimizer.PolynomialSchduler",
+            "will be removed in the future, use {} instead.".format(
+                "oneflow.compatible.single_client.optimizer.PolynomialScheduler"
+            ),
+        )
+        print(traceback.format_stack()[-2])
+        super().__init__(
+            base_lr=base_lr,
+            steps=steps,
+            end_learning_rate=end_learning_rate,
+            power=power,
+            cycle=cycle,
+            warmup=warmup,
+        )
+
+
+@oneflow_export("optimizer.LinearCosineScheduler")
+class LinearCosineScheduler(LrScheduler):
+    r"""This operator creates a linear cosine decayed learning rate scheduler.
+
+    The learning rate will be updated as follows:
+
+    .. math::
+
+        & current\_batch = min(current\_batch, decay\_batch)
+
+        & linear\_decay = \frac{(decay\_batch - current\_batch)}{decay\_batch}
+
+        & cosine\_decay = 0.5*(1.0+cos(2*\pi*num\_periods*\frac{current\_batch}{decay\_batch}))
+
+        & decay\_factor = (\alpha+linear\_decay)*cosine\_decay + \beta
+
+        & learning\_rate = base\_learning\_rate*decay\_factor
+
+    Args:
+        base_lr (float): The base learning rate
+        steps (int): The decay steps
+        num_periods (float, optional): The number of decay periods. Defaults to 0.5.
+        alpha (float, optional): The :math:`\alpha` in equation. Defaults to 0.0.
+        beta (float, optional): The :math:`\beta` in equation. Defaults to 0.001.
+        warmup (Optional[WarmupConf], optional): The warmup strategy. Defaults to None.
+
+    For example:
+
+        .. code-block:: python
+
+            import oneflow.compatible.single_client as flow
+            import oneflow.compatible.single_client.typing as tp
+
+            @flow.global_function(type="train")
+            def train_job(
+                    images: tp.Numpy.Placeholder((BATCH_SIZE, 1, 28, 28), dtype=flow.float),
+                    labels: tp.Numpy.Placeholder((BATCH_SIZE,), dtype=flow.int32),
+            ) -> tp.Numpy:
+                with flow.scope.placement("gpu", "0:0"):
+                    logits = lenet(images, train=True)
+                    loss = flow.nn.sparse_softmax_cross_entropy_with_logits(
+                        labels, logits, name="softmax_loss"
+                    )
+
+                lr_scheduler = flow.optimizer.LinearCosineScheduler(base_lr=0.1,
+                                                                    steps=10)
+                flow.optimizer.SGD(lr_scheduler, momentum=0.9).minimize(loss)
+
+                return loss
+
+    """
+
+    def __init__(
+        self,
+        base_lr: float,
+        steps: int,
+        num_periods: float = 0.5,
+        alpha: float = 0.0,
+        beta: float = 0.001,
+        warmup: Optional[WarmupConf] = None,
+    ):
+        super().__init__(base_lr=base_lr, warmup=warmup)
+        self.steps = steps
+        self.num_periods = num_periods
+        self.alpha = alpha
+        self.beta = beta
+
+    @property
+    def learning_rate_decay_conf(
+        self,
+    ) -> Optional[learning_rate_schedule_conf_cfg.LearningRateDecayConf]:
+        learning_rate_decay_conf = (
+            learning_rate_schedule_conf_cfg.LearningRateDecayConf()
+        )
+        learning_rate_decay_conf.mutable_linear_cosine_conf().set_decay_batches(
+            self.steps
+        )
+        learning_rate_decay_conf.mutable_linear_cosine_conf().set_num_periods(
+            self.num_periods
+        )
+        learning_rate_decay_conf.mutable_linear_cosine_conf().set_alpha(self.alpha)
+        learning_rate_decay_conf.mutable_linear_cosine_conf().set_beta(self.beta)
+        return learning_rate_decay_conf
+
+
+@oneflow_export("optimizer.ExponentialScheduler")
+class ExponentialScheduler(LrScheduler):
+    r"""This operator creates a exponential decayed learning rate scheduler.
+
+    The learning rate will be updated as follows:
+
+    If staircase is set to False, the equation is:
+
+    .. math::
+
+        & pow = \frac{current\_batch}{decay\_batch}
+
+        & learning\_rate = base\_learning\_rate*decay\_rate^{pow}
+
+    If staircase is set to True, the equation is:
+
+    .. math::
+
+        & pow = floor(\frac{current\_batch}{decay\_batch})
+
+        & learning\_rate = base\_learning\_rate*decay\_rate^{pow}
+
+    Args:
+        base_lr (float): The base learning rate
+        steps (int): The decay steps
+        decay_rate (float): The decay rate
+        staircase (bool, optional): If staircase is True, the scheduler decay the learning rate at discrete intervals. Defaults to False.
+        warmup (Optional[WarmupConf], optional): The warmup strategy. Defaults to None.
+
+    For example:
+
+        .. code-block::python
+
+            import oneflow.compatible.single_client as flow
+            import oneflow.compatible.single_client.typing as tp
+
+            @flow.global_function(type="train")
+            def train_job(
+                    images: tp.Numpy.Placeholder((BATCH_SIZE, 1, 28, 28), dtype=flow.float),
+                    labels: tp.Numpy.Placeholder((BATCH_SIZE,), dtype=flow.int32),
+            ) -> tp.Numpy:
+                with flow.scope.placement("gpu", "0:0"):
+                    logits = lenet(images, train=True)
+                    loss = flow.nn.sparse_softmax_cross_entropy_with_logits(
+                        labels, logits, name="softmax_loss"
+                    )
+
+                lr_scheduler = flow.optimizer.CosineScheduler(base_lr=0.01,
+                                                              steps=10,
+                                                              alpha=0.1)
+                flow.optimizer.Adam(lr_scheduler).minimize(loss)
+
+                return loss
+
+    """
+
+    def __init__(
+        self,
+        base_lr: float,
+        steps: int,
+        decay_rate: float,
+        staircase=False,
+        warmup: Optional[WarmupConf] = None,
+    ):
+        super().__init__(base_lr=base_lr, warmup=warmup)
+        self.steps = steps
+        self.decay_rate = decay_rate
+        self.staircase = staircase
+
+    @property
+    def learning_rate_decay_conf(
+        self,
+    ) -> Optional[learning_rate_schedule_conf_cfg.LearningRateDecayConf]:
+        learning_rate_decay_conf = (
+            learning_rate_schedule_conf_cfg.LearningRateDecayConf()
+        )
+        learning_rate_decay_conf.mutable_exponential_conf().set_decay_batches(
+            self.steps
+        )
+        learning_rate_decay_conf.mutable_exponential_conf().set_decay_rate(
+            self.decay_rate
+        )
+        learning_rate_decay_conf.mutable_exponential_conf().set_staircase(
+            self.staircase
+        )
+        return learning_rate_decay_conf
+
+
+@oneflow_export("optimizer.InverseTimeScheduler")
+class InverseTimeScheduler(LrScheduler):
+    r"""This operator creates a inverse time decayed learning rate scheduler.
+
+    The learning rate will be updated as follows:
+
+    If staircase is set to False, the equation is:
+
+    .. math::
+
+        & step\_ratio = \frac{current\_batch}{decay\_batch}
+
+        & learning\_rate = \frac{base\_learning\_rate}{1+decay\_rate*step\_ratio}
+
+    If staircase is set to True, the equation is:
+
+    .. math::
+
+        & step\_ratio = \frac{current\_batch}{decay\_batch}
+
+        & learning\_rate = \frac{base\_learning\_rate}{1+floor(decay\_rate*step\_ratio)}
+
+    Args:
+        base_lr (float): The base learning rate
+        steps (int): The decay steps
+        decay_rate (float): The decay rate
+        staircase (bool, optional): If staircase is True, the scheduler decay the learning rate at discrete intervals. Defaults to False.
+        warmup (Optional[WarmupConf], optional): The warmup strategy. Defaults to None.
+
+    For example:
+
+        .. code-block:: python
+
+            import oneflow.compatible.single_client as flow
+            import oneflow.compatible.single_client.typing as tp
+
+            @flow.global_function(type="train")
+            def train_job(
+                    images: tp.Numpy.Placeholder((BATCH_SIZE, 1, 28, 28), dtype=flow.float),
+                    labels: tp.Numpy.Placeholder((BATCH_SIZE,), dtype=flow.int32),
+            ) -> tp.Numpy:
+                with flow.scope.placement("gpu", "0:0"):
+                    logits = lenet(images, train=True)
+                    loss = flow.nn.sparse_softmax_cross_entropy_with_logits(
+                        labels, logits, name="softmax_loss"
+                    )
+
+                lr_scheduler = flow.optimizer.InverseTimeScheduler(base_lr=0.1,
+                                                                   steps=5,
+                                                                   decay_rate=0.9)
+                flow.optimizer.SGD(lr_scheduler, momentum=0.9).minimize(loss)
+
+                return loss
+
+    """
+
+    def __init__(
+        self,
+        base_lr: float,
+        steps: int,
+        decay_rate: float,
+        staircase: bool = False,
+        warmup: Optional[WarmupConf] = None,
+    ):
+        super().__init__(base_lr=base_lr, warmup=warmup)
+        self.steps = steps
+        self.decay_rate = decay_rate
+        self.staircase = staircase
+
+    @property
+    def learning_rate_decay_conf(
+        self,
+    ) -> Optional[learning_rate_schedule_conf_cfg.LearningRateDecayConf]:
+        learning_rate_decay_conf = (
+            learning_rate_schedule_conf_cfg.LearningRateDecayConf()
+        )
+        learning_rate_decay_conf.mutable_inverse_time_conf().set_decay_batches(
+            self.steps
+        )
+        learning_rate_decay_conf.mutable_inverse_time_conf().set_decay_rate(
+            self.decay_rate
+        )
+        learning_rate_decay_conf.mutable_inverse_time_conf().set_staircase(
+            self.staircase
+        )
+        return learning_rate_decay_conf
+
+
+@oneflow_export("optimizer.NaturalExpScheduler")
+class NaturalExpScheduler(LrScheduler):
+    r"""This operator creates a natural exponential decayed learning rate scheduler.
+
+    The learning rate will be updated as follows:
+
+    If staircase is set to False, the equation is:
+
+    .. math::
+
+        & step\_ratio = \frac{current\_batch}{decay\_batch}
+
+        & learning\_rate = {base\_learning\_rate}*e^{-decay\_rate*step\_ratio}
+
+    If staircase is set to True, the equation is:
+
+    .. math::
+
+        & step\_ratio = \frac{current\_batch}{decay\_batch}
+
+        & learning\_rate = {base\_learning\_rate}*e^{-decay\_rate*floor(step\_ratio)}
+
+    Args:
+        base_lr (float): The base learning rate
+        steps (int): The decay steps
+        decay_rate (float): The decay rate
+        staircase (bool, optional): If staircase is True, the scheduler decay the learning rate at discrete intervals. Defaults to False.
+        warmup (Optional[WarmupConf], optional): The warmup strategy. Defaults to None.
+
+    For example:
+
+        .. code-block:: python
+
+            import oneflow.compatible.single_client as flow
+            import oneflow.compatible.single_client.typing as tp
+
+            @flow.global_function(type="train")
+            def train_job(
+                    images: tp.Numpy.Placeholder((BATCH_SIZE, 1, 28, 28), dtype=flow.float),
+                    labels: tp.Numpy.Placeholder((BATCH_SIZE,), dtype=flow.int32),
+            ) -> tp.Numpy:
+                with flow.scope.placement("gpu", "0:0"):
+                    logits = lenet(images, train=True)
+                    loss = flow.nn.sparse_softmax_cross_entropy_with_logits(
+                        labels, logits, name="softmax_loss"
+                    )
+
+                lr_scheduler = flow.optimizer.NaturalExpScheduler(base_lr=0.1,
+                                                                  steps=10,
+                                                                  decay_rate=0.5)
+                flow.optimizer.SGD(lr_scheduler, momentum=0.9).minimize(loss)
+
+                return loss
+
+    """
+
+    def __init__(
+        self,
+        base_lr: float,
+        steps: int,
+        decay_rate: float,
+        staircase: bool = False,
+        warmup: Optional[WarmupConf] = None,
+    ):
+        super().__init__(base_lr=base_lr, warmup=warmup)
+        self.steps = steps
+        self.decay_rate = decay_rate
+        self.staircase = staircase
+
+    @property
+    def learning_rate_decay_conf(
+        self,
+    ) -> Optional[learning_rate_schedule_conf_cfg.LearningRateDecayConf]:
+        learning_rate_decay_conf = (
+            learning_rate_schedule_conf_cfg.LearningRateDecayConf()
+        )
+        learning_rate_decay_conf.mutable_natural_exp_conf.set_decay_batches(self.steps)
+        learning_rate_decay_conf.mutable_natural_exp_conf.set_decay_rate(
+            self.decay_rate
+        )
+        learning_rate_decay_conf.mutable_natural_exp_conf.set_staircase(self.staircase)
+        return learning_rate_decay_conf
+
+
+class LossScalePolicy:
+    def SetLossScaleFieldsInTrainConf(self, train_conf):
+        raise NotImplementedError()
+
+
+@oneflow_export("optimizer.loss_scale.static_loss_scale")
+class StaticLossScalePolicy(LossScalePolicy):
+    def __init__(self, loss_scale_factor: float):
+        super().__init__()
+        self.loss_scale_factor = loss_scale_factor
+
+    def SetLossScaleFieldsInTrainConf(self, train_conf):
+        train_conf.loss_scale_factor = self.loss_scale_factor
+
+
+@oneflow_export("optimizer.loss_scale.dynamic_loss_scale")
+class DynamicLossScalePolicy(LossScalePolicy):
+    def __init__(
+        self, initial_loss_scale=(2 ** 30), increment_period=2000, multiplier=2.0
+    ):
+        super().__init__()
+        self.initial_loss_scale = initial_loss_scale
+        self.increment_period = increment_period
+        self.multiplier = multiplier
+
+    def SetLossScaleFieldsInTrainConf(self, train_conf):
+        train_conf.mutable_dynamic_loss_scale_policy().set_initial_loss_scale(
+            self.initial_loss_scale
+        )
+        train_conf.mutable_dynamic_loss_scale_policy().set_increment_period(
+            self.increment_period
+        )
+        train_conf.mutable_dynamic_loss_scale_policy().set_multiplier(self.multiplier)
+
+
+class Optimizer:
+    def __init__(
+        self,
+        loss_scale_factor: Optional[int] = None,
+        train_step_lbn: Optional[Text] = None,
+        loss_scale_policy: Optional[LossScalePolicy] = None,
+    ):
+        self.train_step_lbn = train_step_lbn
+        if loss_scale_factor is not None:
+            assert loss_scale_policy is None
+            self.loss_scale_policy = StaticLossScalePolicy(loss_scale_factor)
+        else:
+            self.loss_scale_policy = loss_scale_policy
+
+        self._variables_list_init = False
+
+    def Variables(self) -> List[Text]:
+        if not self._variables_list_init:
+            if self.variables is None:
+                self.variables = list(GetVariablesForCurrentJob())
+            elif callable(self.variables):
+                self.variables = list(self.variables())
+            else:
+                self.variables = list(self.variables)
+            self._variables_list_init = True
+
+        return self.variables
+
+    def _AddOptimizerConfInTrainConf(self, train_conf: job_conf_cfg.TrainConf) -> None:
+        raise NotImplementedError()
+
+    @property
+    def train_conf(self) -> job_conf_cfg.TrainConf:
+        train_conf = job_conf_cfg.TrainConf()
+        if self.train_step_lbn is not None:
+            train_conf.set_train_step_lbn(self.train_step_lbn)
+        if self.loss_scale_policy is not None:
+            self.loss_scale_policy.SetLossScaleFieldsInTrainConf(train_conf)
+        self._AddOptimizerConfInTrainConf(train_conf)
+        return train_conf
+
+    def minimize(
+        self,
+        loss: Union[
+            Sequence[oneflow._oneflow_internal.BlobDesc],
+            oneflow._oneflow_internal.BlobDesc,
+        ],
+    ) -> None:
+        if not isinstance(loss, collections.abc.Sequence):
+            loss = [loss]
+        c_api_util.CurJobBuildAndInferCtx_SetTrainConf(self.train_conf)
+        for x in loss:
+            flow.losses.add_loss(x)
+
+
+@oneflow_export("optimizer.SGD")
+class SGD(Optimizer):
+    r"""The optimizer of the stochastic gradient descent algorithm.
+
+    This algorithm takes a random sample's gradient as an approximate estimate of the overall gradient in small batch gradient descent.
+
+    When the momentum = 0, the equation of parameters updating is:
+
+    .. math::
+
+        param_{new} = param_{old} - learning\_rate*grad
+
+    With momentum, the equation of parameters updating is:
+
+    .. math::
+
+        & V_{t} = \beta*V_{t-1} + learning\_rate*g_t
+
+        & param_{new} = param_{old} - V_{t}
+
+    Args:
+        lr_scheduler (LrScheduler): The scheduler of learning rate.
+        loss_scale_factor (Optional[float], optional): The scale factor of loss. Defaults to None.
+        momentum (float, optional): Momentum factor (:math:`\beta`). Defaults to 0.9.
+        grad_clipping (Optional[ClipGradientConf], optional): The gradient clipping strategy. Defaults to None.
+        train_step_lbn (Optional[Text], optional): [description]. Defaults to None.
+        loss_scale_policy (Optional[LossScalePolicy]): The policy of loss scale.
+        variables(Optional[
+            Union[Sequence[Text], Callable[[], Sequence[Text]]]
+        ]): maintained variables.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+
+        @flow.global_function(type="train")
+        def train_job(
+            images: tp.Numpy.Placeholder((BATCH_SIZE, 1, 28, 28), dtype=flow.float),
+            labels: tp.Numpy.Placeholder((BATCH_SIZE,), dtype=flow.int32),
+        ) -> tp.Numpy:
+            with flow.scope.placement("gpu", "0:0"):
+                logits = lenet(images, train=True)
+                loss = flow.nn.sparse_softmax_cross_entropy_with_logits(
+                    labels, logits, name="softmax_loss"
+                )
+
+            # Set Learning rate as 0.1
+            lr_scheduler = flow.optimizer.PiecewiseConstantScheduler([], [0.1])
+            # Set Momentum=0.9 SGD optimizer
+            flow.optimizer.SGD(lr_scheduler, momentum=0.9).minimize(loss)
+
+            return loss
+    """
+
+    def __init__(
+        self,
+        lr_scheduler: LrScheduler,
+        loss_scale_factor: Optional[float] = None,
+        momentum: float = 0.9,
+        grad_clipping: Optional[ClipGradientConf] = None,
+        train_step_lbn: Optional[Text] = None,
+        loss_scale_policy: Optional[LossScalePolicy] = None,
+        variables: Optional[
+            Union[Sequence[Text], Callable[[], Sequence[Text]]]
+        ] = GetVariablesForCurrentJob,
+    ):
+        super().__init__(
+            loss_scale_factor, train_step_lbn, loss_scale_policy,
+        )
+        self.lr_scheduler = lr_scheduler
+        self.grad_clipping = grad_clipping
+        self.momentum = momentum
+        self.variables = variables
+
+    def _AddOptimizerConfInTrainConf(self, train_conf) -> None:
+        optimizer_conf = train_conf.mutable_optimizer_conf().Add()
+        self.lr_scheduler.SetLrFieldsInOptimizerConf(optimizer_conf)
+        if self.grad_clipping is not None:
+            optimizer_conf.mutable_clip_conf().CopyFrom(self.grad_clipping.clip_conf)
+        if self.momentum == 0:
+            optimizer_conf.mutable_naive_conf()
+        else:
+            optimizer_conf.mutable_momentum_conf().set_beta(self.momentum)
+        for variable in self.Variables():
+            optimizer_conf.add_variable_op_names(variable)
+
+
+@oneflow_export("optimizer.SGDW")
+class SGDW(Optimizer):
+    r"""The optimizer of the stochastic-gradient-descent-weight-decay algorithm.
+
+    (More details please refer to `Decoupled Weight Decay Regularization <https://arxiv.org/abs/1711.05101>`_).
+
+    When the momentum = 0, the equation of parameters updating is:
+
+    .. math::
+
+        param_{new} = param_{old} - learning\_rate*(grad + \lambda*param_{old}))
+
+    With momentum, the equation of parameters updating is:
+
+    .. math::
+
+        & V_{t} = \beta*V_{t-1} - learning\_rate*g_t
+
+        & param_{new} = param_{old} + V_{t} - learning\_rate * \lambda*param_{old}
+
+    Args:
+        lr_scheduler (LrScheduler): The scheduler of learning rate.
+        loss_scale_factor (Optional[float], optional): The scale factor of loss. Defaults to None.
+        momentum (float, optional): Momentum factor (:math:`\beta`). Defaults to 0.9.
+        weight_decay (Optional[float], optional): The weight decay factor (In the equation is :math:`\lambda`). Defaults to None.
+        weight_decay_includes (Optional[Union[Sequence[Text], Text]], optional): The name of the model parameters that use weight decay. Defaults to None.
+        weight_decay_excludes (Optional[Union[Sequence[Text], Text]], optional): The name of the model parameters that do not use weight decay. Defaults to None.
+        grad_clipping (Optional[ClipGradientConf], optional): The gradient clipping strategy. Defaults to None.
+        train_step_lbn (Optional[Text], optional): [description]. Defaults to None.
+        loss_scale_policy (Optional[LossScalePolicy]): The policy of loss scale.
+        variables(Optional[
+            Union[Sequence[Text], Callable[[], Sequence[Text]]]
+        ]): maintained variables.
+
+    Note:
+
+        Only one of `weight_decay_includes` and `weight_decay_excludes` can be set. If both are None,
+        all the model parameters will use weight decay.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+
+        @flow.global_function(type="train")
+        def train_job(
+            images: tp.Numpy.Placeholder((BATCH_SIZE, 1, 28, 28), dtype=flow.float),
+            labels: tp.Numpy.Placeholder((BATCH_SIZE,), dtype=flow.int32),
+        ) -> tp.Numpy:
+            with flow.scope.placement("gpu", "0:0"):
+                logits = lenet(images, train=True)
+                loss = flow.nn.sparse_softmax_cross_entropy_with_logits(
+                    labels, logits, name="softmax_loss"
+                )
+
+            # Set Learning rate as 0.1
+            lr_scheduler = flow.optimizer.PiecewiseConstantScheduler([], [0.1])
+            # Set Momentum=0.9 SGDW optimizer, weight_decay factor is 0.00005
+            flow.optimizer.SGDW(lr_scheduler, momentum=0.9, weight_decay=0.00005).minimize(loss)
+
+            return loss
+    """
+
+    def __init__(
+        self,
+        lr_scheduler: LrScheduler,
+        loss_scale_factor: Optional[float] = None,
+        momentum: float = 0.9,
+        weight_decay: Optional[float] = None,
+        weight_decay_includes: Optional[Union[Sequence[Text], Text]] = None,
+        weight_decay_excludes: Optional[Union[Sequence[Text], Text]] = None,
+        grad_clipping: Optional[ClipGradientConf] = None,
+        train_step_lbn: Optional[Text] = None,
+        loss_scale_policy: Optional[LossScalePolicy] = None,
+        variables: Optional[
+            Union[Sequence[Text], Callable[[], Sequence[Text]]]
+        ] = GetVariablesForCurrentJob,
+    ):
+        super().__init__(
+            loss_scale_factor, train_step_lbn, loss_scale_policy,
+        )
+        self.lr_scheduler = lr_scheduler
+        self.grad_clipping = grad_clipping
+        self.momentum = momentum
+        self.weight_decay = weight_decay
+        if isinstance(weight_decay_includes, str):
+            weight_decay_includes = [weight_decay_includes]
+        if isinstance(weight_decay_excludes, str):
+            weight_decay_excludes = [weight_decay_excludes]
+        self.weight_decay_includes = weight_decay_includes
+        self.weight_decay_excludes = weight_decay_excludes
+        self.variables = variables
+
+    def _AddOptimizerConfInTrainConf(self, train_conf) -> None:
+        optimizer_conf = train_conf.mutable_optimizer_conf().Add()
+        self.lr_scheduler.SetLrFieldsInOptimizerConf(optimizer_conf)
+        if self.grad_clipping is not None:
+            optimizer_conf.mutable_clip_conf().CopyFrom(self.grad_clipping.clip_conf)
+        if self.momentum == 0:
+            optimizer_conf.mutable_naive_conf()
+        else:
+            optimizer_conf.mutable_momentum_conf().set_beta(self.momentum)
+        if self.weight_decay is not None:
+            optimizer_conf.mutable_weight_decay_conf().set_weight_decay_rate(
+                self.weight_decay
+            )
+            assert not (
+                self.weight_decay_excludes is not None
+                and self.weight_decay_includes is not None
+            )
+            if self.weight_decay_includes is not None:
+                for weight_decay_include in self.weight_decay_includes:
+                    optimizer_conf.mutable_weight_decay_conf().mutable_includes().add_pattern(
+                        weight_decay_include
+                    )
+            elif self.weight_decay_excludes is not None:
+                for weight_decay_exclude in self.weight_decay_excludes:
+                    optimizer_conf.weight_decay_conf().mutable_excludes().add_pattern(
+                        weight_decay_exclude
+                    )
+        for variable in self.Variables():
+            optimizer_conf.add_variable_op_names(variable)
+
+
+@oneflow_export("optimizer.Adam")
+class Adam(Optimizer):
+    r"""The optimizer of the Adam algorithm.
+
+    This algorithm can adjust the learning rate of each parameter dynamically according to the 1st-moment estimates
+
+    and the 2nd-moment estimates of gradient.
+
+    With bias correction, the equation of parameters updating is:
+
+    .. math::
+
+        & V_t = \beta_1*V_{t-1} + (1-\beta_1)*grad
+
+        & S_t = \beta_2*S_{t-1} + (1-\beta_2)*{grad} \odot {grad}
+
+        & \hat{V_t} = \frac{V_t}{1-\beta_1^t}
+
+        & \hat{S_t} = \frac{S_t}{1-\beta_2^t}
+
+        & \hat{g} = learning\_rate*\frac{\hat{V_t}}{\sqrt{\hat{S_t}}+\epsilon}
+
+        & param_{new} = param_{old} - \hat{g}
+
+    Without bias correction, the equation of parameters updating is:
+
+    .. math::
+
+        & V_t = \beta_1*V_{t-1} + (1-\beta_1)*grad
+
+        & S_t = \beta_2*S_{t-1} + (1-\beta_2)*{grad} \odot {grad}
+
+        & \hat{g} = learning\_rate*\frac{{V_t}}{\sqrt{{S_t}}+\epsilon}
+
+        & param_{new} = param_{old} - \hat{g}
+
+    More details please refer to `Adam <https://arxiv.org/abs/1412.6980>`_
+
+    Args:
+        lr_scheduler (LrScheduler): The scheduler of learning rate.
+        beta1 (float, optional): The exponential weighted average decay rate for the 1st-moment estimates (:math:`\beta_1`). Defaults to 0.9.
+        beta2 (float, optional): The exponential weighted average decay rate for the 2rd-moment estimates (:math:`\beta_2`). Defaults to 0.999.
+        epsilon ([type], optional): A small float constant value for numerical stability (:math:`\epsilon`). Defaults to 1e-8.
+        do_bias_correction (bool, optional): Whether to do the bias correction. Defaults to False.
+        loss_scale_factor (Optional[float], optional): The scale factor of loss. Defaults to None.
+        grad_clipping (Optional[ClipGradientConf], optional): The gradient clipping strategy. Defaults to None.
+        train_step_lbn (Optional[Text], optional): [description]. Defaults to None.
+        loss_scale_policy (Optional[LossScalePolicy]): The policy of loss scale.
+        variables(Optional[
+            Union[Sequence[Text], Callable[[], Sequence[Text]]]
+        ]): maintained variables.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+
+        @flow.global_function(type="train")
+        def train_job(
+            images: tp.Numpy.Placeholder((BATCH_SIZE, 1, 28, 28), dtype=flow.float),
+            labels: tp.Numpy.Placeholder((BATCH_SIZE,), dtype=flow.int32),
+        ) -> tp.Numpy:
+            with flow.scope.placement("gpu", "0:0"):
+                logits = lenet(images, train=True)
+                loss = flow.nn.sparse_softmax_cross_entropy_with_logits(
+                    labels, logits, name="softmax_loss"
+                )
+
+            # Set learning rate as 0.001
+            lr_scheduler = flow.optimizer.PiecewiseConstantScheduler([], [0.001])
+            # Set Adam optimizer
+            flow.optimizer.Adam(lr_scheduler, do_bias_correction=False).minimize(loss)
+
+            return loss
+    """
+
+    def __init__(
+        self,
+        lr_scheduler: LrScheduler,
+        beta1=0.9,
+        beta2=0.999,
+        epsilon=1e-8,
+        do_bias_correction=False,
+        loss_scale_factor: Optional[float] = None,
+        grad_clipping: Optional[ClipGradientConf] = None,
+        train_step_lbn: Optional[Text] = None,
+        loss_scale_policy: Optional[LossScalePolicy] = None,
+        variables: Optional[
+            Union[Sequence[Text], Callable[[], Sequence[Text]]]
+        ] = GetVariablesForCurrentJob,
+    ):
+        super().__init__(
+            loss_scale_factor, train_step_lbn, loss_scale_policy,
+        )
+        self.lr_scheduler = lr_scheduler
+        self.grad_clipping = grad_clipping
+        self.beta1 = beta1
+        self.beta2 = beta2
+        self.epsilon = epsilon
+        self.do_bias_correction = do_bias_correction
+        self.variables = variables
+
+    def _AddOptimizerConfInTrainConf(self, train_conf) -> None:
+        optimizer_conf = train_conf.mutable_optimizer_conf().Add()
+        self.lr_scheduler.SetLrFieldsInOptimizerConf(optimizer_conf)
+        if self.grad_clipping is not None:
+            optimizer_conf.mutable_clip_conf().CopyFrom(self.grad_clipping.clip_conf)
+        optimizer_conf.mutable_adam_conf().set_beta1(self.beta1)
+        optimizer_conf.mutable_adam_conf().set_beta2(self.beta2)
+        optimizer_conf.mutable_adam_conf().set_epsilon(self.epsilon)
+        optimizer_conf.mutable_adam_conf().set_do_bias_correction(
+            self.do_bias_correction
+        )
+        for variable in self.Variables():
+            optimizer_conf.add_variable_op_names(variable)
+
+
+@oneflow_export("optimizer.AdamW")
+class AdamW(Optimizer):
+    r"""The optimizer of the Adam-weight-decay algorithm.
+
+    If we use L2 regularization,
+
+    it will be invalid due to the adaptive learning rate in Adam optimizer
+
+    (More details please refer to `Adam-weight-decay <https://www.fast.ai/2018/07/02/adam-weight-decay/>`_).
+
+    So we use Adam-weight-decay algorithm to solve this problem.
+
+    With bias correction, the equation of parameters updating is:
+
+    .. math::
+
+        & V_t = \beta_1*V_{t-1} + (1-\beta_1)*grad
+
+        & S_t = \beta_2*S_{t-1} + (1-\beta_2)*{grad} \odot {grad}
+
+        & \hat{V_t} = \frac{V_t}{1-\beta_1^t}
+
+        & \hat{S_t} = \frac{S_t}{1-\beta_2^t}
+
+        & \hat{g} = learning\_rate*(\frac{\hat{V_t}}{\sqrt{\hat{S_t}}+\epsilon}+\lambda*param_{old})
+
+        & param_{new} = param_{old} - \hat{g}
+
+    Without bias correction, the equation of parameters updating is:
+
+    .. math::
+
+        & V_t = \beta_1*V_{t-1} + (1-\beta_1)*grad
+
+        & S_t = \beta_2*S_{t-1} + (1-\beta_2)*{grad} \odot {grad}
+
+        & \hat{g} = learning\_rate*(\frac{{V_t}}{\sqrt{{S_t}}+\epsilon}+\lambda*param_{old})
+
+        & param_{new} = param_{old} - \hat{g}
+
+    Args:
+        lr_scheduler (LrScheduler): The scheduler of learning rate.
+        beta1 (float, optional): The exponential weighted average decay rate for the 1st-moment estimates (:math:`\beta_1`). Defaults to 0.9.
+        beta2 (float, optional): The exponential weighted average decay rate for the 2rd-moment estimates (:math:`\beta_2`). Defaults to 0.999.
+        epsilon ([type], optional): A small float constant value for numerical stability (:math:`\epsilon`). Defaults to 1e-8.
+        do_bias_correction (bool, optional): Whether to do the bias correction. Defaults to False.
+        loss_scale_factor (Optional[float], optional): The scale factor of loss. Defaults to None.
+        weight_decay (Optional[float], optional): The weight decay factor (In the equation is :math:`\lambda`). Defaults to None.
+        weight_decay_includes (Optional[Union[Sequence[Text], Text]], optional): The name of the model parameters that use weight decay. Defaults to None.
+        weight_decay_excludes (Optional[Union[Sequence[Text], Text]], optional): The name of the model parameters that do not use weight decay. Defaults to None.
+        grad_clipping (Optional[ClipGradientConf], optional): The gradient clipping strategy. Defaults to None.
+        train_step_lbn (Optional[Text], optional): [description]. Defaults to None.
+        loss_scale_policy (Optional[LossScalePolicy]): The policy of loss scale.
+        variables(Optional[
+            Union[Sequence[Text], Callable[[], Sequence[Text]]]
+        ]): maintained variables.
+
+    Note:
+
+        Only one of `weight_decay_includes` and `weight_decay_excludes` can be set. If both are None,
+        all the model parameters will use weight decay.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+
+        @flow.global_function(type="train")
+        def train_job(
+            images: tp.Numpy.Placeholder((BATCH_SIZE, 1, 28, 28), dtype=flow.float),
+            labels: tp.Numpy.Placeholder((BATCH_SIZE,), dtype=flow.int32),
+        ) -> tp.Numpy:
+            with flow.scope.placement("gpu", "0:0"):
+                logits = lenet(images, train=True)
+                loss = flow.nn.sparse_softmax_cross_entropy_with_logits(
+                    labels, logits, name="softmax_loss"
+                )
+
+            # Set learning rate as 0.001
+            lr_scheduler = flow.optimizer.PiecewiseConstantScheduler([], [0.001])
+            # Set AdamW optimizer, weight_decay factor is 0.00005
+            flow.optimizer.AdamW(lr_scheduler,
+                    do_bias_correction=False, weight_decay=0.00005).minimize(loss)
+
+            return loss
+
+    """
+
+    def __init__(
+        self,
+        lr_scheduler: LrScheduler,
+        beta1=0.9,
+        beta2=0.999,
+        epsilon=1e-8,
+        do_bias_correction=False,
+        loss_scale_factor: Optional[float] = None,
+        weight_decay: Optional[float] = None,
+        weight_decay_includes: Optional[Union[Sequence[Text], Text]] = None,
+        weight_decay_excludes: Optional[Union[Sequence[Text], Text]] = None,
+        grad_clipping: Optional[ClipGradientConf] = None,
+        train_step_lbn: Optional[Text] = None,
+        loss_scale_policy: Optional[LossScalePolicy] = None,
+        variables: Optional[
+            Union[Sequence[Text], Callable[[], Sequence[Text]]]
+        ] = GetVariablesForCurrentJob,
+    ):
+        super().__init__(
+            loss_scale_factor, train_step_lbn, loss_scale_policy,
+        )
+        self.lr_scheduler = lr_scheduler
+        self.grad_clipping = grad_clipping
+        self.beta1 = beta1
+        self.beta2 = beta2
+        self.epsilon = epsilon
+        self.do_bias_correction = do_bias_correction
+        self.weight_decay = weight_decay
+        if isinstance(weight_decay_includes, str):
+            weight_decay_includes = [weight_decay_includes]
+        if isinstance(weight_decay_excludes, str):
+            weight_decay_excludes = [weight_decay_excludes]
+        self.weight_decay_includes = weight_decay_includes
+        self.weight_decay_excludes = weight_decay_excludes
+        self.variables = variables
+
+    def _AddOptimizerConfInTrainConf(self, train_conf) -> None:
+        optimizer_conf = train_conf.mutable_optimizer_conf().Add()
+        self.lr_scheduler.SetLrFieldsInOptimizerConf(optimizer_conf)
+        if self.grad_clipping is not None:
+            optimizer_conf.mutable_clip_conf().CopyFrom(self.grad_clipping.clip_conf)
+        optimizer_conf.mutable_adam_conf().set_beta1(self.beta1)
+        optimizer_conf.mutable_adam_conf().set_beta2(self.beta2)
+        optimizer_conf.mutable_adam_conf().set_epsilon(self.epsilon)
+        optimizer_conf.mutable_adam_conf().set_do_bias_correction(
+            self.do_bias_correction
+        )
+        if self.weight_decay is not None:
+            optimizer_conf.mutable_weight_decay_conf().set_weight_decay_rate(
+                self.weight_decay
+            )
+            assert not (
+                self.weight_decay_excludes is not None
+                and self.weight_decay_includes is not None
+            )
+            if self.weight_decay_includes is not None:
+                for weight_decay_include in self.weight_decay_includes:
+                    optimizer_conf.mutable_weight_decay_conf().mutable_includes().add_pattern(
+                        weight_decay_include
+                    )
+            elif self.weight_decay_excludes is not None:
+                for weight_decay_exclude in self.weight_decay_excludes:
+                    optimizer_conf.mutable_weight_decay_conf().mutable_excludes().add_pattern(
+                        weight_decay_exclude
+                    )
+        for variable in self.Variables():
+            optimizer_conf.add_variable_op_names(variable)
+
+
+@oneflow_export("optimizer.RMSProp")
+class RMSProp(Optimizer):
+    r"""The optimizer of the RMSProp algorithm.
+
+    This algorithm uses mean squared gradient to adjust the learning rate.
+
+    The equation of parameters updating is:
+
+        if centered:
+
+            .. math::
+
+                & mg_t = mg * \beta_1 + (1 - \beta_1) * grad
+
+                & denom_t = S_t - mg_t * mg_t
+
+        else:
+
+            .. math::
+
+                denom_t = S_t
+
+        .. math::
+
+            param_{new} = param_{old} - \frac{learning\_rate}{\sqrt{denom_t+\epsilon}} \odot grad
+
+    Args:
+        lr_scheduler (LrScheduler): The scheduler of learning rate.
+        decay_rate (float, optional): The decay factor (:math:`\beta_1`). Defaults to 0.99.
+        epsilon (float, optional): A small float constant value for numerical stability (:math:`\epsilon`). Defaults to 1e-8.
+        centered (bool, optional): If `True`, gradients are normalized by the estimated
+                                   variance of the gradient; if False, by the uncentered second moment.
+                                   Setting this to `True` may help with training, but is slightly more
+                                   expensive in terms of computation and memory. Defaults to `False`.
+        loss_scale_factor (Optional[float], optional): The scale factor of loss. Defaults to None.
+        grad_clipping (Optional[ClipGradientConf], optional): The gradient clipping strategy. Defaults to None.
+        train_step_lbn (Optional[Text], optional): [description]. Defaults to None.
+        loss_scale_policy (Optional[LossScalePolicy]): The policy of loss scale.
+        variables(Optional[
+            Union[Sequence[Text], Callable[[], Sequence[Text]]]
+        ]): maintained variables.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+
+        @flow.global_function(type="train")
+        def train_job(
+            images: tp.Numpy.Placeholder((BATCH_SIZE, 1, 28, 28), dtype=flow.float),
+            labels: tp.Numpy.Placeholder((BATCH_SIZE,), dtype=flow.int32),
+        ) -> tp.Numpy:
+            with flow.scope.placement("gpu", "0:0"):
+                logits = lenet(images, train=True)
+                loss = flow.nn.sparse_softmax_cross_entropy_with_logits(
+                    labels, logits, name="softmax_loss"
+                )
+            # Set learning rate as 0.001
+            lr_scheduler = flow.optimizer.PiecewiseConstantScheduler([], [0.001])
+            # Set RMSProp optimizer
+            flow.optimizer.RMSProp(lr_scheduler).minimize(loss)
+
+            return loss
+
+    """
+
+    def __init__(
+        self,
+        lr_scheduler: LrScheduler,
+        decay_rate: float = 0.99,
+        epsilon: float = 1e-8,
+        centered: bool = False,
+        loss_scale_factor: Optional[float] = None,
+        grad_clipping: Optional[ClipGradientConf] = None,
+        train_step_lbn: Optional[Text] = None,
+        loss_scale_policy: Optional[LossScalePolicy] = None,
+        variables: Optional[
+            Union[Sequence[Text], Callable[[], Sequence[Text]]]
+        ] = GetVariablesForCurrentJob,
+    ):
+        super().__init__(
+            loss_scale_factor, train_step_lbn, loss_scale_policy,
+        )
+        self.lr_scheduler = lr_scheduler
+        self.grad_clipping = grad_clipping
+        self.decay_rate = decay_rate
+        self.epsilon = epsilon
+        self.centered = centered
+        self.variables = variables
+
+    def _AddOptimizerConfInTrainConf(self, train_conf) -> None:
+        optimizer_conf = train_conf.mutable_optimizer_conf().Add()
+        self.lr_scheduler.SetLrFieldsInOptimizerConf(optimizer_conf)
+        if self.grad_clipping is not None:
+            optimizer_conf.mutable_clip_conf().CopyFrom(self.grad_clipping.clip_conf)
+        optimizer_conf.mutable_rmsprop_conf().set_decay_rate(self.decay_rate)
+        optimizer_conf.mutable_rmsprop_conf().set_centered(self.centered)
+        optimizer_conf.mutable_rmsprop_conf().set_epsilon(self.epsilon)
+        for variable in self.Variables():
+            optimizer_conf.add_variable_op_names(variable)
+
+
+@oneflow_export("optimizer.LARS")
+class LARS(Optimizer):
+    r"""The optimizer of the LARS algorithm.
+
+    The equation of parameters updating is:
+
+    .. math::
+
+        & local\_learning\_rate = learning\_rate*lars\_coeff*\frac{\lVert{parm_{old}\rVert}}{\epsilon+\lVert{grad\rVert}+weight_decay*\lVert{parm_{old}\rVert}}
+
+        & momentum_t = \beta*momentum_{t-1} + local\_learning\_rate*(grad)
+
+        & param_{new} = param_{old} - momentum_t - local_learning_rate * weight_decay * param_{old}
+
+    Args:
+        lr_scheduler (LrScheduler): The scheduler of learning rate.
+        momentum_beta (float, optional): The momentum factor (:math:`\beta`). Defaults to 0.9.
+        epsilon (float, optional): A small float constant value for numerical stability (:math:`\epsilon`). Defaults to 1e-9.
+        lars_coefficient (float, optional): The coefficient factor, it defines how much we trust the layer to change its weights (:math:`lars\_coeff`). Defaults to 0.0001.
+        loss_scale_factor (Optional[float], optional): The scale factor of loss. Defaults to None.
+        weight_decay (Optional[float], optional): The weight decay factor (In the equation is :math:`\lambda`). Defaults to None.
+        weight_decay_includes (Optional[Union[Sequence[Text], Text]], optional): The name of the model parameters that use weight decay. Defaults to None.
+        weight_decay_excludes (Optional[Union[Sequence[Text], Text]], optional): The name of the model parameters that do not use weight decay. Defaults to None.
+        grad_clipping (Optional[ClipGradientConf], optional): The gradient clipping strategy. Defaults to None.
+        train_step_lbn (Optional[Text], optional): [description]. Defaults to None.
+        loss_scale_policy (Optional[LossScalePolicy]): The policy of loss scale.
+
+    Note:
+
+        Only one of `weight_decay_includes` and `weight_decay_excludes` can be set. If both are None,
+        all the model parameters will use weight decay.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+
+        @flow.global_function(type="train")
+        def train_job(
+                images: tp.Numpy.Placeholder((BATCH_SIZE, 1, 28, 28), dtype=flow.float),
+                labels: tp.Numpy.Placeholder((BATCH_SIZE,), dtype=flow.int32),
+        ) -> tp.Numpy:
+            with flow.scope.placement("gpu", "0:0"):
+                logits = lenet(images, train=True)
+                loss = flow.nn.sparse_softmax_cross_entropy_with_logits(
+                    labels, logits, name="softmax_loss"
+                )
+            # Set learning rate as 0.1
+            lr_scheduler = flow.optimizer.PiecewiseConstantScheduler([], [0.1])
+            # Set LARS optimizer, momentum factor is 0.9
+            flow.optimizer.LARS(lr_scheduler, momentum_beta=0.9).minimize(loss)
+
+            return loss
+
+    """
+
+    def __init__(
+        self,
+        lr_scheduler: LrScheduler,
+        momentum_beta: float = 0.9,
+        epsilon: float = 1e-9,
+        lars_coefficient: float = 0.0001,
+        loss_scale_factor: Optional[float] = None,
+        weight_decay: Optional[float] = None,
+        weight_decay_includes: Optional[Union[Sequence[Text], Text]] = None,
+        weight_decay_excludes: Optional[Union[Sequence[Text], Text]] = None,
+        grad_clipping: Optional[ClipGradientConf] = None,
+        train_step_lbn: Optional[Text] = None,
+        loss_scale_policy: Optional[LossScalePolicy] = None,
+        variables: Optional[
+            Union[Sequence[Text], Callable[[], Sequence[Text]]]
+        ] = GetVariablesForCurrentJob,
+    ):
+        super().__init__(
+            loss_scale_factor, train_step_lbn, loss_scale_policy,
+        )
+        self.lr_scheduler = lr_scheduler
+        self.grad_clipping = grad_clipping
+        self.momentum_beta = momentum_beta
+        self.epsilon = epsilon
+        self.lars_coefficient = lars_coefficient
+        self.weight_decay = weight_decay
+        if isinstance(weight_decay_includes, str):
+            weight_decay_includes = [weight_decay_includes]
+        if isinstance(weight_decay_excludes, str):
+            weight_decay_excludes = [weight_decay_excludes]
+        self.weight_decay_includes = weight_decay_includes
+        self.weight_decay_excludes = weight_decay_excludes
+        self.variables = variables
+
+    def _AddOptimizerConfInTrainConf(self, train_conf) -> None:
+        optimizer_conf = train_conf.mutable_optimizer_conf().Add()
+        self.lr_scheduler.SetLrFieldsInOptimizerConf(optimizer_conf)
+        if self.grad_clipping is not None:
+            optimizer_conf.mutable_clip_conf().CopyFrom(self.grad_clipping.clip_conf)
+        optimizer_conf.mutable_lars_conf().set_momentum_beta(self.momentum_beta)
+        optimizer_conf.mutable_lars_conf().set_epsilon(self.epsilon)
+        optimizer_conf.mutable_lars_conf().set_lars_coefficient(self.lars_coefficient)
+        if self.weight_decay is not None:
+            optimizer_conf.mutable_weight_decay_conf().set_weight_decay_rate(
+                self.weight_decay
+            )
+            assert not (
+                self.weight_decay_excludes is not None
+                and self.weight_decay_includes is not None
+            )
+            if self.weight_decay_includes is not None:
+                for weight_decay_include in self.weight_decay_includes:
+                    optimizer_conf.mutable_weight_decay_conf().mutable_includes().add_pattern(
+                        weight_decay_include
+                    )
+            elif self.weight_decay_excludes is not None:
+                for weight_decay_exclude in self.weight_decay_excludes:
+                    optimizer_conf.mutable_weight_decay_conf().mutable_excludes().add_pattern(
+                        weight_decay_exclude
+                    )
+        for variable in self.Variables():
+            optimizer_conf.add_variable_op_names(variable)
+
+
+@oneflow_export("optimizer.LazyAdam")
+class LazyAdam(Optimizer):
+    r"""
+    The optimizer of the LazyAdam algorithm.
+
+    This algorithm can adjust the learning rate of each parameter dynamically according to the 1st-moment estimates and the 2nd-moment estimates of the gradient.
+
+    The difference between Adam optimizer and LazyAdam optimizer is that LazyAdam only updates the element that has gradient in the current batch, it is faster than Adam optimizer.
+
+    .. math::
+
+        & V_t = \beta_1*V_{t-1} + (1-\beta_1)*grad
+
+        & S_t = \beta_2*S_{t-1} + (1-\beta_2)*{grad} \odot {grad}
+
+        & \hat{g} = learning\_rate*\frac{{V_t}}{\sqrt{{S_t}}+\epsilon}
+
+        & param_{new} = param_{old} - \hat{g}
+
+    Args:
+        lr_scheduler (LrScheduler): The scheduler of learning rate.
+        beta1 (float, optional): The exponential weighted average decay rate for the 1st-moment estimates (:math:`\beta_1`). Defaults to 0.9.
+        beta2 (float, optional): The exponential weighted average decay rate for the 2rd-moment estimates (:math:`\beta_2`). Defaults to 0.999.
+        epsilon ([type], optional): A small float constant value for numerical stability (:math:`\epsilon`). Defaults to 1e-8.
+        loss_scale_factor (Optional[float], optional): The scale factor of loss. Defaults to None.
+        grad_clipping (Optional[ClipGradientConf], optional): The gradient clipping strategy. Defaults to None.
+        train_step_lbn (Optional[Text], optional): [description]. Defaults to None.
+        loss_scale_policy (Optional[LossScalePolicy]): The policy of loss scale.
+        variables(Optional[
+            Union[Sequence[Text], Callable[[], Sequence[Text]]]
+        ]): maintained variables.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+
+        @flow.global_function(type="train")
+        def train_job(
+            images: tp.Numpy.Placeholder((BATCH_SIZE, 1, 28, 28), dtype=flow.float),
+            labels: tp.Numpy.Placeholder((BATCH_SIZE,), dtype=flow.int32),
+        ) -> tp.Numpy:
+            with flow.scope.placement("gpu", "0:0"):
+                logits = lenet(images, train=True)
+                loss = flow.nn.sparse_softmax_cross_entropy_with_logits(
+                    labels, logits, name="softmax_loss"
+                )
+            # Set learning rate as 0.001
+            lr_scheduler = flow.optimizer.PiecewiseConstantScheduler([], [0.001])
+            # Set LazyAdam optimizer
+            flow.optimizer.LazyAdam(lr_scheduler).minimize(loss)
+
+            return loss
+
+    """
+
+    def __init__(
+        self,
+        lr_scheduler: LrScheduler,
+        beta1: float = 0.9,
+        beta2: float = 0.999,
+        epsilon: float = 1e-8,
+        loss_scale_factor: Optional[float] = None,
+        grad_clipping: Optional[ClipGradientConf] = None,
+        train_step_lbn: Optional[Text] = None,
+        loss_scale_policy: Optional[LossScalePolicy] = None,
+        variables: Optional[
+            Union[Sequence[Text], Callable[[], Sequence[Text]]]
+        ] = GetVariablesForCurrentJob,
+    ):
+        super().__init__(
+            loss_scale_factor, train_step_lbn, loss_scale_policy,
+        )
+        self.lr_scheduler = lr_scheduler
+        self.grad_clipping = grad_clipping
+        self.beta1 = beta1
+        self.beta2 = beta2
+        self.epsilon = epsilon
+        self.variables = variables
+
+    def _AddOptimizerConfInTrainConf(self, train_conf) -> None:
+        optimizer_conf = train_conf.mutable_optimizer_conf().Add()
+        self.lr_scheduler.SetLrFieldsInOptimizerConf(optimizer_conf)
+        if self.grad_clipping is not None:
+            optimizer_conf.mutable_clip_conf().CopyFrom(self.grad_clipping.clip_conf)
+        optimizer_conf.mutable_lazy_adam_conf().set_beta1(self.beta1)
+        optimizer_conf.mutable_lazy_adam_conf().set_beta2(self.beta2)
+        optimizer_conf.mutable_lazy_adam_conf().set_epsilon(self.epsilon)
+        for variable in self.Variables():
+            optimizer_conf.add_variable_op_names(variable)
+
+
+@oneflow_export("optimizer.LAMB")
+class LAMB(Optimizer):
+
+    r"""
+
+    Args:
+        lr_scheduler (LrScheduler): The scheduler of learning rate.
+        beta1 (float, optional): The exponential weighted average decay rate for the 1st-moment estimates (:math:`\beta_1`). Defaults to 0.9.
+        beta2 (float, optional): The exponential weighted average decay rate for the 2rd-moment estimates (:math:`\beta_2`). Defaults to 0.999.
+        epsilon ([type], optional): A small float constant value for numerical stability (:math:`\epsilon`). Defaults to 1e-6.
+        loss_scale_factor (Optional[float], optional): The scale factor of loss. Defaults to None.
+        weight_decay (Optional[float], optional): The weight decay factor (In the equation is :math:`\lambda`). Defaults to None.
+        weight_decay_includes (Optional[Union[Sequence[Text], Text]], optional): The name of the model parameters that use weight decay. Defaults to None.
+        weight_decay_excludes (Optional[Union[Sequence[Text], Text]], optional): The name of the model parameters that do not use weight decay. Defaults to None.
+        grad_clipping (Optional[ClipGradientConf], optional): The gradient clipping strategy. Defaults to None.
+        train_step_lbn (Optional[Text], optional): [description]. Defaults to None.
+        loss_scale_policy (Optional[LossScalePolicy]): The policy of loss scale.
+        variables(Optional[
+            Union[Sequence[Text], Callable[[], Sequence[Text]]]
+        ]): maintained variables.
+
+    Note:
+
+        Only one of `weight_decay_includes` and `weight_decay_excludes` can be set. If both are None,
+        all the model parameters will use weight decay.
+
+    """
+
+    def __init__(
+        self,
+        lr_scheduler: LrScheduler,
+        beta1: float = 0.9,
+        beta2: float = 0.999,
+        epsilon: float = 1e-6,
+        loss_scale_factor: Optional[float] = None,
+        weight_decay: Optional[float] = None,
+        weight_decay_includes: Optional[Union[Sequence[Text], Text]] = None,
+        weight_decay_excludes: Optional[Union[Sequence[Text], Text]] = None,
+        grad_clipping: Optional[ClipGradientConf] = None,
+        train_step_lbn: Optional[Text] = None,
+        loss_scale_policy: Optional[LossScalePolicy] = None,
+        variables: Optional[
+            Union[Sequence[Text], Callable[[], Sequence[Text]]]
+        ] = GetVariablesForCurrentJob,
+    ):
+        super().__init__(
+            loss_scale_factor, train_step_lbn, loss_scale_policy,
+        )
+        self.lr_scheduler = lr_scheduler
+        self.grad_clipping = grad_clipping
+        self.beta1 = beta1
+        self.beta2 = beta2
+        self.epsilon = epsilon
+        self.weight_decay = weight_decay
+        if isinstance(weight_decay_includes, str):
+            weight_decay_includes = [weight_decay_includes]
+        if isinstance(weight_decay_excludes, str):
+            weight_decay_excludes = [weight_decay_excludes]
+        self.weight_decay_includes = weight_decay_includes
+        self.weight_decay_excludes = weight_decay_excludes
+        self.variables = variables
+
+    def _AddOptimizerConfInTrainConf(self, train_conf) -> None:
+        optimizer_conf = train_conf.mutable_optimizer_conf().Add()
+        self.lr_scheduler.SetLrFieldsInOptimizerConf(optimizer_conf)
+        if self.grad_clipping is not None:
+            optimizer_conf.mutable_clip_conf().CopyFrom(self.grad_clipping.clip_conf)
+        optimizer_conf.mutable_lamb_conf().set_beta1(self.beta1)
+        optimizer_conf.mutable_lamb_conf().set_beta2(self.beta2)
+        optimizer_conf.mutable_lamb_conf().set_epsilon(self.epsilon)
+        if self.weight_decay is not None:
+            optimizer_conf.mutable_weight_decay_conf().set_weight_decay_rate(
+                self.weight_decay
+            )
+            assert not (
+                self.weight_decay_excludes is not None
+                and self.weight_decay_includes is not None
+            )
+            if self.weight_decay_includes is not None:
+                for weight_decay_include in self.weight_decay_includes:
+                    optimizer_conf.mutable_weight_decay_conf().mutable_includes().add_pattern(
+                        weight_decay_include
+                    )
+            elif self.weight_decay_excludes is not None:
+                for weight_decay_exclude in self.weight_decay_excludes:
+                    optimizer_conf.mutable_weight_decay_conf().mutable_excludes().add_pattern(
+                        weight_decay_exclude
+                    )
+        for variable in self.Variables():
+            optimizer_conf.add_variable_op_names(variable)
+
+
+@oneflow_export("optimizer.CombinedOptimizer")
+class CombinedOptimizer(Optimizer):
+    r"""
+    Combined optimizer for multi optimizer case.
+
+    Args:
+        optimizers (Sequence[Optimizer]): optimizers to work together
+        loss_scale_factor (Optional[float], optional): The scale factor of loss. Defaults to None.
+        train_step_lbn (Optional[Text], optional): [description]. Defaults to None.
+        loss_scale_policy (Optional[LossScalePolicy]): The policy of loss scale.
+
+        Example: see test_multi_optimizer.py
+    """
+
+    def __init__(
+        self,
+        optimizers: Sequence[Optimizer],
+        loss_scale_factor: Optional[float] = None,
+        train_step_lbn: Optional[Text] = None,
+        loss_scale_policy: Optional[LossScalePolicy] = None,
+    ):
+        super().__init__(
+            loss_scale_factor, train_step_lbn, loss_scale_policy,
+        )
+        for optimizer in optimizers:
+            assert not isinstance(
+                optimizer, CombinedOptimizer
+            ), "Forbid constructing CombinedOptimizer recursively"
+            assert optimizer.train_step_lbn is None, (
+                "Only one train step lbn among multi optimizers, please set this"
+                "parameter in CombinedOptimizer"
+            )
+            assert optimizer.loss_scale_policy is None, (
+                "Only one loss scale policy among multi optimizers, please set this"
+                "parameter in CombinedOptimizer"
+            )
+        self.optimizers = optimizers
+
+    def Variables(self) -> List[Text]:
+        if not self._variables_list_init:
+            self.variables = []
+            for optimizer in self.optimizers:
+                self.variables.append(optimizer.Variables())
+            self._variables_list_init = True
+
+        return self.variables
+
+    def _SanityCheck(self):
+        all_variables = set(GetVariablesForCurrentJob())
+        union_set = set()
+        inter_set = all_variables
+        for optimizer in self.optimizers:
+            s = set(optimizer.Variables())
+            union_set.union(s)
+            inter_set = inter_set.intersection(s)
+
+        assert union_set.issubset(all_variables)
+        assert (
+            len(inter_set) == 0
+        ), "Do not allow overlap of variables between multi optimizers"
+
+    def _AddOptimizerConfInTrainConf(self, train_conf) -> None:
+        self._SanityCheck()
+        for optimizer in self.optimizers:
+            optimizer._AddOptimizerConfInTrainConf(train_conf)
diff --git a/oneflow/compatible_single_client_python/ops/pad.py b/oneflow/compatible_single_client_python/ops/pad.py
new file mode 100644
index 0000000000000000000000000000000000000000..120a8f0b0ee3044ee096667081aa5b8631dd2cbd
--- /dev/null
+++ b/oneflow/compatible_single_client_python/ops/pad.py
@@ -0,0 +1,540 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+from typing import Optional, Sequence, Union
+
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.framework import id_util as id_util
+from oneflow.compatible_single_client_python.framework import (
+    remote_blob as remote_blob_util,
+)
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    stable_api,
+)
+import oneflow._oneflow_internal
+
+
+@oneflow_export("pad")
+def pad(
+    x: oneflow._oneflow_internal.BlobDesc,
+    paddings: Sequence[int],
+    constant_value: Union[int, float] = 0,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator pads the input blob with constant value that user specifies. User can set the amount of padding by setting the parameter `paddings`.
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): The input Blob
+        paddings (Sequence[int]): A list of integers to specify the padding width, its length must equal with the length of `x.shape`.
+        constant_value (Union[int, float], optional): The constant value to pad. Defaults to 0.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Raises:
+        ValueError: The parameter `paddings` must be a tuple or a list.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The Blob after padding.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+        import numpy as np
+
+
+        @flow.global_function()
+        def pad_Job(x: tp.Numpy.Placeholder((3, 3))
+        ) -> tp.Numpy:
+            return flow.pad(x,
+                            paddings=((2, 2), (1, 1)),
+                            constant_value=5)
+
+
+        x = np.array([[1, 1, 1],
+                    [1, 1, 1],
+                    [1, 1, 1]]).astype(np.float32)
+        out = pad_Job(x)
+
+        # out [[5. 5. 5. 5. 5.]
+        #      [5. 5. 5. 5. 5.]
+        #      [5. 1. 1. 1. 5.]
+        #      [5. 1. 1. 1. 5.]
+        #      [5. 1. 1. 1. 5.]
+        #      [5. 5. 5. 5. 5.]
+        #      [5. 5. 5. 5. 5.]]
+
+    """
+    padding_before = []
+    padding_after = []
+    if isinstance(paddings, (list, tuple)):
+        assert len(paddings) == len(x.shape), ValueError(
+            "paddings must be the same size of input dims"
+        )
+        for p in paddings:
+            assert isinstance(p, (list, tuple)) and len(p) == 2, ValueError(
+                "the elem of paddings must be a tuple or a list with length of 2"
+            )
+            padding_before.append(p[0])
+            padding_after.append(p[1])
+    else:
+        raise ValueError("paddings must be a tuple or a list.")
+    if x.dtype in [
+        flow.float32,
+        flow.float16,
+        flow.float64,
+    ]:
+        floating_constant_value = float(constant_value)
+        integral_constant_value = int(0)
+    else:
+        floating_constant_value = float(0)
+        integral_constant_value = int(constant_value)
+    return (
+        flow.user_op_builder(name if name is not None else id_util.UniqueStr("Pad_"))
+        .Op("pad")
+        .Input("x", [x])
+        .Output("y")
+        .Attr("padding_before", padding_before)
+        .Attr("padding_after", padding_after)
+        .Attr("floating_constant_value", floating_constant_value)
+        .Attr("integral_constant_value", integral_constant_value)
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
+
+
+@oneflow_export("pad_grad")
+def pad_grad(
+    x: oneflow._oneflow_internal.BlobDesc,
+    paddings: Sequence[int],
+    constant_value: Union[int, float] = 0,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    padding_before = []
+    padding_after = []
+    if isinstance(paddings, (list, tuple)):
+        assert len(paddings) == len(x.shape), ValueError(
+            "paddings must be the same size of input dims"
+        )
+        for p in paddings:
+            assert isinstance(p, (list, tuple)) and len(p) == 2, ValueError(
+                "the elem of paddings must be a tuple or a list with length of 2"
+            )
+            padding_before.append(p[0])
+            padding_after.append(p[1])
+    else:
+        raise ValueError("paddings must be a tuple or a list.")
+    return (
+        flow.user_op_builder(
+            name if name is not None else id_util.UniqueStr("PadGrad_")
+        )
+        .Op("pad_grad")
+        .Input("dy", [x])
+        .Output("dx")
+        .Attr("padding_before", padding_before)
+        .Attr("padding_after", padding_after)
+        .Attr("floating_constant_value", float(constant_value))
+        .Attr("integral_constant_value", int(constant_value))
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
+
+
+@oneflow_export("same_padding")
+def same_padding(
+    x: oneflow._oneflow_internal.BlobDesc,
+    padding: Sequence[int],
+    data_format: str,
+    kernel_size: Sequence[int],
+    strides: Sequence[int],
+    dilation_rate: Sequence[int],
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator do the padding in "SAME" mode, It can computes the pad width according to the `kernel_size` and `strides` to keep the size of feature map unchanged after convolution or other operations.
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): The input blob.
+        padding (Sequence[int]): The padding mode. It should be "SAME_UPPER" or "SAME_LOWER"
+        data_format ([type]): The data format of input Blob. If the string starts with "NC", it means the data format is `channel first`, else the data format is `channel last`.
+        kernel_size (Sequence[int]): The kernel size of operations. Its type should be tuple or list.
+        strides (Sequence[int]): The strides of operations. Its type should be tuple or list.
+        dilation_rate (Sequence[int]): The dilation rate of operations. Its type should be tuple or list.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The Blob after padding.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+        import numpy as np
+
+
+        @flow.global_function()
+        def same_pad_Job(x: tp.Numpy.Placeholder((1, 1, 3, 3))
+        ) -> tp.Numpy:
+            return flow.same_padding(x,
+                                    padding="SAME_UPPER",
+                                    data_format="NCHW",
+                                    kernel_size=(3, 3),
+                                    strides=(1, 1),
+                                    dilation_rate=(1, 1))
+
+
+        x = np.ones(shape=(1, 1, 3, 3)).astype(np.float32)
+        out = same_pad_Job(x)
+
+        # out [[[[0. 0. 0. 0. 0.]
+        #        [0. 1. 1. 1. 0.]
+        #        [0. 1. 1. 1. 0.]
+        #        [0. 1. 1. 1. 0.]
+        #        [0. 0. 0. 0. 0.]]]]
+
+    """
+    assert isinstance(padding, str) and (
+        padding.upper() == "SAME_LOWER" or padding.upper() == "SAME_UPPER"
+    ), 'padding must be "SAME_LOWER" or "SAME_UPPER".'
+    channel_pos = "channels_first" if data_format.startswith("NC") else "channels_last"
+    assert isinstance(kernel_size, (list, tuple))
+    assert isinstance(strides, (list, tuple))
+    assert isinstance(dilation_rate, (list, tuple))
+    num_spatial_dims = len(x.shape) - 2
+    assert len(kernel_size) == num_spatial_dims
+    assert len(strides) == num_spatial_dims
+    assert len(dilation_rate) == num_spatial_dims
+
+    return (
+        flow.user_op_builder(
+            name if name is not None else id_util.UniqueStr("SamePadding_")
+        )
+        .Op("same_padding")
+        .Input("x", [x])
+        .Output("y")
+        .Attr("padding", padding.lower())
+        .Attr("data_format", channel_pos)
+        .Attr("kernel_size", kernel_size)
+        .Attr("strides", strides)
+        .Attr("dilation_rate", dilation_rate)
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
+
+
+@oneflow_export("reflection_pad2d")
+@stable_api
+def reflection_pad2d(
+    x: oneflow._oneflow_internal.BlobDesc,
+    padding: Union[int, tuple, list],
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    """Pads the input tensor using the reflection of the input boundary.
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): input blob, only support "NCHW" format.
+        padding (Union[int, oneflow._oneflow_internal.BlobDesc]): The size or bundary of padding, if is int uses the same padding in all dimension;
+        if 4-dims tuple, uses (\text{padding\_left}padding_left , \text{padding\_right}padding_right , \text{padding\_top}padding_top , \text{padding\_bottom}padding_bottom )
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: [description]
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+        import numpy as np
+
+
+        @flow.global_function()
+        def pad_Job(x: tp.Numpy.Placeholder((1, 2, 3, 3))
+        ) -> tp.Numpy:
+            return flow.reflection_pad2d(x, padding=[2, 2, 1, 1])
+
+
+        x = np.arange(18).reshape((1, 2, 3, 3)).astype(np.float32)
+        out = pad_Job(x)
+
+        # out [[[[ 5.  4.  3.  4.  5.  4.  3.]
+        #    [ 2.  1.  0.  1.  2.  1.  0.]
+        #    [ 5.  4.  3.  4.  5.  4.  3.]
+        #    [ 8.  7.  6.  7.  8.  7.  6.]
+        #    [ 5.  4.  3.  4.  5.  4.  3.]]
+
+        #   [[ 14. 13. 12. 13. 14. 13. 12.]
+        #    [ 11. 10.  9. 10. 11. 10.  9.]
+        #    [ 14. 13. 12. 13. 14. 13. 12.]
+        #    [ 17. 16. 15. 16. 17. 16. 15.]
+        #    [ 14. 13. 12. 13. 14. 13. 12.]]]]
+
+    """
+    H, W = x.shape[2], x.shape[3]
+    if isinstance(padding, (tuple, list)):
+        assert len(padding) == len(x.shape), ValueError(
+            "padding boundry must be the same size of input dims"
+        )
+        assert (
+            padding[2] < H and padding[3] < H and padding[0] < W and padding[1] < W
+        ), ValueError(
+            "Padding size should be less than the corresponding input dimension!"
+        )
+        boundry = [padding[0], padding[1], padding[2], padding[3]]
+    elif isinstance(padding, int):
+        assert padding < H and padding < W, ValueError(
+            "Padding size should be less than the corresponding input dimension!"
+        )
+        boundry = [padding, padding, padding, padding]
+    else:
+        raise ValueError("padding must be in or list or tuple!")
+
+    return (
+        flow.user_op_builder(
+            name if name is not None else id_util.UniqueStr("Reflection_Pad2d_")
+        )
+        .Op("reflection_pad2d")
+        .Input("x", [x])
+        .Output("y")
+        .Attr("padding", list(boundry))
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
+
+
+@oneflow_export("replication_pad2d")
+def replication_pad2d(
+    x: oneflow._oneflow_internal.BlobDesc,
+    padding: Union[int, tuple, list],
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    """Pads the input tensor using the replication of the input boundary.
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): input blob, only support "NCHW" format.
+        padding (Union[int, oneflow._oneflow_internal.BlobDesc]): The size or bundary of padding, if is int uses the same padding in all dimension;
+        if 4-dims tuple, uses (\text{padding\_left}padding_left , \text{padding\_right}padding_right , \text{padding\_top}padding_top , \text{padding\_bottom}padding_bottom )
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: [description]
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+        import numpy as np
+
+
+        @flow.global_function()
+        def pad_Job(x: tp.Numpy.Placeholder((1, 2, 3, 3))
+        ) -> tp.Numpy:
+            return flow.reflection_pad2d(x, padding=[2, 2, 1, 1])
+
+
+        x = np.arange(18).reshape((1, 2, 3, 3)).astype(np.float32)
+        out = pad_Job(x)
+
+        # out [[[[ 0.  0.  0.  1.  2.  2.  2.]
+        #    [ 0.  0.  0.  1.  2.  2.  2.]
+        #    [ 3.  3.  3.  4.  5.  5.  5.]
+        #    [ 6.  6.  6.  7.  8.  8.  8.]
+        #    [ 6.  6.  6.  7.  8.  8.  8.]]
+
+        #   [[ 9.  9.  9.  10.  11.  11.  11.]
+        #    [ 9.  9.  9.  10.  11.  11.  11.]
+        #    [ 12.  12.  12.  13.  14.  14.  14.]
+        #    [ 15.  15.  15.  16.  17.  17.  17.]
+        #    [ 15.  15.  15.  16.  17.  17.  17.]]]]
+
+    """
+    H, W = x.shape[2], x.shape[3]
+    if isinstance(padding, (tuple, list)):
+        assert len(padding) == len(x.shape), ValueError(
+            "padding boundry must be the same size of input dims"
+        )
+        boundry = [padding[0], padding[1], padding[2], padding[3]]
+    elif isinstance(padding, int):
+        boundry = [padding, padding, padding, padding]
+    else:
+        raise ValueError("padding must be in or list or tuple!")
+
+    return (
+        flow.user_op_builder(
+            name if name is not None else id_util.UniqueStr("Replication_Pad2d_")
+        )
+        .Op("replication_pad2d")
+        .Input("x", [x])
+        .Output("y")
+        .Attr("padding", list(boundry))
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
+
+
+@oneflow_export("constant_pad2d")
+def constant_pad2d(
+    x: oneflow._oneflow_internal.BlobDesc,
+    padding: Union[int, tuple, list],
+    constant_value: Union[int, float] = 0,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    """Pads the input tensor using an input constant value.
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): input blob, only support "NCHW" format.
+        padding (Union[int, oneflow._oneflow_internal.BlobDesc]): The size or bundary of padding, if is int uses the same padding in all dimension;
+        if 4-dims tuple, uses (\text{padding\_left}padding_left , \text{padding\_right}padding_right , \text{padding\_top}padding_top , \text{padding\_bottom}padding_bottom )
+        constant_value (Union[int, float]): The constant value used for padding. Defaults to Zero.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: [description]
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+        import numpy as np
+
+
+        @flow.global_function()
+        def pad_Job(x: tp.Numpy.Placeholder((1, 2, 3, 3), const_value)
+        ) -> tp.Numpy:
+            return flow.constant_pad2d(x, padding=[2, 2, 1, 1], const_value)
+
+
+        x = np.arange(18).reshape((1, 2, 3, 3)).astype(np.float32)
+        const_value = 1.5
+        out = pad_Job(x, const_value)
+
+        # out [[[[ 1.5  1.5  1.5  1.5  1.5  1.5  1.5]
+        #    [ 1.5  1.5  0.  1.  2.  1.5  1.5]
+        #    [ 1.5  1.5  3.  4.  5.  1.5  1.5]
+        #    [ 1.5  1.5  6.  7.  8.  1.5  1.5]
+        #    [ 1.5  1.5  1.5  1.5  1.5  1.5  1.5]]
+
+        #   [[ 1.5  1.5  1.5  1.5  1.5  1.5  1.5.]
+        #    [ 1.5  1.5  9.  10.  11.  1.5  1.5]
+        #    [ 1.5  1.5  12.  13.  14.  1.5  1.5]
+        #    [ 1.5  1.5  15.  16.  17.  1.5  1.5]
+        #    [ 1.5  1.5  1.5  1.5  1.5  1.5  1.5]]]]
+
+    """
+    H, W = x.shape[2], x.shape[3]
+    if isinstance(padding, (tuple, list)):
+        assert len(padding) == len(x.shape), ValueError(
+            "padding boundry must be the same size of input dims"
+        )
+        boundry = [padding[0], padding[1], padding[2], padding[3]]
+    elif isinstance(padding, int):
+        boundry = [padding, padding, padding, padding]
+    else:
+        raise ValueError("padding must be in or list or tuple!")
+
+    if x.dtype in [
+        flow.float32,
+        flow.float16,
+        flow.float64,
+    ]:
+        floating_value = float(constant_value)
+        integral_value = int(0)
+    else:
+        floating_value = float(0)
+        integral_value = int(constant_value)
+
+    return (
+        flow.user_op_builder(
+            name if name is not None else id_util.UniqueStr("Constant_Pad2d_")
+        )
+        .Op("constant_pad2d")
+        .Input("x", [x])
+        .Output("y")
+        .Attr("padding", list(boundry))
+        .Attr("floating_value", floating_value)
+        .Attr("integral_value", integral_value)
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
+
+
+@oneflow_export("zero_pad2d")
+def zero_pad2d(
+    x: oneflow._oneflow_internal.BlobDesc,
+    padding: Union[int, tuple, list],
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    """Pads the input tensor using zeros.
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): input blob, only support "NCHW" format.
+        padding (Union[int, oneflow._oneflow_internal.BlobDesc]): The size or bundary of padding, if is int uses the same padding in all dimension;
+        if 4-dims tuple, uses (\text{padding\_left}padding_left , \text{padding\_right}padding_right , \text{padding\_top}padding_top , \text{padding\_bottom}padding_bottom )
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: [description]
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+        import numpy as np
+
+
+        @flow.global_function()
+        def pad_Job(x: tp.Numpy.Placeholder((1, 2, 3, 3), const_value)
+        ) -> tp.Numpy:
+            return flow.constant_pad2d(x, padding=[2, 2, 1, 1], const_value)
+
+
+        x = np.arange(18).reshape((1, 2, 3, 3)).astype(np.float32)
+        const_value = 1.5
+        out = pad_Job(x, const_value)
+
+        # out [[[[ 0.  0.  0.  0.  0.  0.  0.]
+        #    [ 0.  0.  0.  1.  2.  0.  0.]
+        #    [ 0.  0.  3.  4.  5.  0.  0.]
+        #    [ 0.  0.  6.  7.  8.  0.  0.]
+        #    [ 0.  0.  0.  0.  0.  0.  0.]]
+
+        #   [[ 0.  0.  0.  0.  0.  0.  0.]
+        #    [ 0.  0.  9.  10.  11.  0.  0.]
+        #    [ 0.  0.  12.  13.  14.  0.  0.]
+        #    [ 0.  0.  15.  16.  17.  0.  0.]
+        #    [ 0.  0.  0.  0.  0.  0.  0.]]]]
+
+    """
+    if name is None:
+        name = id_util.UniqueStr("Zero_Pad2d_")
+    return constant_pad2d(x, padding, 0.0, name)
diff --git a/oneflow/compatible_single_client_python/ops/partial_fc_sample.py b/oneflow/compatible_single_client_python/ops/partial_fc_sample.py
new file mode 100644
index 0000000000000000000000000000000000000000..510f77b748aafbbf23ccc48bb8bd1d22f658fa94
--- /dev/null
+++ b/oneflow/compatible_single_client_python/ops/partial_fc_sample.py
@@ -0,0 +1,61 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+import os
+
+from oneflow.compatible import single_client as flow
+from oneflow.core.operator import op_conf_pb2 as op_conf_util
+from oneflow.core.register import logical_blob_id_pb2 as logical_blob_id_util
+from oneflow.compatible_single_client_python.framework import (
+    distribute as distribute_util,
+)
+from oneflow.compatible_single_client_python.framework import id_util as id_util
+from oneflow.compatible_single_client_python.framework import (
+    remote_blob as remote_blob_util,
+)
+from oneflow.compatible_single_client_python.oneflow_export import oneflow_export
+from typing import Optional, Union
+import oneflow._oneflow_internal
+
+
+@oneflow_export("distributed_partial_fc_sample")
+def distributed_partial_fc_sample(
+    weight: oneflow._oneflow_internal.BlobDesc,
+    label: oneflow._oneflow_internal.BlobDesc,
+    num_sample: int,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    parallel_num = flow.current_scope().device_parallel_desc_symbol.parallel_num
+    assert num_sample % parallel_num == 0
+    assert weight.shape[0] % parallel_num == 0
+    return (
+        flow.user_op_builder(
+            name
+            if name is not None
+            else id_util.UniqueStr("DistributedPartialFcSample_")
+        )
+        .Op("distributed_partial_fc_sample")
+        .Input("weight", [weight])
+        .Input("label", [label])
+        .Attr("num_sample", num_sample)
+        .Output("mapped_label")
+        .Output("sampled_label")
+        .Output("sampled_weight")
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()
+    )
diff --git a/oneflow/compatible_single_client_python/ops/prelu.py b/oneflow/compatible_single_client_python/ops/prelu.py
new file mode 100644
index 0000000000000000000000000000000000000000..0916c1267141070ab3d219fe00ea551b287aeaa8
--- /dev/null
+++ b/oneflow/compatible_single_client_python/ops/prelu.py
@@ -0,0 +1,165 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+from typing import Optional, Sequence
+from oneflow.compatible_single_client_python.oneflow_export import oneflow_export
+
+from oneflow.compatible import single_client as flow
+from oneflow.core.operator import op_conf_pb2 as op_conf_util
+from oneflow.core.job import initializer_conf_pb2 as initializer_conf_util
+from oneflow.core.job import regularizer_conf_pb2 as regularizer_conf_util
+from oneflow.compatible_single_client_python.framework import (
+    distribute as distribute_util,
+)
+from oneflow.compatible_single_client_python.framework import (
+    remote_blob as remote_blob_util,
+)
+import oneflow._oneflow_internal
+
+
+@oneflow_export("layers.prelu")
+def prelu(
+    inputs: oneflow._oneflow_internal.BlobDesc,
+    alpha_initializer: Optional[initializer_conf_util.InitializerConf] = None,
+    alpha_regularizer: Optional[regularizer_conf_util.RegularizerConf] = None,
+    shared_axes: Optional[Sequence[int]] = None,
+    trainable: bool = True,
+    name: str = "PRelu",
+    model_distribute: oneflow._oneflow_internal.distribute.Distribute = oneflow._oneflow_internal.distribute.broadcast(),
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""The Prelu(Parametric Rectified Linear Unit) activation.
+
+    The :math:`\alpha` is a parameter that can be trained in network
+
+    The equation is
+
+    .. math::
+
+        out = max(0, x) + \alpha*min(0, x)
+
+    Args:
+        inputs (oneflow._oneflow_internal.BlobDesc): The input Blob.
+        alpha_initializer (Optional[initializer_conf_util.InitializerConf], optional): The initializer of alpha. Defaults to None.
+        alpha_regularizer (Optional[regularizer_conf_util.RegularizerConf], optional): The regularizer of alpha. Defaults to None.
+        shared_axes (Optional[Sequence[int]], optional): The axis along which to share learnable parameters for the prelu activation function. Defaults to None.
+        trainable (bool, optional): Whether to train the parameter :math:`\alpha`. Defaults to True.
+        name (str, optional): The name for the operation. Defaults to "PRelu".
+        model_distribute (oneflow._oneflow_internal.distribute.Distribute, optional): Define the way to ditribute the model. Defaults to oneflow._oneflow_internal.distribute.broadcast().
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The activated Blob
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+
+        BATCH_SIZE = 100
+
+
+        def lenet(data, train=False):
+            initializer = flow.truncated_normal(0.1)
+            conv1 = flow.layers.conv2d(
+                data,
+                32,
+                5,
+                padding="SAME",
+                name="conv1",
+                kernel_initializer=initializer,
+            )
+            prelu1 = flow.layers.prelu(conv1,
+                                    alpha_initializer=initializer,
+                                    shared_axes=[2, 3],
+                                    name="Prelu1")
+            pool1 = flow.nn.max_pool2d(
+                prelu1, ksize=2, strides=2, padding="SAME", name="pool1", data_format="NCHW"
+            )
+            conv2 = flow.layers.conv2d(
+                pool1,
+                64,
+                5,
+                padding="SAME",
+                name="conv2",
+                kernel_initializer=initializer,
+            )
+            prelu2 = flow.layers.prelu(conv2,
+                                    alpha_initializer=initializer,
+                                    shared_axes=[2, 3],
+                                    name="Prelu2")
+            pool2 = flow.nn.max_pool2d(
+                prelu2, ksize=2, strides=2, padding="SAME", name="pool2", data_format="NCHW"
+            )
+            reshape = flow.reshape(pool2, [pool2.shape[0], -1])
+            hidden = flow.layers.dense(
+                reshape,
+                512,
+                activation=flow.nn.relu,
+                kernel_initializer=initializer,
+                name="dense1",
+            )
+            if train:
+                hidden = flow.nn.dropout(hidden, rate=0.5, name="dropout")
+            return flow.layers.dense(hidden, 10, kernel_initializer=initializer, name="dense2")
+
+
+        @flow.global_function(type="train")
+        def train_job(
+                images: tp.Numpy.Placeholder((BATCH_SIZE, 1, 28, 28), dtype=flow.float),
+                labels: tp.Numpy.Placeholder((BATCH_SIZE,), dtype=flow.int32),
+        ) -> tp.Numpy:
+            with flow.scope.placement("gpu", "0:0"):
+                logits = lenet(images, train=True)
+                loss = flow.nn.sparse_softmax_cross_entropy_with_logits(
+                    labels, logits, name="softmax_loss"
+                )
+
+            lr_scheduler = flow.optimizer.PiecewiseConstantScheduler([], [0.1])
+            flow.optimizer.SGD(lr_scheduler, momentum=0.9).minimize(loss)
+            return loss
+
+    """
+    alpha_shape = list(inputs.shape[1:])
+    if shared_axes is not None:
+        for i in shared_axes:
+            assert i >= 1 and i < len(inputs.shape)
+            alpha_shape[i - 1] = 1
+
+    if alpha_initializer is None:
+        alpha_initializer = flow.constant_initializer(0)
+
+    with flow.scope.namespace(name):
+        alpha = flow.get_variable(
+            name="alpha",
+            shape=alpha_shape,
+            dtype=inputs.dtype,
+            initializer=alpha_initializer,
+            regularizer=alpha_regularizer,
+            trainable=trainable,
+            distribute=model_distribute,
+            reuse=False,
+        )
+
+    op = (
+        flow.user_op_builder(name)
+        .Op("prelu")
+        .Input("x", [inputs])
+        .Input("alpha", [alpha])
+        .Output("y")
+        .Build()
+    )
+    return op.InferAndTryRun().SoleOutputBlob()
diff --git a/oneflow/compatible_single_client_python/ops/quantize_ops.py b/oneflow/compatible_single_client_python/ops/quantize_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..e12e3f90a314ab2a85f1ddc91ad3575b9c9500fd
--- /dev/null
+++ b/oneflow/compatible_single_client_python/ops/quantize_ops.py
@@ -0,0 +1,365 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+from typing import Tuple, Optional
+from oneflow.compatible_single_client_python.oneflow_export import oneflow_export
+
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.framework import id_util as id_util
+from oneflow.compatible_single_client_python.framework import (
+    remote_blob as remote_blob_util,
+)
+import oneflow._oneflow_internal
+
+
+@oneflow_export("quantization.min_max_observer")
+def min_max_observer(
+    input: oneflow._oneflow_internal.BlobDesc,
+    quantization_bit: int = 8,
+    quantization_scheme: str = "symmetric",
+    quantization_formula: str = "google",
+    per_layer_quantization: bool = True,
+    name: Optional[str] = None,
+) -> Tuple[oneflow._oneflow_internal.BlobDesc, oneflow._oneflow_internal.BlobDesc]:
+    r"""Compute the quantization parameters of the input tensor.
+
+    First compute the max and min values of input tensor:
+
+    .. math::
+
+        & max\_value = max(input)
+
+        & min\_value = min(input)
+
+    Then compute the scale and zero_point with the following equations:
+
+        if quantization_scheme == "symmetric":
+
+        .. math::
+
+            & denom = 2^{quantization\_to\_bit - 1} - 1
+
+            & scale = max(|max\_value|,|min\_value|) / denom
+
+            & zero\_point = 0
+
+        elif quantization_scheme == "affine":
+
+        .. math::
+
+            & denom = 2^{quantization\_to\_bit} - 1
+
+            & scale = (max\_value - min\_value) / denom
+
+            & zero\_point = -min\_value / scale
+
+    If per_layer_quantization is False, then the shape of scale and zero_point will be (input.shape[0],).
+
+    Args:
+        input (oneflow._oneflow_internal.BlobDesc): input tensor.
+        quantization_bit (int): Quantize input to uintX / intX, X can be in range [2, 8]. Defaults to 8.
+        quantization_scheme (str): "symmetric" or "affine", quantize to signed / unsigned integer. Defaults to "symmetric".
+        quantization_formula (str): Support "google" or "cambricon".
+        per_layer_quantization (bool): True or False, means per-layer / per-channel quantization. Defaults to True.
+        name (Optional[str]): This operator's name. Defaults to None.
+
+    Returns:
+        Tuple[oneflow._oneflow_internal.BlobDesc, oneflow._oneflow_internal.BlobDesc]: The scale and zero_point of input tensor.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+        @flow.global_function(type="predict", function_config=flow.FunctionConfig())
+        def QuantizeJob(
+            input: tp.Numpy.Placeholder(input_shape, dtype=type_name_to_flow_type[dtype])
+        ): tp.Numpy
+            with flow.scope.placement(device_type, "0:0"):
+                scale, zero_point = flow.quantization.min_max_observer(
+                    input, quantization_bit=8,
+                    quantization_scheme="symmetric",
+                    quantization_formula="google",
+                    per_layer_quantization=True
+                )
+            return scale, zero_point
+
+        input = (np.random.random(input_shape) - 0.5).astype(type_name_to_np_type[dtype])
+        scale, zero_point = QuantizeJob(input)
+
+    """
+    if quantization_formula == "cambricon" and not per_layer_quantization:
+        raise NotImplementedError(
+            "per-channel mode is not supported in cambricon scheme"
+        )
+
+    scale, zero_point = (
+        flow.user_op_builder(
+            name if name is not None else id_util.UniqueStr("MinMaxObserver_")
+        )
+        .Op("min_max_observer")
+        .Input("in", [input])
+        .Output("scale")
+        .Output("zero_point")
+        .Attr("quantization_bit", quantization_bit)
+        .Attr("quantization_scheme", quantization_scheme)
+        .Attr("quantization_formula", quantization_formula)
+        .Attr("per_layer_quantization", per_layer_quantization)
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()
+    )
+
+    return scale, zero_point
+
+
+@oneflow_export("quantization.moving_average_min_max_observer")
+def moving_average_min_max_observer(
+    input: oneflow._oneflow_internal.BlobDesc,
+    quantization_bit: int = 8,
+    quantization_scheme: str = "symmetric",
+    quantization_formula: str = "google",
+    momentum: float = 0.95,
+    name: Optional[str] = None,
+) -> Tuple[oneflow._oneflow_internal.BlobDesc, oneflow._oneflow_internal.BlobDesc]:
+    r"""Compute the quantization parameters based on the moving average of the input tensor's min and max values.
+
+    First compute the moving\_max and moving\_min value of input tensor:
+
+        if quantization_scheme == "symmetric":
+
+        .. math::
+
+            & moving\_max = moving\_max * momentum + |max(input)| * (1 - momentum)
+
+            & moving\_min = moving\_max
+
+        elif quantization_scheme == "affine":
+
+        .. math::
+
+            & moving\_max = moving\_max * momentum + max(input) * (1 - momentum)
+
+            & moving\_min = moving\_min * momentum + min(input) * (1 - momentum)
+
+    The moving average of min and max values are initialized as the first batch of input `Blob`'s min and max.
+
+    Then compute the scale and zero_point with the following equations:
+
+        if quantization_scheme == "symmetric":
+
+        .. math::
+
+            & denom = 2^{quantization\_to\_bit - 1} - 1
+
+            & scale = moving\_max / denom
+
+            & zero\_point = 0
+
+        elif quantization_scheme == "affine":
+
+        .. math::
+
+            & denom = 2^{quantization\_to\_bit} - 1
+
+            & scale = (moving\_max - moving\_min) / denom
+
+            & zero\_point = -moving\_min / scale
+
+    Args:
+        input (oneflow._oneflow_internal.BlobDesc): input tensor.
+        quantization_bit (int): Quantize input to uintX / intX, X can be in range [2, 8]. Defaults to 8.
+        quantization_scheme (str): "symmetric" or "affine", quantize to signed / unsigned integer. Defaults to "symmetric".
+        quantization_formula (str): Support "google" or "cambricon".
+        momentum (float): Smoothing parameter for exponential moving average operation. Defaults to 0.95.
+        name (Optional[str]): This operator's name. Defaults to None.
+
+    Returns:
+        Tuple[oneflow._oneflow_internal.BlobDesc, oneflow._oneflow_internal.BlobDesc]: The scale and zero_point of input tensor.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+        @flow.global_function(type="predict", function_config=flow.FunctionConfig())
+        def QuantizeJob(
+            input: tp.Numpy.Placeholder(input_shape, dtype=type_name_to_flow_type[dtype])
+        ): tp.Numpy
+            with flow.scope.placement(device_type, "0:0"):
+                scale, zero_point = flow.quantization.moving_average_min_max_observer(
+                    input, quantization_bit=8,
+                    quantization_scheme="symmetric",
+                    quantization_formula="google",
+                    momentum=0.95
+                )
+            return scale, zero_point
+
+        input = (np.random.random(input_shape) - 0.5).astype(type_name_to_np_type[dtype])
+        scale, zero_point = QuantizeJob(input)
+
+    """
+    op_name = (
+        name if name is not None else id_util.UniqueStr("MovingAverageMinMaxObserver_")
+    )
+
+    training = True if flow.current_global_function_desc().IsTrainable() else False
+
+    with flow.scope.namespace(op_name):
+        moving_max = flow.get_variable(
+            "moving_max",
+            shape=(1,),
+            dtype=input.dtype,
+            initializer=flow.zeros_initializer(input.dtype),
+            trainable=False,
+        )
+        moving_min = flow.get_variable(
+            "moving_min",
+            shape=(1,),
+            dtype=input.dtype,
+            initializer=flow.zeros_initializer(input.dtype),
+            trainable=False,
+        )
+        current_train_step = flow.get_variable(
+            "current_train_step",
+            shape=(1,),
+            dtype=flow.int64,
+            initializer=flow.zeros_initializer(flow.int64),
+            trainable=False,
+        )
+    stop_update_after_iters = 1
+    scale, zero_point = (
+        flow.user_op_builder(op_name)
+        .Op("moving_average_min_max_observer")
+        .Input("in", [input])
+        .Input("current_train_step", [current_train_step])
+        .Input("moving_max", [moving_max])
+        .Input("moving_min", [moving_min])
+        .Output("scale")
+        .Output("zero_point")
+        .Attr("training", training)
+        .Attr("stop_update_after_iters", stop_update_after_iters)
+        .Attr("quantization_bit", quantization_bit)
+        .Attr("quantization_scheme", quantization_scheme)
+        .Attr("quantization_formula", quantization_formula)
+        .Attr("momentum", momentum)
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()
+    )
+
+    return scale, zero_point
+
+
+@oneflow_export("quantization.fake_quantization")
+def fake_quantization(
+    input: oneflow._oneflow_internal.BlobDesc,
+    scale: oneflow._oneflow_internal.BlobDesc,
+    zero_point: oneflow._oneflow_internal.BlobDesc,
+    quantization_bit: int = 8,
+    quantization_scheme: str = "symmetric",
+    quantization_formula: str = "google",
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""Simulate the quantize and dequantize operations in training time.
+
+    The output will be computed as:
+
+        if quantization_scheme == "symmetric":
+
+        .. math::
+
+            & quant\_max = 2^{quantization\_to\_bit - 1} - 1
+
+            & quant\_min = -quant\_max
+
+            & clamp(round(x / scale), quant\_min, quant\_max) * scale
+
+        elif quantization_scheme == "affine":
+
+        .. math::
+
+            & quant\_max = 2^{quantization\_to\_bit} - 1
+
+            & quant\_min = 0
+
+            & (clamp(round(x / scale + zero\_point), quant\_min, quant\_max) - zero\_point) * scale
+
+    Args:
+        input (oneflow._oneflow_internal.BlobDesc): input tensor.
+        scale (oneflow._oneflow_internal.BlobDesc): Computed by min_max_observer or moving_average_min_max_observer op.
+        zero_point (oneflow._oneflow_internal.BlobDesc): Computed by min_max_observer or moving_average_min_max_observer op.
+        quantization_bit (int): Quantize input to uintX / intX, X can be in range [2, 8]. Defaults to 8.
+        quantization_scheme (str): "symmetric" or "affine", quantize to signed / unsigned integer. Defaults to "symmetric".
+        quantization_formula (str): Support "google" or "cambricon".
+        name (Optional[str]): This operator's name. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: Input tensor after quantize and dequantize operations.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+        @flow.global_function(type="predict", function_config=flow.FunctionConfig())
+        def QuantizeJob(
+            input: tp.Numpy.Placeholder(input_shape, dtype=type_name_to_flow_type[dtype])
+        ): tp.Numpy
+            with flow.scope.placement(device_type, "0:0"):
+                scale, zero_point = flow.quantization.min_max_observer(
+                    input, quantization_bit=8,
+                    quantization_scheme="symmetric",
+                    quantization_formula="google",
+                    per_layer_quantization=True
+                )
+                fake_quantize_out = flow.quantization.fake_quantization(
+                    input, scale, zero_point,
+                    quantization_bit=8,
+                    quantization_scheme="symmetric",
+                    quantization_formula="google"
+                )
+            return fake_quantize_out
+
+        input = (np.random.random(input_shape) - 0.5).astype(type_name_to_np_type[dtype])
+        fake_quantize_out = QuantizeJob(input)
+
+    """
+    return (
+        flow.user_op_builder(
+            name if name is not None else id_util.UniqueStr("Fake_Quantization_")
+        )
+        .Op("fake_quantization")
+        .Input("in", [input])
+        .Input("scale", [scale])
+        .Input("zero_point", [zero_point])
+        .Output("out")
+        .Attr("quantization_bit", quantization_bit)
+        .Attr("quantization_scheme", quantization_scheme)
+        .Attr("quantization_formula", quantization_formula)
+        .Build()
+        .InferAndTryRun()
+        .SoleOutputBlob()
+    )
diff --git a/oneflow/compatible_single_client_python/ops/random_ops.py b/oneflow/compatible_single_client_python/ops/random_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..0d16a2b3eba536e1f5dab7b665c5bceebfb25364
--- /dev/null
+++ b/oneflow/compatible_single_client_python/ops/random_ops.py
@@ -0,0 +1,114 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+from typing import Optional
+from oneflow.compatible_single_client_python.oneflow_export import oneflow_export
+
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.framework import id_util as id_util
+from oneflow.compatible_single_client_python.framework import (
+    remote_blob as remote_blob_util,
+)
+from oneflow.compatible_single_client_python.framework import module as module_util
+import oneflow._oneflow_internal
+
+
+@oneflow_export("random.bernoulli")
+def Bernoulli(
+    x: oneflow._oneflow_internal.BlobDesc,
+    seed: Optional[int] = None,
+    dtype: Optional[flow.dtype] = None,
+    name: str = "Bernoulli",
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator returns a Blob with binaray random numbers (0 / 1) from a Bernoulli distribution.
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): The input Blob.
+        seed (Optional[int], optional): The random seed. Defaults to None.
+        dtype (Optional[flow.dtype], optional): The data type. Defaults to None.
+        name (str, optional): The name for the operation. Defaults to "Bernoulli".
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def bernoulli_Job(x: tp.Numpy.Placeholder(shape=(3, 3), dtype=flow.float32),
+        ) -> tp.Numpy:
+            out = flow.random.bernoulli(x)
+            return out
+
+
+        x = np.array([[0.25, 0.45, 0.3],
+                    [0.55, 0.32, 0.13],
+                    [0.75, 0.15, 0.1]]).astype(np.float32)
+        out = bernoulli_Job(x)
+
+        # Because our random seed is not fixed, so the return value is different each time.
+        # out [[1. 0. 0.]
+        #      [0. 0. 1.]
+        #      [0. 0. 0.]]
+
+    """
+    assert isinstance(name, str)
+    if dtype is None:
+        dtype = x.dtype
+    if seed is not None:
+        assert name is not None
+    module = flow.find_or_create_module(
+        name, lambda: BernoulliModule(dtype=dtype, random_seed=seed, name=name),
+    )
+    return module(x)
+
+
+class BernoulliModule(module_util.Module):
+    def __init__(
+        self, dtype: flow.dtype, random_seed: Optional[int], name: str,
+    ):
+        module_util.Module.__init__(self, name)
+        seed, has_seed = flow.random.gen_seed(random_seed)
+        self.op_module_builder = (
+            flow.user_op_module_builder("bernoulli")
+            .InputSize("in", 1)
+            .Output("out")
+            .Attr("dtype", dtype)
+            .Attr("has_seed", has_seed)
+            .Attr("seed", seed)
+            .CheckAndComplete()
+        )
+        self.op_module_builder.user_op_module.InitOpKernel()
+
+    def forward(self, x: oneflow._oneflow_internal.BlobDesc):
+        if self.call_seq_no == 0:
+            name = self.module_name
+        else:
+            name = id_util.UniqueStr("Bernoulli_")
+
+        return (
+            self.op_module_builder.OpName(name)
+            .Input("in", [x])
+            .Build()
+            .InferAndTryRun()
+            .SoleOutputBlob()
+        )
diff --git a/oneflow/compatible_single_client_python/ops/random_util.py b/oneflow/compatible_single_client_python/ops/random_util.py
new file mode 100644
index 0000000000000000000000000000000000000000..96c2c68087511b1a37070a7d518d7c47c4326e1e
--- /dev/null
+++ b/oneflow/compatible_single_client_python/ops/random_util.py
@@ -0,0 +1,47 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from oneflow.compatible_single_client_python.framework import hob as hob
+from oneflow.compatible_single_client_python.lib.core import enable_if as enable_if
+import typing
+import random
+import sys
+
+from oneflow.compatible_single_client_python.oneflow_export import oneflow_export
+
+
+@oneflow_export("random.gen_seed")
+def api_gen_random_seed(seed: typing.Optional[int] = None):
+    api = enable_if.unique([consistent_gen_random_seed, mirrored_gen_random_seed])
+    return api(seed)
+
+
+@enable_if.condition(hob.consistent_view_enabled)
+def consistent_gen_random_seed(seed=None):
+    if seed is None:
+        seed = random.randint(-sys.maxsize, sys.maxsize)
+
+    return seed, True
+
+
+@enable_if.condition(hob.mirrored_view_enabled)
+def mirrored_gen_random_seed(seed=None):
+    if seed is None:
+        seed = -1
+        has_seed = False
+    else:
+        has_seed = True
+
+    return seed, has_seed
diff --git a/oneflow/compatible_single_client_python/ops/reduce_mean.py b/oneflow/compatible_single_client_python/ops/reduce_mean.py
new file mode 100644
index 0000000000000000000000000000000000000000..d44107646b108e580c91a1d1366a38e98dd776b4
--- /dev/null
+++ b/oneflow/compatible_single_client_python/ops/reduce_mean.py
@@ -0,0 +1,93 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+import collections
+from typing import Optional, Union
+
+from oneflow.compatible import single_client as flow
+from oneflow.core.operator import op_conf_pb2 as op_conf_util
+from oneflow.core.register import logical_blob_id_pb2 as logical_blob_id_util
+from oneflow.compatible_single_client_python.framework import id_util as id_util
+from oneflow.compatible_single_client_python.framework import (
+    remote_blob as remote_blob_util,
+)
+from oneflow.compatible_single_client_python.oneflow_export import oneflow_export
+import oneflow._oneflow_internal
+
+
+@oneflow_export("math.reduce_mean")
+def reduce_mean(
+    input_blob: oneflow._oneflow_internal.BlobDesc,
+    axis: Optional[Union[collections.Sized, int]] = None,
+    keepdims: bool = False,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator computes the mean of input Blob along the specified axis
+
+    Args:
+        input_blob (oneflow._oneflow_internal.BlobDesc): A Blob
+        axis (Optional[Union[collections.Sized, int]], optional): The dimension along which the mean value is computed. Defaults to None.
+        keepdims (bool, optional): Whether to keep the reduced dimension in the output Blob. Defaults to False.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result of average on the specified axis of input Blob
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def reduce_mean_Job(x: tp.Numpy.Placeholder((3, 3))
+        ) -> tp.Numpy:
+            return flow.math.reduce_mean(x, axis=1, keepdims=True)
+
+
+        x = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]).astype(np.float32)
+        out = reduce_mean_Job(x)
+
+        # out [[2.]
+        #      [5.]
+        #      [8.]]
+
+    """
+    reduce_sum = flow.math.reduce_sum(
+        input_blob, axis=axis, keepdims=keepdims, name=name
+    )
+    if input_blob.is_dynamic:
+        reduce_count = flow.math.reduced_shape_elem_cnt(
+            input_blob, axis=axis, dtype=input_blob.dtype
+        )
+        return reduce_sum / reduce_count
+    else:
+        if axis is None:
+            axes = []
+        else:
+            axes = list(axis) if isinstance(axis, collections.Sized) else [axis]
+        reduce_count = 1
+        if len(axes) == 0:
+            for dim in input_blob.shape:
+                reduce_count *= dim
+        else:
+            for i in axes:
+                reduce_count *= input_blob.shape[i]
+        return flow.math.multiply(reduce_sum, 1.0 / reduce_count)
diff --git a/oneflow/compatible_single_client_python/ops/reduce_ops.py b/oneflow/compatible_single_client_python/ops/reduce_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..49204c4eb9f9046baf523bb2868098022b31817c
--- /dev/null
+++ b/oneflow/compatible_single_client_python/ops/reduce_ops.py
@@ -0,0 +1,627 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import os
+from typing import Optional, Sequence, Sized, Union
+
+from oneflow.compatible import single_client as flow
+from oneflow.core.operator import op_conf_pb2 as op_conf_util
+from oneflow.core.register import logical_blob_id_pb2 as logical_blob_id_util
+from oneflow.compatible_single_client_python.framework import id_util as id_util
+from oneflow.compatible_single_client_python.framework import (
+    interpret_util as interpret_util,
+)
+from oneflow.compatible_single_client_python.framework import (
+    remote_blob as remote_blob_util,
+)
+from oneflow.compatible_single_client_python.oneflow_export import oneflow_export
+import oneflow._oneflow_internal
+
+
+def _gen_unique_name_if_need(name, default_name):
+    if name is None:
+        return id_util.UniqueStr(default_name)
+
+    assert isinstance(name, str), name
+    return name
+
+
+def _check_axis(axis, shape):
+    if axis is None:
+        axis = list(range(len(shape)))
+
+    if isinstance(axis, int):
+        axis = [axis]
+
+    assert isinstance(axis, (list, tuple)), "Invalid axis {}".format(axis)
+    for x in axis:
+        if x < 0:
+            x += len(shape)
+        assert x >= 0 and x < len(shape), "Invalid axis {}, len(shape): {}".format(
+            axis, len(shape)
+        )
+
+    return axis
+
+
+def _do_reduce(x, name, op_type_name, keepdims, axis):
+    op = (
+        flow.user_op_builder(name)
+        .Op(op_type_name)
+        .Input("input_tensor", [x])
+        .Output("output_tensor")
+        .Attr("axis", axis)
+        .Attr("keepdims", keepdims)
+        .Build()
+    )
+    return op.InferAndTryRun().SoleOutputBlob()
+
+
+@oneflow_export("math.reduce_sum")
+def reduce_sum(
+    input_tensor: oneflow._oneflow_internal.BlobDesc,
+    axis: Optional[Union[int, Sequence[int]]] = None,
+    keepdims: bool = False,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator computes the sum of elements across dimensions of a tensor
+
+    Args:
+        input_tensor (oneflow._oneflow_internal.BlobDesc): A Blob
+        axis (Optional[Union[int, Sequence[int]]], optional): The dimension along which the sum value is computed. Defaults to None.
+        keepdims (bool, optional): Whether to keep the reduced dimension in the output Blob. Defaults to False.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result of sum on the specified axis of input Blob
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def reduce_sum_Job(x: tp.Numpy.Placeholder((3, 3))
+        ) -> tp.Numpy:
+            return flow.math.reduce_sum(x, axis=1, keepdims=True)
+
+
+        x = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]).astype(np.float32)
+        out = reduce_sum_Job(x)
+
+        # out [[ 6.]
+        #      [15.]
+        #      [24.]]
+
+    """
+    name = _gen_unique_name_if_need(name, "ReduceSum_")
+
+    axis = _check_axis(axis, input_tensor.shape)
+    if len(axis) == 0:
+        return input_tensor
+
+    op = (
+        flow.user_op_builder(name)
+        .Op("reduce_sum")
+        .Input("input_tensor", [input_tensor])
+        .Output("output_tensor")
+        .Attr("axis", axis)
+        .Attr("keepdims", keepdims)
+        .Build()
+    )
+    return op.InferAndTryRun().SoleOutputBlob()
+
+
+@oneflow_export("math.reduce_any")
+def reduce_any(
+    x: oneflow._oneflow_internal.BlobDesc,
+    axis: Optional[Union[int, Sequence[int]]] = None,
+    keepdims: bool = False,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator computes the `logical or` of input Blob along the specified axis
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): A Blob
+        axis (Optional[Union[int, Sequence[int]]], optional): The dimension along which the logical and value is computed. Defaults to None.
+        keepdims (bool, optional): Whether to keep the reduced dimension in the output Blob. Defaults to False.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result of logical or on the specified axis of input Blob
+
+    Note:
+
+        The input Blob dtype is int8
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def reduce_any_Job(x: tp.Numpy.Placeholder((3, 3), dtype=flow.int8)
+        ) -> tp.Numpy:
+            return flow.math.reduce_any(x, axis=1, keepdims=True)
+
+
+        x = np.array([[1, 0, 0], [0, 0, 0], [1, 0, 1]]).astype(np.int8)
+        out = reduce_any_Job(x)
+
+        # out [[1]
+        #      [0]
+        #      [1]]
+
+    """
+    name = _gen_unique_name_if_need(name, "ReduceAny_")
+    axis = _check_axis(axis, x.shape)
+    if len(axis) == 0:
+        return flow.math.not_equal(x, flow.constant_scalar(value=0.0, dtype=x.dtype))
+    return _do_reduce(x, name, "reduce_any", keepdims, axis)
+
+
+@oneflow_export("math.reduce_min")
+def reduce_min(
+    x: oneflow._oneflow_internal.BlobDesc,
+    axis: Optional[Union[int, Sequence[int]]] = None,
+    keepdims: bool = False,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator computes the minimum value of input Blob along the specified axis
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): A Blob
+        axis (Optional[Union[int, Sequence[int]]], optional): The dimension along which the minimum value is computed. Defaults to None.
+        keepdims (bool, optional): Whether to keep the reduced dimension in the output Blob. Defaults to False.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result of minimum value on the specified axis of input Blob
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def reduce_min_Job(x: tp.Numpy.Placeholder((3, 3))
+        ) -> tp.Numpy:
+            return flow.math.reduce_min(x, axis=1, keepdims=True)
+
+
+        x = np.array([[2, 1, 3], [5, 3, 6], [7, 4, 9]]).astype(np.float32)
+        out = reduce_min_Job(x)
+
+        # out [[1.]
+        #      [3.]
+        #      [4.]]
+
+    """
+    name = _gen_unique_name_if_need(name, "ReduceMin_")
+    axis = _check_axis(axis, x.shape)
+    if len(axis) == 0:
+        return x
+    return _do_reduce(x, name, "reduce_min", keepdims, axis)
+
+
+@oneflow_export("math.reduce_max")
+def reduce_max(
+    x: oneflow._oneflow_internal.BlobDesc,
+    axis: Optional[Union[int, Sequence[int]]] = None,
+    keepdims: bool = False,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator computes the maximum value of input Blob along the specified axis
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): A Blob
+        axis (Optional[Union[int, Sequence[int]]], optional): The dimension along which the maximum value is computed. Defaults to None.
+        keepdims (bool, optional): Whether to keep the reduced dimension in the output Blob. Defaults to False.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result of maximum value on the specified axis of input Blob
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def reduce_max_Job(x: tp.Numpy.Placeholder((3, 3))
+        ) -> tp.Numpy:
+            return flow.math.reduce_max(x, axis=1, keepdims=True)
+
+
+        x = np.array([[2, 1, 4], [5, 3, 7], [7, 4, 9]]).astype(np.float32)
+        out = reduce_max_Job(x)
+
+        # out [[4.]
+        #      [7.]
+        #      [9.]]
+
+    """
+    name = _gen_unique_name_if_need(name, "ReduceMax_")
+    axis = _check_axis(axis, x.shape)
+    if len(axis) == 0:
+        return x
+    return _do_reduce(x, name, "reduce_max", keepdims, axis)
+
+
+@oneflow_export("math.reduce_prod")
+def reduce_prod(
+    x: oneflow._oneflow_internal.BlobDesc,
+    axis: Optional[Union[int, Sequence[int]]] = None,
+    keepdims: bool = False,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator computes the product of input Blob along the specified axis
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): A Blob
+        axis (Optional[Union[int, Sequence[int]]], optional): The dimension along which the product is computed. Defaults to None.
+        keepdims (bool, optional): Whether to keep the reduced dimension in the output Blob. Defaults to False.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result of product value on the specified axis of input Blob
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def reduce_product_Job(x: tp.Numpy.Placeholder((3, 3))
+        ) -> tp.Numpy:
+            return flow.math.reduce_prod(x, axis=1, keepdims=True)
+
+
+        x = np.array([[1, 2, 3], [3, 4, 5], [6, 3, 2]]).astype(np.float32)
+        out = reduce_product_Job(x)
+
+        # out [[ 6.]
+        #      [60.]
+        #      [36.]]
+
+    """
+    name = _gen_unique_name_if_need(name, "ReduceProd_")
+    axis = _check_axis(axis, x.shape)
+    if len(axis) == 0:
+        return x
+    return _do_reduce(x, name, "reduce_prod", keepdims, axis)
+
+
+@oneflow_export("math.reduce_all")
+def reduce_all(
+    x: oneflow._oneflow_internal.BlobDesc,
+    axis: Optional[Union[int, Sequence[int]]] = None,
+    keepdims: bool = False,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator computes the `logical and` of input Blob along the specified axis
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): A Blob
+        axis (Optional[Union[int, Sequence[int]]], optional): The dimension along which the logical and value is computed. Defaults to None.
+        keepdims (bool, optional): Whether to keep the reduced dimension in the output Blob. Defaults to False.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result of logical and value on the specified axis of input Blob
+
+    Note:
+
+        The input Blob dtype is int8
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def reduce_all_Job(x: tp.Numpy.Placeholder((3, 3), dtype=flow.int8)
+        ) -> tp.Numpy:
+            return flow.math.reduce_all(x, axis=1, keepdims=True)
+
+
+        x = np.array([[1, 0, 0], [0, 0, 0], [1, 1, 1]]).astype(np.int8)
+        out = reduce_all_Job(x)
+
+        # out [[0]
+        #      [0]
+        #      [1]]
+
+    """
+    name = _gen_unique_name_if_need(name, "ReduceAll_")
+    axis = _check_axis(axis, x.shape)
+    if len(axis) == 0:
+        return flow.math.not_equal(x, flow.constant_scalar(value=0.0, dtype=x.dtype))
+    return _do_reduce(x, name, "reduce_all", keepdims, axis)
+
+
+@oneflow_export("math.reduce_euclidean_norm")
+def reduce_euclidean_norm(
+    input_tensor: oneflow._oneflow_internal.BlobDesc,
+    axis: Optional[Union[int, Sequence[int]]] = None,
+    keepdims: bool = False,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""This operator computes the Euclidean norm of input Blob along the specified axis
+
+    The equation is:
+
+    .. math::
+
+        out=\sqrt{\sum_{t=0}^{n} x_{t}^2}
+
+    Args:
+        input_tensor (oneflow._oneflow_internal.BlobDesc): A Blob
+        axis (Optional[Union[int, Sequence[int]]], optional): The dimension along which the Euclidean norm is computed. Defaults to None.
+        keepdims (bool, optional): Whether to keep the reduced dimension in the output Blob. Defaults to False.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result of Euclidean norm on the specified axis of input Blob
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def reduce_euclidean_norm_Job(x: tp.Numpy.Placeholder((3, 2))
+        ) -> tp.Numpy:
+            return flow.math.reduce_euclidean_norm(x, axis=1, keepdims=True)
+
+
+        x = np.array([[3, 4], [5, 12], [8, 15]]).astype(np.float32)
+        out = reduce_euclidean_norm_Job(x)
+
+        # out [[ 5.]
+        #      [13.]
+        #      [17.]]
+
+    """
+    name = _gen_unique_name_if_need(name, "ReduceEuclideanNorm_")
+    return flow.math.sqrt(
+        flow.math.reduce_sum(
+            flow.math.square(input_tensor, name + "_square"),
+            axis,
+            keepdims,
+            name + "_reduce_sum",
+        ),
+        name + "_sqrt",
+    )
+
+
+@oneflow_export("math.reduce_logsumexp")
+def reduce_logsumexp(
+    input_tensor: oneflow._oneflow_internal.BlobDesc,
+    axis: Optional[Union[int, Sequence[int]]] = None,
+    keepdims: bool = False,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""This operator computes the log of exponential sum of input Blob along the specified axis
+
+
+    The equation is:
+
+    .. math::
+
+        out = log(\sum_{t=0}^{t=n} e^{x_{t}})
+
+    Args:
+        input_tensor (oneflow._oneflow_internal.BlobDesc): A Blob
+        axis (Optional[Union[int, Sequence[int]]], optional): The dimension along which the log of exponential sum is computed. Defaults to None.
+        keepdims (bool, optional): Whether to keep the reduced dimension in the output Blob. Defaults to False.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result of log of exponential sum on the specified axis of input Blob
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def reduce_logsumexp_Job(x: tp.Numpy.Placeholder((3, 2))
+        ) -> tp.Numpy:
+            return flow.math.reduce_logsumexp(x, axis=1, keepdims=True)
+
+
+        x = np.array([[0, 0], [1, 1], [2, 2]]).astype(np.float32)
+        out = reduce_logsumexp_Job(x)
+
+        # out [[0.6931472]
+        #      [1.6931472]
+        #      [2.6931472]]
+
+    """
+    name = _gen_unique_name_if_need(name, "ReduceLogSumExp_")
+    axis = _check_axis(axis, input_tensor.shape)
+    return flow.math.log(
+        flow.math.reduce_sum(
+            flow.math.exp(input_tensor, name + "_exp"),
+            axis,
+            keepdims,
+            name + "_reduce_sum",
+        ),
+        name + "_log",
+    )
+
+
+@oneflow_export("math.reduce_std")
+def reduce_std(
+    input_tensor: oneflow._oneflow_internal.BlobDesc,
+    axis: Optional[Union[int, Sequence[int]]] = None,
+    keepdims: bool = False,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""This operator computes the standard deviation of input Blob along the specified axis
+
+    The equation is:
+
+    .. math::
+
+        out=\sqrt{\frac{1}{n}*\sum_{i=1}^{n}(x_i-mean)^2}
+
+    Args:
+        input_tensor (oneflow._oneflow_internal.BlobDesc): A Blob
+        axis (Optional[Union[int, Sequence[int]]], optional): The dimension along which the standard deviation is computed. Defaults to None.
+        keepdims (bool, optional): Whether to keep the reduced dimension in the output Blob. Defaults to False.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result of standard deviation on the specified axis of input Blob
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def reduce_std_Job(x: tp.Numpy.Placeholder((3, 3))
+        ) -> tp.Numpy:
+            return flow.math.reduce_std(x, axis=1, keepdims=True)
+
+
+        x = np.array([[0, 5, 10], [5, 5, 5], [12, 3, 0]]).astype(np.float32)
+        out = reduce_std_Job(x)
+
+        # out [[4.0824833]
+        #      [0.       ]
+        #      [5.0990195]]
+
+    """
+    name = _gen_unique_name_if_need(name, "ReduceStd_")
+    axis = _check_axis(axis, input_tensor.shape)
+    if isinstance(axis, list) and len(axis) == 0:
+        return flow.zeros_like(
+            input_tensor, dtype=input_tensor.dtype, name=name + "_zeros_like"
+        )
+    return flow.math.sqrt(
+        flow.math.reduce_variance(
+            input_tensor, axis, keepdims, name + "_reduce_variance"
+        ),
+        name + "_sqrt",
+    )
+
+
+@oneflow_export("math.reduce_variance")
+def reduce_variance(
+    input_tensor: oneflow._oneflow_internal.BlobDesc,
+    axis: Optional[Union[int, Sequence[int]]] = None,
+    keepdims: bool = False,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""This operator computes the variance of input Blob along the specified axis
+
+    The equation is:
+
+    .. math::
+
+        out=\frac{1}{n}*\sum_{i=1}^{n}(x_i-mean)^2
+
+    Args:
+        input_tensor (oneflow._oneflow_internal.BlobDesc): A Blob
+        axis (Optional[Union[int, Sequence[int]]], optional): The dimension along which the variance is computed. Defaults to None.
+        keepdims (bool, optional): Whether to keep the reduced dimension in the output Blob. Defaults to False.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result of variance on the specified axis of input Blob
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def reduce_variance_Job(x: tp.Numpy.Placeholder((3, 3))
+        ) -> tp.Numpy:
+            return flow.math.reduce_variance(x, axis=1, keepdims=True)
+
+
+        x = np.array([[0, 5, 10], [5, 5, 5], [12, 3, 0]]).astype(np.float32)
+        out = reduce_variance_Job(x)
+
+        # out [[16.666668]
+        #      [ 0.      ]
+        #      [26.      ]]
+
+    """
+    name = _gen_unique_name_if_need(name, "ReduceVariance_")
+    axis = _check_axis(axis, input_tensor.shape)
+    if isinstance(axis, list) and len(axis) == 0:
+        return flow.zeros_like(
+            input_tensor, dtype=input_tensor.dtype, name=name + "_zeros_like"
+        )
+    return flow.math.subtract(
+        flow.math.reduce_mean(
+            flow.math.square(input_tensor, name + "_square_minuend"),
+            axis,
+            keepdims,
+            name + "_reduce_mean_minuend",
+        ),
+        flow.math.square(
+            flow.math.reduce_mean(
+                input_tensor, axis, keepdims, name + "_reduce_mean_subtrahend"
+            ),
+            name + "_square_subtrahend",
+        ),
+        name + "_subtract",
+    )
diff --git a/oneflow/compatible_single_client_python/ops/regularizer_util.py b/oneflow/compatible_single_client_python/ops/regularizer_util.py
new file mode 100644
index 0000000000000000000000000000000000000000..fe36aed18ef74b184e4c3dbda6d0abb91e14b3ba
--- /dev/null
+++ b/oneflow/compatible_single_client_python/ops/regularizer_util.py
@@ -0,0 +1,158 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+from oneflow.core.operator import op_conf_pb2 as op_conf_util
+from oneflow.core.job import regularizer_conf_pb2 as regularizer_conf_util
+from oneflow.compatible_single_client_python.oneflow_export import oneflow_export
+
+
+@oneflow_export("regularizers.l1_l2")
+def l1_l2_regularizer(
+    l1: float = 0.01, l2: float = 0.01
+) -> regularizer_conf_util.RegularizerConf:
+    """This operator creates a L1 and L2 weight regularizer. 
+
+    Args:
+        l1 (float, optional): The L1 regularization coefficient. Defaults to 0.01.
+        l2 (float, optional): The L2 regularization coefficient. Defaults to 0.01.
+
+    Returns:
+        regularizer_conf_util.RegularizerConf: A regularizer that can be used in other layers or operators.
+    
+    For example: 
+
+    .. code-block:: python 
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def conv2d_l1_l2_Job(x: tp.Numpy.Placeholder((1, 256, 32, 32))
+        ) -> tp.Numpy:
+            initializer = flow.truncated_normal(0.1)
+            regularizer = flow.regularizers.l1_l2(l1=0.001, l2=0.001)
+            conv2d = flow.layers.conv2d(
+                x,
+                filters=128,
+                kernel_size=3,
+                strides=1,
+                padding='SAME',
+                kernel_initializer=initializer, 
+                kernel_regularizer=regularizer,
+                name="Conv2d"
+            )
+            return conv2d
+
+
+        x = np.random.randn(1, 256, 32, 32).astype(np.float32)
+        out = conv2d_l1_l2_Job(x)
+    
+    """
+    regularizer = regularizer_conf_util.RegularizerConf()
+    setattr(regularizer.l1_l2_conf, "l1", l1)
+    setattr(regularizer.l1_l2_conf, "l2", l2)
+    return regularizer
+
+
+@oneflow_export("regularizers.l1")
+def l1_regularizer(l: float = 0.01) -> regularizer_conf_util.RegularizerConf:
+    """This operator creates a L1 weight regularizer. 
+
+    Args:
+        l (float, optional): The L1 regularization coefficient. Defaults to 0.01.
+
+    Returns:
+        regularizer_conf_util.RegularizerConf: A regularizer that can be used in other layers or operators.
+    
+    For example: 
+
+    .. code-block:: python 
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def conv2d_l1_Job(x: tp.Numpy.Placeholder((1, 256, 32, 32))
+        ) -> tp.Numpy:
+            initializer = flow.truncated_normal(0.1)
+            regularizer = flow.regularizers.l1(l=0.001)
+            conv2d = flow.layers.conv2d(
+                x,
+                filters=128,
+                kernel_size=3,
+                strides=1,
+                padding='SAME',
+                kernel_initializer=initializer, 
+                kernel_regularizer=regularizer,
+                name="Conv2d"
+            )
+            return conv2d
+
+
+        x = np.random.randn(1, 256, 32, 32).astype(np.float32)
+        out = conv2d_l1_Job(x)
+            
+    """
+    return l1_l2_regularizer(l1=l, l2=0.0)
+
+
+@oneflow_export("regularizers.l2")
+def l2_regularizer(l: float = 0.01) -> regularizer_conf_util.RegularizerConf:
+    """This operator creates a L2 weight regularizer. 
+
+    Args:
+        l (float, optional): The L2 regularization coefficient. Defaults to 0.01.
+
+    Returns:
+        regularizer_conf_util.RegularizerConf: A regularizer that can be used in other layers or operators.
+
+    For example: 
+
+    .. code-block:: python 
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def conv2d_l2_Job(x: tp.Numpy.Placeholder((1, 256, 32, 32))
+        ) -> tp.Numpy:
+            initializer = flow.truncated_normal(0.1)
+            regularizer = flow.regularizers.l2(l=0.001)
+            conv2d = flow.layers.conv2d(
+                x,
+                filters=128,
+                kernel_size=3,
+                strides=1,
+                padding='SAME',
+                kernel_initializer=initializer, 
+                kernel_regularizer=regularizer,
+                name="Conv2d"
+            )
+            return conv2d
+
+
+        x = np.random.randn(1, 256, 32, 32).astype(np.float32)
+        out = conv2d_l2_Job(x)
+
+    """
+    return l1_l2_regularizer(l1=0.0, l2=l)
diff --git a/oneflow/compatible_single_client_python/ops/sort_ops.py b/oneflow/compatible_single_client_python/ops/sort_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..654bc8bc591dcaf94a8071a02c25f9a444aa63ab
--- /dev/null
+++ b/oneflow/compatible_single_client_python/ops/sort_ops.py
@@ -0,0 +1,181 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+from typing import Optional
+
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.framework import id_util as id_util
+from oneflow.compatible_single_client_python.framework import (
+    remote_blob as remote_blob_util,
+)
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    stable_api,
+)
+from oneflow.compatible_single_client_python.ops.transpose_util import (
+    get_perm_when_transpose_axis_to_last_dim,
+)
+from oneflow.compatible_single_client_python.ops.transpose_util import get_inversed_perm
+import oneflow._oneflow_internal
+
+
+def _sort_at_last_dim(
+    input: oneflow._oneflow_internal.BlobDesc,
+    direction: str = "ASCENDING",
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    assert direction in ["ASCENDING", "DESCENDING"]
+    return (
+        flow.user_op_builder(name if name is not None else id_util.UniqueStr("Sort_"))
+        .Op("sort")
+        .Input("in", [input])
+        .Output("out")
+        .Attr("direction", direction)
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
+
+
+@oneflow_export("sort")
+@stable_api
+def sort(
+    input: oneflow._oneflow_internal.BlobDesc,
+    axis: int = -1,
+    direction: str = "ASCENDING",
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator sorts the input Blob at specified axis.
+
+    Args:
+        input (oneflow._oneflow_internal.BlobDesc): A Blob
+        axis (int, optional): dimension to be sorted. Defaults to the last dim (-1)
+        direction (str, optional): The direction in which to sort the Blob values. If the direction is "ASCENDING", The order of input will be sorted as ascending, else, the order of input will be sorted as descending. Defaults to "ASCENDING".
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The sorted Blob
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def sort_Job(x: tp.Numpy.Placeholder((5, ))
+        ) -> tp.Numpy:
+            return flow.sort(input=x)
+
+        x = np.array([10, 2, 9, 3, 7]).astype("float32")
+        out = sort_Job(x)
+
+        # out [ 2.  3.  7.  9. 10.]
+
+    """
+    assert direction in ["ASCENDING", "DESCENDING"]
+    name = name if name is not None else id_util.UniqueStr("Sort_")
+    num_axes = len(input.shape)
+    axis = axis if axis >= 0 else axis + num_axes
+    assert 0 <= axis < num_axes, "axis out of range"
+    if axis == num_axes - 1:
+        return _sort_at_last_dim(input, direction, name)
+    else:
+        perm = get_perm_when_transpose_axis_to_last_dim(num_axes, axis)
+        x = flow.transpose(input, perm, False, True, name + "_transpose")
+        x = _sort_at_last_dim(x, direction, name)
+        return flow.transpose(
+            x, get_inversed_perm(perm), False, True, name + "_inverse_transpose"
+        )
+
+
+def _argsort_at_last_dim(
+    input: oneflow._oneflow_internal.BlobDesc,
+    direction: str = "ASCENDING",
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    assert direction in ["ASCENDING", "DESCENDING"]
+    return (
+        flow.user_op_builder(
+            name if name is not None else id_util.UniqueStr("ArgSort_")
+        )
+        .Op("arg_sort")
+        .Input("in", [input])
+        .Output("out")
+        .Attr("direction", direction)
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
+
+
+@oneflow_export("argsort")
+@stable_api
+def argsort(
+    input: oneflow._oneflow_internal.BlobDesc,
+    axis: int = -1,
+    direction: str = "ASCENDING",
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator sorts the input Blob at specified axis and return the indices of the sorted Blob.
+
+    Args:
+        input (oneflow._oneflow_internal.BlobDesc): A Blob
+        axis (int, optional): dimension to be sorted. Defaults to the last dim (-1)
+        direction (str, optional): The direction in which to sort the Blob values. If the direction is "ASCENDING", The order of input will be sorted as ascending, else, the order of input will be sorted as descending. Defaults to "ASCENDING".
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The indices of the sorted Blob
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def argsort_Job(x: tp.Numpy.Placeholder((5, ))
+        ) -> tp.Numpy:
+            return flow.argsort(input=x)
+
+        x = np.array([10, 2, 9, 3, 7]).astype("float32")
+        out = argsort_Job(x)
+
+        # out [1 3 4 2 0]
+
+    """
+    assert direction in ["ASCENDING", "DESCENDING"]
+    name = name if name is not None else id_util.UniqueStr("ArgSort_")
+    num_axes = len(input.shape)
+    axis = axis if axis >= 0 else axis + num_axes
+    assert 0 <= axis < num_axes, "axis out of range"
+    if axis == num_axes - 1:
+        return _argsort_at_last_dim(input, direction, name)
+    else:
+        perm = get_perm_when_transpose_axis_to_last_dim(num_axes, axis)
+        x = flow.transpose(input, perm, False, True, name + "_transpose")
+        x = _argsort_at_last_dim(x, direction, name)
+        return flow.transpose(
+            x, get_inversed_perm(perm), False, True, name + "_inverse_transpose"
+        )
diff --git a/oneflow/compatible_single_client_python/ops/summary_ops.py b/oneflow/compatible_single_client_python/ops/summary_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..2c80183268bd7acc76ddd060204e7ada01d72a04
--- /dev/null
+++ b/oneflow/compatible_single_client_python/ops/summary_ops.py
@@ -0,0 +1,147 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+from oneflow.compatible_single_client_python.framework import id_util as id_util
+from oneflow.compatible_single_client_python.oneflow_export import oneflow_export
+from oneflow.compatible_single_client_python.ops import (
+    user_op_builder as user_op_builder,
+)
+
+from oneflow.compatible import single_client as flow
+
+
+@oneflow_export("summary.scalar")
+def write_scalar(value, step, tag, name=None):
+    r"""Write scalar to log file
+
+    Args:
+        value: A 'Blob' with 1 value and dtype in (flow.float, flow.double, flow.int64, flow.int32)
+        step: A 'Blob' with 1 value and dtype is 'flow.int64'
+        tag: A 'Blob' with 1 value and dtype is 'flow.int8'
+        name: This operator's name 
+    """
+    if name is None:
+        name = id_util.UniqueStr("WriteScalar_")
+    (
+        flow.user_op_builder(name)
+        .Op("summary_write_scalar")
+        .Input("in", [value])
+        .Input("step", [step])
+        .Input("tag", [tag])
+        .Build()
+        .InferAndTryRun()
+    )
+
+
+@oneflow_export("summary.create_summary_writer")
+def create_summary_writer(logdir, name=None):
+    r"""Create a summary writer object
+
+    Args:
+        logdir: log dir
+        name: This operator's name
+    """
+    if name is None:
+        name = id_util.UniqueStr("CreateWriter_")
+    (
+        flow.user_op_builder(name)
+        .Op("create_summary_writer")
+        .Attr("logdir", logdir)
+        .Build()
+        .InferAndTryRun()
+    )
+
+
+@oneflow_export("summary.flush_summary_writer")
+def flush_summary_writer(name=None):
+    r"""Flush the summary writer
+
+    Args:
+        name: This operator's name
+    """
+    if name is None:
+        name = id_util.UniqueStr("FlushWriter_")
+    (flow.user_op_builder(name).Op("flush_summary_writer").Build().InferAndTryRun())
+
+
+@oneflow_export("summary.histogram")
+def write_histogram(value, step, tag, name=None):
+    r"""Write histogram to log file
+
+    Args:
+        value: A 'Blob' with dtype in (flow.float, flow.double, flow.int64, flow.int32, flow.int8, flow.uint8)
+        step: A 'Blob' with 1 value and dtype is 'flow.int64'
+        tag: A 'Blob' with 1 value and dtype is 'flow.int8'
+        name: This operator's name 
+    """
+    if name is None:
+        name = id_util.UniqueStr("WriteHistogram_")
+    (
+        flow.user_op_builder(name)
+        .Op("summary_write_histogram")
+        .Input("in", [value])
+        .Input("step", [step])
+        .Input("tag", [tag])
+        .Build()
+        .InferAndTryRun()
+    )
+
+
+@oneflow_export("summary.pb")
+def write_pb(value, step=None, name=None):
+    r"""Write raw protobuf data to log file
+
+    Args:
+        value: A 'Blob' with dtype in 'flow.int8'
+        step: A 'Blob' with 1 value and dtype is 'flow.int64'
+        name: This operator's name 
+    """
+    if name is None:
+        name = id_util.UniqueStr("WritePb_")
+    (
+        flow.user_op_builder(name)
+        .Op("summary_write_pb")
+        .Input("in", [value])
+        .Input("step", [step])
+        .Build()
+        .InferAndTryRun()
+    )
+
+
+@oneflow_export("summary.image")
+def write_image(value, step=None, tag=None, name=None):
+    r"""Write image to log file
+
+    Args:
+        value: A 'Blob' with dtype in 'flow.uint8'
+        step: A 'Blob' with 1 value and dtype is 'flow.int64'
+        tag: A 'Blob' with 1 value and dtype is 'flow.int8'
+        name: This operator's name 
+    """
+    if name is None:
+        name = id_util.UniqueStr("WriteImage_")
+    if tag is None:
+        tag = "image"
+    (
+        flow.user_op_builder(name)
+        .Op("summary_write_image")
+        .Input("in", [value])
+        .Input("step", [step])
+        .Input("tag", [tag])
+        .Build()
+        .InferAndTryRun()
+    )
diff --git a/oneflow/compatible_single_client_python/ops/tensor_buffer_ops.py b/oneflow/compatible_single_client_python/ops/tensor_buffer_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..5839c7686628bad0376b35ac8428c995c95ea531
--- /dev/null
+++ b/oneflow/compatible_single_client_python/ops/tensor_buffer_ops.py
@@ -0,0 +1,254 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+import functools
+import operator
+
+from oneflow.compatible import single_client as flow
+import oneflow._oneflow_internal
+from oneflow.compatible_single_client_python.framework import id_util as id_util
+from oneflow.compatible_single_client_python.oneflow_export import (
+    oneflow_export,
+    stable_api,
+)
+from typing import Optional, Sequence, List
+
+
+@oneflow_export("tensor_buffer_to_tensor")
+@stable_api
+def tensor_buffer_to_tensor(
+    x: oneflow._oneflow_internal.BlobDesc,
+    dtype: flow.dtype,
+    instance_shape: Sequence[int],
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""This operator converts the Blob's type from TensorBuffer to Tensor.
+    Some operator's output data type is `TensorBuffer`, you can use this operator to convert back
+    to `Tensor`.
+
+    Refer to `Concept Explanation <https://docs.oneflow.org/basics_topics/concept_explanation.html#3tensorbuffer-tensorlist>`_
+    for more about TensorBuffer.
+
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): Input `Blob`.
+        dtype (flow.dtype): The data dtype.
+        instance_shape (Sequence[int]): The shape of each TensorBuffer instance.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: A `Blob`.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def tensor_buffer_to_tensor_Job(x: tp.Numpy.Placeholder(shape=(4, 16, 64, 64), dtype=flow.float32),
+        ) -> tp.Numpy:
+            x = flow.tensor_to_tensor_buffer(x,
+                                            instance_dims=2)
+            return flow.tensor_buffer_to_tensor(x,
+                                                instance_shape=(64, 64),
+                                                dtype=flow.float)
+
+        x = np.random.randn(4, 16, 64, 64).astype(np.float32)
+        out = tensor_buffer_to_tensor_Job(x)
+
+        # out.shape (4, 16, 64, 64)
+
+    """
+    if name is None:
+        name = id_util.UniqueStr("TensorBufferToTensor_")
+    return (
+        flow.user_op_builder(name)
+        .Op("tensor_buffer_to_tensor")
+        .Input("in", [x])
+        .Output("out")
+        .Attr("dtype", dtype)
+        .Attr("instance_shape", instance_shape)
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
+
+
+@oneflow_export("tensor_to_tensor_buffer")
+@stable_api
+def tensor_to_tensor_buffer(
+    x: oneflow._oneflow_internal.BlobDesc,
+    instance_dims: int,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""This operator converts the Blob's type from Tensor to TensorBuffer.
+
+    Refer to `Concept Explanation <https://docs.oneflow.org/basics_topics/concept_explanation.html#3tensorbuffer-tensorlist>`_
+    for more about TensorBuffer.
+
+
+    Args:
+        x (oneflow._oneflow_internal.BlobDesc): Input `Blob`.
+        instance_dims (int): The dimensions of dynamic tensor instance.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import numpy as np
+        import oneflow.compatible.single_client.typing as tp
+
+
+        @flow.global_function()
+        def tensor_buffer_to_tensor_Job(x: tp.Numpy.Placeholder(shape=(4, 16, 64, 64), dtype=flow.float32),
+        ) -> tp.Numpy:
+            x = flow.tensor_to_tensor_buffer(x,
+                                            instance_dims=2)
+            return flow.tensor_buffer_to_tensor(x,
+                                                instance_shape=(64, 64),
+                                                dtype=flow.float)
+
+        x = np.random.randn(4, 16, 64, 64).astype(np.float32)
+        out = tensor_buffer_to_tensor_Job(x)
+
+        # out.shape (4, 16, 64, 64)
+
+    """
+    if name is None:
+        name = id_util.UniqueStr("TensorToTensorBuffer_")
+    return (
+        flow.user_op_builder(name)
+        .Op("tensor_to_tensor_buffer")
+        .Input("in", [x])
+        .Output("out")
+        .Attr("instance_dims", instance_dims)
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
+
+
+@oneflow_export("gen_tensor_buffer")
+@stable_api
+def gen_tensor_buffer(
+    shape: Sequence[int],
+    shape_list: Sequence[Sequence[int]],
+    value_list: Sequence[float],
+    data_type: Optional[flow.dtype] = flow.float32,
+    dynamic_out: Optional[bool] = False,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""This operator generates a tensor buffer blob.
+
+    Args:
+        shape (Sequence[int]): shape of output blob
+        shape_list ( Sequence[Sequence[int]]): shapes for tensor buffer in output blob
+        value_list (Sequence[float]): values for tensor buffer in output blob
+        data_type (Optional[flow.dtype]): data type for tensor buffer in output blob
+        dynamic_out (Optional[bool]): if output is a dynamic blob
+        name (Optional[str]): The name for the operation. Defaults to None.
+
+    Returns:
+        BlobDesc: The result Blob.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+
+        @flow.global_function(function_config=func_config)
+        def GenTensorBufferJob():
+            with flow.scope.placement("cpu", "0:0"):
+                x = flow.gen_tensor_buffer([(2,)], [(2, 1), (1, 2)], [0.0, 1.0])
+                y = flow.tensor_buffer_to_list_of_tensors(x, (100, 100), flow.float, True)
+                return y
+
+        # y_0.shape (2, 1), y_1.shape (1. 2)
+
+    """
+    return (
+        flow.user_op_builder(
+            name if name is not None else id_util.UniqueStr("GenTensorBuffer_")
+        )
+        .Op("gen_tensor_buffer")
+        .Output("out")
+        .Attr("shape", shape)
+        .Attr("shape_list", shape_list)
+        .Attr("value_list", value_list)
+        .Attr("data_type", data_type)
+        .Attr("dynamic_out", dynamic_out)
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
+
+
+@oneflow_export("tensor_buffer_to_list_of_tensors")
+@stable_api
+def tensor_buffer_to_list_of_tensors(
+    x: oneflow._oneflow_internal.BlobDesc,
+    out_shape: Sequence[int],
+    out_dtype: flow.dtype,
+    dynamic_out: Optional[bool] = False,
+    name: Optional[str] = None,
+) -> List[oneflow._oneflow_internal.BlobDesc]:
+    r"""This operator converts the Blob of TensorBuffer to list of Tensors. Every element in x will be converted
+    to a Tensor and output will be flatten to a list.
+
+    Args:
+        x (BlobDesc): Input `Blob`, data type must be tensor buffer.
+        out_shape (Sequence[int]): max shape for a tensor buffer in x
+        out_dtype (flow.dtype,): output data type
+        dynamic_out (Optional[bool]): if output is dynamic blob. Default to False.
+        name (Optional[str]): The name for the operation. Default to None.
+
+    Returns:
+        List[BlobDesc]: result blobs
+
+    For example:
+
+    .. code-block:: python
+
+        # the same with `gen_tensor_buffer` op
+
+    """
+    return (
+        flow.user_op_builder(
+            name
+            if name is not None
+            else id_util.UniqueStr("TensorBufferToListOfTensors_")
+        )
+        .Op("tensor_buffer_to_list_of_tensors")
+        .Input("in", [x])
+        .Output("out", functools.reduce(operator.mul, x.shape, 1))
+        .Attr("out_dtype", out_dtype)
+        .Attr("out_shape", out_shape)
+        .Attr("dynamic_out", dynamic_out)
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()
+    )
diff --git a/oneflow/python/test/ops/test_generator.py b/oneflow/compatible_single_client_python/ops/test_generator.py
similarity index 100%
rename from oneflow/python/test/ops/test_generator.py
rename to oneflow/compatible_single_client_python/ops/test_generator.py
diff --git a/oneflow/compatible_single_client_python/ops/transpose_util.py b/oneflow/compatible_single_client_python/ops/transpose_util.py
new file mode 100644
index 0000000000000000000000000000000000000000..bed101cb50e67e664cfce00e4f5d3b98b401ee2d
--- /dev/null
+++ b/oneflow/compatible_single_client_python/ops/transpose_util.py
@@ -0,0 +1,40 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+from typing import Sequence
+
+
+def is_perm(perm: Sequence[int],) -> bool:
+    return list(range(len(perm))) == sorted(list(perm))
+
+
+# get the perm when you want to transpose specified axis to the last dimension
+def get_perm_when_transpose_axis_to_last_dim(num_axes: int, axis: int,) -> tuple:
+    axis = axis if axis >= 0 else axis + num_axes
+    assert 0 <= axis < num_axes, "axis out of range"
+    perm = [dim if dim < axis else dim + 1 for dim in range(num_axes - 1)]
+    perm.append(axis)
+    return tuple(perm)
+
+
+# x == transpose(transpose(x, perm), get_inversed_perm(perm))
+def get_inversed_perm(perm: Sequence[int],) -> tuple:
+    assert is_perm(perm)
+    inversed_perm = [-1] * len(perm)
+    for i in range(len(perm)):
+        inversed_perm[perm[i]] = i
+    return tuple(inversed_perm)
diff --git a/oneflow/compatible_single_client_python/ops/two_stage_reduce.py b/oneflow/compatible_single_client_python/ops/two_stage_reduce.py
new file mode 100644
index 0000000000000000000000000000000000000000..85940cf715ad4fc55a38c82594f99475968db41c
--- /dev/null
+++ b/oneflow/compatible_single_client_python/ops/two_stage_reduce.py
@@ -0,0 +1,174 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+from typing import Optional, Sequence, Union
+
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.framework import (
+    distribute as distribute_util,
+)
+from oneflow.compatible_single_client_python.framework import hob as hob
+from oneflow.compatible_single_client_python.framework import id_util as id_util
+from oneflow.compatible_single_client_python.framework import (
+    remote_blob as remote_blob_util,
+)
+from oneflow.compatible_single_client_python.lib.core import enable_if as enable_if
+from oneflow.compatible_single_client_python.ops import (
+    user_op_builder as user_op_builder,
+)
+from oneflow.compatible_single_client_python.oneflow_export import oneflow_export
+import oneflow._oneflow_internal
+
+
+@oneflow_export("math.two_stage_reduce_max")
+def api_two_stage_reduce_max(
+    x: oneflow._oneflow_internal.BlobDesc,
+    axis: Optional[Union[int, Sequence[int]]] = None,
+    keepdims: bool = False,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    func = enable_if.unique([two_stage_reduce_max])
+    return func(x, axis=axis, keepdims=keepdims, name=name)
+
+
+@enable_if.condition(hob.in_global_mode)
+def two_stage_reduce_max(x, axis=None, keepdims=False, name=None):
+    name = name if name is not None else id_util.UniqueStr("ReduceMax_")
+    return two_stage_reduce(x, axis, keepdims, "reduce_max", name)
+
+
+@oneflow_export("math.two_stage_reduce_min")
+def api_two_stage_reduce_min(
+    x: oneflow._oneflow_internal.BlobDesc,
+    axis: Optional[Union[int, Sequence[int]]] = None,
+    keepdims: bool = False,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    func = enable_if.unique([two_stage_reduce_min])
+    return func(x, axis=axis, keepdims=keepdims, name=name)
+
+
+@enable_if.condition(hob.in_global_mode)
+def two_stage_reduce_min(x, axis=None, keepdims=False, name=None):
+    name = name if name is not None else id_util.UniqueStr("ReduceMin_")
+    return two_stage_reduce(x, axis, keepdims, "reduce_min", name)
+
+
+def two_stage_reduce(x, axis=None, keepdims=False, op_type_name=None, name=None):
+
+    assert check_x_dictribute(x, axis)
+    axis = _check_axis(axis, x.shape)
+
+    device_stage_out_list = []
+    device_stage_count_list = []
+    distribute_axis = x.distribute.axis
+    x_list = flow.advanced.distribute_split(x, axis=distribute_axis)
+    parallel_desc_symbol = flow.current_scope().device_parallel_desc_symbol
+    device_tag = parallel_desc_symbol.device_tag
+    parallel_id = 0
+    for (
+        machine_id,
+        device_ids,
+    ) in parallel_desc_symbol.machine_id2device_id_list.items():
+        for device_id in device_ids:
+            with flow.scope.placement(
+                device_tag, "@" + str(machine_id) + ":" + str(device_id)
+            ):
+                device_stage_out, device_stage_count = reduce_device_stage(
+                    x_list[parallel_id],
+                    axis,
+                    op_type_name + "_device_stage",
+                    name + "_device_stage" + str(parallel_id),
+                )
+                device_stage_out_list.append(device_stage_out)
+                device_stage_count_list.append(device_stage_count)
+                parallel_id += 1
+    device_stage_out = flow.advanced.distribute_concat(
+        device_stage_out_list, axis=distribute_axis
+    )
+    device_stage_count = flow.advanced.distribute_concat(
+        device_stage_count_list, axis=distribute_axis
+    )
+
+    device_stage_out = device_stage_out.with_distribute(flow.distribute.broadcast())
+    device_stage_count = device_stage_count.with_distribute(flow.distribute.broadcast())
+
+    out = reduce_global_stage(
+        device_stage_out,
+        device_stage_count,
+        axis,
+        keepdims,
+        op_type_name + "_global_stage",
+        name + "_global_stage",
+    )
+    return out
+
+
+def reduce_device_stage(x, axis, op_name, name):
+    out, mask, count = (
+        flow.user_op_builder(name)
+        .Op(op_name)
+        .Input("in", [x])
+        .Output("out")
+        .Output("mask")
+        .Output("count")
+        .Attr("axis", axis)
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()
+    )
+    return out, count
+
+
+def reduce_global_stage(x, device_count, axis, keepdims, op_name, name):
+    out, mask = (
+        flow.user_op_builder(name)
+        .Op(op_name)
+        .Input("in", [x])
+        .Input("device_count", [device_count])
+        .Output("out")
+        .Output("mask")
+        .Attr("axis", axis)
+        .Attr("keepdims", keepdims)
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()
+    )
+    return out
+
+
+def _check_axis(axis, shape):
+    if axis is None:
+        axis = list(range(len(shape)))
+
+    if isinstance(axis, int):
+        axis = [axis]
+
+    assert isinstance(axis, (list, tuple)), "Invalid axis {}".format(axis)
+    for x in axis:
+        if x < 0:
+            x += len(shape)
+        assert x >= 0 and x < len(shape), "Invalid axis {}".format(axis)
+
+    return axis
+
+
+def check_x_dictribute(x, axis):
+    for i in axis:
+        if x.distribute is oneflow._oneflow_internal.distribute.split(i):
+            return True
+    return False
diff --git a/oneflow/compatible_single_client_python/ops/user_data_ops.py b/oneflow/compatible_single_client_python/ops/user_data_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..afcd548ccfed075f2d7afd24c37be07f5437df3b
--- /dev/null
+++ b/oneflow/compatible_single_client_python/ops/user_data_ops.py
@@ -0,0 +1,2475 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.framework import id_util as id_util
+from oneflow.compatible_single_client_python.framework import (
+    remote_blob as remote_blob_util,
+)
+from oneflow.compatible_single_client_python.framework import module as module_util
+import oneflow._oneflow_internal
+from oneflow.compatible_single_client_python.oneflow_export import oneflow_export
+from typing import Optional, Sequence, Union
+import random
+import sys
+import traceback
+
+
+@oneflow_export("data.OFRecordRawDecoder", "data.ofrecord_raw_decoder")
+def OFRecordRawDecoder(
+    input_blob: oneflow._oneflow_internal.BlobDesc,
+    blob_name: str,
+    shape: Sequence[int],
+    dtype: flow.dtype,
+    dim1_varying_length: bool = False,
+    truncate: bool = False,
+    auto_zero_padding: bool = False,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    if auto_zero_padding:
+        print(
+            """WARNING: auto_zero_padding has been deprecated, Please use truncate instead.
+            """
+        )
+    if name is None:
+        name = id_util.UniqueStr("OFRecordRawDecoder_")
+    return (
+        flow.user_op_builder(name)
+        .Op("ofrecord_raw_decoder")
+        .Input("in", [input_blob])
+        .Output("out")
+        .Attr("name", blob_name)
+        .Attr("shape", shape)
+        .Attr("data_type", dtype)
+        .Attr("dim1_varying_length", dim1_varying_length)
+        .Attr("truncate", truncate or auto_zero_padding)
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
+
+
+@oneflow_export("data.OFRecordBytesDecoder", "data.ofrecord_bytes_decoder")
+def OFRecordBytesDecoder(
+    input_blob: oneflow._oneflow_internal.BlobDesc,
+    blob_name: str,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    if name is None:
+        name = id_util.UniqueStr("OFRecordBytesDecoder_")
+    return (
+        flow.user_op_builder(name)
+        .Op("ofrecord_bytes_decoder")
+        .Input("in", [input_blob])
+        .Output("out")
+        .Attr("name", blob_name)
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
+
+
+@oneflow_export(
+    "data.OFRecordImageDecoderRandomCrop", "data.ofrecord_image_decoder_random_crop"
+)
+def api_ofrecord_image_decoder_random_crop(
+    input_blob: oneflow._oneflow_internal.BlobDesc,
+    blob_name: str,
+    color_space: str = "BGR",
+    num_attempts: int = 10,
+    seed: Optional[int] = None,
+    random_area: Sequence[float] = [0.08, 1.0],
+    random_aspect_ratio: Sequence[float] = [0.75, 1.333333],
+    name: str = "OFRecordImageDecoderRandomCrop",
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator is an image decoder with random crop.
+
+    Args:
+        input_blob (oneflow._oneflow_internal.BlobDesc): The input Blob
+        blob_name (str): The name of the Blob
+        color_space (str, optional): The color space, such as "RGB", "BGR". Defaults to "BGR".
+        num_attempts (int, optional): The maximum number of random cropping attempts. Defaults to 10.
+        seed (Optional[int], optional): The random seed. Defaults to None.
+        random_area (Sequence[float], optional): The random cropping area. Defaults to [0.08, 1.0].
+        random_aspect_ratio (Sequence[float], optional): The random scaled ratio. Defaults to [0.75, 1.333333].
+        name (str, optional): The name for the operation. Defaults to "OFRecordImageDecoderRandomCrop".
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The random cropped Blob
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+        from typing import Tuple
+
+
+        @flow.global_function(type="predict")
+        def ofrecord_reader_job() -> Tuple[tp.Numpy, tp.Numpy]:
+            batch_size = 16
+            color_space = "RGB"
+            # our ofrecord file path is "./dataset/part-0"
+            ofrecord = flow.data.ofrecord_reader(
+                "./imgdataset",
+                batch_size=batch_size,
+                data_part_num=1,
+                part_name_suffix_length=-1,
+                part_name_prefix='part-',
+                random_shuffle=True,
+                shuffle_after_epoch=True,
+            )
+            image = flow.data.OFRecordImageDecoderRandomCrop(
+                    ofrecord, "encoded", color_space=color_space
+                )
+            res_image, scale, new_size = flow.image.Resize(
+                    image, target_size=(224, 224)
+                )
+            label = flow.data.OFRecordRawDecoder(
+                ofrecord, "class/label", shape=(1, ), dtype=flow.int32
+            )
+
+            return res_image, label
+
+        if __name__ == "__main__":
+            images, labels = ofrecord_reader_job()
+            # images.shape (16, 224, 224, 3)
+
+    """
+    assert isinstance(name, str)
+    if seed is not None:
+        assert name is not None
+    module = flow.find_or_create_module(
+        name,
+        lambda: OFRecordImageDecoderRandomCropModule(
+            blob_name=blob_name,
+            color_space=color_space,
+            num_attempts=num_attempts,
+            random_seed=seed,
+            random_area=random_area,
+            random_aspect_ratio=random_aspect_ratio,
+            name=name,
+        ),
+    )
+    return module(input_blob)
+
+
+class OFRecordImageDecoderRandomCropModule(module_util.Module):
+    def __init__(
+        self,
+        blob_name: str,
+        color_space: str,
+        num_attempts: int,
+        random_seed: Optional[int],
+        random_area: Sequence[float],
+        random_aspect_ratio: Sequence[float],
+        name: str,
+    ):
+        module_util.Module.__init__(self, name)
+        seed, has_seed = flow.random.gen_seed(random_seed)
+        self.op_module_builder = (
+            flow.user_op_module_builder("ofrecord_image_decoder_random_crop")
+            .InputSize("in", 1)
+            .Output("out")
+            .Attr("name", blob_name)
+            .Attr("color_space", color_space)
+            .Attr("num_attempts", num_attempts)
+            .Attr("random_area", random_area)
+            .Attr("random_aspect_ratio", random_aspect_ratio)
+            .Attr("has_seed", has_seed)
+            .Attr("seed", seed)
+            .CheckAndComplete()
+        )
+        self.op_module_builder.user_op_module.InitOpKernel()
+
+    def forward(self, input: oneflow._oneflow_internal.BlobDesc):
+        if self.call_seq_no == 0:
+            name = self.module_name
+        else:
+            name = id_util.UniqueStr("OFRecordImageDecoderRandomCrop_")
+
+        return (
+            self.op_module_builder.OpName(name)
+            .Input("in", [input])
+            .Build()
+            .InferAndTryRun()
+            .SoleOutputBlob()
+        )
+
+
+@oneflow_export("data.OFRecordImageDecoder", "data.ofrecord_image_decoder")
+def OFRecordImageDecoder(
+    input_blob: oneflow._oneflow_internal.BlobDesc,
+    blob_name: str,
+    color_space: str = "BGR",
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator is an image decoder.
+
+    Args:
+        input_blob (oneflow._oneflow_internal.BlobDesc): The input Blob
+        blob_name (str): The name of the input Blob
+        color_space (str, optional): The color space, such as "RGB", "BGR". Defaults to "BGR".
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+        from typing import Tuple
+
+
+        @flow.global_function(type="predict")
+        def image_decoder_job() -> Tuple[tp.Numpy, tp.Numpy]:
+            batch_size = 16
+            color_space = "RGB"
+            # our ofrecord file path is "./dataset/part-0"
+            ofrecord = flow.data.ofrecord_reader(
+                "./imgdataset",
+                batch_size=batch_size,
+                data_part_num=1,
+                part_name_suffix_length=-1,
+                part_name_prefix='part-',
+                random_shuffle=True,
+                shuffle_after_epoch=True,
+            )
+            image = flow.data.OFRecordImageDecoder(
+                    ofrecord, "encoded", color_space=color_space
+                )
+            res_image, scale, new_size = flow.image.Resize(
+                    image, target_size=(224, 224)
+                )
+            label = flow.data.OFRecordRawDecoder(
+                ofrecord, "class/label", shape=(1, ), dtype=flow.int32
+            )
+
+            return res_image, label
+
+        if __name__ == "__main__":
+            images, labels = image_decoder_job()
+            # image.shape (16, 224, 224, 3)
+
+    """
+    if name is None:
+        name = id_util.UniqueStr("OFRecordImageDecoder_")
+    return (
+        flow.user_op_builder(name)
+        .Op("ofrecord_image_decoder")
+        .Input("in", [input_blob])
+        .Output("out")
+        .Attr("name", blob_name)
+        .Attr("color_space", color_space)
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
+
+
+@oneflow_export("image.Resize", "image.resize", "image_resize")
+def api_image_resize(
+    image: oneflow._oneflow_internal.BlobDesc,
+    target_size: Union[int, Sequence[int]] = None,
+    min_size: Optional[int] = None,
+    max_size: Optional[int] = None,
+    keep_aspect_ratio: bool = False,
+    resize_side: str = "shorter",
+    channels: int = 3,
+    dtype: Optional[flow.dtype] = None,
+    interpolation_type: str = "auto",
+    name: Optional[str] = None,
+    # deprecated params, reserve for backward compatible
+    color_space: Optional[str] = None,
+    interp_type: Optional[str] = None,
+    resize_shorter: int = 0,
+    resize_x: int = 0,
+    resize_y: int = 0,
+) -> Union[
+    oneflow._oneflow_internal.BlobDesc, Sequence[oneflow._oneflow_internal.BlobDesc]
+]:
+    r"""Resize images to target size.
+
+    Args:
+        image: A `Tensor` consists of images to be resized.
+        target_size: A list or tuple when `keep_aspect_ratio` is false or an int when `keep_aspect_ratio` is true. When `keep_aspect_ratio` is false, `target_size` has a form of `(target_width, target_height)` that image will resize to. When `keep_aspect_ratio` is true, the longer side or shorter side of the image will be resized to target size.
+        min_size: An int, optional. Only works when `keep_aspect_ratio` is true and `resize_side` is "longer". If `min_size` is not None, the shorter side must be greater than or equal to `min_size`. Default is None.
+        max_size: An int, optional. Only works when `keep_aspect_ratio` is true and `resize_side` is "shorter". If `max_size` is not None, the longer side must be less than or equal to `max_size`. Default is None.
+        keep_aspect_ratio: A bool. If is false, indicate that image will be resized to fixed width and height, otherwise image will be resized keeping aspect ratio.
+        resize_side: A str of "longer" or "shorter". Only works when `keep_aspect_ratio` is True. If `resize_side` is "longer", the longer side of image will be resized to `target_size`. If `resize_side` is "shorter", the shorter side of image will be resized to `target_size`.
+        channels: An int. how many channels an image has
+        dtype: `oneflow.compatible.single_client.dtype`. Indicate output resized image data type.
+        interpolation_type: A str of "auto", "bilinear", "nearest_neighbor", "bicubic" or "area". Indicate interpolation method used to resize image.
+        name: A str, optional. Name for the operation.
+        color_space: Deprecated, a str of "RGB", "BGR" or "GRAY". Please use `channels` instead.
+        interp_type: Deprecated, s str of "Linear", "Cubic" or "NN". Please use `interpolation_type` instead.
+        resize_shorter: Deprecated, a int. Indicate target size that the shorter side of image will resize to. Please use `target_size` and `resize_side` instead.
+        resize_x: Deprecated, a int. Indicate the target size that the width of image will resize to. Please use `target_size` instead.
+        resize_y: Deprecated, a int. Indicate the target size that the height of image will resize to. Please use `target_size` instead.
+
+    Returns:
+        Tuple of resized images `Blob`, width and height scales `Blob` and new width and height `Blob`
+        (new width and height `Blob` will be None when keep_aspect_ratio is false).
+        If deprecated params are used, a single resized images `Blob` will be returned.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+        from typing import Tuple
+
+
+        @flow.global_function(type="predict")
+        def ofrecord_reader_job() -> Tuple[tp.Numpy, tp.Numpy]:
+            batch_size = 16
+            color_space = "RGB"
+            # our ofrecord file path is "./dataset/part-0"
+            ofrecord = flow.data.ofrecord_reader(
+                "./imgdataset",
+                batch_size=batch_size,
+                data_part_num=1,
+                part_name_suffix_length=-1,
+                part_name_prefix='part-',
+                random_shuffle=True,
+                shuffle_after_epoch=True,
+            )
+            image = flow.data.OFRecordImageDecoderRandomCrop(
+                    ofrecord, "encoded", color_space=color_space
+                )
+            res_image, scale, new_size = flow.image.Resize(
+                    image, target_size=(224, 224)
+                )
+            label = flow.data.OFRecordRawDecoder(
+                ofrecord, "class/label", shape=(1, ), dtype=flow.int32
+            )
+
+            return res_image, label
+
+        if __name__ == "__main__":
+            images, labels = ofrecord_reader_job()
+            # image.shape (16, 224, 224, 3)
+
+    """
+    # process deprecated params
+    deprecated_param_used = False
+    if color_space is not None:
+        print("WARNING: color_space has been deprecated. Please use channels instead.")
+        print(traceback.format_stack()[-2])
+        deprecated_param_used = True
+        assert isinstance(color_space, str)
+        if color_space.upper() == "RGB" or color_space.upper() == "BGR":
+            channels = 3
+        elif color_space.upper() == "GRAY":
+            channels = 1
+        else:
+            raise ValueError("invalid color_space")
+
+    if interp_type is not None:
+        print(
+            "WARNING: interp_type has been deprecated. Please use interpolation_type instead."
+        )
+        print(traceback.format_stack()[-2])
+        deprecated_param_used = True
+        assert isinstance(interp_type, str)
+        if interp_type == "Linear":
+            interpolation_type = "bilinear"
+        elif interp_type == "NN":
+            interpolation_type = "nearest_neighbor"
+        elif interp_type == "Cubic":
+            interpolation_type = "bicubic"
+        else:
+            raise ValueError("invalid interp_type")
+
+    if resize_x > 0 and resize_y > 0:
+        print(
+            "WARNING: resize_x and resize_y has been deprecated. Please use target_size instead."
+        )
+        print(traceback.format_stack()[-2])
+        deprecated_param_used = True
+        target_size = (resize_x, resize_y)
+        keep_aspect_ratio = False
+
+    if resize_shorter > 0:
+        print(
+            "WARNING: resize_shorter has been deprecated. Please use target_size instead."
+        )
+        print(traceback.format_stack()[-2])
+        deprecated_param_used = True
+        target_size = resize_shorter
+        keep_aspect_ratio = True
+        resize_side = "shorter"
+
+    if name is None:
+        name = id_util.UniqueStr("ImageResize_")
+
+    if keep_aspect_ratio:
+        if not isinstance(target_size, int):
+            raise ValueError(
+                "target_size must be an int when keep_aspect_ratio is True"
+            )
+
+        if min_size is None:
+            min_size = 0
+
+        if max_size is None:
+            max_size = 0
+
+        if resize_side == "shorter":
+            resize_longer = False
+        elif resize_side == "longer":
+            resize_longer = True
+        else:
+            raise ValueError('resize_side must be "shorter" or "longer"')
+
+        op = (
+            flow.user_op_builder(name)
+            .Op("image_resize_keep_aspect_ratio")
+            .Input("in", [image])
+            .Output("out")
+            .Output("size")
+            .Output("scale")
+            .Attr("target_size", target_size)
+            .Attr("min_size", min_size)
+            .Attr("max_size", max_size)
+            .Attr("resize_longer", resize_longer)
+            .Attr("interpolation_type", interpolation_type)
+            .Build()
+        )
+        res_image, new_size, scale = op.InferAndTryRun().RemoteBlobList()
+        scale = flow.tensor_buffer_to_tensor(
+            scale, dtype=flow.float32, instance_shape=(2,)
+        )
+        new_size = flow.tensor_buffer_to_tensor(
+            new_size, dtype=flow.int32, instance_shape=(2,)
+        )
+
+    else:
+        if (
+            not isinstance(target_size, (list, tuple))
+            or len(target_size) != 2
+            or not all(isinstance(size, int) for size in target_size)
+        ):
+            raise ValueError(
+                "target_size must be a form like (width, height) when keep_aspect_ratio is False"
+            )
+
+        if dtype is None:
+            dtype = flow.uint8
+
+        target_w, target_h = target_size
+        op = (
+            flow.user_op_builder(name)
+            .Op("image_resize_to_fixed")
+            .Input("in", [image])
+            .Output("out")
+            .Output("scale")
+            .Attr("target_width", target_w)
+            .Attr("target_height", target_h)
+            .Attr("channels", channels)
+            .Attr("data_type", dtype)
+            .Attr("interpolation_type", interpolation_type)
+            .Build()
+        )
+        res_image, scale = op.InferAndTryRun().RemoteBlobList()
+        new_size = None
+
+    if deprecated_param_used:
+        return res_image
+
+    return res_image, scale, new_size
+
+
+@oneflow_export("image.target_resize", "image_target_resize")
+def api_image_target_resize(
+    images: oneflow._oneflow_internal.BlobDesc,
+    target_size: int,
+    min_size: Optional[int] = None,
+    max_size: Optional[int] = None,
+    resize_side: str = "shorter",
+    interpolation_type: str = "auto",
+    name: Optional[str] = None,
+) -> Sequence[oneflow._oneflow_internal.BlobDesc]:
+    """This operator resizes image to target size.
+
+    Args:
+        images (oneflow._oneflow_internal.BlobDesc): The input Blob. Its type should be `kTensorBuffer`. More details please refer to the code example.
+        target_size (int): An int, the target size.
+        min_size (Optional[int], optional): If `min_size` is not None, the shorter side must be greater than or equal to `min_size`. Default is None. Defaults to None.
+        max_size (Optional[int], optional): If `max_size` is not None, the longer side must be less than or equal to `max_size`. Defaults to None.
+        resize_side (str, optional): A str of "longer" or "shorter". Only works when `keep_aspect_ratio` is True. If `resize_side` is "longer", the longer side of image will be resized to `target_size`. If `resize_side` is "shorter", the shorter side of image will be resized to `target_size`. Defaults to "shorter".
+        interpolation_type (str, optional): A str of "auto", "bilinear", "nearest_neighbor", "bicubic" or "area". Indicate interpolation method used to resize image. Defaults to "auto".
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        Sequence[oneflow._oneflow_internal.BlobDesc]: A Sequence includes the result Blob.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+        from typing import Tuple
+        import numpy as np
+        import cv2
+
+
+        def _read_images_by_cv(image_files):
+            images = [cv2.imread(image_file).astype(np.single) for image_file in image_files]
+            return [np.expand_dims(image, axis=0) for image in images]
+
+
+        def _get_images_static_shape(images):
+            image_shapes = [image.shape for image in images]
+            image_static_shape = np.amax(image_shapes, axis=0)
+            assert isinstance(
+                image_static_shape, np.ndarray
+            ), "image_shapes: {}, image_static_shape: {}".format(
+                str(image_shapes), str(image_static_shape)
+            )
+            image_static_shape = image_static_shape.tolist()
+            assert image_static_shape[0] == 1, str(image_static_shape)
+            image_static_shape[0] = len(image_shapes)
+            return image_static_shape
+
+        def _of_image_target_resize(images, image_static_shape, target_size, max_size):
+            func_config = flow.FunctionConfig()
+            func_config.default_data_type(flow.float)
+            func_config.default_logical_view(flow.scope.mirrored_view())
+
+            @flow.global_function(function_config=func_config)
+            def image_target_resize_job(images_def: tp.ListListNumpy.Placeholder(shape=image_static_shape, dtype=flow.float)
+            ) -> Tuple[tp.ListListNumpy, tp.ListNumpy, tp.ListNumpy]:
+                # The input Blob type should be "kTensorBuffer"
+                # So we use oneflow.compatible.single_client.tensor_list_to_tensor_buffer to convert
+                images_buffer = flow.tensor_list_to_tensor_buffer(images_def)
+
+                resized_images_buffer, size, scale = flow.image_target_resize(
+                    images_buffer,
+                    target_size=target_size,
+                    max_size=max_size,
+                    resize_side="shorter",
+                )
+                # We convert back to "tensorlist" type
+                resized_images = flow.tensor_buffer_to_tensor_list(
+                    resized_images_buffer,
+                    shape=(target_size, max_size, image_static_shape[-1]),
+                    dtype=flow.float,
+                )
+                return resized_images, size, scale
+
+            resized_images, size, scale = image_target_resize_job([images])
+            resized_image = resized_images[0]
+            size = size[0]
+            scale = scale[0]
+
+            return resized_images, size, scale
+
+        if __name__ == "__main__":
+            img = _read_images_by_cv(['./img/1.jpg'])
+            img_shape = _get_images_static_shape(img) # In example is [1, 349, 367, 3]
+            target_size = 256
+            max_size = 512
+            resized_images, size, scale = _of_image_target_resize(img, tuple(img_shape), target_size, max_size)
+            # Here the shorter side is "349", we resize it to target_size(256)
+            # The scale is 256 / 349 = 0.73
+            # The longer side will be resized to 367 * scale = 269
+            # get the first element from the resized_images (its type is `list.list`)
+            print(resized_images[0][0].shape) # (1, 256, 269, 3)
+
+    """
+    if name is None:
+        name = id_util.UniqueStr("ImageTargetResize_")
+
+    res_image, scale, new_size = api_image_resize(
+        images,
+        target_size=target_size,
+        min_size=min_size,
+        max_size=max_size,
+        keep_aspect_ratio=True,
+        resize_side=resize_side,
+        interpolation_type=interpolation_type,
+        name=name,
+    )
+    return res_image, new_size, scale
+
+
+@oneflow_export("image.CropMirrorNormalize", "image.crop_mirror_normalize")
+def CropMirrorNormalize(
+    input_blob: oneflow._oneflow_internal.BlobDesc,
+    mirror_blob: Optional[oneflow._oneflow_internal.BlobDesc] = None,
+    color_space: str = "BGR",
+    output_layout: str = "NCHW",
+    crop_h: int = 0,
+    crop_w: int = 0,
+    crop_pos_y: float = 0.5,
+    crop_pos_x: float = 0.5,
+    mean: Sequence[float] = [0.0],
+    std: Sequence[float] = [1.0],
+    output_dtype: flow.dtype = flow.float,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator performs the cropping, normalization, and horizontal flip for input Blob.
+
+    If `crop_h` and `crop_w` are provided, the image cropping position is specified by "crop_pos_y" and "crop_pos_x".
+
+    The position is computed as follows:
+
+    .. math::
+
+        & crop_x = crop\_pos\_x*(Width-crop\_w)
+
+        & crop_y = crop\_pos\_y*(Height-crop\_h)
+
+    The `Width` and `Height` is the width and height of input Blob.
+
+    Args:
+        input_blob (oneflow._oneflow_internal.BlobDesc): The input Blob.
+        mirror_blob (Optional[oneflow._oneflow_internal.BlobDesc], optional): The operation for horizontal flip, if it is `None`, the operator will not perform the horizontal flip. Defaults to None.
+        color_space (str, optional): The color space for input Blob. Defaults to "BGR".
+        output_layout (str, optional): The output format. Defaults to "NCHW".
+        crop_h (int, optional): The image cropping window height. Defaults to 0.
+        crop_w (int, optional): The image cropping window width. Defaults to 0.
+        crop_pos_y (float, optional): The vertical position of the image cropping window, the value range is normalized to (0.0, 1.0). Defaults to 0.5.
+        crop_pos_x (float, optional): The horizontal position of the image cropping window, the value range is normalized to (0.0, 1.0). Defaults to 0.5.
+        mean (Sequence[float], optional): The mean value for normalization. Defaults to [0.0].
+        std (Sequence[float], optional): The standard deviation values for normalization. Defaults to [1.0].
+        output_dtype (flow.dtype, optional): The datatype of output Blob. Defaults to flow.float.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Raises:
+        NotImplementedError: The data type of input Blob should be `tensor_buffer` or `uint8`
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+        from typing import Tuple
+
+
+        @flow.global_function(type="predict")
+        def crop_mirror_job() -> Tuple[tp.Numpy, tp.Numpy]:
+            batch_size = 1
+            color_space = "RGB"
+            # our ofrecord file path is "./dataset/part-0"
+            ofrecord = flow.data.ofrecord_reader(
+                "./imgdataset",
+                batch_size=batch_size,
+                data_part_num=1,
+                part_name_suffix_length=-1,
+                part_name_prefix='part-',
+                shuffle_after_epoch=True,
+            )
+            image = flow.data.OFRecordImageDecoder(
+                    ofrecord, "encoded", color_space=color_space
+                )
+            res_image, scale, new_size = flow.image.Resize(
+                    image, target_size=(512, 512)
+                )
+            label = flow.data.OFRecordRawDecoder(
+                ofrecord, "class/label", shape=(1, ), dtype=flow.int32
+            )
+            rng = flow.random.CoinFlip(batch_size=batch_size)
+            normal = flow.image.CropMirrorNormalize(
+                    res_image,
+                    mirror_blob=rng,
+                    color_space=color_space,
+                    crop_h= 256,
+                    crop_w= 256,
+                    crop_pos_y=0.5,
+                    crop_pos_x=0.5,
+                    mean=[123.68, 116.779, 103.939],
+                    std=[58.393, 57.12, 57.375],
+                    output_dtype=flow.float,
+                )
+
+            return normal, label
+
+        if __name__ == "__main__":
+            images, labels = crop_mirror_job()
+            # images.shape (1, 3, 256, 256)
+
+    """
+    if name is None:
+        name = id_util.UniqueStr("CropMirrorNormalize_")
+    op_type_name = ""
+    if input_blob.dtype is flow.tensor_buffer:
+        op_type_name = "crop_mirror_normalize_from_tensorbuffer"
+    elif input_blob.dtype is flow.uint8:
+        op_type_name = "crop_mirror_normalize_from_uint8"
+    else:
+        print(
+            "ERROR! oneflow.compatible.single_client.data.crop_mirror_normalize op",
+            " NOT support input data type : ",
+            input_blob.dtype,
+        )
+        raise NotImplementedError
+
+    op = flow.user_op_builder(name).Op(op_type_name).Input("in", [input_blob])
+    if mirror_blob is not None:
+        op = op.Input("mirror", [mirror_blob])
+    return (
+        op.Output("out")
+        .Attr("color_space", color_space)
+        .Attr("output_layout", output_layout)
+        .Attr("mean", mean)
+        .Attr("std", std)
+        .Attr("crop_h", crop_h)
+        .Attr("crop_w", crop_w)
+        .Attr("crop_pos_y", crop_pos_y)
+        .Attr("crop_pos_x", crop_pos_x)
+        .Attr("output_dtype", output_dtype)
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
+
+
+@oneflow_export("image.random_crop", "image_random_crop")
+def api_image_random_crop(
+    input_blob: oneflow._oneflow_internal.BlobDesc,
+    num_attempts: int = 10,
+    seed: Optional[int] = None,
+    random_area: Sequence[float] = None,
+    random_aspect_ratio: Sequence[float] = None,
+    name: str = "ImageRandomCrop",
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator crops the input image randomly.
+
+    Args:
+        input_blob (oneflow._oneflow_internal.BlobDesc): The input Blob.
+        num_attempts (int, optional): The maximum number of random cropping attempts. Defaults to 10.
+        seed (Optional[int], optional): The random seed. Defaults to None.
+        random_area (Sequence[float], optional): The random cropping area. Defaults to None.
+        random_aspect_ratio (Sequence[float], optional): The random scaled ratio. Defaults to None.
+        name (str, optional): The name for the operation. Defaults to "ImageRandomCrop".
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+        import numpy as np
+        import cv2
+
+
+        def _read_images_by_cv(image_files):
+            images = [cv2.imread(image_file).astype(np.single) for image_file in image_files]
+            return [np.expand_dims(image, axis=0) for image in images]
+
+
+        def _get_images_static_shape(images):
+            image_shapes = [image.shape for image in images]
+            image_static_shape = np.amax(image_shapes, axis=0)
+            assert isinstance(
+                image_static_shape, np.ndarray
+            ), "image_shapes: {}, image_static_shape: {}".format(
+                str(image_shapes), str(image_static_shape)
+            )
+            image_static_shape = image_static_shape.tolist()
+            assert image_static_shape[0] == 1, str(image_static_shape)
+            image_static_shape[0] = len(image_shapes)
+            return image_static_shape
+
+        def _of_image_random_crop(images, image_static_shape):
+            func_config = flow.FunctionConfig()
+            func_config.default_data_type(flow.float)
+            func_config.default_logical_view(flow.scope.mirrored_view())
+
+            @flow.global_function(function_config=func_config)
+            def image_random_crop_job(images_def: tp.ListListNumpy.Placeholder(shape=image_static_shape, dtype=flow.float)
+            ) -> tp.ListListNumpy:
+                # The input Blob type should be "kTensorBuffer"
+                # So we use oneflow.compatible.single_client.tensor_list_to_tensor_buffer to convert
+                images_buffer = flow.tensor_list_to_tensor_buffer(images_def)
+                # Do the random crop
+                random_crop_buffer = flow.image.random_crop(
+                    images_buffer,
+                    random_area=[0.15, 0.80],
+                    random_aspect_ratio=[0.75, 1.55],
+                )
+                # We convert back to "tensorlist" type
+                random_crop_images = flow.tensor_buffer_to_tensor_list(
+                    random_crop_buffer,
+                    shape=(image_static_shape[1], image_static_shape[2], image_static_shape[-1]),
+                    dtype=flow.float,
+                )
+                return random_crop_images
+
+            random_crop_images = image_random_crop_job([images])
+
+            return random_crop_images
+
+        if __name__ == "__main__":
+            img = _read_images_by_cv(['./img/1.jpg'])
+            img_shape = _get_images_static_shape(img) # In example is (1, 234, 346, 3)
+            random_crop_images = _of_image_random_crop(img, tuple(img_shape))
+            # random_crop_images.shape is (234, 346, 3)
+
+    """
+    assert isinstance(name, str)
+    if seed is not None:
+        assert name is not None
+    if random_area is None:
+        random_area = [0.08, 1.0]
+    if random_aspect_ratio is None:
+        random_aspect_ratio = [0.75, 1.333333]
+    module = flow.find_or_create_module(
+        name,
+        lambda: ImageRandomCropModule(
+            num_attempts=num_attempts,
+            random_seed=seed,
+            random_area=random_area,
+            random_aspect_ratio=random_aspect_ratio,
+            name=name,
+        ),
+    )
+    return module(input_blob)
+
+
+class ImageRandomCropModule(module_util.Module):
+    def __init__(
+        self,
+        num_attempts: int,
+        random_seed: Optional[int],
+        random_area: Sequence[float],
+        random_aspect_ratio: Sequence[float],
+        name: str,
+    ):
+        module_util.Module.__init__(self, name)
+        seed, has_seed = flow.random.gen_seed(random_seed)
+        self.op_module_builder = (
+            flow.user_op_module_builder("image_random_crop")
+            .InputSize("in", 1)
+            .Output("out")
+            .Attr("num_attempts", num_attempts)
+            .Attr("random_area", random_area)
+            .Attr("random_aspect_ratio", random_aspect_ratio)
+            .Attr("has_seed", has_seed)
+            .Attr("seed", seed)
+            .CheckAndComplete()
+        )
+        self.op_module_builder.user_op_module.InitOpKernel()
+
+    def forward(self, input: oneflow._oneflow_internal.BlobDesc):
+        if self.call_seq_no == 0:
+            name = self.module_name
+        else:
+            name = id_util.UniqueStr("ImageRandomCrop_")
+
+        return (
+            self.op_module_builder.OpName(name)
+            .Input("in", [input])
+            .Build()
+            .InferAndTryRun()
+            .SoleOutputBlob()
+        )
+
+
+@oneflow_export("random.CoinFlip", "random.coin_flip")
+def api_coin_flip(
+    batch_size: int = 1,
+    seed: Optional[int] = None,
+    probability: float = 0.5,
+    name: str = "CoinFlip",
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator performs the horizontal flip.
+
+    Args:
+        batch_size (int, optional): The batch size. Defaults to 1.
+        seed (Optional[int], optional): The random seed. Defaults to None.
+        probability (float, optional): The flip probability. Defaults to 0.5.
+        name (str, optional): The name for the operation. Defaults to "CoinFlip".
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: [description]
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+        from typing import Tuple
+
+
+        @flow.global_function(type="predict")
+        def coin_flip_job() -> Tuple[tp.Numpy, tp.Numpy]:
+            batch_size = 1
+            color_space = "RGB"
+            # our ofrecord file path is "./dataset/part-0"
+            ofrecord = flow.data.ofrecord_reader(
+                "./imgdataset",
+                batch_size=batch_size,
+                data_part_num=1,
+                part_name_suffix_length=-1,
+                part_name_prefix='part-',
+                shuffle_after_epoch=True,
+            )
+            image = flow.data.OFRecordImageDecoder(
+                    ofrecord, "encoded", color_space=color_space
+                )
+            res_image, scale, new_size = flow.image.Resize(
+                    image, target_size=(512, 512)
+                )
+            label = flow.data.OFRecordRawDecoder(
+                ofrecord, "class/label", shape=(1, ), dtype=flow.int32
+            )
+            coin_flip = flow.random.CoinFlip(
+                batch_size=batch_size,
+                probability=0.8
+            )
+            normal = flow.image.CropMirrorNormalize(
+                    res_image,
+                    mirror_blob=coin_flip,
+                    color_space=color_space,
+                    crop_h= 256,
+                    crop_w= 256,
+                    crop_pos_y=0.5,
+                    crop_pos_x=0.5,
+                    mean=[123.68, 116.779, 103.939],
+                    std=[58.393, 57.12, 57.375],
+                    output_dtype=flow.float,
+                )
+
+            return normal, label
+
+        if __name__ == "__main__":
+            images, labels = coin_flip_job()
+
+    """
+    assert isinstance(name, str)
+    if seed is not None:
+        assert name is not None
+    module = flow.find_or_create_module(
+        name,
+        lambda: CoinFlipModule(
+            batch_size=batch_size, probability=probability, random_seed=seed, name=name,
+        ),
+    )
+    return module()
+
+
+class CoinFlipModule(module_util.Module):
+    def __init__(
+        self,
+        batch_size: str,
+        probability: float,
+        random_seed: Optional[int],
+        name: str,
+    ):
+        module_util.Module.__init__(self, name)
+        seed, has_seed = flow.random.gen_seed(random_seed)
+        self.op_module_builder = (
+            flow.user_op_module_builder("coin_flip")
+            .Output("out")
+            .Attr("batch_size", batch_size)
+            .Attr("probability", probability)
+            .Attr("has_seed", has_seed)
+            .Attr("seed", seed)
+            .CheckAndComplete()
+        )
+        self.op_module_builder.user_op_module.InitOpKernel()
+
+    def forward(self):
+        if self.call_seq_no == 0:
+            name = self.module_name
+        else:
+            name = id_util.UniqueStr("CoinFlip_")
+
+        return (
+            self.op_module_builder.OpName(name)
+            .Build()
+            .InferAndTryRun()
+            .SoleOutputBlob()
+        )
+
+
+@oneflow_export("image.decode", "image_decode")
+def image_decode(
+    images_bytes_buffer: oneflow._oneflow_internal.BlobDesc,
+    dtype: flow.dtype = flow.uint8,
+    color_space: str = "BGR",
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator decode the image.
+
+    Args:
+        images_bytes_buffer (oneflow._oneflow_internal.BlobDesc): The input Blob. Its type should be `kTensorBuffer`. More details please refer to the code example.
+        dtype (flow.dtype, optional): The data type. Defaults to flow.uint8.
+        color_space (str, optional): The color space. Defaults to "BGR".
+        name (Optional[str], optional): The name for the opreation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The decoded image list.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+        import numpy as np
+        from PIL import Image
+
+
+        def _of_image_decode(images):
+            image_files = [open(im, "rb") for im in images]
+            images_bytes = [imf.read() for imf in image_files]
+            static_shape = (len(images_bytes), max([len(bys) for bys in images_bytes]))
+            for imf in image_files:
+                imf.close()
+
+            func_config = flow.FunctionConfig()
+            func_config.default_data_type(flow.float)
+            func_config.default_logical_view(flow.scope.mirrored_view())
+
+            @flow.global_function(function_config=func_config)
+            def image_decode_job(
+                images_def: tp.ListListNumpy.Placeholder(shape=static_shape, dtype=flow.int8)
+            )->tp.ListListNumpy:
+                # convert to tensor buffer
+                images_buffer = flow.tensor_list_to_tensor_buffer(images_def)
+                decoded_images_buffer = flow.image_decode(images_buffer)
+                # Remember to set a shape
+                # convert back to tensor list
+                return flow.tensor_buffer_to_tensor_list(
+                    decoded_images_buffer, shape=(640, 640, 3), dtype=flow.uint8
+                )
+
+            images_np_arr = [
+                np.frombuffer(bys, dtype=np.byte).reshape(1, -1) for bys in images_bytes
+            ]
+            decoded_images = image_decode_job([images_np_arr])
+            return decoded_images[0]
+
+
+        if __name__ == "__main__":
+            img = _of_image_decode(['./img/1.jpg'])
+            print(img[0].shape) # Our image shape is (1, 349, 367, 3)
+
+    """
+    # TODO: check color_space valiad
+    if name is None:
+        name = id_util.UniqueStr("ImageDecode_")
+
+    op = (
+        flow.user_op_builder(name)
+        .Op("image_decode")
+        .Input("in", [images_bytes_buffer])
+        .Output("out")
+        .Attr("color_space", color_space)
+        .Attr("data_type", dtype)
+        .Build()
+    )
+    return op.InferAndTryRun().SoleOutputBlob()
+
+
+@oneflow_export("image.batch_align", "image_batch_align")
+def image_batch_align(
+    images: oneflow._oneflow_internal.BlobDesc,
+    shape: Sequence[int],
+    dtype: flow.dtype,
+    alignment: int,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    r"""This operator aligns the shape for a batch of images.
+
+    The aligned shape is computed as:
+
+    .. math::
+
+        & shape_{width} = int(\frac{(shape_{width}+alignment-1)}{alignment})*alignment
+
+        & shape_{height} = int(\frac{(shape_{height}+alignment-1)}{alignment})*alignment
+
+    Args:
+        images (oneflow._oneflow_internal.BlobDesc): The images.
+        shape (Sequence[int]): The maximum static shape of input images.
+        dtype (flow.dtype): The data type.
+        alignment (int): The align factor.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob
+
+    For example:
+
+    .. code-block:: python
+
+        import cv2
+        import numpy as np
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+
+
+        def _of_image_batch_align(images, input_shape, output_shape, alignment):
+            func_config = flow.FunctionConfig()
+            func_config.default_data_type(flow.float)
+            func_config.default_logical_view(flow.scope.mirrored_view())
+
+            @flow.global_function(function_config=func_config)
+            def image_batch_align_job(
+                images_def: tp.ListListNumpy.Placeholder(shape=input_shape, dtype=flow.float)
+            ) -> tp.ListNumpy:
+                # Convert to tensor buffer
+                images_buffer = flow.tensor_list_to_tensor_buffer(images_def)
+                image = flow.image_batch_align(
+                    images_buffer, shape=output_shape[1:], dtype=flow.float, alignment=alignment
+                )
+                return image
+
+            image = image_batch_align_job([images])
+            return image[0]
+
+
+        def _read_images_by_cv(image_files):
+            images = [cv2.imread(image_file).astype(np.single) for image_file in image_files]
+            return [np.expand_dims(image, axis=0) for image in images]
+
+
+        def _get_images_static_shape(images):
+            image_shapes = [image.shape for image in images]
+            image_static_shape = np.amax(image_shapes, axis=0)
+            assert isinstance(
+                image_static_shape, np.ndarray
+            ), "image_shapes: {}, image_static_shape: {}".format(
+                str(image_shapes), str(image_static_shape)
+            )
+            image_static_shape = image_static_shape.tolist()
+            assert image_static_shape[0] == 1, str(image_static_shape)
+            image_static_shape[0] = len(image_shapes)
+            return image_static_shape
+
+        def _roundup(x, n):
+            # compute the aligned shape
+            return int((x + n - 1) / n) * n
+
+        if __name__ == "__main__":
+            img = _read_images_by_cv(['./img/1.jpg', './img/2.jpg', './img/3.jpg'])
+            img_shape = _get_images_static_shape(img) # In example is [3, 349, 367, 3]
+            alignment = 16 # alignment factor
+            aligned_image_shape = [
+                img_shape[0],
+                _roundup(img_shape[1], alignment),
+                _roundup(img_shape[2], alignment),
+                img_shape[3],
+            ]
+            image = _of_image_batch_align(img, tuple(img_shape), aligned_image_shape, alignment)
+
+    """
+    if name is None:
+        name = id_util.UniqueStr("ImageBatchAlign_")
+
+    op = (
+        flow.user_op_builder(name)
+        .Op("image_batch_align")
+        .Input("in", [images])
+        .Output("out")
+        .Attr("shape", shape)
+        .Attr("data_type", dtype)
+        .Attr("alignment", alignment)
+        .Build()
+    )
+    return op.InferAndTryRun().SoleOutputBlob()
+
+
+@oneflow_export("image.normalize", "image_normalize")
+def image_normalize(
+    image: oneflow._oneflow_internal.BlobDesc,
+    std: Sequence[float],
+    mean: Sequence[float],
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator normalizes the image.
+
+    Args:
+        image (oneflow._oneflow_internal.BlobDesc): The input image.
+        std (Sequence[float]): The standard deviation of the images.
+        mean (Sequence[float]): The mean value of the images.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob
+
+    For example:
+
+    .. code-block:: python
+
+        import cv2
+        import numpy as np
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+
+
+        def _of_image_normalize(images, image_shape, std, mean):
+            func_config = flow.FunctionConfig()
+            func_config.default_data_type(flow.float)
+            func_config.default_logical_view(flow.scope.mirrored_view())
+
+            @flow.global_function(function_config=func_config)
+            def image_normalize_job(
+                images_def: tp.ListListNumpy.Placeholder(shape=image_shape, dtype=flow.float)
+            ) -> tp.ListListNumpy:
+                # Convert to tensor buffer
+                images_buffer = flow.tensor_list_to_tensor_buffer(images_def)
+                # Normalize the imagess
+                norm_images = flow.image_normalize(images_buffer, std, mean)
+                # Convert back to tensor list
+                return flow.tensor_buffer_to_tensor_list(
+                    norm_images, shape=image_shape[1:], dtype=flow.float
+                )
+
+            image_tensor = image_normalize_job([images])
+            return image_tensor[0]
+
+
+        def _read_images_by_cv(image_files):
+            images = [cv2.imread(image_file).astype(np.single) for image_file in image_files]
+            return [np.expand_dims(image, axis=0) for image in images]
+
+
+        def _get_images_static_shape(images):
+            image_shapes = [image.shape for image in images]
+            image_static_shape = np.amax(image_shapes, axis=0)
+            assert isinstance(
+                image_static_shape, np.ndarray
+            ), "image_shapes: {}, image_static_shape: {}".format(
+                str(image_shapes), str(image_static_shape)
+            )
+            image_static_shape = image_static_shape.tolist()
+            assert image_static_shape[0] == 1, str(image_static_shape)
+            image_static_shape[0] = len(image_shapes)
+            return image_static_shape
+
+        if __name__ == "__main__":
+            img = _read_images_by_cv(['./img/1.jpg', './img/2.jpg', './img/3.jpg'])
+            img_shape = _get_images_static_shape(img) # In example is [3, 349, 367, 3]
+            image = _of_image_normalize(img,
+                                        tuple(img_shape),
+                                        std=(102.9801, 115.9465, 122.7717),
+                                        mean=(1.0, 1.0, 1.0))
+
+    """
+    if name is None:
+        name = id_util.UniqueStr("ImageNormalize_")
+
+    assert isinstance(std, (list, tuple))
+    assert isinstance(mean, (list, tuple))
+
+    op = (
+        flow.user_op_builder(name)
+        .Op("image_normalize")
+        .Input("in", [image])
+        .Output("out")
+        .Attr("std", std)
+        .Attr("mean", mean)
+        .Build()
+    )
+    return op.InferAndTryRun().SoleOutputBlob()
+
+
+@oneflow_export("image.flip", "image_flip")
+def image_flip(
+    image: oneflow._oneflow_internal.BlobDesc,
+    flip_code: Union[int, oneflow._oneflow_internal.BlobDesc],
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator flips the images.
+
+    The flip code corresponds to the different flip mode:
+
+    0 (0x00): Non Flip
+
+    1 (0x01): Horizontal Flip
+
+    16 (0x10): Vertical Flip
+
+    17 (0x11): Both Horizontal and Vertical Flip
+
+    Args:
+        image (oneflow._oneflow_internal.BlobDesc): The input images.
+        flip_code (Union[int, oneflow._oneflow_internal.BlobDesc]): The flip code.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob
+
+    For example:
+
+    .. code-block:: python
+
+        import cv2
+        import numpy as np
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+
+
+        def _of_image_flip(images, image_shape, flip_code):
+            func_config = flow.FunctionConfig()
+            func_config.default_data_type(flow.float)
+            func_config.default_logical_view(flow.scope.mirrored_view())
+
+            @flow.global_function(function_config=func_config)
+            def image_flip_job(
+                images_def: tp.ListListNumpy.Placeholder(shape=image_shape, dtype=flow.float)
+            ) -> tp.ListListNumpy:
+                images_buffer = flow.tensor_list_to_tensor_buffer(images_def)
+                flip_images = flow.image_flip(images_buffer, flip_code)
+                return flow.tensor_buffer_to_tensor_list(
+                    flip_images, shape=image_shape[1:], dtype=flow.float
+                )
+
+            image_tensor = image_flip_job([images])
+            return image_tensor[0]
+
+
+        def _read_images_by_cv(image_files):
+            images = [cv2.imread(image_file).astype(np.single) for image_file in image_files]
+            return [np.expand_dims(image, axis=0) for image in images]
+
+
+        def _get_images_static_shape(images):
+            image_shapes = [image.shape for image in images]
+            image_static_shape = np.amax(image_shapes, axis=0)
+            assert isinstance(
+                image_static_shape, np.ndarray
+            ), "image_shapes: {}, image_static_shape: {}".format(
+                str(image_shapes), str(image_static_shape)
+            )
+            image_static_shape = image_static_shape.tolist()
+            assert image_static_shape[0] == 1, str(image_static_shape)
+            image_static_shape[0] = len(image_shapes)
+            return image_static_shape
+
+        if __name__ == "__main__":
+            img = _read_images_by_cv(['./img/1.jpg', './img/2.jpg', './img/3.jpg'])
+            img_shape = _get_images_static_shape(img) # In example is [3, 349, 367, 3]
+            image = _of_image_flip(img,
+                           tuple(img_shape),
+                           flip_code=1)
+
+    """
+    assert isinstance(image, oneflow._oneflow_internal.BlobDesc)
+
+    if name is None:
+        name = id_util.UniqueStr("ImageFlip_")
+
+    if not isinstance(flip_code, oneflow._oneflow_internal.BlobDesc):
+        assert isinstance(flip_code, int)
+        flip_code = flow.constant(
+            flip_code,
+            shape=(image.shape[0],),
+            dtype=flow.int8,
+            name="{}_FlipCode_".format(name),
+        )
+    else:
+        assert image.shape[0] == flip_code.shape[0]
+
+    op = (
+        flow.user_op_builder(name)
+        .Op("image_flip")
+        .Input("in", [image])
+        .Input("flip_code", [flip_code])
+        .Output("out")
+        .Build()
+    )
+    return op.InferAndTryRun().SoleOutputBlob()
+
+
+@oneflow_export("detection.object_bbox_flip", "object_bbox_flip")
+def object_bbox_flip(
+    bbox: oneflow._oneflow_internal.BlobDesc,
+    image_size: oneflow._oneflow_internal.BlobDesc,
+    flip_code: Union[int, oneflow._oneflow_internal.BlobDesc],
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator flips the object bounding box.
+
+    The flip code corresponds to the different flip mode:
+
+    0 (0x00): Non Flip
+
+    1 (0x01): Horizontal Flip
+
+    16 (0x10): Vertical Flip
+
+    17 (0x11): Both Horizontal and Vertical Flip
+
+    Args:
+        bbox (oneflow._oneflow_internal.BlobDesc): The bounding box.
+        image_size (oneflow._oneflow_internal.BlobDesc): The size of input image.
+        flip_code (Union[int, oneflow._oneflow_internal.BlobDesc]): The flip code.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob
+
+    For example:
+
+    .. code-block:: python
+
+        import numpy as np
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+
+
+        def _of_object_bbox_flip(bbox_list, image_size, flip_code):
+            bbox_shape = _get_bbox_static_shape(bbox_list)
+            func_config = flow.FunctionConfig()
+            func_config.default_data_type(flow.float)
+            func_config.default_logical_view(flow.scope.mirrored_view())
+
+            @flow.global_function(function_config=func_config)
+            def object_bbox_flip_job(
+                bbox_def: tp.ListListNumpy.Placeholder(
+                    shape=tuple(bbox_shape), dtype=flow.float
+                ),
+                image_size_def: tp.ListNumpy.Placeholder(
+                    shape=image_size.shape, dtype=flow.int32
+                ),
+            ) -> tp.ListListNumpy:
+                bbox_buffer = flow.tensor_list_to_tensor_buffer(bbox_def)
+                flip_bbox = flow.object_bbox_flip(bbox_buffer, image_size_def, flip_code)
+                return flow.tensor_buffer_to_tensor_list(
+                    flip_bbox, shape=bbox_shape[1:], dtype=flow.float
+                )
+
+            input_bbox_list = [np.expand_dims(bbox, axis=0) for bbox in bbox_list]
+            bbox_tensor = object_bbox_flip_job([input_bbox_list], [image_size])
+            return bbox_tensor[0]
+
+
+        def _get_bbox_static_shape(bbox_list):
+            bbox_shapes = [bbox.shape for bbox in bbox_list]
+            bbox_static_shape = np.amax(bbox_shapes, axis=0)
+            assert isinstance(
+                bbox_static_shape, np.ndarray
+            ), "bbox_shapes: {}, bbox_static_shape: {}".format(
+                str(bbox_shapes), str(bbox_static_shape)
+            )
+            bbox_static_shape = bbox_static_shape.tolist()
+            bbox_static_shape.insert(0, len(bbox_list))
+            return bbox_static_shape
+
+        if __name__ == "__main__":
+            bbox = np.array([[[20.0, 40.0, 80.0, 160.0],
+                            [30.0, 50.0, 70.0, 100.0]]]).astype(np.single) # [x1, y1, x2, y2]
+            image_size = np.array([[480, 620]]).astype(np.int32)
+            bbox_flip =  _of_object_bbox_flip(bbox,
+                                            image_size,
+                                            flip_code=1) # Horizontal Flip
+            print(bbox_flip[0][0])
+
+            # [[399.  40. 459. 160.]
+            #  [409.  50. 449. 100.]]
+    """
+    assert isinstance(bbox, oneflow._oneflow_internal.BlobDesc)
+    assert isinstance(image_size, oneflow._oneflow_internal.BlobDesc)
+    assert bbox.shape[0] == image_size.shape[0]
+
+    if name is None:
+        name = id_util.UniqueStr("ObjectBboxFlip_")
+
+    if not isinstance(flip_code, oneflow._oneflow_internal.BlobDesc):
+        assert isinstance(flip_code, int)
+        flip_code = flow.constant(
+            flip_code,
+            shape=(bbox.shape[0],),
+            dtype=flow.int8,
+            name="{}_FlipCode".format(name),
+        )
+    else:
+        assert bbox.shape[0] == flip_code.shape[0]
+
+    op = (
+        flow.user_op_builder(name)
+        .Op("object_bbox_flip")
+        .Input("bbox", [bbox])
+        .Input("image_size", [image_size])
+        .Input("flip_code", [flip_code])
+        .Output("out")
+        .Build()
+    )
+    return op.InferAndTryRun().SoleOutputBlob()
+
+
+@oneflow_export("detection.object_bbox_scale", "object_bbox_scale")
+def object_bbox_scale(
+    bbox: oneflow._oneflow_internal.BlobDesc,
+    scale: oneflow._oneflow_internal.BlobDesc,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator scales the input image and the corresponding bounding box. It returns the scaled bounding box.
+
+    Args:
+        bbox (oneflow._oneflow_internal.BlobDesc): The bounding box.
+        scale (oneflow._oneflow_internal.BlobDesc): The scale factor.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob.
+
+    For example:
+
+    .. code-block:: python
+
+        import numpy as np
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+        import cv2
+        from typing import Tuple
+
+
+        def _read_images_by_cv(image_files):
+            images = [cv2.imread(image_file).astype(np.single) for image_file in image_files]
+            return images
+
+
+        def _get_images_static_shape(images):
+            image_shapes = [image.shape for image in images]
+            image_static_shape = np.amax(image_shapes, axis=0)
+            assert isinstance(
+                image_static_shape, np.ndarray
+            ), "image_shapes: {}, image_static_shape: {}".format(
+                str(image_shapes), str(image_static_shape)
+            )
+            image_static_shape = image_static_shape.tolist()
+            image_static_shape.insert(0, len(image_shapes))
+            return image_static_shape
+
+
+        def _get_bbox_static_shape(bbox_list):
+            bbox_shapes = [bbox.shape for bbox in bbox_list]
+            bbox_static_shape = np.amax(bbox_shapes, axis=0)
+            assert isinstance(
+                bbox_static_shape, np.ndarray
+            ), "bbox_shapes: {}, bbox_static_shape: {}".format(
+                str(bbox_shapes), str(bbox_static_shape)
+            )
+            bbox_static_shape = bbox_static_shape.tolist()
+            bbox_static_shape.insert(0, len(bbox_list))
+            return bbox_static_shape
+
+
+        def _of_target_resize_bbox_scale(images, bbox_list, target_size, max_size):
+            image_shape = _get_images_static_shape(images)
+            bbox_shape = _get_bbox_static_shape(bbox_list)
+
+            func_config = flow.FunctionConfig()
+            func_config.default_data_type(flow.float)
+            func_config.default_logical_view(flow.scope.mirrored_view())
+
+            @flow.global_function(function_config=func_config)
+            def target_resize_bbox_scale_job(
+                image_def: tp.ListListNumpy.Placeholder(
+                    shape=tuple(image_shape), dtype=flow.float
+                ),
+                bbox_def: tp.ListListNumpy.Placeholder(
+                    shape=tuple(bbox_shape), dtype=flow.float
+                ),
+            ) -> Tuple[tp.ListListNumpy, tp.ListNumpy]:
+                images_buffer = flow.tensor_list_to_tensor_buffer(image_def)
+                resized_images_buffer, new_size, scale = flow.image_target_resize(
+                    images_buffer, target_size=target_size, max_size=max_size
+                )
+                bbox_buffer = flow.tensor_list_to_tensor_buffer(bbox_def)
+                scaled_bbox = flow.object_bbox_scale(bbox_buffer, scale)
+                scaled_bbox_list = flow.tensor_buffer_to_tensor_list(
+                    scaled_bbox, shape=bbox_shape[1:], dtype=flow.float
+                )
+                return scaled_bbox_list, new_size
+
+            input_image_list = [np.expand_dims(image, axis=0) for image in images]
+            input_bbox_list = [np.expand_dims(bbox, axis=0) for bbox in bbox_list]
+            output_bbox_list, output_image_size = target_resize_bbox_scale_job(
+                [input_image_list], [input_bbox_list]
+            )
+            return output_bbox_list[0], output_image_size[0]
+
+
+        if __name__ == "__main__":
+            images = _read_images_by_cv(['./img/1.jpg', './img/2.jpg'])
+            bbox = np.array([[[20.0, 40.0, 80.0, 160.0],
+                            [30.0, 50.0, 70.0, 100.0]],
+                            [[26.0, 40.0, 86.0, 160.0],
+                            [36.0, 56.0, 76.0, 106.0]]]).astype(np.single) # [x1, y1, x2, y2]
+            bbox, size = _of_target_resize_bbox_scale(images, bbox, 280, 350)
+            print(bbox[0])
+            print(bbox[1])
+
+            # [[[ 16.0218    32.09169   64.0872   128.36676 ]
+            #   [ 24.032698  40.114613  56.076298  80.229225]]]
+
+            # [[[ 24.186047  37.170418  80.       148.68167 ]
+            #   [ 33.488373  52.038586  70.69768   98.5016  ]]]
+
+    """
+    assert isinstance(bbox, oneflow._oneflow_internal.BlobDesc)
+    assert isinstance(scale, oneflow._oneflow_internal.BlobDesc)
+    assert bbox.shape[0] == scale.shape[0]
+
+    if name is None:
+        name = id_util.UniqueStr("ObjectBboxScale_")
+
+    op = (
+        flow.user_op_builder(name)
+        .Op("object_bbox_scale")
+        .Input("bbox", [bbox])
+        .Input("scale", [scale])
+        .Output("out")
+        .Build()
+    )
+    return op.InferAndTryRun().SoleOutputBlob()
+
+
+@oneflow_export(
+    "detection.object_segmentation_polygon_flip", "object_segmentation_polygon_flip"
+)
+def object_segm_poly_flip(
+    poly: oneflow._oneflow_internal.BlobDesc,
+    image_size: oneflow._oneflow_internal.BlobDesc,
+    flip_code: Union[int, oneflow._oneflow_internal.BlobDesc],
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator flips the segmentation points in image.
+
+    The flip code corresponds to the different flip mode:
+
+    0 (0x00): Non Flip
+
+    1 (0x01): Horizontal Flip
+
+    16 (0x10): Vertical Flip
+
+    17 (0x11): Both Horizontal and Vertical Flip
+
+    Args:
+        poly (oneflow._oneflow_internal.BlobDesc): The poly segmentation points.
+        image_size (oneflow._oneflow_internal.BlobDesc): The image size.
+        flip_code (Union[int, oneflow._oneflow_internal.BlobDesc]): The filp code.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob
+
+    For example:
+
+    .. code-block:: python
+
+        import numpy as np
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+        import cv2
+
+
+        def _read_images_by_cv(image_files):
+            images = [cv2.imread(image_file).astype(np.single) for image_file in image_files]
+            return [np.expand_dims(image, axis=0) for image in images]
+
+
+        def _of_object_segm_poly_flip(poly_list, image_size, flip_code):
+            poly_shape = _get_segm_poly_static_shape(poly_list)
+
+            func_config = flow.FunctionConfig()
+            func_config.default_data_type(flow.float)
+            func_config.default_logical_view(flow.scope.mirrored_view())
+
+            @flow.global_function(function_config=func_config)
+            def object_segm_poly_flip_job(
+                poly_def: tp.ListListNumpy.Placeholder(
+                    shape=tuple(poly_shape), dtype=flow.float
+                ),
+                image_size_def: tp.ListNumpy.Placeholder(
+                    shape=image_size.shape, dtype=flow.int32
+                ),
+            ) -> tp.ListListNumpy:
+                poly_buffer = flow.tensor_list_to_tensor_buffer(poly_def)
+                flip_poly = flow.object_segmentation_polygon_flip(
+                    poly_buffer, image_size_def, flip_code
+                )
+                return flow.tensor_buffer_to_tensor_list(
+                    flip_poly, shape=poly_shape[1:], dtype=flow.float
+                )
+
+            input_poly_list = [np.expand_dims(poly, axis=0) for poly in poly_list]
+            poly_tensor = object_segm_poly_flip_job([input_poly_list], [image_size])
+            return poly_tensor[0]
+
+
+        def _get_segm_poly_static_shape(poly_list):
+            poly_shapes = [poly.shape for poly in poly_list]
+            poly_static_shape = np.amax(poly_shapes, axis=0)
+            assert isinstance(
+                poly_static_shape, np.ndarray
+            ), "poly_shapes: {}, poly_static_shape: {}".format(
+                str(poly_shapes), str(poly_static_shape)
+            )
+            poly_static_shape = poly_static_shape.tolist()
+            poly_static_shape.insert(0, len(poly_list))
+            return poly_static_shape
+
+        if __name__ == "__main__":
+            segm_poly_list = []
+            segmentations = [[[20.0, 40.0], [80.0, 160.0], [100.0, 210.0]], # Image 1 segmentation point
+                            [[25.0, 45.0], [85.0, 165.0], [105.0, 215.0]]] # Image 2 segmentation point
+            for segmentation in segmentations:
+                polygon = []
+                for seg in segmentation:
+                    polygon.extend(seg)
+                poly_array = np.array(polygon, dtype=np.single).reshape(-1, 2) # Reshape it
+                segm_poly_list.append(poly_array)
+
+            image_size = np.array([[480, 620], # Image 1 size
+                                [640, 640]]).astype(np.int32) # Image 2 size
+            of_segm_poly_list = _of_object_segm_poly_flip(
+                segm_poly_list, image_size, flip_code=1
+            ) # Horizontal Flip
+            print(of_segm_poly_list[0])
+            print(of_segm_poly_list[1])
+
+            # of_segm_poly_list[0]
+            # [[[460.  40.]
+            #   [400. 160.]
+            #   [380. 210.]]]
+
+            # of_segm_poly_list[1]
+            # [[[615.  45.]
+            #   [555. 165.]
+            #   [535. 215.]]]
+
+    """
+    assert isinstance(poly, oneflow._oneflow_internal.BlobDesc)
+    assert isinstance(image_size, oneflow._oneflow_internal.BlobDesc)
+    assert poly.shape[0] == image_size.shape[0]
+
+    if name is None:
+        name = id_util.UniqueStr("ObjectSegmPolyFilp_")
+
+    if not isinstance(flip_code, oneflow._oneflow_internal.BlobDesc):
+        assert isinstance(flip_code, int)
+        flip_code = flow.constant(
+            flip_code,
+            shape=(poly.shape[0],),
+            dtype=flow.int8,
+            name="{}_FlipCode".format(name),
+        )
+    else:
+        assert poly.shape[0] == flip_code.shape[0]
+
+    op = (
+        flow.user_op_builder(name)
+        .Op("object_segmentation_polygon_flip")
+        .Input("poly", [poly])
+        .Input("image_size", [image_size])
+        .Input("flip_code", [flip_code])
+        .Output("out")
+        .Build()
+    )
+    return op.InferAndTryRun().SoleOutputBlob()
+
+
+@oneflow_export(
+    "detection.object_segmentation_polygon_scale", "object_segmentation_polygon_scale"
+)
+def object_segm_poly_scale(
+    poly: oneflow._oneflow_internal.BlobDesc,
+    scale: oneflow._oneflow_internal.BlobDesc,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator scales the segmentation points in the images.
+
+    Args:
+        poly (oneflow._oneflow_internal.BlobDesc): The poly segmentation points.
+        scale (oneflow._oneflow_internal.BlobDesc): The image scale.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob.
+
+    For example:
+
+    .. code-block:: python
+
+        import numpy as np
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+        import cv2
+        from typing import Tuple
+
+
+        def _read_images_by_cv(image_files):
+            images = [cv2.imread(image_file).astype(np.single) for image_file in image_files]
+            return images
+
+
+        def _get_images_static_shape(images):
+            image_shapes = [image.shape for image in images]
+            image_static_shape = np.amax(image_shapes, axis=0)
+            assert isinstance(
+                image_static_shape, np.ndarray
+            ), "image_shapes: {}, image_static_shape: {}".format(
+                str(image_shapes), str(image_static_shape)
+            )
+            image_static_shape = image_static_shape.tolist()
+            image_static_shape.insert(0, len(image_shapes))
+            return image_static_shape
+
+
+        def _get_segm_poly_static_shape(poly_list):
+            poly_shapes = [poly.shape for poly in poly_list]
+            poly_static_shape = np.amax(poly_shapes, axis=0)
+            assert isinstance(
+                poly_static_shape, np.ndarray
+            ), "poly_shapes: {}, poly_static_shape: {}".format(
+                str(poly_shapes), str(poly_static_shape)
+            )
+            poly_static_shape = poly_static_shape.tolist()
+            poly_static_shape.insert(0, len(poly_list))
+            return poly_static_shape
+
+
+        def _get_bbox_static_shape(bbox_list):
+            bbox_shapes = [bbox.shape for bbox in bbox_list]
+            bbox_static_shape = np.amax(bbox_shapes, axis=0)
+            assert isinstance(
+                bbox_static_shape, np.ndarray
+            ), "bbox_shapes: {}, bbox_static_shape: {}".format(
+                str(bbox_shapes), str(bbox_static_shape)
+            )
+            bbox_static_shape = bbox_static_shape.tolist()
+            bbox_static_shape.insert(0, len(bbox_list))
+            return bbox_static_shape
+
+
+        def _of_object_segm_poly_scale(images, poly_list, target_size, max_size):
+            image_shape = _get_images_static_shape(images)
+            print(image_shape)
+            poly_shape = _get_segm_poly_static_shape(poly_list)
+            print("Poly shape is ", poly_shape)
+            func_config = flow.FunctionConfig()
+            func_config.default_data_type(flow.float)
+            func_config.default_logical_view(flow.scope.mirrored_view())
+
+            @flow.global_function(function_config=func_config)
+            def object_segm_poly_scale_job(
+                image_def: tp.ListListNumpy.Placeholder(
+                    shape=tuple(image_shape), dtype=flow.float
+                ),
+                poly_def: tp.ListListNumpy.Placeholder(
+                    shape=tuple(poly_shape), dtype=flow.float
+                ),
+            ) -> Tuple[tp.ListListNumpy, tp.ListNumpy]:
+                images_buffer = flow.tensor_list_to_tensor_buffer(image_def)
+                resized_images_buffer, new_size, scale = flow.image_target_resize(
+                    images_buffer, target_size=target_size, max_size=max_size
+                )
+                poly_buffer = flow.tensor_list_to_tensor_buffer(poly_def)
+                scaled_poly = flow.object_segmentation_polygon_scale(poly_buffer, scale)
+                scaled_poly_list = flow.tensor_buffer_to_tensor_list(
+                    scaled_poly, shape=poly_shape[1:], dtype=flow.float
+                )
+                return scaled_poly_list, new_size
+
+            input_image_list = [np.expand_dims(image, axis=0) for image in images]
+            input_poly_list = [np.expand_dims(poly, axis=0) for poly in poly_list]
+
+            output_poly_list, output_image_size = object_segm_poly_scale_job(
+                [input_image_list], [input_poly_list]
+            )
+
+            return output_poly_list[0], output_image_size
+
+        if __name__ == "__main__":
+            images = _read_images_by_cv(['./img/1.jpg', './img/2.jpg'])
+            segm_poly_list = []
+            segmentations = [[[20.0, 40.0], [80.0, 160.0], [100.0, 210.0]], # Image 1 segmentation point
+                            [[25.0, 45.0], [85.0, 165.0], [105.0, 215.0]]] # Image 2 segmentation point
+
+            for segmentation in segmentations:
+                polygon = []
+                for seg in segmentation:
+                    polygon.extend(seg)
+                poly_array = np.array(polygon, dtype=np.single).reshape(-1, 2) # Reshape it
+                segm_poly_list.append(poly_array)
+
+            bbox, size = _of_object_segm_poly_scale(images, segm_poly_list, 280, 350)
+
+    """
+    assert isinstance(poly, oneflow._oneflow_internal.BlobDesc)
+    assert isinstance(scale, oneflow._oneflow_internal.BlobDesc)
+    assert poly.shape[0] == scale.shape[0]
+
+    if name is None:
+        name = id_util.UniqueStr("ObjectSegmPolyFilp_")
+
+    op = (
+        flow.user_op_builder(name)
+        .Op("object_segmentation_polygon_scale")
+        .Input("poly", [poly])
+        .Input("scale", [scale])
+        .Output("out")
+        .Build()
+    )
+    return op.InferAndTryRun().SoleOutputBlob()
+
+
+@oneflow_export(
+    "detection.object_segmentation_polygon_to_mask",
+    "object_segmentation_polygon_to_mask",
+)
+def object_segm_poly_to_mask(
+    poly: oneflow._oneflow_internal.BlobDesc,
+    poly_index: oneflow._oneflow_internal.BlobDesc,
+    image_size: oneflow._oneflow_internal.BlobDesc,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator converts the poly segment points to the segment mask array.
+
+    Args:
+        poly (oneflow._oneflow_internal.BlobDesc): The poly segment points.
+        poly_index (oneflow._oneflow_internal.BlobDesc): The poly segment index.
+        image_size (oneflow._oneflow_internal.BlobDesc): The input image size.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob.
+
+    .. code-block:: python
+
+        import numpy as np
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+        import cv2
+        from typing import Tuple
+
+
+        def _read_images_by_cv(image_files):
+            images = [cv2.imread(image_file).astype(np.single) for image_file in image_files]
+            return images
+
+
+        def _get_images_static_shape(images):
+            image_shapes = [image.shape for image in images]
+            image_static_shape = np.amax(image_shapes, axis=0)
+            assert isinstance(
+                image_static_shape, np.ndarray
+            ), "image_shapes: {}, image_static_shape: {}".format(
+                str(image_shapes), str(image_static_shape)
+            )
+            image_static_shape = image_static_shape.tolist()
+            image_static_shape.insert(0, len(image_shapes))
+            return image_static_shape
+
+
+        def _get_segm_poly_static_shape(poly_list, poly_index_list):
+            assert len(poly_list) == len(poly_index_list)
+            num_images = len(poly_list)
+            max_poly_elems = 0
+            for poly, poly_index in zip(poly_list, poly_index_list):
+                assert len(poly.shape) == 2
+                assert len(poly_index.shape) == 2, str(poly_index.shape)
+                assert poly.shape[0] == poly_index.shape[0]
+                assert poly.shape[1] == 2
+                assert poly_index.shape[1] == 3
+                max_poly_elems = max(max_poly_elems, poly.shape[0])
+            return [num_images, max_poly_elems, 2], [num_images, max_poly_elems, 3]
+
+        def _segm_poly_to_tensor(img_segm_poly_list):
+            poly_array_list = []
+            poly_index_array_list = []
+            for img_idx, segm_poly_list in enumerate(img_segm_poly_list):
+                img_poly_elem_list = []
+                img_poly_index_list = []
+
+                for obj_idx, poly_list in enumerate(segm_poly_list):
+                    for poly_idx, poly in enumerate(poly_list):
+                        img_poly_elem_list.extend(poly)
+                        for pt_idx, pt in enumerate(poly):
+                            if pt_idx % 2 == 0:
+                                img_poly_index_list.append([pt_idx / 2, poly_idx, obj_idx])
+
+                img_poly_array = np.array(img_poly_elem_list, dtype=np.single).reshape(-1, 2)
+                assert img_poly_array.size > 0, segm_poly_list
+                poly_array_list.append(img_poly_array)
+
+                img_poly_index_array = np.array(img_poly_index_list, dtype=np.int32)
+                assert img_poly_index_array.size > 0, segm_poly_list
+                poly_index_array_list.append(img_poly_index_array)
+
+            return poly_array_list, poly_index_array_list
+
+
+        def _of_poly_to_mask_pipline(
+            images, poly_list, poly_index_list, num_segms_list, target_size, max_size
+        ):
+            print(len(images))
+            print(len(poly_list))
+
+            assert len(images) == len(poly_list)
+            assert len(poly_list) == len(poly_index_list)
+            image_shape = _get_images_static_shape(images)
+            poly_shape, poly_index_shape = _get_segm_poly_static_shape(
+                poly_list, poly_index_list
+            )
+            max_num_segms = max(num_segms_list)
+
+            func_config = flow.FunctionConfig()
+            func_config.default_logical_view(flow.scope.mirrored_view())
+            func_config.default_data_type(flow.float)
+
+
+            @flow.global_function(function_config=func_config)
+            def poly_to_mask_job(
+                image_def: tp.ListListNumpy.Placeholder(
+                    shape=tuple(image_shape), dtype=flow.float
+                ),
+                poly_def: tp.ListListNumpy.Placeholder(
+                    shape=tuple(poly_shape), dtype=flow.float
+                ),
+                poly_index_def: tp.ListListNumpy.Placeholder(
+                    shape=tuple(poly_index_shape), dtype=flow.int32
+                ),
+            ) -> Tuple[tp.ListListNumpy, tp.ListListNumpy]:
+                images_buffer = flow.tensor_list_to_tensor_buffer(image_def)
+                resized_images_buffer, new_size, scale = flow.image_target_resize(
+                    images_buffer, target_size=target_size, max_size=max_size
+                )
+                poly_buffer = flow.tensor_list_to_tensor_buffer(poly_def)
+                poly_index_buffer = flow.tensor_list_to_tensor_buffer(poly_index_def)
+                scaled_poly_buffer = flow.object_segmentation_polygon_scale(poly_buffer, scale)
+                mask_buffer = flow.object_segmentation_polygon_to_mask(
+                    scaled_poly_buffer, poly_index_buffer, new_size
+                )
+                mask_list = flow.tensor_buffer_to_tensor_list(
+                    mask_buffer, shape=(max_num_segms, target_size, max_size), dtype=flow.int8
+                )
+                scaled_poly_list = flow.tensor_buffer_to_tensor_list(
+                    scaled_poly_buffer, shape=poly_shape[1:], dtype=flow.float
+                )
+                return mask_list, scaled_poly_list
+
+            input_image_list = [np.expand_dims(image, axis=0) for image in images]
+            input_poly_list = [np.expand_dims(poly, axis=0) for poly in poly_list]
+            input_poly_index_list = [
+                np.expand_dims(poly_index, axis=0) for poly_index in poly_index_list
+            ]
+
+            output_mask_list, output_poly_list = poly_to_mask_job(
+                [input_image_list], [input_poly_list], [input_poly_index_list]
+            )
+
+            return output_mask_list[0], output_poly_list[0]
+
+        if __name__ == "__main__":
+            images = _read_images_by_cv(['./img/1.jpg', './img/2.jpg'])
+            segm_poly_list = []
+
+            segmentations = [[[20.0, 40.0, 80.0, 160.0, 100.0, 210.0, 120.0, 215.0]], # Image 1 segmentation point
+                            [[24.0, 42.0, 86.0, 168.0, 103.0, 223.0, 125.0, 235.0]]] # Image 2 segmentation point
+
+            for segmentation in segmentations:
+                polygon = []
+                for seg in segmentation:
+                    polygon.extend(seg)
+
+                poly_array = np.array(polygon, dtype=np.single).reshape(-1, 2) # Reshape it
+                segm_poly_list.append([poly_array])
+
+            poly_list, poly_index_list = _segm_poly_to_tensor(segm_poly_list)
+            num_segms_list = [len(segm_poly_list) for segm_poly_list in segm_poly_list]
+            target_size = 280
+            max_size = 350
+            of_mask_list, of_scaled_poly_list = _of_poly_to_mask_pipline(
+                images, poly_list, poly_index_list, num_segms_list, target_size, max_size
+            )
+            of_mask_list = [
+                mask_array.reshape(-1, mask_array.shape[-2], mask_array.shape[-1])
+                for mask_array in of_mask_list
+            ] # reshape it
+
+    """
+    assert isinstance(poly, oneflow._oneflow_internal.BlobDesc)
+    assert isinstance(poly_index, oneflow._oneflow_internal.BlobDesc)
+    assert isinstance(image_size, oneflow._oneflow_internal.BlobDesc)
+    assert poly.shape[0] == poly_index.shape[0]
+    assert poly.shape[0] == image_size.shape[0]
+
+    if name is None:
+        name = id_util.UniqueStr("ObjectSegmPolyToMask_")
+
+    op = (
+        flow.user_op_builder(name)
+        .Op("object_segmentation_polygon_to_mask")
+        .Input("poly", [poly])
+        .Input("poly_index", [poly_index])
+        .Input("image_size", [image_size])
+        .Output("out")
+        .Build()
+    )
+    return op.InferAndTryRun().SoleOutputBlob()
+
+
+@oneflow_export("data.coco_reader")
+def api_coco_reader(
+    annotation_file: str,
+    image_dir: str,
+    batch_size: int,
+    shuffle: bool = True,
+    random_seed: Optional[int] = None,
+    group_by_aspect_ratio: bool = True,
+    stride_partition: bool = True,
+    remove_images_without_annotations: bool = True,
+    name: str = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    assert name is not None
+    module = flow.find_or_create_module(
+        name,
+        lambda: COCOReader(
+            annotation_file=annotation_file,
+            image_dir=image_dir,
+            batch_size=batch_size,
+            shuffle=shuffle,
+            random_seed=random_seed,
+            group_by_aspect_ratio=group_by_aspect_ratio,
+            remove_images_without_annotations=remove_images_without_annotations,
+            stride_partition=stride_partition,
+            name=name,
+        ),
+    )
+    return module()
+
+
+class COCOReader(module_util.Module):
+    def __init__(
+        self,
+        annotation_file: str,
+        image_dir: str,
+        batch_size: int,
+        shuffle: bool = True,
+        random_seed: Optional[int] = None,
+        group_by_aspect_ratio: bool = True,
+        remove_images_without_annotations: bool = True,
+        stride_partition: bool = True,
+        name: str = None,
+    ):
+        assert name is not None
+        if random_seed is None:
+            random_seed = random.randrange(sys.maxsize)
+        module_util.Module.__init__(self, name)
+        self.op_module_builder = (
+            flow.consistent_user_op_module_builder("COCOReader")
+            .Output("image")
+            .Output("image_id")
+            .Output("image_size")
+            .Output("gt_bbox")
+            .Output("gt_label")
+            .Output("gt_segm")
+            .Output("gt_segm_index")
+            .Attr("session_id", flow.current_scope().session_id)
+            .Attr("annotation_file", annotation_file)
+            .Attr("image_dir", image_dir)
+            .Attr("batch_size", batch_size)
+            .Attr("shuffle_after_epoch", shuffle)
+            .Attr("random_seed", random_seed)
+            .Attr("group_by_ratio", group_by_aspect_ratio)
+            .Attr(
+                "remove_images_without_annotations", remove_images_without_annotations
+            )
+            .Attr("stride_partition", stride_partition)
+            .CheckAndComplete()
+        )
+        self.op_module_builder.user_op_module.InitOpKernel()
+
+    def forward(self):
+        if self.call_seq_no == 0:
+            name = self.module_name
+        else:
+            name = id_util.UniqueStr("COCOReader")
+        return (
+            self.op_module_builder.OpName(name)
+            .Build()
+            .InferAndTryRun()
+            .RemoteBlobList()
+        )
+
+
+@oneflow_export("data.ofrecord_image_classification_reader")
+def ofrecord_image_classification_reader(
+    ofrecord_dir: str,
+    image_feature_name: str,
+    label_feature_name: str,
+    batch_size: int = 1,
+    data_part_num: int = 1,
+    part_name_prefix: str = "part-",
+    part_name_suffix_length: int = -1,
+    random_shuffle: bool = False,
+    shuffle_buffer_size: int = 1024,
+    shuffle_after_epoch: bool = False,
+    color_space: str = "BGR",
+    decode_buffer_size_per_thread: int = 32,
+    num_decode_threads_per_machine: Optional[int] = None,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    """This operator creates a reader for image classification tasks.
+
+    Args:
+        ofrecord_dir (str): The directory of ofrecord file.
+        image_feature_name (str): The name of the image feature.
+        label_feature_name (str): The name of the label feature.
+        batch_size (int, optional): The batch_size. Defaults to 1.
+        data_part_num (int, optional): The amounts of data part. Defaults to 1.
+        part_name_prefix (str, optional): The prefix of data part name. Defaults to "part-".
+        part_name_suffix_length (int, optional): The suffix name of data part name. Defaults to -1.
+        random_shuffle (bool, optional): Whether to random shuffle the data. Defaults to False.
+        shuffle_buffer_size (int, optional): The buffer size for shuffle data. Defaults to 1024.
+        shuffle_after_epoch (bool, optional): Whether to shuffle the data after each epoch. Defaults to False.
+        color_space (str, optional): The color space. Defaults to "BGR".
+        decode_buffer_size_per_thread (int, optional): The decode buffer size for per thread. Defaults to 32.
+        num_decode_threads_per_machine (Optional[int], optional): The amounts of decode threads for each machine. Defaults to None.
+        name (Optional[str], optional): The name for the operation. Defaults to None.
+
+    Returns:
+        oneflow._oneflow_internal.BlobDesc: The result Blob.
+
+    For example:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+        from typing import Tuple
+
+
+        @flow.global_function(type="predict")
+        def image_classifier_job() -> Tuple[tp.Numpy, tp.Numpy]:
+            image, label = flow.data.ofrecord_image_classification_reader(
+                ofrecord_dir="./imgdataset",
+                image_feature_name="encoded",
+                label_feature_name="class/label",
+                batch_size=8,
+                data_part_num=1,
+                part_name_prefix="part-",
+                part_name_suffix_length=-1,
+                random_shuffle=False,
+                shuffle_after_epoch=False,
+                color_space="RGB",
+                decode_buffer_size_per_thread=16,
+            )
+            res_image, scale, new_size = flow.image.Resize(
+                    image, target_size=(224, 224)
+                )
+            return res_image, label
+
+
+        if __name__ == "__main__":
+            images, labels = image_classifier_job()
+            # images.shape (8, 224, 224, 3)
+
+    """
+    if name is None:
+        name = id_util.UniqueStr("OFRecordImageClassificationReader_")
+    (image, label) = (
+        flow.user_op_builder(name)
+        .Op("ofrecord_image_classification_reader")
+        .Output("image")
+        .Output("label")
+        .Attr("data_dir", ofrecord_dir)
+        .Attr("data_part_num", data_part_num)
+        .Attr("batch_size", batch_size)
+        .Attr("part_name_prefix", part_name_prefix)
+        .Attr("random_shuffle", random_shuffle)
+        .Attr("shuffle_buffer_size", shuffle_buffer_size)
+        .Attr("shuffle_after_epoch", shuffle_after_epoch)
+        .Attr("part_name_suffix_length", part_name_suffix_length)
+        .Attr("color_space", color_space)
+        .Attr("image_feature_name", image_feature_name)
+        .Attr("label_feature_name", label_feature_name)
+        .Attr("decode_buffer_size_per_thread", decode_buffer_size_per_thread)
+        .Attr("num_decode_threads_per_machine", num_decode_threads_per_machine or 0)
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()
+    )
+    label = flow.tensor_buffer_to_tensor(label, dtype=flow.int32, instance_shape=[1])
+    label = flow.squeeze(label, axis=[-1])
+    return image, label
+
+
+@oneflow_export("data.OneRecDecoder", "data.onerec_decoder")
+def OneRecDecoder(
+    input_blob,
+    key,
+    dtype,
+    shape,
+    is_dynamic=False,
+    reshape=None,
+    batch_padding=None,
+    name=None,
+):
+    if name is None:
+        name = id_util.UniqueStr("OneRecDecoder_")
+    if reshape is not None:
+        has_reshape = True
+    else:
+        has_reshape = False
+        reshape = shape
+    if batch_padding is not None:
+        has_batch_padding = True
+    else:
+        has_batch_padding = False
+        batch_padding = shape
+    return (
+        flow.user_op_builder(name)
+        .Op("onerec_decoder")
+        .Input("in", [input_blob])
+        .Output("out")
+        .Attr("key", key)
+        .Attr("data_type", dtype)
+        .Attr("static_shape", shape)
+        .Attr("is_dynamic", is_dynamic)
+        .Attr("has_reshape", has_reshape)
+        .Attr("reshape", reshape)
+        .Attr("has_batch_padding", has_batch_padding)
+        .Attr("batch_padding", batch_padding)
+        .Build()
+        .InferAndTryRun()
+        .RemoteBlobList()[0]
+    )
+
+
+@oneflow_export("data.megatron_gpt_mmap_data_loader", "data.MegatronGPTMMapDataLoader")
+def gpt_data_loader(
+    data_file_prefix: str,
+    seq_length: int,
+    num_samples: int,
+    batch_size: int,
+    dtype: flow.dtype = flow.int64,
+    shuffle: bool = True,
+    random_seed: Optional[int] = None,
+    split_sizes: Optional[Sequence[str]] = None,
+    split_index: Optional[int] = None,
+    parallel_distribution: Optional[Sequence[str]] = None,
+    start_from_saved_progress: bool = False,
+    name: Optional[str] = None,
+) -> oneflow._oneflow_internal.BlobDesc:
+    if name is None:
+        name = (
+            "gpt_data_loader"
+            if start_from_saved_progress
+            else id_util.UniqueStr("gpt_data_loader_")
+        )
+
+    # consider being exported as parameters
+    label_length = 1
+
+    if parallel_distribution is None:
+        parallel_distribution = []
+
+    if split_index is None:
+        split_index = 0
+
+    if split_sizes is None:
+        split_sizes = (1,)
+
+    if split_index >= len(split_sizes):
+        raise ValueError(
+            "split index {} is out of range, split_sizes {}".formart(
+                split_index, split_sizes
+            )
+        )
+
+    if random_seed is None:
+        from datetime import datetime
+
+        random_seed = int(datetime.utcnow().timestamp())
+
+    def distribute_to_str(dist):
+        if dist is None:
+            return ""
+        elif type(dist) is str:
+            return dist
+        elif type(dist) is oneflow._oneflow_internal.distribute.SplitDistribute:
+            return "S({})".format(dist.axis)
+        elif type(dist) is oneflow._oneflow_internal.distribute.BroadcastDistribute:
+            return "B"
+        else:
+            raise ValueError("unsupported distribute")
+
+    parallel_distribution = list(map(distribute_to_str, parallel_distribution))
+
+    if start_from_saved_progress:
+        iteration_name = "{}-iteration-sq{}-sa{}-bs{}-sd{}-sp{}-spi{}-{}".format(
+            name,
+            seq_length,
+            num_samples,
+            batch_size,
+            random_seed,
+            "_".join([str(s) for s in split_sizes]),
+            split_index,
+            "_".join(
+                [
+                    "S{}".format(p[2:-1]) if p.startswith("S") else p
+                    for p in parallel_distribution
+                ]
+            ),
+        )
+        iteration = flow.get_variable(
+            name=iteration_name,
+            shape=(1,),
+            dtype=flow.int64,
+            initializer=flow.constant_initializer(0, flow.int64),
+            model_name="iteration",
+            reuse=False,
+        )
+
+    op_builder = flow.user_op_builder(name).Op("megatron_gpt_mmap_data_loader")
+    if start_from_saved_progress:
+        op_builder.Input("iteration", [iteration])
+
+    op = (
+        op_builder.Output("out")
+        .Attr("data_file_prefix", data_file_prefix)
+        .Attr("seq_length", seq_length)
+        .Attr("label_length", label_length)
+        .Attr("num_samples", num_samples)
+        .Attr("batch_size", batch_size)
+        .Attr("dtype", dtype)
+        .Attr("shuffle", shuffle)
+        .Attr("random_seed", random_seed)
+        .Attr("split_sizes", split_sizes)
+        .Attr("split_index", split_index)
+        .Attr("parallel_distribution", parallel_distribution)
+        .Build()
+    )
+
+    return op.InferAndTryRun().SoleOutputBlob()
diff --git a/oneflow/compatible_single_client_python/ops/user_op_builder.py b/oneflow/compatible_single_client_python/ops/user_op_builder.py
new file mode 100644
index 0000000000000000000000000000000000000000..94c38b6edca5a8d7e5cd8a4181287bfa8945735e
--- /dev/null
+++ b/oneflow/compatible_single_client_python/ops/user_op_builder.py
@@ -0,0 +1,582 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from oneflow.compatible_single_client_python.framework import (
+    interpret_util as interpret_util,
+)
+from oneflow.compatible_single_client_python.framework import (
+    remote_blob as remote_blob_util,
+)
+from oneflow.compatible_single_client_python.framework import c_api_util as c_api_util
+from oneflow.compatible_single_client_python.framework import (
+    compile_context as compile_context,
+)
+from oneflow.compatible_single_client_python.framework import distribute as distribute
+from oneflow.compatible_single_client_python.framework import hob as hob
+from oneflow.compatible_single_client_python.framework import (
+    remote_blob as remote_blob_util,
+)
+from oneflow.compatible_single_client_python.lib.core import enable_if as enable_if
+from oneflow.core.operator import op_conf_pb2 as op_conf_util
+from oneflow.core.framework import user_op_attr_pb2 as attr_value_pb
+from oneflow._oneflow_internal.oneflow.core.framework import (
+    user_op_attr as user_op_attr_cfg,
+)
+from oneflow.core.register import logical_blob_id_pb2 as logical_blob_id_util
+from oneflow._oneflow_internal.oneflow.core.common import shape as shape_cfg
+from oneflow._oneflow_internal.oneflow.core.common import data_type as data_type_cfg
+from oneflow.compatible import single_client as flow
+from oneflow.compatible_single_client_python.oneflow_export import oneflow_export
+from oneflow.compatible_single_client_python.framework import hob as hob
+from oneflow.compatible_single_client_python.experimental import (
+    name_scope as name_scope,
+)
+from oneflow.core.eager import eager_symbol_pb2 as eager_symbol_util
+from oneflow.compatible_single_client_python.eager import (
+    eager_blob_util as eager_blob_util,
+)
+from oneflow.compatible_single_client_python.lib.core import enable_if as enable_if
+import random
+from oneflow.compatible_single_client_python.eager import gradient_util as gradient_util
+from oneflow.compatible import single_client as flow
+import oneflow._oneflow_internal
+import traceback
+
+from google.protobuf import text_format
+
+blob_register = oneflow._oneflow_internal.GetDefaultBlobRegister()
+
+
+class UserOp(object):
+    def __init__(self, op_name, op_type_name=None):
+        self.op_conf_ = op_conf_util.OperatorConf()
+        self.op_conf_.name = op_name
+        if op_type_name is not None:
+            self.op_conf_.user_conf.op_type_name = op_type_name
+        device_tag = flow.current_scope().device_parallel_desc_symbol.device_tag
+        self.op_conf_.device_tag = device_tag
+        self.output_arg_key_list_ = []
+
+    @property
+    def op_conf(self):
+        return self.op_conf_
+
+    def InferAndTryRun(self):
+        raise NotImplementedError
+
+    def MakeRemoteBlob(self, lbi):
+        raise NotImplementedError
+
+    def RemoteBlobList(self):
+        remote_blob_list = []
+        for k in self.op_conf_.user_conf.output:
+            if k not in self.output_arg_key_list_:
+                raise ValueError(
+                    "output_arg_name {} of {} op is not set in python op builder".format(
+                        k, self.op_conf_.name
+                    )
+                )
+
+        for output_arg_name in self.output_arg_key_list_:
+            assert output_arg_name in self.op_conf_.user_conf.output
+            for i in range(len(self.op_conf_.user_conf.output[output_arg_name].s)):
+                lbi = logical_blob_id_util.LogicalBlobId()
+                lbi.op_name = self.op_conf_.name
+                lbi.blob_name = "{}_{}".format(output_arg_name, i)
+                remote_blob_obj = self.MakeRemoteBlob(lbi)
+                remote_blob_list.append(remote_blob_obj)
+                if flow.eager_execution_enabled():
+                    gradient_util.GetDefaultBackwardBlobRegister().TrySetObject4BlobName(
+                        remote_blob_obj.logical_blob_name, remote_blob_obj.blob_object
+                    )
+
+        return tuple(remote_blob_list)
+
+    def RemoteBlobDict(self):
+        remote_blob_dict = {}
+        for k in self.op_conf_.user_conf.output:
+            if k not in self.output_arg_key_list_:
+                raise ValueError(
+                    "output_arg_name {} of {} op is not set in python op builder".format(
+                        k, self.op_conf_.name
+                    )
+                )
+
+        for output_arg_name in self.output_arg_key_list_:
+            assert output_arg_name in self.op_conf_.user_conf.output
+            if output_arg_name not in remote_blob_dict:
+                remote_blob_dict[output_arg_name] = []
+            for i in range(len(self.op_conf_.user_conf.output[output_arg_name].s)):
+                lbi = logical_blob_id_util.LogicalBlobId()
+                lbi.op_name = self.op_conf_.name
+                lbi.blob_name = "{}_{}".format(output_arg_name, i)
+                remote_blob_dict[output_arg_name].append(self.MakeRemoteBlob(lbi))
+
+        return remote_blob_dict
+
+    def SoleOutputBlob(self):
+        blobs = self.RemoteBlobList()
+        assert len(blobs) == 1
+        return blobs[0]
+
+
+class UserOpModule(object):
+    @property
+    def opkernel_object(self):
+        return self.opkernel_object_
+
+    def set_opkernel_object(self, opkernel_object):
+        assert not hasattr(self, "opkernel_object_")
+        self.opkernel_object_ = opkernel_object
+
+    def InitOpKernel(self):
+        raise NotImplementedError
+
+
+@oneflow_export("user_op_builder")
+def api_user_op_builder(op_name):
+    r"""Build a wrapper of user op.
+
+    For instance::
+        def myargmax(
+            input: oneflow._oneflow_internal.BlobDesc) -> oneflow._oneflow_internal.BlobDesc:
+            return (
+            flow.user_op_builder("myargmax")
+            .Op("argmax")
+            .Input("in", [input])
+            .Output("out")
+            .Build()
+            .InferAndTryRun()
+            .RemoteBlobList()[0]
+            )
+
+    Args:
+        op_name (str): name of new user op
+
+    Returns:
+        UserOpConfBuilder: `UserOpConfBuilder` object used to build a wrapper of user op.
+    """
+    api = enable_if.unique([lazy_user_op_builder, eager_user_op_builder])
+    return api(op_name)
+
+
+@enable_if.condition(hob.in_global_mode & ~hob.eager_execution_enabled)
+def lazy_user_op_builder(op_name):
+    job_name = oneflow._oneflow_internal.JobBuildAndInferCtx_GetCurrentJobName()
+    op_name = name_scope.GetJobNameScopePrefix(job_name) + op_name
+    return UserOpConfBuilder(LazyUserOp, op_name, None)
+
+
+class LazyUserOp(UserOp):
+    def __init__(self, op_name, op_type_name):
+        UserOp.__init__(self, op_name, op_type_name)
+
+    def InferAndTryRun(self):
+        compile_context.CurJobAddOp(self.op_conf_)
+        return self
+
+    def MakeRemoteBlob(self, lbi):
+        return remote_blob_util.RemoteBlob(lbi)
+
+
+@enable_if.condition(hob.in_global_mode & hob.eager_execution_enabled)
+def eager_user_op_builder(op_name):
+    job_name = oneflow._oneflow_internal.JobBuildAndInferCtx_GetCurrentJobName()
+    op_name = name_scope.GetJobNameScopePrefix(job_name) + op_name
+    return UserOpConfBuilder(EagerUserOp, op_name, None)
+
+
+class EagerUserOp(UserOp):
+    def __init__(self, op_name, op_type_name):
+        UserOp.__init__(self, op_name, op_type_name)
+
+    def InferAndTryRun(self):
+        interpret_util.Forward(self.op_conf_)
+        return self
+
+    def MakeRemoteBlob(self, lbi):
+        return remote_blob_util.EagerLogicalBlob(lbi)
+
+
+@oneflow_export("consistent_user_op_builder")
+def api_consistent_user_op_builder(op_name):
+    job_name = oneflow._oneflow_internal.JobBuildAndInferCtx_GetCurrentJobName()
+    op_name = name_scope.GetJobNameScopePrefix(job_name) + op_name
+    return UserOpConfBuilder(ConsistentUserOp, op_name, None)
+
+
+class ConsistentUserOp(UserOp):
+    def __init__(self, op_name, op_type_name):
+        UserOp.__init__(self, op_name, op_type_name)
+
+    def InferAndTryRun(self):
+        interpret_util.ConsistentForward(self.op_conf_)
+        return self
+
+    def MakeRemoteBlob(self, lbi):
+        return remote_blob_util.RemoteBlob(lbi)
+
+
+class UserOpConfBuilder(object):
+    def __init__(self, user_op_or_module_class, op_name, op_type_name):
+        self.user_op_ = user_op_or_module_class(op_name, op_type_name)
+
+    def CheckAndComplete(self):
+        assert self.user_op_.op_conf_.user_conf.op_type_name != ""
+        self.user_op_.op_conf_ = c_api_util.CheckAndCompleteUserOpConf(
+            self.user_op_.op_conf_
+        )
+        return self
+
+    def Build(self):
+        r"""Build op when in/output and other attribute set up.
+
+        Returns:
+            self
+
+        """
+        return self.CheckAndComplete().user_op_
+
+    def OpName(self, op_name):
+        job_name = oneflow._oneflow_internal.JobBuildAndInferCtx_GetCurrentJobName()
+        op_name = name_scope.GetJobNameScopePrefix(job_name) + op_name
+
+        self.user_op_.op_conf_.name = op_name
+        user_conf = self.user_op_.op_conf_.user_conf
+
+        def GetLbn(output_name, i):
+            return "{}/{}_{}".format(op_name, output_name, i)
+
+        for output_name, output in user_conf.output.items():
+            output.s[:] = [GetLbn(output_name, i) for i in range(len(output.s))]
+        return self
+
+    def Op(self, op_type_name):
+        r"""set typename of op
+
+        Args:
+            op_type_name (string): op type name
+
+        Returns:
+            self
+        """
+        self.user_op_.op_conf_.user_conf.op_type_name = op_type_name
+        return self
+
+    def Input(self, input_name, input_blob_list):
+        r"""Set input blob of op
+
+        Args:
+            input_name (str): input name of blob
+            input_blob_list : list of blobs
+
+        Returns:
+            self
+        """
+        assert isinstance(input_blob_list, (tuple, list))
+        input_conf = self.user_op_.op_conf_.user_conf.input
+        input_conf[input_name].ClearField("s")
+        for input_blob in input_blob_list:
+            # assert type(input_blob) is blob_desc.BlobDesc
+            input_conf[input_name].s.append(input_blob.unique_name)
+        return self
+
+    def InputSize(self, input_name, input_blob_size):
+        input_conf = self.user_op_.op_conf_.user_conf.input
+        assert input_blob_size >= 0
+        assert input_name not in input_conf
+        for i in range(input_blob_size):
+            unique_name = "%s/%s_%s" % (self.user_op_.op_conf_.name, input_name, i)
+            input_conf[input_name].s.append(unique_name)
+        return self
+
+    def Output(self, output_name, num=1):
+        r"""Set output blob of op
+
+        Args:
+            output_name (str): name of output blob
+            num (int, optional):  Defaults to 1.
+
+        Returns:
+            self
+        """
+        assert isinstance(num, int) and num >= 1
+        out_lbns = []
+        for i in range(num):
+            lbn = "{}/{}_{}".format(self.user_op_.op_conf_.name, output_name, i)
+            out_lbns.append(lbn)
+        self.user_op_.op_conf_.user_conf.output[output_name].s[:] = out_lbns
+        self.user_op_.output_arg_key_list_.append(output_name)
+        return self
+
+    def Attr(self, attr_name, attr_value, attr_type_name=None):
+        r"""Set value of op's attribute.
+
+        Args:
+            attr_name (str): attribute name of op
+            attr_value (Any): attribute value of op
+
+        Raises:
+            ValueError: raised when value is not idential to op's attribute type.
+
+        Returns:
+            [type]: [description]
+        """
+        if attr_type_name != None:
+            print(
+                """WARNING: Argument 'attr_type_name' of UserOpConfBuilder.Attr has been deprecated. Please remove it.
+
+            For instance:
+                -     .Attr("out_num", out_num, "AttrTypeInt64")
+                +     .Attr("out_num", out_num)
+                        """
+            )
+            print(traceback.format_stack()[-2])
+
+        attribute = user_op_attr_cfg.AttrValue()
+        assert isinstance(attr_name, str)
+        attr_type = oneflow._oneflow_internal.GetUserOpAttrType(
+            self.user_op_.op_conf_.user_conf.op_type_name, attr_name
+        )
+        if attr_type == user_op_attr_cfg.kAtInt32:
+            assert isinstance(attr_value, int)
+            attribute.set_at_int32(attr_value)
+        elif attr_type == user_op_attr_cfg.kAtInt64:
+            assert isinstance(attr_value, int)
+            attribute.set_at_int64(attr_value)
+        elif attr_type == user_op_attr_cfg.kAtBool:
+            assert isinstance(attr_value, bool)
+            attribute.set_at_bool(attr_value)
+        elif attr_type == user_op_attr_cfg.kAtFloat:
+            assert isinstance(attr_value, (float, int))
+            attribute.set_at_float(attr_value)
+        elif attr_type == user_op_attr_cfg.kAtDouble:
+            assert isinstance(attr_value, (float, int))
+            attribute.set_at_double(attr_value)
+        elif attr_type == user_op_attr_cfg.kAtString:
+            assert isinstance(attr_value, str)
+            attribute.set_at_string(attr_value)
+        elif attr_type == user_op_attr_cfg.kAtShape:
+            assert isinstance(attr_value, (tuple, list))
+            attribute_mutable_at_shape = attribute.mutable_at_shape()
+            for x in attr_value:
+                assert isinstance(x, int)
+                attribute_mutable_at_shape.add_dim(x)
+        elif attr_type == user_op_attr_cfg.kAtDataType:
+            assert attr_value in flow.dtypes()
+            attr_value = oneflow._oneflow_internal.deprecated.GetProtoDtype4OfDtype(
+                attr_value
+            )
+            assert isinstance(attr_value, int)
+            attribute.set_at_data_type(data_type_cfg.DataType(attr_value))
+        elif attr_type == user_op_attr_cfg.kAtListInt32:
+            assert isinstance(attr_value, (tuple, list))
+            attribute_mutable_at_list_int32 = attribute.mutable_at_list_int32()
+            for x in attr_value:
+                assert isinstance(x, int)
+                attribute_mutable_at_list_int32.add_val(x)
+        elif attr_type == user_op_attr_cfg.kAtListInt64:
+            assert isinstance(attr_value, (tuple, list))
+            attribute_mutable_at_list_int64 = attribute.mutable_at_list_int64()
+            for x in attr_value:
+                assert isinstance(x, int)
+                attribute_mutable_at_list_int64.add_val(x)
+        elif attr_type == user_op_attr_cfg.kAtListFloat:
+            assert isinstance(attr_value, (tuple, list))
+            attribute_mutable_at_list_float = attribute.mutable_at_list_float()
+            for x in attr_value:
+                assert isinstance(x, (float, int))
+                attribute_mutable_at_list_float.add_val(x)
+        elif attr_type == user_op_attr_cfg.kAtListDataType:
+            assert isinstance(attr_value, (tuple, list))
+            attribute_mutable_at_list_data_type = attribute.mutable_at_list_data_type()
+            for x in attr_value:
+                assert x in flow.dtypes()
+                x = oneflow._oneflow_internal.deprecated.GetProtoDtype4OfDtype(x)
+                assert isinstance(x, int)
+                attribute_mutable_at_list_data_type.add_val(data_type_cfg.DataType(x))
+        elif attr_type == user_op_attr_cfg.kAtListShape:
+            assert isinstance(attr_value, (tuple, list))
+            attribute_mutable_at_list_shape = (
+                attribute.mutable_at_list_shape().mutable_val()
+            )
+            for x in attr_value:
+                assert isinstance(x, (tuple, list))
+                shape = shape_cfg.ShapeProto()
+                for dim in x:
+                    assert isinstance(dim, int)
+                    shape.add_dim(dim)
+                attribute_mutable_at_list_shape.Add().CopyFrom(shape)
+        elif attr_type == user_op_attr_cfg.kAtListString:
+            assert isinstance(attr_value, (tuple, list))
+            attribute_mutable_at_list_string = attribute.mutable_at_list_string()
+            for x in attr_value:
+                assert isinstance(x, str)
+                attribute_mutable_at_list_string.add_val(x)
+        else:
+            raise ValueError("Invalid op attribute type {}".format(attr_type))
+
+        self.user_op_.op_conf_.user_conf.attr[attr_name].CopyFrom(
+            text_format.Parse(str(attribute), attr_value_pb.AttrValue())
+        )
+        return self
+
+
+@oneflow_export("user_op_module_builder")
+def api_user_op_module_builder(op_type_name):
+    api = enable_if.unique(
+        [lazy_user_op_module_builder, eager_logical_user_op_module_builder]
+    )
+    return api(op_type_name)
+
+
+class UserOpModuleBuilder(UserOpConfBuilder):
+    def __init__(self, *args, **kwargs):
+        UserOpConfBuilder.__init__(self, *args, **kwargs)
+        self.user_op_module.op_conf.scope_symbol_id = flow.current_scope().symbol_id
+
+    @property
+    def user_op_module(self):
+        return self.user_op_
+
+    def Op(self, op_type_name):
+        raise ValueError(
+            "user op module builder of {} can't call '.Op(op_type_name)' method".format(
+                op_type_name
+            )
+        )
+
+
+@enable_if.condition(hob.in_global_mode & ~hob.eager_execution_enabled)
+def lazy_user_op_module_builder(op_type_name):
+    job_name = oneflow._oneflow_internal.JobBuildAndInferCtx_GetCurrentJobName()
+    op_name = name_scope.GetJobNameScopePrefix(job_name) + op_type_name
+    return UserOpModuleBuilder(LazyUserOpModule, op_name, op_type_name)
+
+
+@enable_if.condition(hob.in_global_mode & hob.eager_execution_enabled)
+def eager_logical_user_op_module_builder(op_type_name):
+    job_name = oneflow._oneflow_internal.JobBuildAndInferCtx_GetCurrentJobName()
+    op_name = name_scope.GetJobNameScopePrefix(job_name) + op_type_name
+    return UserOpModuleBuilder(EagerLogicalUserOpModule, op_name, op_type_name)
+
+
+class LazyUserOpModule(UserOpModule, UserOp):
+    def __init__(self, op_name, op_type_name):
+        UserOp.__init__(self, op_name, op_type_name)
+
+    def InitOpKernel(self):
+        self.set_opkernel_object(None)
+
+    def InferAndTryRun(self):
+        assert hob.in_global_mode(None)
+        compile_context.CurJobAddOp(self.op_conf_)
+        return self
+
+    def MakeRemoteBlob(self, lbi):
+        return remote_blob_util.RemoteBlob(lbi)
+
+
+class EagerLogicalUserOpModule(UserOpModule, UserOp):
+    def __init__(self, op_name, op_type_name):
+        UserOp.__init__(self, op_name, op_type_name)
+
+    def InitOpKernel(self):
+        def BuildInstruction(builder):
+            if not isinstance(
+                self.op_conf,
+                oneflow._oneflow_internal.oneflow.core.operator.op_conf.OperatorConf,
+            ):
+                cfg_op_conf = oneflow._oneflow_internal.deprecated.MakeOpConfByString(
+                    str(self.op_conf)
+                )
+            self.set_opkernel_object(builder.NewOpKernelObject(cfg_op_conf))
+
+        oneflow._oneflow_internal.deprecated.LogicalRun(BuildInstruction)
+
+    def InferAndTryRun(self):
+        assert hob.in_global_mode(None)
+        interpret_util.OpKernelForward(self.op_conf, self.opkernel_object)
+        return self
+
+    def MakeRemoteBlob(self, lbi):
+        return remote_blob_util.EagerLogicalBlob(lbi)
+
+
+@oneflow_export("consistent_user_op_module_builder")
+def api_consistent_user_op_module_builder(op_type_name):
+    api = enable_if.unique(
+        [
+            lazy_consistent_user_op_module_builder,
+            eager_consistent_user_op_module_builder,
+        ]
+    )
+    return api(op_type_name)
+
+
+@enable_if.condition(hob.in_global_mode & ~hob.eager_execution_enabled)
+def lazy_consistent_user_op_module_builder(op_type_name):
+    job_name = oneflow._oneflow_internal.JobBuildAndInferCtx_GetCurrentJobName()
+    op_name = name_scope.GetJobNameScopePrefix(job_name) + op_type_name
+    return UserOpModuleBuilder(LazyConsistentUserOpModule, op_name, op_type_name)
+
+
+@enable_if.condition(hob.in_global_mode & hob.eager_execution_enabled)
+def eager_consistent_user_op_module_builder(op_type_name):
+    job_name = oneflow._oneflow_internal.JobBuildAndInferCtx_GetCurrentJobName()
+    op_name = name_scope.GetJobNameScopePrefix(job_name) + op_type_name
+    return UserOpModuleBuilder(EagerConsistentUserOpModule, op_name, op_type_name)
+
+
+class LazyConsistentUserOpModule(UserOpModule, UserOp):
+    def __init__(self, op_name, op_type_name):
+        UserOp.__init__(self, op_name, op_type_name)
+
+    def InitOpKernel(self):
+        self.set_opkernel_object(None)
+
+    def InferAndTryRun(self):
+        assert hob.in_global_mode(None)
+        compile_context.CurJobAddConsistentOp(self.op_conf_)
+        return self
+
+    def MakeRemoteBlob(self, lbi):
+        return remote_blob_util.RemoteBlob(lbi)
+
+
+class EagerConsistentUserOpModule(UserOpModule, UserOp):
+    def __init__(self, op_name, op_type_name):
+        UserOp.__init__(self, op_name, op_type_name)
+
+    def InitOpKernel(self):
+        def BuildInstruction(builder):
+            if not isinstance(
+                self.op_conf,
+                oneflow._oneflow_internal.oneflow.core.operator.op_conf.OperatorConf,
+            ):
+                cfg_op_conf = oneflow._oneflow_internal.deprecated.MakeOpConfByString(
+                    str(self.op_conf)
+                )
+            self.set_opkernel_object(builder.NewOpKernelObject(cfg_op_conf))
+
+        oneflow._oneflow_internal.deprecated.LogicalRun(BuildInstruction)
+
+    def InferAndTryRun(self):
+        assert hob.in_global_mode(None)
+        interpret_util.OpKernelConsistentForward(self.op_conf, self.opkernel_object)
+        return self
+
+    def MakeRemoteBlob(self, lbi):
+        return remote_blob_util.EagerLogicalBlob(lbi)
diff --git a/oneflow/compatible_single_client_python/ops/util/__init__.py b/oneflow/compatible_single_client_python/ops/util/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..617c36dd235e8d878fa6d1cf56a0ad5905a6f738
--- /dev/null
+++ b/oneflow/compatible_single_client_python/ops/util/__init__.py
@@ -0,0 +1,15 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
diff --git a/oneflow/compatible_single_client_python/ops/util/custom_op_module.py b/oneflow/compatible_single_client_python/ops/util/custom_op_module.py
new file mode 100644
index 0000000000000000000000000000000000000000..0c43b1802d1334386146ac45a9e6c0ca55b0a17b
--- /dev/null
+++ b/oneflow/compatible_single_client_python/ops/util/custom_op_module.py
@@ -0,0 +1,179 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+import importlib.util
+import os
+import os.path
+import shutil
+import subprocess as sp
+import sys
+import sysconfig
+import numpy
+
+from oneflow.compatible_single_client_python.oneflow_export import oneflow_export
+from oneflow.compatible_single_client_python.framework import (
+    sysconfig as oneflow_sysconfig,
+)
+from oneflow.compatible import single_client as flow
+import oneflow._oneflow_internal
+
+
+def run_cmd(cmd, cwd=None):
+    if cwd:
+        res = sp.run(cmd, cwd=cwd, shell=True, stdout=sp.PIPE, stderr=sp.STDOUT)
+    else:
+        res = sp.run(cmd, shell=True, stdout=sp.PIPE, stderr=sp.STDOUT)
+    out = res.stdout.decode("utf8")
+    if res.returncode != 0:
+        err_msg = "Run cmd failed: {}, output: {}".format(cmd, out)
+        raise Exception(err_msg)
+    if len(out) and out[-1] == "\n":
+        out = out[:-1]
+    return out
+
+
+def compile(compiler, flags, link, inputs, output):
+    if os.path.exists(output):
+        return True
+    if isinstance(inputs, list):
+        cmd = "{} {} {} {} -o {}".format(
+            compiler, " ".join(inputs), flags, link, output
+        )
+    else:
+        cmd = "{} {} {} {} -o {}".format(compiler, inputs, flags, link, output)
+    run_cmd(cmd)
+    return True
+
+
+def get_cflags():
+    return " ".join(oneflow_sysconfig.get_compile_flags())
+
+
+def get_lflags():
+    return (
+        " ".join(oneflow_sysconfig.get_link_flags())
+        + " -Wl,-rpath "
+        + oneflow_sysconfig.get_lib()
+    )
+
+
+class PythonKernelRegistry(object):
+    """A helper class to store python kernel module
+    """
+
+    def __init__(self):
+        self.kernels_ = {}
+
+    def Register(self, op_module_name, module):
+        self.kernels_[op_module_name] = module
+
+
+_python_kernel_reg = PythonKernelRegistry()
+
+
+@oneflow_export("experimental.custom_op_module")
+class CustomOpModule(object):
+    def __init__(self, op_module_name, module_path=""):
+        self.op_module_name_ = op_module_name
+        self.api = None
+        self.so_path_ = ""
+        self.objs_ = []
+        self.has_api_ = False
+        self.has_def_ = False
+        self.has_py_kernel_ = False
+        self.has_cpu_kernel_ = False
+        self.has_gpu_kernel_ = False
+        self.got_so_ = False
+
+        module_path = os.path.normpath(module_path)
+        pwd_path = os.getcwd()
+        if module_path != "." and module_path != pwd_path:
+            module_folder = os.path.join(module_path, self.op_module_name_)
+            pwd_folder = os.path.join(pwd_path, self.op_module_name_)
+            if os.path.exists(pwd_folder):
+                shutil.rmtree(pwd_folder)
+            shutil.copytree(module_folder, pwd_folder)
+
+        self.src_prefix_ = os.path.join(
+            pwd_path, self.op_module_name_, self.op_module_name_
+        )
+
+        out_path = os.path.join(pwd_path, self.op_module_name_, "out")
+        if not os.path.exists(out_path):
+            os.makedirs(out_path)
+        self.out_prefix_ = os.path.join(out_path, self.op_module_name_)
+
+    def py_api(self):
+        assert os.path.exists("{}_py_api.py".format(self.src_prefix_))
+        spec = importlib.util.spec_from_file_location(
+            self.op_module_name_, "{}_py_api.py".format(self.src_prefix_)
+        )
+        self.api = importlib.util.module_from_spec(spec)
+        spec.loader.exec_module(self.api)
+        return self
+
+    def cpp_def(self):
+        flags = "-std=c++11 -c -fPIC -O2 " + get_cflags()
+        compile(
+            "g++",
+            flags,
+            get_lflags(),
+            "{}_cpp_def.cpp".format(self.src_prefix_),
+            "{}_cpp_def.o".format(self.out_prefix_),
+        )
+        self.objs_.append("{}_cpp_def.o".format(self.out_prefix_))
+        self.has_def_ = True
+        return self
+
+    def py_kernel(self):
+        assert os.path.exists("{}_py_kernel.py".format(self.src_prefix_))
+        spec = importlib.util.spec_from_file_location(
+            self.op_module_name_, "{}_py_kernel.py".format(self.src_prefix_)
+        )
+        kernel = importlib.util.module_from_spec(spec)
+        spec.loader.exec_module(kernel)
+        _python_kernel_reg.Register(self.op_module_name_, kernel)
+        oneflow._oneflow_internal.RegisterPyKernelCaller(self.op_module_name_)
+        self.has_py_kernel_ = True
+        return self
+
+    def cpp_kernel(self):
+        flags = "-std=c++11 -c -fPIC -O2 " + get_cflags()
+        compile(
+            "g++",
+            flags,
+            "",
+            "{}_cpp_kernel.cpp".format(self.src_prefix_),
+            "{}_cpp_kernel.o".format(self.out_prefix_),
+        )
+        self.objs_.append("{}_cpp_kernel.o".format(self.out_prefix_))
+        self.has_cpu_kernel_ = True
+        return self
+
+    def gpu_kernel(self):
+        raise NotImplementedError
+
+    def build_load(self):
+        if len(self.objs_) > 0:
+            flags = "-std=c++11 -shared -fPIC " + get_cflags()
+            compile(
+                "g++", flags, get_lflags(), self.objs_, "{}.so".format(self.out_prefix_)
+            )
+            self.got_so_ = True
+            self.so_path_ = self.out_prefix_ + ".so"
+
+        flow.config.load_library_now(self.so_path_)
diff --git a/oneflow/compatible_single_client_python/ops/watch.py b/oneflow/compatible_single_client_python/ops/watch.py
new file mode 100644
index 0000000000000000000000000000000000000000..2c9a47d06f05d759f5137da1a0529ec75b200977
--- /dev/null
+++ b/oneflow/compatible_single_client_python/ops/watch.py
@@ -0,0 +1,447 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+import uuid
+from typing import Callable, Optional, Union
+
+from oneflow.core.operator import op_conf_pb2 as op_conf_util
+from oneflow.compatible_single_client_python.framework import c_api_util as c_api_util
+from oneflow.compatible_single_client_python.framework import (
+    session_context as session_ctx,
+)
+from oneflow.compatible_single_client_python.framework import (
+    compile_context as compile_context,
+)
+from oneflow.compatible_single_client_python.framework import id_util as id_util
+from oneflow.compatible_single_client_python.framework import (
+    local_blob as local_blob_util,
+)
+from oneflow.compatible_single_client_python.framework import (
+    remote_blob as remote_blob_util,
+)
+from oneflow.compatible_single_client_python.framework import watcher as watcher_util
+from oneflow.compatible_single_client_python.framework import typing as oft
+from oneflow.compatible_single_client_python.framework import typing_util as oft_util
+from oneflow.compatible_single_client_python.lib.core import enable_if as enable_if
+from oneflow.compatible_single_client_python.framework import hob as hob
+from oneflow.core.job.lbi_diff_watcher_info_pb2 import LbiAndDiffWatcherUuidPair
+from oneflow.compatible_single_client_python.oneflow_export import oneflow_export
+from oneflow.compatible_single_client_python import eager as eager_util
+from oneflow.compatible import single_client as flow
+import oneflow._oneflow_internal
+from oneflow._oneflow_internal import ConsistentBlob, MirroredBlob
+import inspect
+import numpy as np
+
+
+@oneflow_export("watch")
+def Watch(
+    blob_watched: oneflow._oneflow_internal.BlobDesc,
+    handler_or_prompt: Optional[Union[Callable, str]] = None,
+) -> None:
+    r"""Register callback for a blob. The callback function will be called after the computation produce the blob finishes. We can use it to watch the values of Blob.
+
+    Args:
+        blob_watched: a `Blob`
+        handler_or_prompt: a function has an argument of a `Blob`
+
+    For example:
+
+    Example 1:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+
+
+        def watch_handler(y: tp.Numpy):
+            print("out", y)
+
+
+        @flow.global_function()
+        def watch_Job() -> None:
+            init = flow.constant_initializer(2.5)
+            variable = flow.get_variable(
+                "variable-weight",
+                shape=(5, ),
+                initializer=init,
+                trainable=True
+            )
+            flow.watch(variable, watch_handler)
+
+
+        checkpoint = flow.train.CheckPoint()
+        checkpoint.init()
+        watch_Job()
+
+        # out [2.5 2.5 2.5 2.5 2.5]
+
+    Example 2:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+        import numpy as np
+
+        def watch_handler(y: tp.Numpy):
+            print("out", y)
+
+
+        @flow.global_function()
+        def watch_Job(x: tp.Numpy.Placeholder((1, 3, 2, 2))
+        ) -> None:
+            initializer = flow.truncated_normal(0.1)
+            conv2d = flow.layers.conv2d(
+                x,
+                filters=3,
+                kernel_size=1,
+                strides=1,
+                padding='SAME',
+                kernel_initializer=initializer,
+                name="Conv2d"
+            )
+
+            flow.watch(conv2d, watch_handler)
+
+
+        checkpoint = flow.train.CheckPoint()
+        checkpoint.init()
+        x = np.ones(shape=(1, 3, 2, 2)).astype(np.float32)
+        watch_Job(x)
+
+        # out [[[[ 0.03757111  0.03757111]
+        #        [ 0.03757111  0.03757111]]
+
+        #       [[-0.36131713 -0.36131713]
+        #        [-0.36131713 -0.36131713]]
+
+        #       [[-0.12266113 -0.12266113]
+        #        [-0.12266113 -0.12266113]]]]
+
+    """
+    api = enable_if.unique([EagerWatch, LazyWatch])
+    return api(blob_watched, handler_or_prompt)
+
+
+@enable_if.condition(hob.in_global_mode & hob.eager_execution_enabled)
+def EagerWatch(blob_watched, handler_or_prompt=None):
+    handler = _CheckOrMakeHandler(blob_watched, handler_or_prompt)
+    local_blob = local_blob_util.MakeLocalBlob4EagerBlob(blob_watched)
+    handler(oft_util.TransformWatchedBlob(local_blob, handler))
+
+
+@enable_if.condition(hob.in_global_mode & ~hob.eager_execution_enabled)
+def LazyWatch(blob_watched, handler_or_prompt=None):
+    handler = _CheckOrMakeHandler(blob_watched, handler_or_prompt)
+    if isinstance(blob_watched, ConsistentBlob):
+        LazyConsistentWatch(blob_watched, handler)
+    elif isinstance(blob_watched, MirroredBlob):
+        handlers = _MakeSubConsistentBlobHandlers(blob_watched, handler)
+        for consistent_blob, sub_handler in zip(
+            blob_watched.sub_consistent_blob_list, handlers
+        ):
+            assert isinstance(consistent_blob, ConsistentBlob)
+            LazyConsistentWatch(consistent_blob, sub_handler)
+    else:
+        raise NotImplementedError
+
+
+def LazyConsistentWatch(blob_watched, handler):
+    handler_uuid = str(uuid.uuid1())
+    op_conf = op_conf_util.OperatorConf()
+    op_conf.name = id_util.UniqueStr("ForeignWatch_")
+    setattr(op_conf.foreign_watch_conf, "in", blob_watched.unique_name)
+    op_conf.foreign_watch_conf.handler_uuid = handler_uuid
+    device_name = blob_watched.parallel_conf.device_name(0)
+    with flow.scope.placement("cpu", "0:0"):
+        compile_context.CurJobAddOp(op_conf)
+    watcher_util.BindUuidAndHandler(handler_uuid, blob_watched, handler)
+
+
+@oneflow_export("watch_diff")
+def WatchDiff(
+    blob_watched: oneflow._oneflow_internal.BlobDesc,
+    handler_or_prompt: Optional[Union[Callable, str]] = None,
+) -> None:
+    r"""Register callback for gradient of a blob. The callback will be called after the computation produce the gradient blob finishes.
+
+    Args:
+        blob_watched: a `Blob`
+        handler_or_prompt: a function has an argument of a `Blob`
+
+    For example:
+
+    Example 1:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+
+
+        BATCH_SIZE = 20
+
+        def watch_diff_handler(blob: tp.Numpy):
+            print("watch_diff_handler:", blob, blob.shape, blob.dtype)
+
+        @flow.global_function(type="train")
+        def train_job(
+            images: tp.Numpy.Placeholder((BATCH_SIZE, 1, 28, 28), dtype=flow.float),
+            labels: tp.Numpy.Placeholder((BATCH_SIZE,), dtype=flow.int32),
+        ) -> tp.Numpy:
+            initializer = flow.truncated_normal(0.1)
+            with flow.scope.placement("gpu", "0:0"):
+                reshape = flow.reshape(images, [images.shape[0], -1])
+                hidden = flow.layers.dense(
+                    reshape,
+                    512,
+                    activation=flow.nn.relu,
+                    kernel_initializer=initializer,
+                    name="hidden",
+                )
+                logits = flow.layers.dense(
+                    hidden, 10, kernel_initializer=initializer, name="output"
+                )
+                loss = flow.nn.sparse_softmax_cross_entropy_with_logits(labels, logits, name="softmax_loss")
+
+            lr_scheduler = flow.optimizer.PiecewiseConstantScheduler([], [0.1])
+            flow.optimizer.SGD(lr_scheduler, momentum=0).minimize(loss)
+            flow.watch_diff(logits, watch_diff_handler)
+            return loss
+
+
+        if __name__ == "__main__":
+            checkpoint = flow.train.CheckPoint()
+            checkpoint.init()
+            (train_images, train_labels), (test_images, test_labels) = flow.data.load_mnist(
+                    BATCH_SIZE
+            )
+            for i, (images, labels) in enumerate(zip(train_images, train_labels)):
+                loss = train_job(images, labels)
+
+
+        # watch_diff_handler: [[-1.88834548e-01  2.71021971e-03  2.28271242e-02  7.17673637e-03
+        #                       4.10183379e-03  8.93106461e-02  2.23669074e-02  3.86103359e-03
+        #                       3.12465224e-02  5.23346756e-03] .....
+
+    Example 2:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+        import numpy as np
+
+
+        BATCH_SIZE = 20
+
+        def watch_diff_handler(blob: tp.Numpy):
+            print("watch_diff_handler:", blob)
+
+
+        @flow.global_function(type="train")
+        def watch_matmul_diff_job(
+            images: tp.Numpy.Placeholder((3, 3), dtype=flow.float),
+        ) -> None:
+            with flow.scope.placement("cpu", "0:0"):
+                weight_initializer = flow.constant_initializer(2)
+                weight_shape = (3, BATCH_SIZE)
+                weight = flow.get_variable(
+                    "matmultest-weight",
+                    shape=weight_shape,
+                    initializer=weight_initializer)
+                output = flow.linalg.matmul(images, weight)
+
+            lr_scheduler = flow.optimizer.PiecewiseConstantScheduler([], [0.1])
+            flow.optimizer.SGD(lr_scheduler, momentum=0.9).minimize(output)
+            flow.watch_diff(weight, watch_diff_handler)
+
+
+        if __name__ == "__main__":
+            check_point = flow.train.CheckPoint()
+            check_point.init()
+
+            x = np.array([[1, 1, 1],
+                        [1, 1, 1],
+                        [1, 1, 1]]).astype(np.float32)
+            watch_matmul_diff_job(x)
+
+        # watch_diff_handler: [[3. 3. 3.]
+        #                      [3. 3. 3.]
+        #                      [3. 3. 3.]]
+
+    Example 3:
+
+    .. code-block:: python
+
+        import oneflow.compatible.single_client as flow
+        import oneflow.compatible.single_client.typing as tp
+        import numpy as np
+
+
+        def watch_diff_handler(blob: tp.Numpy):
+            print("watch_diff_handler:", blob, blob.shape, blob.dtype)
+
+
+        @flow.global_function(type="train")
+        def watch_conv_diff_job(
+            images: tp.Numpy.Placeholder((1, 1, 4, 4), dtype=flow.float),
+        ) -> None:
+            with flow.scope.placement("gpu", "0:0"):
+                weight_shape = (1, 1, 3, 3)
+                weight_initializer = flow.truncated_normal(0.1)
+                weight = flow.get_variable(
+                    name="conv-weight",
+                    shape=weight_shape,
+                    initializer=weight_initializer
+                )
+                output = flow.nn.conv2d(images, weight, strides=1, padding="VALID")
+
+            lr_scheduler = flow.optimizer.PiecewiseConstantScheduler([], [0.1])
+            flow.optimizer.SGD(lr_scheduler, momentum=0.9).minimize(output)
+            flow.watch_diff(weight, watch_diff_handler)
+
+
+        if __name__ == "__main__":
+            check_point = flow.train.CheckPoint()
+            check_point.init()
+
+            x = np.array([[[[ 1.,  2.,  3.,  4.],
+                            [ 5.,  6.,  7.,  8.],
+                            [ 9., 10., 11., 12.],
+                            [13., 14., 15., 16.]]]]).astype(np.float32)
+
+            watch_conv_diff_job(x)
+
+        # watch_diff_handler: [[[[14. 18. 22.]
+        #                        [30. 34. 38.]
+        #                        [46. 50. 54.]]]]
+
+    """
+    api = enable_if.unique([EagerWatchDiff, LazyWatchDiff])
+    return api(blob_watched, handler_or_prompt)
+
+
+@enable_if.condition(hob.in_global_mode & hob.eager_execution_enabled)
+def EagerWatchDiff(blob_watched, handler_or_prompt=None):
+    handler = _CheckOrMakeHandler(blob_watched, handler_or_prompt)
+    handler_uuid = str(uuid.uuid1())
+    lbi_and_uuid = LbiAndDiffWatcherUuidPair()
+    # Copy cfg LBI to proto LBI
+    lbi_and_uuid.lbi.op_name = blob_watched.lbi.op_name()
+    lbi_and_uuid.lbi.blob_name = blob_watched.lbi.blob_name()
+    lbi_and_uuid.watcher_uuid = handler_uuid
+    c_api_util.CurJobBuildAndInferCtx_AddLbiAndDiffWatcherUuidPair(lbi_and_uuid)
+    uuid2watch_handler = session_ctx.GetDefaultSession().uuid2watch_handler
+    uuid2watch_handler[handler_uuid] = lambda x: EagerWatch(x, handler_or_prompt)
+
+
+@enable_if.condition(hob.in_global_mode & ~hob.eager_execution_enabled)
+def LazyWatchDiff(blob_watched, handler_or_prompt=None):
+    handler = _CheckOrMakeHandler(blob_watched, handler_or_prompt)
+    if isinstance(blob_watched, ConsistentBlob):
+        LazyConsistentWatchDiff(blob_watched, handler)
+    elif isinstance(blob_watched, MirroredBlob):
+        handlers = _MakeSubConsistentBlobHandlers(blob_watched, handler)
+        for consistent_blob, sub_handler in zip(
+            blob_watched.sub_consistent_blob_list, handlers
+        ):
+            assert isinstance(consistent_blob, ConsistentBlob)
+            LazyConsistentWatchDiff(consistent_blob, sub_handler)
+    else:
+        raise NotImplementedError
+
+
+def LazyConsistentWatchDiff(blob_watched, handler):
+    handler_uuid = str(uuid.uuid1())
+    lbi_and_uuid = LbiAndDiffWatcherUuidPair()
+    # Copy cfg LBI to proto LBI
+    lbi_and_uuid.lbi.op_name = blob_watched.lbi.op_name()
+    lbi_and_uuid.lbi.blob_name = blob_watched.lbi.blob_name()
+    lbi_and_uuid.watcher_uuid = handler_uuid
+    c_api_util.CurJobBuildAndInferCtx_AddLbiAndDiffWatcherUuidPair(lbi_and_uuid)
+    watcher_util.BindUuidAndHandler(handler_uuid, blob_watched, handler)
+
+
+def _CheckOrMakeHandler(blob_watched, handler_or_prompt):
+    if callable(handler_or_prompt):
+        parameters = inspect.signature(handler_or_prompt).parameters
+        oft_util.CheckWatchCallbackParameterAnnotation(parameters)
+        annotation = parameters[list(parameters.keys())[0]].annotation
+        oft_util.CheckWatchedBlobByAnnotation(blob_watched, annotation)
+        return handler_or_prompt
+    prompt = handler_or_prompt
+
+    def Handler(x: GetTypeAnnotation(blob_watched)):
+        if prompt is not None:
+            print(str(prompt))
+        print(x)
+
+    return Handler
+
+
+def _MakeSubConsistentBlobHandlers(blob_watched, handler):
+    assert isinstance(blob_watched, MirroredBlob)
+    handler4parallel_id_and_local_blob = _MakeHandler4ParallelIdAndLocalBlob(
+        blob_watched, handler
+    )
+    return [
+        _WrapperHandler4ParallelIdAndLocalBlob(i, handler4parallel_id_and_local_blob)
+        for i in range(len(blob_watched.sub_consistent_blob_list))
+    ]
+
+
+def _WrapperHandler4ParallelIdAndLocalBlob(
+    parallel_id, handler4parallel_id_and_local_blob
+):
+    return lambda local_blob: handler4parallel_id_and_local_blob(
+        parallel_id, local_blob
+    )
+
+
+def _MakeHandler4ParallelIdAndLocalBlob(blob_watched, handler):
+    parallel_id2consistent_local_blob = {}
+    len_sub_remote_blobs = len(blob_watched.sub_consistent_blob_list)
+
+    def HandlerParallelIdAndLocalBlob(parallel_id, local_blob):
+        assert parallel_id not in parallel_id2consistent_local_blob
+        parallel_id2consistent_local_blob[parallel_id] = local_blob
+        if len(parallel_id2consistent_local_blob) != len_sub_remote_blobs:
+            return
+        local_blob_list = [
+            parallel_id2consistent_local_blob[parallel_id]
+            for i in range(len_sub_remote_blobs)
+        ]
+        local_numpy = local_blob_list[0].numpy()
+        if len(local_blob_list) > 1:
+            print("WARNING: watch return tensor list will concat as axis = 0.")
+            local_numpy_list = [x.numpy() for x in local_blob_list]
+            local_numpy = np.concatenate(local_numpy_list, axis=0)
+        local_blob = local_blob_util.LocalBlob(local_numpy, blob_watched.is_dynamic)
+        handler(oft_util.TransformWatchedBlob(local_blob, handler))
+
+    return HandlerParallelIdAndLocalBlob
+
+
+def GetTypeAnnotation(blob_watched):
+    # TODO(chengcheng): oft.Numpy support dynamic
+    if not blob_watched.is_dynamic:
+        return oft.Numpy
+    else:
+        return oft.ListNumpy
diff --git a/oneflow/compatible_single_client_python/serving/__init__.py b/oneflow/compatible_single_client_python/serving/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/oneflow/compatible_single_client_python/serving/inference_session.py b/oneflow/compatible_single_client_python/serving/inference_session.py
new file mode 100644
index 0000000000000000000000000000000000000000..14e8a5ab72d735ecbf044eb54a7339ddaf087a8a
--- /dev/null
+++ b/oneflow/compatible_single_client_python/serving/inference_session.py
@@ -0,0 +1,549 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import asyncio
+import contextlib
+import inspect
+import numpy as np
+import os
+import enum
+from google.protobuf import text_format as text_format
+
+from oneflow.compatible import single_client as flow
+import oneflow._oneflow_internal
+from oneflow._oneflow_internal.oneflow.core.job import job_conf as job_conf_proto_cfg
+
+from oneflow._oneflow_internal.oneflow.core.operator import (
+    interface_blob_conf as interface_blob_conf_proto_cfg,
+)
+from oneflow._oneflow_internal.oneflow.core.common import shape as shape_proto_cfg
+from oneflow._oneflow_internal.oneflow.core.common import data_type as dtype_proto_cfg
+from oneflow._oneflow_internal.oneflow.core.job import sbp_parallel as sbp_parallel_cfg
+from oneflow.core.job import job_conf_pb2 as job_conf_proto
+from oneflow.core.operator import interface_blob_conf_pb2 as interface_blob_conf_proto
+from oneflow.core.serving import saved_model_pb2 as saved_model_pb
+from oneflow.compatible_single_client_python.framework import c_api_util as c_api_util
+from oneflow.compatible_single_client_python.framework import (
+    compile_context as compile_ctx,
+)
+from oneflow.compatible_single_client_python.framework import (
+    session_util as session_util,
+)
+from oneflow.compatible_single_client_python.framework import (
+    placement_util as placement_util,
+)
+from oneflow.compatible_single_client_python.framework import scope_util as scope_util
+from oneflow.compatible_single_client_python.framework import (
+    runtime_mode as runtime_mode,
+)
+from oneflow.compatible_single_client_python.framework import (
+    job_instance as job_instance_util,
+)
+from oneflow.compatible_single_client_python.framework import (
+    input_blob_def as input_blob_util,
+)
+from oneflow.compatible_single_client_python.framework import dtype as dtype_util
+from oneflow.compatible_single_client_python.oneflow_export import oneflow_export
+
+
+def _is_int(val):
+    try:
+        num = int(val)
+    except ValueError:
+        return False
+    return True
+
+
+def _find_model_latest_version(saved_model_dir):
+    version_dirs = []
+    for f in os.listdir(saved_model_dir):
+        if os.path.isdir(os.path.join(saved_model_dir, f)) and _is_int(f):
+            version_dirs.append(f)
+
+    version_dirs.sort(reverse=True, key=lambda x: int(x))
+    return version_dirs[0]
+
+
+def _need_check_device_tag(op_conf):
+    if op_conf.HasField("return_conf"):
+        return False
+
+    return op_conf.HasField("device_tag")
+
+
+def _signature_proto_to_cfg(signature_proto, mut_signature_cfg):
+    assert isinstance(signature_proto, job_conf_proto.JobSignatureDef)
+    assert isinstance(mut_signature_cfg, job_conf_proto_cfg.JobSignatureDef)
+
+    for input_name, input_def in signature_proto.inputs.items():
+        input_def_cfg = job_conf_proto_cfg.JobInputDef()
+        input_def_cfg.mutable_lbi().set_op_name(input_def.lbi.op_name)
+        input_def_cfg.mutable_lbi().set_blob_name(input_def.lbi.blob_name)
+        _inferface_blob_conf_proto_to_cfg(
+            input_def.blob_conf, input_def_cfg.mutable_blob_conf()
+        )
+        mut_signature_cfg.mutable_inputs()[input_name].CopyFrom(input_def_cfg)
+
+    for output_name, output_def in signature_proto.outputs.items():
+        output_def_cfg = job_conf_proto_cfg.JobOutputDef()
+        output_def_cfg.mutable_lbi().set_op_name(output_def.lbi.op_name)
+        output_def_cfg.mutable_lbi().set_blob_name(output_def.lbi.blob_name)
+        mut_signature_cfg.mutable_outputs()[output_name].CopyFrom(output_def_cfg)
+
+
+def _inferface_blob_conf_proto_to_cfg(
+    inferface_blob_conf_proto, mut_inferface_blob_conf_cfg
+):
+    assert isinstance(
+        inferface_blob_conf_proto, interface_blob_conf_proto.InterfaceBlobConf
+    )
+    assert isinstance(
+        mut_inferface_blob_conf_cfg, interface_blob_conf_proto_cfg.InterfaceBlobConf
+    )
+
+    shape = shape_proto_cfg.ShapeProto()
+    for dim in inferface_blob_conf_proto.shape.dim:
+        shape.add_dim(dim)
+
+    mut_inferface_blob_conf_cfg.mutable_shape().CopyFrom(shape)
+    dtype = dtype_proto_cfg.DataType(int(inferface_blob_conf_proto.data_type))
+    mut_inferface_blob_conf_cfg.set_data_type(dtype)
+
+    if inferface_blob_conf_proto.HasField("parallel_distribution"):
+        # TODO(guoran): Process Nd sbp, parallel_distribution_cfg CopyFrom parallel_distribution_proto
+        assert len(inferface_blob_conf_proto.parallel_distribution.sbp_parallel) == 1
+        sbp_proto = inferface_blob_conf_proto.parallel_distribution.sbp_parallel[0]
+        if sbp_proto.HasField("split_parallel"):
+            split_axis = sbp_proto.split_parallel.axis
+            sbp = sbp_parallel_cfg.SbpParallel()
+            sbp.mutable_split_parallel().set_axis(split_axis)
+            mut_inferface_blob_conf_cfg.mutable_parallel_distribution().mutable_sbp_parallel().Add().CopyFrom(
+                sbp
+            )
+
+    mut_inferface_blob_conf_cfg.set_is_dynamic(inferface_blob_conf_proto.is_dynamic)
+
+
+@oneflow_export("serving.ModelVersionPolicy")
+class ModelVersionPolicy(enum.Enum):
+    LATEST = 1
+
+
+@oneflow_export("serving.SessionOption")
+class SessionOption(object):
+    def __init__(self):
+        self.device_tag = "gpu"
+        self.device_num = 1
+        self.is_mirrored_view = False
+
+
+@oneflow_export("serving.InferenceSession")
+class InferenceSession(object):
+    class SessionStatus(enum.Enum):
+        OPEN = 1
+        RUNNING = 2
+        CLOSED = 3
+
+    def __init__(self, option=None):
+        if option is None:
+            self.option_ = SessionOption()
+        else:
+            assert isinstance(option, SessionOption)
+            self.option_ = option
+
+        self.is_mirrored_ = self.option_.is_mirrored_view
+        self.checkpoint_path_ = None
+        self.config_proto_ = None
+        self.job_name2job_conf_ = {}
+        self.inter_user_job_info_ = None
+        self.cur_job_name_ = None
+        self.inferface_name2info_ = {}
+        self.output_name2future_ = {}
+        self.job_futures_ = []
+        self.status_ = None
+
+        self._init_event_loop()
+        self.init()
+
+    def __del__(self):
+        if self.status_ != self.SessionStatus.CLOSED:
+            self.close()
+
+    def _init_event_loop(self):
+        self.event_loop_ = asyncio.get_event_loop()
+        if self.event_loop_.is_closed():
+            asyncio.set_event_loop(asyncio.new_event_loop())
+            self.event_loop_ = asyncio.get_event_loop()
+
+    def init(self):
+        # env init
+        if not oneflow._oneflow_internal.IsEnvInited():
+            flow.env.init()
+
+        # session init
+        if not oneflow._oneflow_internal.IsSessionInited():
+            self._make_config_proto()
+            session_util._TryCompleteConfigProto(self.config_proto_)
+            c_api_util.InitLazyGlobalSession(self.config_proto_)
+
+        self.status_ = self.SessionStatus.OPEN
+
+    def close(self):
+        self.event_loop_.run_until_complete(self.wait_for_all_jobs_finished())
+        self.event_loop_.close()
+
+        if self.status_ == self.SessionStatus.RUNNING:
+            oneflow._oneflow_internal.StopLazyGlobalSession()
+            oneflow._oneflow_internal.DestroyLazyGlobalSession()
+        elif self.status_ == self.SessionStatus.OPEN:
+            oneflow._oneflow_internal.DestroyLazyGlobalSession()
+        else:
+            pass
+
+        self.status_ = self.SessionStatus.CLOSED
+
+    def _check_status(self, *status):
+        check_success = False
+        for stat in status:
+            if self.status_ == stat:
+                check_success = True
+                break
+
+        if check_success is False:
+            caller_func_name = inspect.stack()[1].function
+            allowed_status = ",".join(status)
+            raise ValueError(
+                "The calling to {} is only allowed when status is {}, current status is {}".format(
+                    caller_func_name, allowed_status, self.status_
+                )
+            )
+
+    def _make_config_proto(self):
+        if self.config_proto_ is None:
+            self.config_proto_ = session_util._GetDefaultConfigProto()
+
+        if self.option_.device_tag == "gpu":
+            self.config_proto_.resource.gpu_device_num = self.option_.device_num
+        elif self.option_.device_tag == "cpu":
+            self.config_proto_.resource.cpu_device_num = self.option_.device_num
+            self.config_proto_.resource.gpu_device_num = 0
+        else:
+            raise NotImplementedError(
+                "not supported device tag {}".format(self.option_.device_tag)
+            )
+
+        self.config_proto_.resource.enable_legacy_model_io = True
+
+    def set_checkpoint_path(self, checkpoint_path):
+        self._check_status(self.SessionStatus.OPEN)
+        self.checkpoint_path_ = checkpoint_path
+
+    def set_job_signature(self, job_name, signature):
+        assert isinstance(signature, job_conf_proto.JobSignatureDef)
+        job_conf = self._get_job_conf(job_name)
+        _signature_proto_to_cfg(signature, job_conf.mutable_signature())
+
+    def set_job_batch_size(self, job_name, batch_size):
+        self._check_status(self.SessionStatus.OPEN)
+        job_conf = self._get_job_conf(job_name)
+        for _, mut_input_def in job_conf.mutable_signature().mutable_inputs().items():
+            mut_shape = mut_input_def.mutable_blob_conf().mutable_shape()
+            mut_shape.mutable_dim()[0] = batch_size
+
+    def _get_job_conf(self, job_name):
+        if job_name in self.job_name2job_conf_:
+            return self.job_name2job_conf_[job_name]
+        else:
+            job_conf = job_conf_proto_cfg.JobConfigProto()
+            job_conf.set_job_name(job_name)
+            job_conf.mutable_predict_conf()
+            self.job_name2job_conf_[job_name] = job_conf
+            return job_conf
+
+    @contextlib.contextmanager
+    def open(self, job_name, signature=None, batch_size=None):
+        self._check_status(self.SessionStatus.OPEN)
+        c_api_util.JobBuildAndInferCtx_Open(job_name)
+
+        if signature is not None:
+            self.set_job_signature(job_name, signature)
+
+        if isinstance(batch_size, int):
+            self.set_job_batch_size(job_name, batch_size)
+
+        job_conf = self._get_job_conf(job_name)
+        c_api_util.CurJobBuildAndInferCtx_SetJobConf(job_conf)
+
+        tag_and_dev_ids = placement_util.GetDefaultMachineDeviceIds(
+            self.config_proto_.resource
+        )
+        scope = scope_util.MakeInitialScope(
+            job_conf, *tag_and_dev_ids, None, self.is_mirrored_
+        )
+
+        with runtime_mode.ModeScope(runtime_mode.GLOBAL_MODE):
+            with scope_util.ScopeContext(scope):
+                self.cur_job_name_ = job_name
+                yield self
+                self.cur_job_name_ = None
+
+        oneflow._oneflow_internal.JobBuildAndInferCtx_Close()
+
+    def compile(self, op_list):
+        self._check_status(self.SessionStatus.OPEN)
+        scope = flow.current_scope()
+        device_tag = scope.device_parallel_desc_symbol.device_tag
+        for op_conf in op_list:
+            if _need_check_device_tag(op_conf) and op_conf.device_tag != device_tag:
+                print(
+                    "WARNING: the device_tag of op {} is not equal to the device_tag of seesion's current scope"
+                    " ({} vs. {})"
+                    ", which may cause the op graph to be incompatible".format(
+                        op_conf.name, op_conf.device_tag, device_tag
+                    )
+                )
+
+            compile_ctx.CurJobAddOp(op_conf)
+
+        oneflow._oneflow_internal.CurJobBuildAndInferCtx_Complete()
+        oneflow._oneflow_internal.CurJobBuildAndInferCtx_Rebuild()
+
+    def launch(self):
+        self._check_status(self.SessionStatus.OPEN)
+        oneflow._oneflow_internal.StartLazyGlobalSession()
+        self.inter_user_job_info_ = c_api_util.GetInterUserJobInfo()
+        self._run_load_checkpoint_job()
+        self.status_ = self.SessionStatus.RUNNING
+
+    def load_saved_model(
+        self,
+        saved_model_dir,
+        model_version=ModelVersionPolicy.LATEST,
+        saved_model_meta_file_basename="saved_model",
+        graph_name=None,
+        signature_name=None,
+    ):
+        if not os.path.isdir(saved_model_dir):
+            raise ValueError("{} is not a valid directory".format(saved_model_dir))
+
+        if isinstance(model_version, int):
+            pass
+        elif model_version == ModelVersionPolicy.LATEST:
+            model_version = _find_model_latest_version(saved_model_dir)
+        else:
+            raise NotImplementedError
+
+        saved_model_path = os.path.join(saved_model_dir, str(model_version))
+        if not os.path.isdir(saved_model_path):
+            raise ValueError(
+                "version {} of saved model in dir {} do not exist".format(
+                    model_version, saved_model_dir
+                )
+            )
+
+        subfiles = list(os.listdir(saved_model_path))
+        saved_model_meta_pb_filename = saved_model_meta_file_basename + ".pb"
+        saved_model_meta_prototxt_filename = (
+            saved_model_meta_file_basename + ".prototxt"
+        )
+        saved_model_proto = saved_model_pb.SavedModel()
+        if saved_model_meta_pb_filename in subfiles:
+            saved_model_meta_file_path = os.path.join(
+                saved_model_path, saved_model_meta_pb_filename
+            )
+            with open(saved_model_meta_file_path, "rb") as f:
+                saved_model_proto.ParseFromString(f.read())
+        elif saved_model_meta_prototxt_filename in subfiles:
+            saved_model_meta_file_path = os.path.join(
+                saved_model_path, saved_model_meta_prototxt_filename
+            )
+            with open(saved_model_meta_file_path, "rt") as f:
+                text_format.Merge(f.read(), saved_model_proto)
+        else:
+            raise ValueError(
+                "saved model meta file {} do not exist in {}".format(
+                    saved_model_meta_file_basename, saved_model_path
+                )
+            )
+        # set checkpoint
+        self.set_checkpoint_path(
+            os.path.join(saved_model_path, saved_model_proto.checkpoint_dir)
+        )
+        # get signature
+        signature = None
+        if graph_name is None:
+            graph_name = saved_model_proto.default_graph_name
+        else:
+            if graph_name not in saved_model_proto.graphs:
+                raise ValueError("graph {} do not exist".format(graph_name))
+        graph_def = saved_model_proto.graphs[graph_name]
+        if signature_name is None and graph_def.HasField("default_signature_name"):
+            signature_name = graph_def.default_signature_name
+        if signature_name is not None:
+            if signature_name not in graph_def.signatures:
+                raise ValueError("signature {} do not exist".format(signature_name))
+            else:
+                signature = graph_def.signatures[signature_name]
+
+        # compile job
+        with self.open(graph_name, signature):
+            self.compile(graph_def.op_list)
+
+    def print_job_set(self):
+        self._check_status(self.SessionStatus.OPEN, self.SessionStatus.RUNNING)
+        job_set = c_api_util.GetJobSet()
+        for job in job_set.job:
+            print("job_name:", job.job_conf.job_name)
+            for op_conf in job.net.op:
+                print("\top_name:", op_conf.name)
+
+    def list_jobs(self):
+        self._check_status(self.SessionStatus.RUNNING)
+        return list(self.job_name2job_conf_.keys())
+
+    def list_inputs(self):
+        self._check_status(self.SessionStatus.RUNNING)
+        input_names = []
+        for (
+            input_name,
+            _,
+        ) in self.inter_user_job_info_.input_or_var_op_name2push_job_name.items():
+            input_names.append(input_name)
+        return tuple(input_names)
+
+    def list_outputs(self):
+        self._check_status(self.SessionStatus.RUNNING)
+        output_names = []
+        for (
+            output_name,
+            _,
+        ) in self.inter_user_job_info_.output_or_var_op_name2pull_job_name.items():
+            output_names.append(output_name)
+        return tuple(output_names)
+
+    def input_info(self, input_name, job_name=None):
+        return self._get_op_blob_info(job_name, input_name, "out")
+
+    def output_info(self, output_name, job_name=None):
+        return self._get_op_blob_info(job_name, output_name, "in")
+
+    def _get_op_blob_info(self, job_name, op_name, blob_name):
+        self._check_status(self.SessionStatus.OPEN, self.SessionStatus.RUNNING)
+        if op_name in self.inferface_name2info_:
+            return self.inferface_name2info_[op_name]
+
+        job_name = job_name or self.cur_job_name_
+        if job_name is None:
+            raise ValueError("please specify job_name")
+
+        lbn = oneflow._oneflow_internal.JobBuildAndInferCtx_GetOpBlobLbn(
+            job_name, op_name, blob_name
+        )
+        shape = c_api_util.JobBuildAndInferCtx_GetStaticShape(job_name, lbn)
+        dtype = c_api_util.JobBuildAndInferCtx_GetDataType(job_name, lbn)
+        dtype = dtype_util.convert_proto_dtype_to_oneflow_dtype(dtype)
+        # TODO: other info
+        info = dict(shape=shape, dtype=dtype)
+        self.inferface_name2info_[op_name] = info
+        return info
+
+    def run(self, job_name, **kwargs):
+        self._check_status(self.SessionStatus.RUNNING)
+        return self.event_loop_.run_until_complete(self.async_run(job_name, **kwargs))
+
+    async def async_run(self, job_name, **kwargs):
+        self._check_status(self.SessionStatus.RUNNING)
+        self._run_push_jobs(**kwargs)
+        job_inst = job_instance_util.MakeUserJobInstance(job_name)
+        self._run_job(job_inst)
+        output_futures = tuple(self._run_pull_jobs(job_name).values())
+        return await asyncio.gather(*output_futures)
+
+    def _run_job(self, job_inst):
+        future = self.event_loop_.create_future()
+
+        def job_finish_cb(_):
+            self.event_loop_.call_soon_threadsafe(future.set_result, None)
+
+        job_inst.AddPostFinishCallback(job_finish_cb)
+        oneflow._oneflow_internal.LaunchJob(job_inst)
+        self.job_futures_.append(future)
+
+    def _run_push_jobs(self, **kwargs):
+        for (
+            input_name,
+            push_job_name,
+        ) in self.inter_user_job_info_.input_or_var_op_name2push_job_name.items():
+            if input_name not in kwargs:
+                raise ValueError('input "{}" is absent'.format(input_name))
+
+            input_numpy = kwargs[input_name]
+            if not isinstance(input_numpy, np.ndarray):
+                raise ValueError('input "{}" requires numpy.ndarray'.format(input_name))
+
+            push_fn = input_blob_util._MakePushNdarrayCallback(input_numpy)
+            push_job_inst = job_instance_util.MakePushJobInstance(
+                push_job_name, input_name, push_fn
+            )
+            self._run_job(push_job_inst)
+
+    def _run_pull_jobs(self, user_job_name):
+        output_futures = {}
+        for (
+            output_name,
+            pull_job_name,
+        ) in self.inter_user_job_info_.output_or_var_op_name2pull_job_name.items():
+            future = self.event_loop_.create_future()
+            pull_fn = self._make_pull_job_cb(output_name, user_job_name, future)
+            pull_job_inst = job_instance_util.MakePullJobInstance(
+                pull_job_name, output_name, pull_fn
+            )
+            self._run_job(pull_job_inst)
+            output_futures[output_name] = future
+
+        return output_futures
+
+    def _make_pull_job_cb(self, output_name, user_job_name, future):
+        output_lbn = oneflow._oneflow_internal.JobBuildAndInferCtx_GetOpBlobLbn(
+            user_job_name, output_name, "out"
+        )
+        split_axis = c_api_util.JobBuildAndInferCtx_GetSplitAxisFromProducerView(
+            user_job_name, output_lbn
+        )
+
+        def pull_fn(ofblob):
+            ndarray = ofblob.CopyToNdarray()
+            self.event_loop_.call_soon_threadsafe(future.set_result, ndarray)
+
+        return pull_fn
+
+    def _run_load_checkpoint_job(self):
+        if self.checkpoint_path_ is None:
+            raise ValueError("checkpoint path not set")
+
+        def copy_model_load_path(ofblob):
+            ofblob.CopyFromNdarray(
+                np.frombuffer(self.checkpoint_path_.encode("ascii"), dtype=np.int8)
+            )
+
+        load_checkpoint_job_inst = job_instance_util.MakeJobInstance(
+            self.inter_user_job_info_.global_model_load_job_name,
+            push_cb=copy_model_load_path,
+        )
+        self._run_job(load_checkpoint_job_inst)
+
+    async def wait_for_all_jobs_finished(self):
+        await asyncio.gather(*self.job_futures_)
+        self.job_futures_ = []
diff --git a/oneflow/compatible_single_client_python/serving/saved_model_builder.py b/oneflow/compatible_single_client_python/serving/saved_model_builder.py
new file mode 100644
index 0000000000000000000000000000000000000000..e615732ae04996f1280a98714d4dbd4a808da697
--- /dev/null
+++ b/oneflow/compatible_single_client_python/serving/saved_model_builder.py
@@ -0,0 +1,348 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import os
+import typing
+from google.protobuf import text_format
+
+from oneflow.compatible import single_client as flow
+import oneflow._oneflow_internal
+from oneflow.compatible_single_client_python.framework import c_api_util as c_api_util
+from oneflow.compatible_single_client_python.framework import (
+    session_context as session_ctx,
+)
+from oneflow.core.serving import saved_model_pb2 as saved_model_pb
+from oneflow.core.job import job_conf_pb2 as job_conf_pb
+from oneflow.core.register import logical_blob_id_pb2 as logical_blob_id_pb
+from oneflow.core.operator import interface_blob_conf_pb2 as interface_blob_conf_pb
+from oneflow.core.job import sbp_parallel_pb2 as sbp_parallel_pb
+from oneflow.compatible_single_client_python.oneflow_export import oneflow_export
+
+
+@oneflow_export("saved_model.ModelBuilder")
+class ModelBuilder(object):
+    DEFAULT_CHECKPOINT_DIR = "variables"
+    DEFAULT_SAVED_MODEL_FILE_BASENAME = "saved_model"
+
+    def __init__(self, save_path: str):
+        if not isinstance(save_path, str):
+            raise ValueError(
+                "param 'save_path' must be str, but got {}".format(save_path)
+            )
+
+        self.version_ = None
+        self.checkpoint_dir_ = self.DEFAULT_CHECKPOINT_DIR
+        self.saved_model_dir_ = save_path
+        self.saved_model_pb_filename_ = "{}.pb".format(
+            self.DEFAULT_SAVED_MODEL_FILE_BASENAME
+        )
+        self.saved_model_pbtxt_filename_ = "{}.prototxt".format(
+            self.DEFAULT_SAVED_MODEL_FILE_BASENAME
+        )
+        self.saved_model_proto_ = saved_model_pb.SavedModel()
+        self.graph_builders_ = {}
+
+    @property
+    def proto(self):
+        return self.saved_model_proto_
+
+    def ModelName(self, model_name: str):
+        assert isinstance(model_name, str)
+        self.proto.name = model_name
+        return self
+
+    def Version(self, version: int):
+        assert isinstance(version, int)
+        self.version_ = version
+        return self
+
+    def AddFunction(self, func):
+        func_name = func.__name__
+        if func_name in self.graph_builders_:
+            raise ValueError("function with name {} already exists".format(func_name))
+        graph_builder = GraphBuilder(func_name, self)
+        self.graph_builders_[func_name] = graph_builder
+        if not self.proto.HasField("default_graph_name"):
+            self.proto.default_graph_name = func_name
+        return graph_builder
+
+    def _check_input_output_name_conflict(self):
+        name_set = set()
+        lbn_set = set()
+
+        def check_name_conflict(name, interface_def):
+            if name in name_set:
+                raise ValueError("input conflict, {} already exist".format(name))
+            name_set.add(name)
+            lbn = Lbi2Lbn(interface_def.lbi)
+            if lbn in lbn_set:
+                raise ValueError(
+                    "input conflict, {} already bind to other input".format(lbn)
+                )
+            lbn_set.add(lbn)
+
+        for _, graph_def in self.proto.graphs.items():
+            for _, signature_def in graph_def.signatures.items():
+                for input_name, input_def in signature_def.inputs.items():
+                    check_name_conflict(input_name, input_def)
+                for output_name, output_def in signature_def.outputs.items():
+                    check_name_conflict(output_name, output_def)
+
+    @session_ctx.try_init_default_session
+    def Save(self, save_model_before_graph_complete: bool = True):
+        self._check_input_output_name_conflict()
+        for _, graph_builder in self.graph_builders_.items():
+            if not graph_builder.finished:
+                graph_builder.Finish()
+
+        sess = session_ctx.GetDefaultSession()
+        for graph_name, graph_def in self.proto.graphs.items():
+            job = sess.Job(
+                graph_name
+                if save_model_before_graph_complete
+                else graph_name + "_after_complete"
+            )
+            graph_def.op_list.extend(list(job.net.op))
+
+        if not os.path.exists(self.saved_model_dir_):
+            os.makedirs(self.saved_model_dir_)
+
+        if self.version_ is None:
+            raise ValueError("model version is not set")
+
+        version_dir = os.path.join(self.saved_model_dir_, str(self.version_))
+        if os.path.exists(version_dir):
+            raise ValueError(
+                'Directory of model "{}" version "{}" already exist.'.format(
+                    self.saved_model_dir_, self.version_
+                )
+            )
+
+        os.makedirs(version_dir)
+        self.proto.version = self.version_
+
+        checkpoint_path = os.path.join(version_dir, self.checkpoint_dir_)
+        flow.checkpoint.save(checkpoint_path)
+        self.proto.checkpoint_dir = self.checkpoint_dir_
+
+        saved_model_pb_path = os.path.join(version_dir, self.saved_model_pb_filename_)
+        with open(saved_model_pb_path, "wb") as writer:
+            writer.write(self.saved_model_proto_.SerializeToString())
+
+        saved_model_pbtxt_path = os.path.join(
+            version_dir, self.saved_model_pbtxt_filename_
+        )
+        with open(saved_model_pbtxt_path, "wt") as writer:
+            writer.write(text_format.MessageToString(self.saved_model_proto_))
+
+
+@oneflow_export("saved_model.GraphBuilder")
+class GraphBuilder(object):
+    def __init__(self, name: str, model_builder: typing.Optional[ModelBuilder] = None):
+        if not isinstance(name, str):
+            raise ValueError("param 'name' must be str, but got {}".format(name))
+
+        if not isinstance(model_builder, ModelBuilder) and model_builder is not None:
+            raise ValueError(
+                "param 'model_builder' must be a type of ModelBuilder or None"
+            )
+
+        if model_builder is not None:
+            if name in model_builder.proto.graphs:
+                raise ValueError(
+                    "graph function ({}) is already added to model ({})".format(
+                        name, model_builder.proto.name
+                    )
+                )
+
+            self.proto_ = model_builder.proto.graphs[name]
+            self.owner_ = model_builder
+        else:
+            self.proto_ = saved_model_pb.GraphDef()
+            self.owner_ = None
+
+        self.name_ = name
+        self.finished_ = False
+        self.signature_builders_ = {}
+
+    @property
+    def name(self):
+        return self.name_
+
+    @property
+    def proto(self):
+        return self.proto_
+
+    @property
+    def finished(self):
+        return self.finished_
+
+    def AddSignature(self, signature_name: str):
+        assert isinstance(signature_name, str)
+        if signature_name in self.signature_builders_:
+            raise ValueError("signature name {} already exists".format(signature_name))
+        signature_builder = SignatureBuilder(signature_name, self)
+        self.signature_builders_[signature_name] = signature_builder
+        if not self.proto.HasField("default_signature_name"):
+            self.proto.default_signature_name = signature_name
+        return signature_builder
+
+    def Finish(self):
+        assert self.finished is False
+        for _, signature_def in self.proto.signatures.items():
+            for _, input_def in signature_def.inputs.items():
+                input_lbn = Lbi2Lbn(input_def.lbi)
+                oneflow._oneflow_internal.JobBuildAndInferCtx_CheckLbnValidAndExist(
+                    self.name, input_lbn
+                )
+                GetInterfaceBlobConf(self.name, input_lbn, input_def.blob_conf)
+
+            for _, output_def in signature_def.outputs.items():
+                oneflow._oneflow_internal.JobBuildAndInferCtx_CheckLbnValidAndExist(
+                    self.name, Lbi2Lbn(output_def.lbi)
+                )
+
+        self.finished_ = True
+
+    def OwnerModelBuilder(self):
+        return self.owner_
+
+    def AsDefault(self):
+        if self.owner_ is not None:
+            self.owner_.proto.default_graph_name = self.name
+
+        return self
+
+
+@oneflow_export("saved_model.SignatureBuilder")
+class SignatureBuilder(object):
+    def __init__(self, name: str, graph_builder: typing.Optional[GraphBuilder] = None):
+        if not isinstance(name, str):
+            raise ValueError("param 'name' must be str, but got {}".format(name))
+
+        if not isinstance(graph_builder, GraphBuilder) and graph_builder is not None:
+            raise ValueError(
+                "param 'graph_builder' must be a type of GraphBuilder or None"
+            )
+
+        if graph_builder is not None:
+            if name in graph_builder.proto.signatures:
+                raise ValueError(
+                    "signature ({}) already exist in graph ({})".format(
+                        name, graph_builder.name,
+                    )
+                )
+
+            self.proto_ = graph_builder.proto.signatures[name]
+            self.owner_ = graph_builder
+        else:
+            self.proto_ = job_conf_pb.JobSignatureDef()
+            self.owner_ = None
+
+        self.name_ = name
+
+    @property
+    def name(self):
+        return self.name_
+
+    @property
+    def proto(self):
+        return self.proto_
+
+    def Input(self, input_name: str, lbn: str):
+        assert isinstance(input_name, str)
+        assert isinstance(lbn, str)
+        assert "/" in lbn
+
+        if input_name in self.proto.inputs:
+            raise ValueError(
+                "input_name ({}) already exist in signature ({}) of graph ({})".format(
+                    input_name, self.name, self.graph_builder_.name
+                )
+            )
+
+        input_def = self.proto.inputs[input_name]
+        Lbn2Lbi(lbn, input_def.lbi)
+        return self
+
+    def Output(self, output_name: str, lbn: str):
+        assert isinstance(output_name, str)
+        assert isinstance(lbn, str)
+        assert "/" in lbn
+
+        if output_name in self.proto.outputs:
+            raise ValueError(
+                "output_name ({}) already exist in signature ({}) of graph ({})".format(
+                    output_name, self.name, self.graph_builder_.name
+                )
+            )
+
+        output_def = self.proto.outputs[output_name]
+        Lbn2Lbi(lbn, output_def.lbi)
+        return self
+
+    def OwnerGraphBuilder(self):
+        return self.owner_
+
+    def AsDefault(self):
+        if self.owner_ is not None:
+            self.owner_.proto.default_signature_name = self.name
+
+        return self
+
+
+def GetInterfaceBlobConf(job_name, lbn, blob_conf=None):
+    assert isinstance(job_name, str)
+    assert isinstance(lbn, str)
+    if blob_conf is None:
+        blob_conf = interface_blob_conf_pb.InterfaceBlobConf()
+    else:
+        assert isinstance(blob_conf, interface_blob_conf_pb.InterfaceBlobConf)
+
+    shape = c_api_util.JobBuildAndInferCtx_GetStaticShape(job_name, lbn)
+    dtype = c_api_util.JobBuildAndInferCtx_GetDataType(job_name, lbn)
+    split_axis = c_api_util.JobBuildAndInferCtx_GetSplitAxisFromProducerView(
+        job_name, lbn
+    )
+    is_dynamic = c_api_util.JobBuildAndInferCtx_IsDynamic(job_name, lbn)
+
+    blob_conf.shape.dim.extend(shape)
+    blob_conf.data_type = dtype
+    if split_axis is not None:
+        sbp_parallel = sbp_parallel_pb.SbpParallel()
+        sbp_parallel.split_parallel.axis = split_axis
+        blob_conf.parallel_distribution.sbp_parallel.extend([sbp_parallel])
+
+    blob_conf.is_dynamic = is_dynamic
+    return blob_conf
+
+
+def Lbn2Lbi(lbn, lbi=None):
+    assert isinstance(lbn, str)
+    assert "/" in lbn, 'invalid lbn "{}"'.format(lbn)
+
+    [op_name, blob_name] = lbn.split("/")
+    if lbi is None:
+        lbi = logical_blob_id_pb.LogicalBlobId()
+
+    lbi.op_name = op_name
+    lbi.blob_name = blob_name
+    return lbi
+
+
+def Lbi2Lbn(lbi):
+    assert isinstance(lbi, logical_blob_id_pb.LogicalBlobId)
+    return "{}/{}".format(lbi.op_name, lbi.blob_name)
diff --git a/oneflow/compatible_single_client_python/summary/__init__.py b/oneflow/compatible_single_client_python/summary/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/oneflow/compatible_single_client_python/summary/summary_graph.py b/oneflow/compatible_single_client_python/summary/summary_graph.py
new file mode 100644
index 0000000000000000000000000000000000000000..507619dae81d7573bec6f43b596ead5992b05766
--- /dev/null
+++ b/oneflow/compatible_single_client_python/summary/summary_graph.py
@@ -0,0 +1,69 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import os
+from oneflow.core.summary import projector_pb2 as projector_pb2
+from oneflow.compatible_single_client_python.oneflow_export import oneflow_export
+import oneflow._oneflow_internal
+import time
+import logging
+
+from oneflow.compatible import single_client as flow
+
+
+@oneflow_export("summary.Graph")
+class Graph(object):
+    r"""The class of Graph
+
+    This class can write 'computing_graph' or 'structure_graph' into log file
+    """
+
+    def __init__(self, logdir=None):
+        r"""Create a Graph object
+
+        Args:
+            logdir: The log dir
+
+        Raises:
+            Exception: If log dir is None or illegal
+        """
+        if logdir is None:
+            raise Exception("logdir should not be None!")
+        logdir += "/graph"
+        if not os.path.exists(logdir):
+            os.makedirs(logdir)
+        self.logdir_ = logdir
+        self.structure_graph_filename_ = None
+        self.compute_graph_filename_ = None
+
+    def write_structure_graph(self):
+        if (self.structure_graph_filename_ is not None) and (
+            os.path.exists(self.structure_graph_filename_)
+        ):
+            raise OSError("You must create only one structure graph log file!")
+
+        self.structure_graph_filename_ = self.logdir_ + "/structure_graph.json"
+        struct_graph_str = oneflow._oneflow_internal.GetSerializedStructureGraph()
+        with open(self.structure_graph_filename_, "w", encoding="utf-8") as f:
+            f.write(str(struct_graph_str))
+            f.flush()
+
+    @property
+    def logdir(self):
+        return self.logdir_
+
+    @property
+    def structure_graph_filename(self):
+        return self.structure_graph_filename_
diff --git a/oneflow/compatible_single_client_python/summary/summary_hparams.py b/oneflow/compatible_single_client_python/summary/summary_hparams.py
new file mode 100644
index 0000000000000000000000000000000000000000..23b9b7a194b1cb2e25ac3373efd13625f40b12e9
--- /dev/null
+++ b/oneflow/compatible_single_client_python/summary/summary_hparams.py
@@ -0,0 +1,356 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+import six
+import hashlib
+import json
+import time
+
+from oneflow.core.summary import plugin_data_pb2 as plugin_data_pb2
+from oneflow.core.summary import summary_pb2 as summary_pb2
+from oneflow.core.summary import event_pb2 as event_pb2
+from oneflow.core.summary import tensor_pb2 as tensor_pb2
+from oneflow.core.summary import projector_pb2 as projector_pb2
+from oneflow.compatible_single_client_python.oneflow_export import oneflow_export
+
+
+from oneflow.compatible import single_client as flow
+
+
+@oneflow_export("summary.text")
+def text(text, tag=None):
+    r"""Add a text list to Summary
+
+    Args:
+        text: A str list
+        tag: The tag of summary
+
+    Returns:
+        A protobuf message [Summary]
+    """
+    if isinstance(text, (tuple, list)) and len(text) > 0:
+        if not isinstance(tag, str) or tag is None:
+            tag = "text"
+        text_size = len(text)
+        tensor_shape = tensor_pb2.TensorShapeProto()
+        dim = tensor_shape.dim.add()
+        dim.size = text_size
+
+        tensor = tensor_pb2.TensorProto(
+            dtype=tensor_pb2.DT_STRING, tensor_shape=tensor_shape,
+        )
+        for idx in range(text_size):
+            tensor.string_val.append(text[idx].encode("utf-8"))
+        summary = summary_pb2.Summary()
+        value = summary.value.add(
+            tag=tag,
+            metadata=summary_pb2.SummaryMetadata(
+                plugin_data=summary_pb2.SummaryMetadata.PluginData(plugin_name="text")
+            ),
+            tensor=tensor,
+        )
+        return summary
+
+
+def _get_tensor(values, dtype=None, shape=None):
+    array = np.empty(shape, dtype=np.float)
+    tensor_shape = tensor_pb2.TensorShapeProto()
+    dim = tensor_shape.dim.add()
+    dim.size = 0
+
+    tensor_proto = tensor_pb2.TensorProto(
+        dtype=tensor_pb2.DT_FLOAT, tensor_shape=tensor_shape,
+    )
+    proto_values = array.ravel()
+    tensor_proto.float_val.extend([np.asscalar(x) for x in proto_values])
+    return tensor_proto
+
+
+@oneflow_export("summary.hparams")
+def hparams(hparams):
+    r"""Add hparams to Summary
+
+    Args:
+        hparams: A dict of Hparams
+
+    Raises:
+        TypeError: If the type of hparam not in (str, int, float, bool)
+        TypeError: If the type of metric not in (float, int)
+
+    Returns:
+        A protobuf message [Summary]
+    """
+    hparams, metrics = _get_hparams_dict(hparams)
+    jparams = json.dumps(hparams, sort_keys=True, separators=(",", ":"))
+    group_name = hashlib.sha256(jparams.encode("utf-8")).hexdigest()
+
+    session_start_info = plugin_data_pb2.SessionStartInfo(
+        group_name=group_name, start_time_secs=time.time(),
+    )
+    for key in sorted(hparams):
+        value = hparams[key]
+        if isinstance(value, str):
+            session_start_info.hparams[key].string_value = value
+        elif isinstance(value, (float, int)):
+            session_start_info.hparams[key].number_value = value
+        elif isinstance(value, bool):
+            session_start_info.hparams[key].bool_value = value
+        else:
+            raise TypeError("the type of value: %r is not supported!" % value)
+    for key in metrics:
+        value = metrics[key]
+        if isinstance(value, (float, int)):
+            session_start_info.metrics[key].number_value = value
+        else:
+            raise TypeError("the type of value: %r is not supported!" % value)
+
+    summary = summary_pb2.Summary()
+    summary_metadata = _get_metadata(
+        plugin_data_pb2.HParamsPluginData(session_start_info=session_start_info)
+    )
+    summary.value.add(
+        tag="_hparams_/session_start_info",
+        metadata=summary_metadata,
+        tensor=_get_tensor([], tensor_pb2.DT_FLOAT, (0,)),
+    )
+    return summary
+
+
+def _get_metadata(hparams_plugin_data):
+    plugin_data = plugin_data_pb2.HParamsPluginData()
+    plugin_data.CopyFrom(hparams_plugin_data)
+    plugin_data.version = 0
+    return summary_pb2.SummaryMetadata(
+        plugin_data=summary_pb2.SummaryMetadata.PluginData(
+            plugin_name="hparams", content=plugin_data.SerializeToString()
+        )
+    )
+
+
+def _get_hparams_dict(hparams):
+    hparams_dict = {}
+    metrics_dict = {}
+    for (key, value) in dict.items(hparams):
+        if key in hparams_dict or key in metrics_dict:
+            raise ValueError("the key is already exist %r" % (key,))
+        if isinstance(key, HParam):
+            key = key.name
+        if isinstance(key, Metric):
+            metrics_dict[key.name] = _get_value(value)
+            continue
+        hparams_dict[key] = _get_value(value)
+    return hparams_dict, metrics_dict
+
+
+def _get_value(value):
+    if isinstance(value, np.generic):
+        return value.item()
+    else:
+        return value
+
+
+@oneflow_export("summary.Hparam")
+class HParam(object):
+    r"""The class of Hparam
+
+    This class describes the name and the type of Hparam
+    """
+
+    def __init__(self, name, dtype=None):
+        r"""Create a Hparam object
+
+        Args:
+            name: Hparam name
+            dtype: Hparam type
+
+        Raises:
+            ValueError: If Hparam type not in (IntegerRange, RealRange, ValueSet)
+        """
+        self.name_ = name
+        self.dtype_ = dtype
+        if not isinstance(self.dtype_, (IntegerRange, RealRange, ValueSet, type(None))):
+            raise ValueError(
+                "Hparam dtype must be: (IntegerRange, RealRange, ValueSet) : %r"
+                % (self.dtype_,)
+            )
+
+    @property
+    def name(self):
+        return self.name_
+
+    @property
+    def dtype(self):
+        return self.dtype_
+
+
+@oneflow_export("summary.IntegerRange")
+class IntegerRange(object):
+    r"""The class of IntegerRange
+
+    This class takes a integer range between min_value and max_value
+    """
+
+    def __init__(self, min_value, max_value):
+        r"""Create an 'IntegerRange' object
+
+        Args:
+            min_value: The min value of the range
+            max_value: The max value of the range
+
+        Raises:
+            TypeError: If 'min_value' or 'max_value' is not an int
+            ValueError: If 'min_value' > 'max_value'
+        """
+        if not isinstance(max_value, int):
+            raise TypeError("max_value is not an integer value: %r" % (max_value,))
+        if not isinstance(min_value, int):
+            raise TypeError("min_value is not an integer value: %r" % (min_value,))
+        if min_value > max_value:
+            raise ValueError(
+                "max_value must bigger than min_value: %r > %r" % (min_value, max_value)
+            )
+        self.min_value_ = min_value
+        self.max_value_ = max_value
+
+    @property
+    def min_value(self):
+        return self.min_value_
+
+    @property
+    def max_value(self):
+        return self.max_value_
+
+
+@oneflow_export("summary.RealRange")
+class RealRange(object):
+    r"""The class of RealRange
+
+    This class takes a realnumber range between min_value and max_value
+    """
+
+    def __init__(self, min_value, max_value):
+        r"""Create a 'RealRange' object
+
+        Args:
+            min_value: The min value of the range
+            max_value: The max value of the range
+
+        Raises:
+            TypeError: If 'min_value' or 'max_value' is not an float
+            ValueError: If 'min_value' > 'max_value'
+        """
+        if not isinstance(max_value, float):
+            raise TypeError("max_value is not an float value: %r" % (max_value,))
+        if not isinstance(min_value, float):
+            raise TypeError("min_value is not an float value: %r" % (min_value,))
+        if min_value > max_value:
+            raise ValueError(
+                "max_value must bigger than min_value: %r > %r" % (min_value, max_value)
+            )
+        self.min_value_ = min_value
+        self.max_value_ = max_value
+
+    @property
+    def min_value(self):
+        return self.min_value_
+
+    @property
+    def max_value(self):
+        return self.max_value_
+
+
+@oneflow_export("summary.ValueSet")
+class ValueSet(object):
+    r"""The class of ValueSet
+
+    This class takes a list of value
+    """
+
+    def __init__(self, values, dtype=None):
+        r"""Create a ValueSet object
+
+        Args:
+            values: a list of values
+            dtype: the value type
+
+        Raises:
+            ValueError: If the value type not in (int, float, bool, str)
+            TypeError: If the value in the list is not same
+        """
+        self.values_ = list(values)
+        if dtype is None:
+            if self.values_:
+                dtype = type(self.values_[0])
+        if dtype not in (int, float, bool, str):
+            raise ValueError(
+                "Value type must in (int, float, bool, str), %r is not supported!"
+                % (dtype,)
+            )
+        self.dtype_ = dtype
+        for value in self.values_:
+            if not isinstance(value, self.dtype_):
+                raise TypeError(
+                    "The type of value is not supported! value: %r type: %s"
+                    % (value, self.dtype_.__name__)
+                )
+        self.values_.sort()
+
+    @property
+    def dtype(self):
+        return self.dtype_
+
+    @property
+    def values(self):
+        return list(self.values_)
+
+
+@oneflow_export("summary.Metric")
+class Metric(object):
+    r"""The class of Metric
+
+    This class takes a 'int' or 'float' value
+    """
+
+    def __init__(self, name, dtype=None):
+        r"""Create a Metric object
+
+        Args:
+            name: Metric name
+            dtype: Value type
+
+        Raises:
+            ValueError: If type is not 'int' or 'float'
+        """
+        self.name_ = name
+        if dtype is None:
+            dtype = float
+        if dtype not in (int, float):
+            raise ValueError(
+                "Value type must in (int, float), %r is not supported!" % (dtype,)
+            )
+        self.dtype_ = dtype
+
+    @property
+    def name(self):
+        return self.name_
+
+    @property
+    def dtype(self):
+        return self.dtype_
diff --git a/oneflow/compatible_single_client_python/summary/summary_projector.py b/oneflow/compatible_single_client_python/summary/summary_projector.py
new file mode 100644
index 0000000000000000000000000000000000000000..1ce6b9bed90c1836bbba2bf96afa139a4991d6b8
--- /dev/null
+++ b/oneflow/compatible_single_client_python/summary/summary_projector.py
@@ -0,0 +1,160 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import os
+from oneflow.core.summary import projector_pb2 as projector_pb2
+from oneflow.compatible_single_client_python.oneflow_export import oneflow_export
+import time
+
+from oneflow.compatible import single_client as flow
+
+
+@oneflow_export("summary.Projector")
+class Projector(object):
+    r"""The class of Projector
+
+    This class can create an 'embedding_projector' or 'exception_projector'
+    """
+
+    def __init__(self, logdir=None):
+        r"""Create a Projector objector
+
+        Args:
+            logdir: The log dir
+
+        Raises:
+            Exception: If 'logdir' is None or illegal
+        """
+        if logdir is None:
+            raise Exception("logdir should not be None!")
+        logdir += "/projector"
+        if not os.path.exists(logdir):
+            os.makedirs(logdir)
+        self.logdir_ = logdir
+        self.embedding_filename_ = None
+        self.exception_filename_ = None
+
+    def create_embedding_projector(self):
+        if (self.embedding_filename_ is not None) and (
+            os.path.exists(self.embedding_filename_)
+        ):
+            raise OSError("You must create only one embedding projector!")
+        self.embedding_filename_ = (
+            self.logdir_ + "/projector." + str(int(time.time())) + ".log"
+        )
+
+    def create_exception_projector(self):
+        if (self.exception_filename_ is not None) and (
+            os.path.exists(self.exception_filename_)
+        ):
+            raise OSError("You must create only one embedding projector!")
+        self.exception_filename_ = (
+            self.logdir_ + "/projector.gradit." + str(int(time.time())) + ".log"
+        )
+
+    @property
+    def logdir(self):
+        return self.logdir_
+
+    @property
+    def exception_filename(self):
+        return self.exception_filename_
+
+    @property
+    def embedding_filename(self):
+        return self.embedding_filename_
+
+    def write_projector(self, filename=None, projector=None):
+        with open(filename, "wb") as f:
+            f.write(projector.SerializeToString())
+            f.flush()
+
+    def set_tensor(self, tensor: projector_pb2.Tensor, value):
+        for d in value.shape:
+            td = tensor.shape.dim.add()
+            td.size = d
+        tensor.dtype = str(value.dtype)
+        tensor.content = value.tobytes()
+
+    def set_projector(self, pro, tag, step, value, label=None):
+        pro.tag = str(tag)
+        pro.step = step
+        pro.WALL_TIME = time.time()
+        self.set_tensor(pro.value, value)
+        if label is not None:
+            self.set_tensor(pro.label, label)
+
+    def set_sample(self, sample, name, x, sample_type):
+        if name is not None:
+            sample.name = name
+        if sample_type == "image" or sample_type == "IMAGE":
+            sample.type = projector_pb2.Sample.SampleType.IMAGE
+        elif sample_type == "audio" or sample_type == "AUDIO":
+            sample.type = projector_pb2.Sample.SampleType.AUDIO
+        elif sample_type == "text" or sample_type == "TEXT":
+            sample.type = projector_pb2.Sample.SampleType.TEXT
+        else:
+            raise NotImplementedError
+        if x is not None:
+            self.set_tensor(sample.X, x)
+
+    def embedding_projector(
+        self,
+        value=None,
+        label=None,
+        tag=None,
+        step=None,
+        sample_name=None,
+        sample_type=None,
+        x=None,
+    ):
+        if tag is None:
+            tag = "embedding_projector"
+        summary_projector = projector_pb2.SummaryProjector()
+        summary_projector.metadata.type = projector_pb2.MetaData.ProjectorType.EMBEDDING
+        projector = summary_projector.projector.add()
+        self.set_projector(pro=projector, tag=tag, step=step, value=value, label=label)
+        if (sample_name is not None) and (sample_type is not None):
+            self.set_sample(
+                sample=summary_projector.sample,
+                name=sample_name,
+                x=x,
+                sample_type=sample_type,
+            )
+        self.write_projector(self.embedding_filename_, summary_projector)
+
+    def exception_projector(
+        self,
+        value=None,
+        tag=None,
+        step=None,
+        sample_name=None,
+        sample_type=None,
+        x=None,
+    ):
+        if tag is None:
+            tag = "exception_projector"
+        summary_projector = projector_pb2.SummaryProjector()
+        summary_projector.metadata.type = projector_pb2.MetaData.ProjectorType.EXCEPTION
+        projector = summary_projector.projector.add()
+        self.set_projector(pro=projector, tag=tag, step=step, value=value)
+        if (sample_name is not None) and (sample_type is not None):
+            self.set_sample(
+                sample=summary_projector.sample,
+                name=sample_name,
+                x=x,
+                sample_type=sample_type,
+            )
+        self.write_projector(self.exception_filename_, summary_projector)
diff --git a/oneflow/compatible_single_client_python/test/README.md b/oneflow/compatible_single_client_python/test/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..e61ee2196433d9846e7321ed0a43992622ba578a
--- /dev/null
+++ b/oneflow/compatible_single_client_python/test/README.md
@@ -0,0 +1,56 @@
+# æµ‹è¯•å·¥å…·ä½¿ç”¨ç®€ä»‹
+
+Created: Oct 4, 2020 10:52 AM
+
+å¯¹opçš„æµ‹è¯•ä»£ç è¿›è¡Œäº†æ›´æ–°ï¼Œä¸»è¦è§£å†³çš„é—®é¢˜ï¼š
+
+1. æ—§ä»£ç åœ¨ python è‡ªå¸¦çš„ unittest ä¸Šå¼•å…¥äº†ä¸€äº›è¿˜æœ‰ç‚¹å¤æ‚çš„æŠ½è±¡ï¼Œå¯¼è‡´å¹¶è¡Œè¿è¡Œå•å…ƒæµ‹è¯•å¾ˆéš¾åšåˆ°
+2. ä¸èƒ½éšæ„è¿è¡Œå•ä¸€è„šæœ¬ï¼Œæ¯”å¦‚ `python3 oneflow/compatible/single_client/python/test/ops/test_add.py` è¿™æ ·
+3. å¯¹äºŽå¯åŠ¨æµ‹è¯•çš„é…ç½®ä¿¡æ¯ï¼Œéƒ½è¦é å‘½ä»¤è¡Œä¼ å…¥ï¼Œå¯¹ CI ä¸å‹å¥½
+
+æ–°çš„ç¼–å†™è§„èŒƒï¼š
+
+```cpp
+@flow.unittest.skip_unless_1n1d()
+class TestAdd(flow.unittest.TestCase):
+    def test_naive(test_case):
+        ....
+
+    def test_broadcast(test_case):
+        ....
+
+if __name__ == "__main__":
+    unittest.main()
+```
+
+- å¿…é¡»æŠŠ`test__***` å‡½æ•°å†™åœ¨ä¸€ä¸ªç»§æ‰¿ `flow.unittest.TestCase` çš„ç±»é‡Œé¢
+- å¿…é¡»åŠ ä¸€ä¸ª `if __name__ == "__main__":` ï¼Œé‡Œé¢è°ƒç”¨ `unittest.main()`
+- å¿…é¡»åŠ ä¸Š skip decoratorï¼Œæ¯”å¦‚ `@flow.unittest.skip_unless_1n1d()` æ ‡è®°è¿™ä¸ªæµ‹è¯•ç”¨ä¾‹åªåœ¨1 node 1 device çš„æƒ…å†µä¸‹æ‰èƒ½è¿è¡Œã€‚æ³¨æ„ï¼šè¿™é‡Œçš„ device ä¸ä»…è¦è€ƒè™‘åˆ° oneflow ç”¨äº†å‡ ä¸ª gpuï¼Œè¿˜è¦è€ƒè™‘åˆ°è¿™ä¸ªè„šæœ¬é‡Œé¢ tensorflow/pytorch ç”¨åˆ°äº†å‡ ä¸ª gpu
+- skip decorator å¯ä»¥æ”¾åœ¨ class å¤´ä¸Šä¹Ÿå¯ä»¥æ”¾åœ¨ method å¤´ä¸Šï¼Œæ”¾åœ¨ class å¤´ä¸Šçš„è¯ï¼Œä¸æ»¡è¶³æ¡ä»¶æ•´ä¸ª class å†…éƒ¨æ‰€æœ‰ test method éƒ½ä¼šè·³è¿‡
+- åœ¨ python unit test çš„è§„èŒƒä¸Šæ²¡æœ‰å¼•å…¥é¢å¤–çš„æŠ½è±¡ï¼Œäº†è§£æ›´å¤šï¼š[https://docs.python.org/3/library/unittest.html](https://docs.python.org/3/library/unittest.html)
+
+å¦‚ä½•è¿è¡Œï¼š
+
+- æ•´ä½“è¿è¡Œï¼Œè¿›å…¥ `oneflow/compatible/single_client/python/test/ops`ç›®å½•ï¼Œè¿è¡Œ`python3 -m unittest`
+
+    ```cpp
+    oneflow/compatible/single_client/python/test/ops
+    export ONEFLOW_TEST_DEVICE_NUM=1
+    python3 -m unittest --failfast --verbose
+    ```
+
+    æˆ–è€…ï¼š
+
+    ```cpp
+    python3 -m unittest discover oneflow/compatible/single_client/python/test/ops
+    ```
+
+    æ›´å¤šç”¨æ³•è¯·å‚è€ƒ [https://docs.python.org/3/library/unittest.html](https://docs.python.org/3/library/unittest.html)
+
+- é€šè¿‡è®¾ç½®çŽ¯å¢ƒå˜é‡ `ONEFLOW_TEST_DEVICE_NUM` è¿‡æ»¤è¦è¿è¡Œå‡ å¡çš„è„šæœ¬ï¼Œå¦‚æžœæ²¡æœ‰ç»™ï¼Œé»˜è®¤å°±æ˜¯1
+- å¤šæœºè„šæœ¬éœ€è¦è®¾ç½® `ONEFLOW_TEST_NODE_LIST` å’Œ`ONEFLOW_TEST_MASTER_PORT`çŽ¯å¢ƒå˜é‡æ¥æŒ‡å®šå¤šæœºçš„ ip åœ°å€å’Œ control port
+- è¿è¡Œå•ä¸€è„šæœ¬ï¼Œå¯ä»¥ç›´æŽ¥ç”¨ python3 äºŒè¿›åˆ¶è¿è¡Œä¸€ä¸ªæ–‡ä»¶ï¼ŒæŽ¥å— python unitest çš„æ‰€æœ‰å‘½ä»¤è¡Œå‚æ•°ï¼Œå¦‚ `--failfast` , `--verbose`
+
+    ```cpp
+    python3 oneflow/compatible/single_client/python/test/ops/test_add.py --verbose
+    ```
diff --git a/oneflow/python/test/custom_ops/test_user_sigmoid.py b/oneflow/compatible_single_client_python/test/custom_ops/test_user_sigmoid.py
similarity index 97%
rename from oneflow/python/test/custom_ops/test_user_sigmoid.py
rename to oneflow/compatible_single_client_python/test/custom_ops/test_user_sigmoid.py
index 4cc75243df2c7a0c8117f2b778b6dd78af639477..0bf6381e627c069e1b4ac599d6d78bddb350383d 100644
--- a/oneflow/python/test/custom_ops/test_user_sigmoid.py
+++ b/oneflow/compatible_single_client_python/test/custom_ops/test_user_sigmoid.py
@@ -18,8 +18,8 @@ import os
 import numpy as np
 import math
 
-import oneflow as flow
-import oneflow.typing as oft
+import oneflow.compatible.single_client as flow
+import oneflow.compatible.single_client.typing as oft
 
 
 func_config = flow.FunctionConfig()
diff --git a/oneflow/python/test/custom_ops/user_sigmoid/user_sigmoid_cpp_def.cpp b/oneflow/compatible_single_client_python/test/custom_ops/user_sigmoid/user_sigmoid_cpp_def.cpp
similarity index 100%
rename from oneflow/python/test/custom_ops/user_sigmoid/user_sigmoid_cpp_def.cpp
rename to oneflow/compatible_single_client_python/test/custom_ops/user_sigmoid/user_sigmoid_cpp_def.cpp
diff --git a/oneflow/python/test/custom_ops/user_sigmoid/user_sigmoid_py_api.py b/oneflow/compatible_single_client_python/test/custom_ops/user_sigmoid/user_sigmoid_py_api.py
similarity index 96%
rename from oneflow/python/test/custom_ops/user_sigmoid/user_sigmoid_py_api.py
rename to oneflow/compatible_single_client_python/test/custom_ops/user_sigmoid/user_sigmoid_py_api.py
index b3aca463b0a0d41b49ed39456372000c692ddaaf..5ad2d1c01d688b8a18b36eb69afd5eb8983b196f 100644
--- a/oneflow/python/test/custom_ops/user_sigmoid/user_sigmoid_py_api.py
+++ b/oneflow/compatible_single_client_python/test/custom_ops/user_sigmoid/user_sigmoid_py_api.py
@@ -17,7 +17,7 @@ from __future__ import absolute_import
 
 import os
 
-import oneflow as flow
+import oneflow.compatible.single_client as flow
 from typing import Union, Tuple, List, Optional, Sequence, Callable
 
 
diff --git a/oneflow/python/test/custom_ops/user_sigmoid/user_sigmoid_py_kernel.py b/oneflow/compatible_single_client_python/test/custom_ops/user_sigmoid/user_sigmoid_py_kernel.py
similarity index 100%
rename from oneflow/python/test/custom_ops/user_sigmoid/user_sigmoid_py_kernel.py
rename to oneflow/compatible_single_client_python/test/custom_ops/user_sigmoid/user_sigmoid_py_kernel.py
diff --git a/oneflow/compatible_single_client_python/test/models/1node_test.py b/oneflow/compatible_single_client_python/test/models/1node_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..757ba4d462b6236a62a85f07507f3c56b842b008
--- /dev/null
+++ b/oneflow/compatible_single_client_python/test/models/1node_test.py
@@ -0,0 +1,61 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from oneflow.compatible import single_client as flow
+import env_1node
+import os
+
+from absl import app
+from absl.testing import absltest
+from test_1node_mixin import Test1NodeMixin
+from cnns_tests import (
+    TestAlexNetMixin,
+    TestResNet50Mixin,
+    TestVgg16Mixin,
+    TestInceptionV3Mixin,
+)
+
+
+class TestAlexNet(Test1NodeMixin, TestAlexNetMixin, absltest.TestCase):
+    pass
+
+
+class TestResNet50(Test1NodeMixin, TestResNet50Mixin, absltest.TestCase):
+    pass
+
+
+class TestVgg16(Test1NodeMixin, TestVgg16Mixin, absltest.TestCase):
+    pass
+
+
+class TestInceptionV3(Test1NodeMixin, TestInceptionV3Mixin, absltest.TestCase):
+    pass
+
+
+flow.unittest.register_test_cases(
+    scope=globals(),
+    directory=os.path.dirname(os.path.realpath(__file__)),
+    filter_by_num_nodes=lambda x: x == 1,
+    base_class=absltest.TestCase,
+)
+
+
+def main(argv):
+    env_1node.Init()
+    absltest.main()
+
+
+if __name__ == "__main__":
+    app.run(main)
diff --git a/oneflow/compatible_single_client_python/test/models/2node_test.py b/oneflow/compatible_single_client_python/test/models/2node_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..f11a213484a64ecaec2a7c341851fc4c40fac5f2
--- /dev/null
+++ b/oneflow/compatible_single_client_python/test/models/2node_test.py
@@ -0,0 +1,59 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import os
+
+import cnns_tests
+import env_2node
+import numpy
+from oneflow.compatible import single_client as flow
+from absl import app
+from absl.testing import absltest
+from test_2node_mixin import Test2NodeMixin
+
+
+class TestAlexNet(Test2NodeMixin, cnns_tests.TestAlexNetMixin, absltest.TestCase):
+    pass
+
+
+class TestResNet50(Test2NodeMixin, cnns_tests.TestResNet50Mixin, absltest.TestCase):
+    pass
+
+
+class TestVgg16(Test2NodeMixin, cnns_tests.TestVgg16Mixin, absltest.TestCase):
+    pass
+
+
+class TestInceptionV3(
+    Test2NodeMixin, cnns_tests.TestInceptionV3Mixin, absltest.TestCase
+):
+    pass
+
+
+flow.unittest.register_test_cases(
+    scope=globals(),
+    directory=os.path.dirname(os.path.realpath(__file__)),
+    filter_by_num_nodes=lambda x: x == 2,
+    base_class=absltest.TestCase,
+)
+
+
+def main(argv):
+    env_2node.Init()
+    absltest.main()
+
+
+if __name__ == "__main__":
+    app.run(main)
diff --git a/oneflow/compatible_single_client_python/test/models/README.md b/oneflow/compatible_single_client_python/test/models/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..bbbe7c196225478574eee96f24af1036a3beb1ff
--- /dev/null
+++ b/oneflow/compatible_single_client_python/test/models/README.md
@@ -0,0 +1,78 @@
+# Usage: 
+## Test all cnn nets: 
+```
+python run_cnns_test.py
+```
+
+## Test all cnn nets on current node: 
+```
+python 1node_run_cnns_test.py
+```
+
+## Test a specific net:
+### alexnet
+```
+python run_cnns_test.py TestAlexNet
+```
+
+### resnet50
+```
+python run_cnns_test.py TestResNet50
+```
+
+### inceptionv3
+```
+python run_cnns_test.py TestInceptionV3
+```
+
+### vgg16
+```
+python run_cnns_test.py TestVgg16
+```
+
+## Test a specific case for a specific net: 
+
+### test alexnet on 1 gpu, 1 machine(node)
+```
+python run_cnns_test.py TestAlexNet.test_1n1c
+```
+
+### test alexnet on 4 gpu, 1 machine(node)
+```
+python run_cnns_test.py TestAlexNet.test_1n4c
+```
+
+### test alexnet on 4 gpu, 8 machine(node)
+```
+python run_cnns_test.py TestAlexNet.test_2n8c
+
+```
+
+## Loss report format
+```
+======================================================================
+xx net loss report
+======================================================================
+iter     tensorflow   oneflow-1n1c
+0        6.932688     6.932688
+1        6.924820     6.924820
+2        6.917069     6.917069
+3        6.909393     6.909393
+4        6.901904     6.901904
+5        6.894367     6.894367
+6        6.886764     6.886764
+7        6.879305     6.879305
+8        6.872003     6.872003
+9        6.864939     6.864939
+```
+
+## Test Bert on current node (1n1c, 1n4c):
+
+```
+python 1node_run_cnns_test.py test_bert
+```
+
+## Test Bert with specified distributed strategy
+```
+python 2node_run_cnns_test.py test_bert.test_2n8c
+```
diff --git a/oneflow/python/test/customized/ccrelu_alexnet.py b/oneflow/compatible_single_client_python/test/models/alexnet.py
similarity index 87%
rename from oneflow/python/test/customized/ccrelu_alexnet.py
rename to oneflow/compatible_single_client_python/test/models/alexnet.py
index 0a59cb01e8b0bb02d8895083d8567759aca694a8..f83d952dabdab7db964b052de7be7ff293580147 100644
--- a/oneflow/python/test/customized/ccrelu_alexnet.py
+++ b/oneflow/compatible_single_client_python/test/models/alexnet.py
@@ -18,9 +18,9 @@ import os
 from datetime import datetime
 
 import numpy
-import oneflow as flow
-import oneflow.core.operator.op_conf_pb2 as op_conf_util
-import oneflow.core.job.initializer_conf_pb2 as initializer_conf_util
+from oneflow.compatible import single_client as flow
+from oneflow.core.operator import op_conf_pb2 as op_conf_util
+from oneflow.core.job import initializer_conf_pb2 as initializer_conf_util
 
 # _DATA_DIR = "/dataset/imagenet_227/train/32"
 _DATA_DIR = "/dataset/PNGS/PNG227/of_record_repeated"
@@ -32,7 +32,7 @@ NODE_LIST = "192.168.1.12,192.168.1.14"
 
 
 class DLNetSpec(object):
-    def __init__(self):
+    def __init__(self, enable_auto_mixed_precision):
         self.batch_size = 8
         self.data_part_num = 32
         self.eval_dir = _DATA_DIR
@@ -42,6 +42,7 @@ class DLNetSpec(object):
         self.num_nodes = 1
         self.gpu_num_per_node = 1
         self.iter_num = 10
+        self.enable_auto_mixed_precision = enable_auto_mixed_precision
 
 
 parser = argparse.ArgumentParser(description="flags for multi-node and resource")
@@ -113,7 +114,7 @@ def _conv2d_layer(
 
     if activation is not None:
         if activation == op_conf_util.kRelu:
-            output = flow.math.relu(output)
+            output = flow.nn.relu(output)
         else:
             raise NotImplementedError
 
@@ -124,15 +125,17 @@ def _data_load_layer(args, data_dir):
     node_num = args.num_nodes
     total_batch_size = args.batch_size * args.gpu_num_per_node * node_num
     rgb_mean = [123.68, 116.78, 103.94]
-    ofrecord = flow.data.ofrecord_reader(
-        data_dir, batch_size=total_batch_size, data_part_num=args.data_part_num
-    )
-    image = flow.data.OFRecordImageDecoder(ofrecord, "encoded", color_space="RGB")
-    label = flow.data.OFRecordRawDecoder(
-        ofrecord, "class/label", shape=(), dtype=flow.int32
+    (image, label) = flow.data.ofrecord_image_classification_reader(
+        data_dir,
+        batch_size=total_batch_size,
+        data_part_num=args.data_part_num,
+        image_feature_name="encoded",
+        label_feature_name="class/label",
+        color_space="RGB",
+        name="decode",
     )
-    rsz = flow.image.Resize(image, resize_x=227, resize_y=227, color_space="RGB")
-    normal = flow.image.CropMirrorNormalize(
+    rsz = flow.image.resize(image, resize_x=227, resize_y=227, color_space="RGB")
+    normal = flow.image.crop_mirror_normalize(
         rsz,
         color_space="RGB",
         output_layout="NCHW",
@@ -142,18 +145,6 @@ def _data_load_layer(args, data_dir):
     return label, normal
 
 
-def ccrelu(x, name):
-    return (
-        flow.user_op_builder(name)
-        .Op("ccrelu")
-        .Input("in", [x])
-        .Output("out")
-        .Build()
-        .InferAndTryRun()
-        .RemoteBlobList()[0]
-    )
-
-
 def alexnet(args, images, labels, trainable=True):
     conv1 = _conv2d_layer(
         args, "conv1", images, filters=64, kernel_size=11, strides=4, padding="VALID",
@@ -192,8 +183,7 @@ def alexnet(args, images, labels, trainable=True):
         name="fc1",
     )
 
-    # dropout1 = fc1
-    dropout1 = ccrelu(fc1, "ccrelu_Fc1ToDropout1")
+    dropout1 = fc1
 
     fc2 = flow.layers.dense(
         inputs=dropout1,
@@ -229,27 +219,30 @@ def alexnet(args, images, labels, trainable=True):
 def main(args):
     flow.config.machine_num(args.num_nodes)
     flow.config.gpu_device_num(args.gpu_num_per_node)
+    flow.config.enable_legacy_model_io(True)
 
     func_config = flow.FunctionConfig()
-    func_config.default_distribute_strategy(flow.scope.consistent_view())
+    func_config.default_logical_view(flow.scope.consistent_view())
     func_config.default_data_type(flow.float)
-    func_config.train.primary_lr(0.00001)
-    func_config.train.model_update_conf(dict(naive_conf={}))
     func_config.cudnn_conv_force_fwd_algo(0)
     func_config.cudnn_conv_force_bwd_data_algo(1)
     func_config.cudnn_conv_force_bwd_filter_algo(1)
+    func_config.enable_auto_mixed_precision(args.enable_auto_mixed_precision)
 
-    @flow.global_function(func_config)
+    @flow.global_function(type="train", function_config=func_config)
     def alexnet_train_job():
         (labels, images) = _data_load_layer(args, args.train_dir)
         loss = alexnet(args, images, labels)
-        flow.losses.add_loss(loss)
+        flow.optimizer.SGD(
+            flow.optimizer.PiecewiseConstantScheduler([], [0.00001]), momentum=0
+        ).minimize(loss)
         return loss
 
     func_config = flow.FunctionConfig()
     func_config.default_data_type(flow.float)
+    func_config.enable_auto_mixed_precision(args.enable_auto_mixed_precision)
 
-    @flow.global_function(func_config)
+    @flow.global_function(function_config=func_config)
     def alexnet_eval_job():
         with flow.scope.consistent_view():
             (labels, images) = _data_load_layer(args, args.eval_dir)
diff --git a/oneflow/compatible_single_client_python/test/models/alexnet_with_unpack.py b/oneflow/compatible_single_client_python/test/models/alexnet_with_unpack.py
new file mode 100644
index 0000000000000000000000000000000000000000..a7b81e29a92a9022b8d57b74889fedf7989c995b
--- /dev/null
+++ b/oneflow/compatible_single_client_python/test/models/alexnet_with_unpack.py
@@ -0,0 +1,390 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import argparse
+import os
+from datetime import datetime
+
+import numpy
+from oneflow.compatible import single_client as flow
+from oneflow.core.operator import op_conf_pb2 as op_conf_util
+from oneflow.core.job import initializer_conf_pb2 as initializer_conf_util
+
+# _DATA_DIR = "/dataset/imagenet_227/train/32"
+_DATA_DIR = "/dataset/PNGS/PNG227/of_record_repeated"
+_MODEL_SAVE_DIR = "./model_save-{}".format(
+    str(datetime.now().strftime("%Y-%m-%d-%H:%M:%S"))
+)
+_MODEL_LOAD = "/dataset/PNGS/cnns_model_for_test/alexnet/models/of_model_bk"
+NODE_LIST = "192.168.1.12,192.168.1.14"
+
+
+class DLNetSpec(object):
+    def __init__(self):
+        self.batch_size = 8
+        self.data_part_num = 32
+        self.eval_dir = _DATA_DIR
+        self.train_dir = _DATA_DIR
+        self.model_save_dir = _MODEL_SAVE_DIR
+        self.model_load_dir = _MODEL_LOAD
+        self.num_nodes = 1
+        self.gpu_num_per_node = 1
+        self.iter_num = 10
+        self.num_unpack = 2
+
+
+parser = argparse.ArgumentParser(description="flags for multi-node and resource")
+parser.add_argument("-nn", "--num_nodes", type=str, default=1, required=False)
+parser.add_argument("-g", "--gpu_num_per_node", type=int, default=1, required=False)
+parser.add_argument("-i", "--iter_num", type=int, default=10, required=False)
+parser.add_argument(
+    "-m", "--multinode", default=False, action="store_true", required=False
+)
+parser.add_argument("-n", "--node_list", type=str, default=NODE_LIST, required=False)
+parser.add_argument(
+    "-s", "--skip_scp_binary", default=False, action="store_true", required=False
+)
+parser.add_argument(
+    "-c",
+    "--scp_binary_without_uuid",
+    default=False,
+    action="store_true",
+    required=False,
+)
+parser.add_argument(
+    "-r", "--remote_by_hand", default=False, action="store_true", required=False
+)
+parser.add_argument("-e", "--eval_dir", type=str, default=_DATA_DIR, required=False)
+parser.add_argument("-t", "--train_dir", type=str, default=_DATA_DIR, required=False)
+parser.add_argument(
+    "-load", "--model_load_dir", type=str, default=_MODEL_LOAD, required=False
+)
+parser.add_argument(
+    "-save", "--model_save_dir", type=str, default=_MODEL_SAVE_DIR, required=False
+)
+parser.add_argument("-dn", "--data_part_num", type=int, default=32, required=False)
+parser.add_argument("-b", "--batch_size", type=int, default=8, required=False)
+parser.add_argument("-p", "--num_piece_in_batch", type=int, default=2, required=False)
+
+
+def _conv2d_layer(
+    args,
+    name,
+    input,
+    filters,
+    kernel_size=3,
+    strides=1,
+    padding="SAME",
+    data_format="NCHW",
+    dilation_rate=1,
+    activation=op_conf_util.kRelu,
+    use_bias=False,
+    weight_initializer=flow.random_uniform_initializer(),
+    bias_initializer=flow.random_uniform_initializer(),
+):
+    weight_shape = (filters, input.shape[1], kernel_size, kernel_size)
+    weight = flow.get_variable(
+        name + "-weight",
+        shape=weight_shape,
+        dtype=input.dtype,
+        initializer=weight_initializer,
+    )
+    weight = flow.identity(weight)
+    weight = flow.repeat(weight, args.num_piece_in_batch)
+    output = flow.nn.conv2d(
+        input, weight, strides, padding, None, data_format, dilation_rate, name=name
+    )
+    if use_bias:
+        bias = flow.get_variable(
+            name + "-bias",
+            shape=(filters,),
+            dtype=input.dtype,
+            initializer=bias_initializer,
+        )
+        bias = flow.identity(bias)
+        bias = flow.repeat(bias, args.num_piece_in_batch)
+        output = flow.nn.bias_add(output, bias, data_format)
+
+    if activation is not None:
+        if activation == op_conf_util.kRelu:
+            output = flow.math.relu(output)
+        else:
+            raise NotImplementedError
+
+    return output
+
+
+def _data_load_layer(args, data_dir):
+    node_num = args.num_nodes
+    total_batch_size = args.batch_size * args.gpu_num_per_node * node_num
+    rgb_mean = [123.68, 116.78, 103.94]
+    ofrecord = flow.data.ofrecord_reader(
+        data_dir,
+        batch_size=total_batch_size,
+        data_part_num=args.data_part_num,
+        name="decode",
+    )
+    image = flow.data.ofrecord_image_decoder(ofrecord, "encoded", color_space="RGB")
+    label = flow.data.ofrecord_raw_decoder(
+        ofrecord, "class/label", shape=(), dtype=flow.int32
+    )
+    rsz = flow.image.resize(image, resize_x=227, resize_y=227, color_space="RGB")
+    normal = flow.image.crop_mirror_normalize(
+        rsz,
+        color_space="RGB",
+        output_layout="NCHW",
+        mean=rgb_mean,
+        output_dtype=flow.float,
+    )
+    return (
+        flow.unpack(label, args.num_piece_in_batch),
+        flow.unpack(normal, args.num_piece_in_batch),
+    )
+
+
+def _dense_layer(
+    inputs,
+    units,
+    activation=None,
+    use_bias=True,
+    kernel_initializer=None,
+    bias_initializer=None,
+    trainable=True,
+    name=None,
+):
+    in_shape = inputs.shape
+    in_num_axes = len(in_shape)
+    assert in_num_axes >= 2
+
+    name_prefix = name if name is not None else id_util.UniqueStr("Dense_")
+    inputs = flow.reshape(inputs, (-1, in_shape[-1])) if in_num_axes > 2 else inputs
+
+    weight = flow.get_variable(
+        name="{}-weight".format(name_prefix),
+        shape=(units, inputs.shape[1]),
+        dtype=inputs.dtype,
+        initializer=(
+            kernel_initializer
+            if kernel_initializer is not None
+            else flow.constant_initializer(0)
+        ),
+        trainable=trainable,
+        model_name="weight",
+    )
+    weight = flow.identity(weight)
+    weight = flow.repeat(weight, args.num_piece_in_batch)
+
+    out = flow.matmul(
+        a=inputs, b=weight, transpose_b=True, name="{}_matmul".format(name_prefix),
+    )
+    if use_bias:
+        bias = flow.get_variable(
+            name="{}-bias".format(name_prefix),
+            shape=(units,),
+            dtype=inputs.dtype,
+            initializer=(
+                bias_initializer
+                if bias_initializer is not None
+                else flow.constant_initializer(0)
+            ),
+            trainable=trainable,
+            model_name="bias",
+        )
+
+        bias = flow.identity(bias)
+        bias = flow.repeat(bias, args.num_piece_in_batch)
+
+        out = flow.nn.bias_add(out, bias, name="{}_bias_add".format(name_prefix))
+    out = (
+        activation(out, name="{}_activation".format(name_prefix))
+        if activation is not None
+        else out
+    )
+    out = flow.reshape(out, in_shape[:-1] + (units,)) if in_num_axes > 2 else out
+
+    return out
+
+
+def alexnet(args, images, labels, trainable=True):
+    conv1 = _conv2d_layer(
+        args, "conv1", images, filters=64, kernel_size=11, strides=4, padding="VALID",
+    )
+
+    pool1 = flow.nn.avg_pool2d(conv1, 3, 2, "VALID", "NCHW", name="pool1")
+
+    conv2 = _conv2d_layer(args, "conv2", pool1, filters=192, kernel_size=5)
+
+    pool2 = flow.nn.avg_pool2d(conv2, 3, 2, "VALID", "NCHW", name="pool2")
+
+    conv3 = _conv2d_layer(args, "conv3", pool2, filters=384)
+
+    conv4 = _conv2d_layer(args, "conv4", conv3, filters=384)
+
+    conv5 = _conv2d_layer(args, "conv5", conv4, filters=256)
+
+    pool5 = flow.nn.avg_pool2d(conv5, 3, 2, "VALID", "NCHW", name="pool5")
+
+    def _get_initializer():
+        kernel_initializer = initializer_conf_util.InitializerConf()
+        kernel_initializer.truncated_normal_conf.std = 0.816496580927726
+        return kernel_initializer
+
+    if len(pool5.shape) > 2:
+        pool5 = flow.reshape(pool5, shape=(pool5.shape[0], -1))
+
+    fc1 = _dense_layer(
+        inputs=pool5,
+        units=4096,
+        activation=flow.math.relu,
+        use_bias=False,
+        kernel_initializer=_get_initializer(),
+        bias_initializer=False,
+        trainable=trainable,
+        name="fc1",
+    )
+
+    dropout1 = fc1
+
+    fc2 = _dense_layer(
+        inputs=dropout1,
+        units=4096,
+        activation=flow.math.relu,
+        use_bias=False,
+        kernel_initializer=_get_initializer(),
+        bias_initializer=False,
+        trainable=trainable,
+        name="fc2",
+    )
+
+    dropout2 = fc2
+
+    fc3 = _dense_layer(
+        inputs=dropout2,
+        units=1001,
+        activation=None,
+        use_bias=False,
+        kernel_initializer=_get_initializer(),
+        bias_initializer=False,
+        trainable=trainable,
+        name="fc3",
+    )
+
+    loss = flow.nn.sparse_softmax_cross_entropy_with_logits(
+        labels, fc3, name="softmax_loss"
+    )
+
+    return loss
+
+
+def main(args):
+    flow.config.machine_num(args.num_nodes)
+    flow.config.gpu_device_num(args.gpu_num_per_node)
+
+    func_config = flow.FunctionConfig()
+    func_config.default_logical_view(flow.scope.consistent_view())
+    func_config.default_data_type(flow.float)
+    func_config.cudnn_conv_force_fwd_algo(0)
+    func_config.cudnn_conv_force_bwd_data_algo(1)
+    func_config.cudnn_conv_force_bwd_filter_algo(1)
+
+    @flow.global_function(type="train", function_config=func_config)
+    def alexnet_train_job():
+        (labels, images) = _data_load_layer(args, args.train_dir)
+        loss = alexnet(args, images, labels)
+        flow.optimizer.SGD(
+            flow.optimizer.PiecewiseConstantScheduler([], [0.00001]), momentum=0
+        ).minimize(loss)
+        return flow.pack(loss, args.num_piece_in_batch)
+
+    func_config = flow.FunctionConfig()
+    func_config.default_data_type(flow.float)
+
+    @flow.global_function(function_config=func_config)
+    def alexnet_eval_job():
+        with flow.scope.consistent_view():
+            (labels, images) = _data_load_layer(args, args.eval_dir)
+            loss = alexnet(args, images, labels)
+            return flow.pack(loss, args.num_piece_in_batch)
+
+    check_point = flow.train.CheckPoint()
+    if not args.model_load_dir:
+        check_point.init()
+    else:
+        check_point.load(args.model_load_dir)
+
+    num_nodes = args.num_nodes
+    print(
+        "Traning alexnet: num_gpu_per_node = {}, num_nodes = {}.".format(
+            args.gpu_num_per_node, num_nodes
+        )
+    )
+
+    print("{:>12}  {:>12}  {:>12}".format("iter", "loss type", "loss value"))
+    loss = []
+    for i in range(args.iter_num):
+        train_loss = alexnet_train_job().get().mean()
+        loss.append(train_loss)
+
+        fmt_str = "{:>12}  {:>12}  {:>12.6f}"
+        print(fmt_str.format(i, "train loss:", train_loss))
+
+        # if (i + 1) % 10 == 0:
+        #   eval_loss = alexnet_eval_job().get().mean()
+        # print(
+        #     fmt_str.format(
+        #         i, "eval loss:", eval_loss
+        #     )
+        # )
+        if (i + 1) % 100 == 0:
+            check_point.save(_MODEL_SAVE_DIR + str(i))
+
+    # save loss to file
+    loss_file = "{}n{}c.npy".format(
+        str(num_nodes), str(args.gpu_num_per_node * num_nodes)
+    )
+    loss_path = "./of_loss/alexnet"
+    if not os.path.exists(loss_path):
+        os.makedirs(loss_path)
+    numpy.save(os.path.join(loss_path, loss_file), loss)
+
+
+if __name__ == "__main__":
+    args = parser.parse_args()
+    args.num_nodes = len(args.node_list.strip().split(",")) if args.multinode else 1
+    flow.env.ctrl_port(9788)
+    if args.multinode:
+        flow.env.ctrl_port(12138)
+        nodes = []
+        for n in args.node_list.strip().split(","):
+            addr_dict = {}
+            addr_dict["addr"] = n
+            nodes.append(addr_dict)
+
+        flow.env.machine(nodes)
+        if args.remote_by_hand is False:
+            if args.scp_binary_without_uuid:
+                flow.deprecated.init_worker(scp_binary=True, use_uuid=False)
+            elif args.skip_scp_binary:
+                flow.deprecated.init_worker(scp_binary=False, use_uuid=False)
+            else:
+                flow.deprecated.init_worker(scp_binary=True, use_uuid=True)
+
+    main(args)
+    if (
+        args.multinode
+        and args.skip_scp_binary is False
+        and args.scp_binary_without_uuid is False
+    ):
+        flow.deprecated.delete_worker()
diff --git a/oneflow/compatible_single_client_python/test/models/bert.py b/oneflow/compatible_single_client_python/test/models/bert.py
new file mode 100644
index 0000000000000000000000000000000000000000..0b88309d44f702ba1edb53e804daa1ceaedc9fb7
--- /dev/null
+++ b/oneflow/compatible_single_client_python/test/models/bert.py
@@ -0,0 +1,415 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import math
+
+from oneflow.compatible import single_client as flow
+from oneflow.core.common import data_type_pb2 as data_type_util
+from oneflow.core.operator import op_conf_pb2 as op_conf_util
+
+
+class BertBackbone(object):
+    def __init__(
+        self,
+        input_ids_blob,
+        input_mask_blob,
+        token_type_ids_blob,
+        vocab_size,
+        seq_length=512,
+        hidden_size=768,
+        num_hidden_layers=12,
+        num_attention_heads=12,
+        intermediate_size=3072,
+        hidden_act="gelu",
+        hidden_dropout_prob=0.1,
+        attention_probs_dropout_prob=0.1,
+        max_position_embeddings=512,
+        type_vocab_size=16,
+        initializer_range=0.02,
+    ):
+
+        with flow.scope.namespace("bert"):
+            with flow.scope.namespace("embeddings"):
+                (self.embedding_output_, self.embedding_table_) = _EmbeddingLookup(
+                    input_ids_blob=input_ids_blob,
+                    vocab_size=vocab_size,
+                    embedding_size=hidden_size,
+                    initializer_range=initializer_range,
+                    word_embedding_name="word_embeddings",
+                )
+                self.embedding_output_ = _EmbeddingPostprocessor(
+                    input_blob=self.embedding_output_,
+                    seq_length=seq_length,
+                    embedding_size=hidden_size,
+                    use_token_type=True,
+                    token_type_ids_blob=token_type_ids_blob,
+                    token_type_vocab_size=type_vocab_size,
+                    token_type_embedding_name="token_type_embeddings",
+                    use_position_embeddings=True,
+                    position_embedding_name="position_embeddings",
+                    initializer_range=initializer_range,
+                    max_position_embeddings=max_position_embeddings,
+                    dropout_prob=hidden_dropout_prob,
+                )
+            with flow.scope.namespace("encoder"):
+                addr_blob = _CreateAttentionMaskFromInputMask(
+                    input_mask_blob,
+                    from_seq_length=seq_length,
+                    to_seq_length=seq_length,
+                )
+                self.all_encoder_layers_ = _TransformerModel(
+                    input_blob=self.embedding_output_,
+                    addr_blob=addr_blob,
+                    seq_length=seq_length,
+                    hidden_size=hidden_size,
+                    num_hidden_layers=num_hidden_layers,
+                    num_attention_heads=num_attention_heads,
+                    intermediate_size=intermediate_size,
+                    intermediate_act_fn=GetActivation(hidden_act),
+                    hidden_dropout_prob=hidden_dropout_prob,
+                    attention_probs_dropout_prob=attention_probs_dropout_prob,
+                    initializer_range=initializer_range,
+                    do_return_all_layers=False,
+                )
+            self.sequence_output_ = self.all_encoder_layers_[-1]
+
+    def embedding_output(self):
+        return self.embedding_output_
+
+    def all_encoder_layers(self):
+        return self.all_encoder_layers_
+
+    def sequence_output(self):
+        return self.sequence_output_
+
+    def embedding_table(self):
+        return self.embedding_table_
+
+
+def CreateInitializer(std):
+    return flow.truncated_normal(std)
+
+
+def _Gelu(in_blob):
+    return flow.math.gelu(in_blob)
+
+
+def _TransformerModel(
+    input_blob,
+    addr_blob,
+    seq_length,
+    hidden_size=768,
+    num_hidden_layers=12,
+    num_attention_heads=12,
+    intermediate_size=3072,
+    intermediate_act_fn=_Gelu,
+    hidden_dropout_prob=0.1,
+    attention_probs_dropout_prob=0.1,
+    initializer_range=0.02,
+    do_return_all_layers=False,
+):
+
+    assert hidden_size % num_attention_heads == 0
+    attention_head_size = int(hidden_size / num_attention_heads)
+    input_width = hidden_size
+    prev_output_blob = flow.reshape(input_blob, (-1, input_width))
+    all_layer_output_blobs = []
+    for layer_idx in range(num_hidden_layers):
+        with flow.scope.namespace("layer_%d" % layer_idx):
+            layer_input_blob = prev_output_blob
+            with flow.scope.namespace("attention"):
+                with flow.scope.namespace("self"):
+                    attention_output_blob = _AttentionLayer(
+                        from_blob=layer_input_blob,
+                        to_blob=layer_input_blob,
+                        addr_blob=addr_blob,
+                        num_attention_heads=num_attention_heads,
+                        size_per_head=attention_head_size,
+                        attention_probs_dropout_prob=attention_probs_dropout_prob,
+                        initializer_range=initializer_range,
+                        do_return_2d_tensor=True,
+                        from_seq_length=seq_length,
+                        to_seq_length=seq_length,
+                    )
+                with flow.scope.namespace("output"):
+                    attention_output_blob = _FullyConnected(
+                        attention_output_blob,
+                        input_size=num_attention_heads * attention_head_size,
+                        units=hidden_size,
+                        weight_initializer=CreateInitializer(initializer_range),
+                        name="dense",
+                    )
+                    attention_output_blob = _Dropout(
+                        attention_output_blob, hidden_dropout_prob
+                    )
+                    attention_output_blob = attention_output_blob + layer_input_blob
+                    attention_output_blob = _LayerNorm(
+                        attention_output_blob, hidden_size
+                    )
+            with flow.scope.namespace("intermediate"):
+                if callable(intermediate_act_fn):
+                    act_fn = op_conf_util.kNone
+                else:
+                    act_fn = intermediate_act_fn
+                intermediate_output_blob = _FullyConnected(
+                    attention_output_blob,
+                    input_size=num_attention_heads * attention_head_size,
+                    units=intermediate_size,
+                    activation=act_fn,
+                    weight_initializer=CreateInitializer(initializer_range),
+                    name="dense",
+                )
+                if callable(intermediate_act_fn):
+                    intermediate_output_blob = intermediate_act_fn(
+                        intermediate_output_blob
+                    )
+            with flow.scope.namespace("output"):
+                layer_output_blob = _FullyConnected(
+                    intermediate_output_blob,
+                    input_size=intermediate_size,
+                    units=hidden_size,
+                    weight_initializer=CreateInitializer(initializer_range),
+                    name="dense",
+                )
+                layer_output_blob = _Dropout(layer_output_blob, hidden_dropout_prob)
+                layer_output_blob = layer_output_blob + attention_output_blob
+                layer_output_blob = _LayerNorm(layer_output_blob, hidden_size)
+                prev_output_blob = layer_output_blob
+                all_layer_output_blobs.append(layer_output_blob)
+
+    input_shape = (-1, seq_length, hidden_size)
+    if do_return_all_layers:
+        final_output_blobs = []
+        for layer_output_blob in all_layer_output_blobs:
+            final_output_blob = flow.reshape(layer_output_blob, input_shape)
+            final_output_blobs.append(final_output_blob)
+        return final_output_blobs
+    else:
+        final_output_blob = flow.reshape(prev_output_blob, input_shape)
+        return [final_output_blob]
+
+
+def _AttentionLayer(
+    from_blob,
+    to_blob,
+    addr_blob,
+    num_attention_heads=1,
+    size_per_head=512,
+    query_act=op_conf_util.kNone,
+    key_act=op_conf_util.kNone,
+    value_act=op_conf_util.kNone,
+    attention_probs_dropout_prob=0.0,
+    initializer_range=0.02,
+    do_return_2d_tensor=False,
+    batch_size=None,
+    from_seq_length=None,
+    to_seq_length=None,
+):
+    def TransposeForScores(input_blob, num_attention_heads, seq_length, width):
+        output_blob = flow.reshape(
+            input_blob, [-1, seq_length, num_attention_heads, width]
+        )
+        output_blob = flow.transpose(output_blob, perm=[0, 2, 1, 3])
+        return output_blob
+
+    from_blob_2d = flow.reshape(from_blob, [-1, num_attention_heads * size_per_head])
+    to_blob_2d = flow.reshape(to_blob, [-1, num_attention_heads * size_per_head])
+
+    query_blob = _FullyConnected(
+        from_blob_2d,
+        input_size=num_attention_heads * size_per_head,
+        units=num_attention_heads * size_per_head,
+        activation=query_act,
+        name="query",
+        weight_initializer=CreateInitializer(initializer_range),
+    )
+
+    key_blob = _FullyConnected(
+        to_blob_2d,
+        input_size=num_attention_heads * size_per_head,
+        units=num_attention_heads * size_per_head,
+        activation=key_act,
+        name="key",
+        weight_initializer=CreateInitializer(initializer_range),
+    )
+
+    value_blob = _FullyConnected(
+        to_blob_2d,
+        input_size=num_attention_heads * size_per_head,
+        units=num_attention_heads * size_per_head,
+        activation=value_act,
+        name="value",
+        weight_initializer=CreateInitializer(initializer_range),
+    )
+
+    query_blob = TransposeForScores(
+        query_blob, num_attention_heads, from_seq_length, size_per_head
+    )
+    key_blob = TransposeForScores(
+        key_blob, num_attention_heads, to_seq_length, size_per_head
+    )
+
+    attention_scores_blob = flow.matmul(query_blob, key_blob, transpose_b=True)
+    attention_scores_blob = attention_scores_blob * (
+        1.0 / math.sqrt(float(size_per_head))
+    )
+
+    attention_scores_blob = attention_scores_blob + addr_blob
+    attention_probs_blob = flow.nn.softmax(attention_scores_blob)
+    attention_probs_blob = _Dropout(attention_probs_blob, attention_probs_dropout_prob)
+
+    value_blob = flow.reshape(
+        value_blob, [-1, to_seq_length, num_attention_heads, size_per_head]
+    )
+    value_blob = flow.transpose(value_blob, perm=[0, 2, 1, 3])
+    context_blob = flow.matmul(attention_probs_blob, value_blob)
+    context_blob = flow.transpose(context_blob, perm=[0, 2, 1, 3])
+
+    if do_return_2d_tensor:
+        context_blob = flow.reshape(
+            context_blob, [-1, num_attention_heads * size_per_head]
+        )
+    else:
+        context_blob = flow.reshape(
+            context_blob, [-1, from_seq_length, num_attention_heads * size_per_head]
+        )
+    return context_blob
+
+
+def _FullyConnected(
+    input_blob, input_size, units, activation=None, name=None, weight_initializer=None
+):
+    weight_blob = flow.get_variable(
+        name=name + "-weight",
+        shape=[input_size, units],
+        dtype=input_blob.dtype,
+        model_name="weight",
+        initializer=weight_initializer,
+    )
+    bias_blob = flow.get_variable(
+        name=name + "-bias",
+        shape=[units],
+        dtype=input_blob.dtype,
+        model_name="bias",
+        initializer=flow.constant_initializer(0.0),
+    )
+    output_blob = flow.matmul(input_blob, weight_blob)
+    output_blob = flow.nn.bias_add(output_blob, bias_blob)
+    return output_blob
+
+
+def _Dropout(input_blob, dropout_prob):
+    if dropout_prob == 0.0:
+        return input_blob
+    return flow.nn.dropout(input_blob, rate=dropout_prob)
+
+
+def _LayerNorm(input_blob, hidden_size):
+    return flow.layers.layer_norm(
+        input_blob, name="LayerNorm", begin_norm_axis=-1, begin_params_axis=-1
+    )
+
+
+def _CreateAttentionMaskFromInputMask(to_mask_blob, from_seq_length, to_seq_length):
+    output = flow.cast(to_mask_blob, dtype=flow.float)
+    output = flow.reshape(output, [-1, 1, to_seq_length])
+    zeros = flow.constant(0.0, dtype=flow.float, shape=[from_seq_length, to_seq_length])
+    attention_mask_blob = zeros + output
+    attention_mask_blob = flow.reshape(
+        attention_mask_blob, [-1, 1, from_seq_length, to_seq_length]
+    )
+    attention_mask_blob = flow.cast(attention_mask_blob, dtype=flow.float)
+    addr_blob = (attention_mask_blob - 1.0) * 10000.0
+
+    return addr_blob
+
+
+def _EmbeddingPostprocessor(
+    input_blob,
+    seq_length,
+    embedding_size,
+    use_token_type=False,
+    token_type_ids_blob=None,
+    token_type_vocab_size=16,
+    token_type_embedding_name="token_type_embeddings",
+    use_position_embeddings=True,
+    position_embedding_name="position_embeddings",
+    initializer_range=0.02,
+    max_position_embeddings=512,
+    dropout_prob=0.1,
+):
+    output = input_blob
+
+    if use_token_type:
+        assert token_type_ids_blob is not None
+        token_type_table = flow.get_variable(
+            name=token_type_embedding_name,
+            shape=[token_type_vocab_size, embedding_size],
+            dtype=input_blob.dtype,
+            initializer=CreateInitializer(initializer_range),
+        )
+        token_type_embeddings = flow.gather(
+            params=token_type_table, indices=token_type_ids_blob, axis=0
+        )
+        output = output + token_type_embeddings
+
+    if use_position_embeddings:
+        position_table = flow.get_variable(
+            name=position_embedding_name,
+            shape=[1, max_position_embeddings, embedding_size],
+            dtype=input_blob.dtype,
+            initializer=CreateInitializer(initializer_range),
+        )
+        assert seq_length <= max_position_embeddings
+        if seq_length != max_position_embeddings:
+            position_table = flow.slice(
+                position_table, begin=[None, 0, 0], size=[None, seq_length, -1]
+            )
+        output = output + position_table
+
+    output = _LayerNorm(output, embedding_size)
+    output = _Dropout(output, dropout_prob)
+
+    return output
+
+
+def _EmbeddingLookup(
+    input_ids_blob,
+    vocab_size,
+    embedding_size=128,
+    initializer_range=0.02,
+    word_embedding_name="word_embeddings",
+):
+    embedding_table = flow.get_variable(
+        name=word_embedding_name,
+        shape=[vocab_size, embedding_size],
+        dtype=flow.float,
+        initializer=CreateInitializer(initializer_range),
+    )
+    output = flow.gather(params=embedding_table, indices=input_ids_blob, axis=0)
+    return output, embedding_table
+
+
+def GetActivation(name):
+    if name == "linear":
+        return None
+    elif name == "relu":
+        return flow.math.relu
+    elif name == "tanh":
+        return flow.math.tanh
+    elif name == "gelu":
+        return flow.math.gelu
+    else:
+        raise Exception("unsupported activation")
diff --git a/oneflow/compatible_single_client_python/test/models/cnns_tests.py b/oneflow/compatible_single_client_python/test/models/cnns_tests.py
new file mode 100644
index 0000000000000000000000000000000000000000..ec45d874c9dbfae475349154ef3e76b5d5990dfc
--- /dev/null
+++ b/oneflow/compatible_single_client_python/test/models/cnns_tests.py
@@ -0,0 +1,205 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import imp
+import os
+import sys
+
+import numpy
+from oneflow.compatible import single_client as flow
+from absl import app, flags
+
+FLAGS = flags.FLAGS
+
+flags.DEFINE_string("python_bin", "python3", "python binary program name or filepath.")
+flags.DEFINE_boolean(
+    "enable_auto_mixed_precision",
+    False,
+    "automatically change float net to mixed precision net",
+)
+
+
+class TestNetMixin:
+    """
+    Base Tester
+  """
+
+    def setUp(self):
+        self.net = ""
+        self.tf_loss_dir = ""
+        self.of_loss_dir = ""
+        self.num_iter = 10
+        if os.getenv("ONEFLOW_TEST_CPU_ONLY"):
+            self.num_iter = 3
+        self.set_params()
+        flow.clear_default_session()
+
+    def set_params(self):
+        pass
+
+    def assert_tolerance_4_mixed_precision(self):
+        raise AssertionError
+
+    def run_net(self, num_gpu_per_node, num_node=1, node_list=""):
+        net_modudle = _Import(self.net)
+        spec = net_modudle.DLNetSpec(FLAGS.enable_auto_mixed_precision)
+        spec.num_nodes = num_node
+        spec.gpu_num_per_node = num_gpu_per_node
+        if os.getenv("ONEFLOW_TEST_CPU_ONLY"):
+            spec.iter_num = 3
+        net_modudle.main(spec)
+        return
+        if num_node > 1:
+            os.system(
+                "{} {}.py -g {} -m -n {}".format(
+                    FLAGS.python_bin, self.net, num_gpu_per_node, node_list
+                )
+            )
+        else:
+            os.system(
+                "{} {}.py -g {}".format(FLAGS.python_bin, self.net, num_gpu_per_node)
+            )
+
+    def load_tf_loss(self):
+        tf_loss = numpy.load(os.path.join(self.tf_loss_dir, "1n1c.npy"))
+        return tf_loss[0 : self.num_iter]
+
+    def load_of_loss(self, test_type):
+        path = os.path.join(self.of_loss_dir, test_type + ".npy")
+        if os.path.exists(path):
+            of_loss = numpy.load(path)
+        else:
+            of_loss = numpy.zeros(self.num_iter)
+        return of_loss[0 : self.num_iter]
+
+    def print_and_check_result(self, result_name):
+        if os.getenv("ONEFLOW_TEST_CPU_ONLY"):
+            if self.net == "resnet50":
+                print("WARNING: skipping check for resnet50 cpu due to GEMM NaN")
+                return
+        loss_dict = {}
+        loss_dict["tensorflow"] = self.load_tf_loss()
+        loss_dict["oneflow"] = self.load_of_loss(result_name)
+
+        print("==".ljust(64, "="))
+        print(" ".ljust(2, " ") + self.net + " loss report")
+        print("==".ljust(64, "="))
+        fmt_str = "{:>6}  {:>12}  {:>12}"
+        print(fmt_str.format("iter", "tensorflow", "oneflow-" + result_name))
+        for i in range(self.num_iter):
+            fmt_str = "{:>6}  {:>12.6f}  {:>12.6f}"
+            print(
+                fmt_str.format(i, loss_dict["tensorflow"][i], loss_dict["oneflow"][i])
+            )
+        if FLAGS.enable_auto_mixed_precision:
+            tolerance = self.assert_tolerance_4_mixed_precision()
+            rtol = tolerance["rtol"]
+            atol = tolerance["atol"]
+            print(
+                "assert tolerance for mixed_precision are: rtol", rtol, ", atol", atol
+            )
+            self.assertTrue(
+                numpy.allclose(
+                    loss_dict["tensorflow"], loss_dict["oneflow"], rtol=rtol, atol=atol
+                )
+            )
+        else:
+            self.assertTrue(
+                numpy.allclose(loss_dict["tensorflow"], loss_dict["oneflow"])
+            )
+
+
+class TestAlexNetMixin(TestNetMixin):
+    """
+    AlexNet Tester
+  """
+
+    def set_params(self):
+        self.net = "alexnet"
+        self.tf_loss_dir = os.path.join(
+            "/dataset/PNGS/cnns_model_for_test/tf_loss", self.net
+        )
+        self.of_loss_dir = os.path.join("./of_loss", self.net)
+
+    def assert_tolerance_4_mixed_precision(self):
+        return {"rtol": 1e-5, "atol": 1e-2}
+
+
+class TestResNet50Mixin(TestNetMixin):
+    """
+    AlexNet Tester
+  """
+
+    def set_params(self):
+        self.net = "resnet50"
+        self.tf_loss_dir = os.path.join(
+            "/dataset/PNGS/cnns_model_for_test/tf_loss", self.net
+        )
+        self.of_loss_dir = os.path.join("./of_loss", self.net)
+
+    def assert_tolerance_4_mixed_precision(self):
+        return {"rtol": 1e-8, "atol": 1e-5}
+
+
+class TestVgg16Mixin(TestNetMixin):
+    """
+    Vgg16 Tester
+  """
+
+    def set_params(self):
+        self.net = "vgg16"
+        self.tf_loss_dir = os.path.join(
+            "/dataset/PNGS/cnns_model_for_test/tf_loss", self.net
+        )
+        self.of_loss_dir = os.path.join("./of_loss", self.net)
+
+    def assert_tolerance_4_mixed_precision(self):
+        return {"rtol": 1e-4, "atol": 1e-1}  # big tolerance due to running ci on 1080ti
+
+
+class TestInceptionV3Mixin(TestNetMixin):
+    """
+    InceptionV3 Tester
+  """
+
+    def set_params(self):
+        self.net = "inceptionv3"
+        self.tf_loss_dir = os.path.join(
+            "/dataset/PNGS/cnns_model_for_test/tf_loss", self.net
+        )
+        self.of_loss_dir = os.path.join("./of_loss", self.net)
+
+    def assert_tolerance_4_mixed_precision(self):
+        return {"rtol": 1e-5, "atol": 1e-2}
+
+
+def _Import(name, globals=None, locals=None, fromlist=None):
+    # Fast path: see if the module has already been imported.
+    try:
+        return sys.modules[name]
+    except KeyError:
+        pass
+
+    # If any of the following calls raises an exception,
+    # there's a problem we can't handle -- let the caller handle it.
+
+    fp, pathname, description = imp.find_module(name)
+
+    try:
+        return imp.load_module(name, fp, pathname, description)
+    finally:
+        # Since we may exit via an exception, close fp explicitly.
+        if fp:
+            fp.close()
diff --git a/oneflow/compatible_single_client_python/test/models/eager_1node_test.py b/oneflow/compatible_single_client_python/test/models/eager_1node_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..54263c3f9696f5a43d44deeb8a6629a095ae46ba
--- /dev/null
+++ b/oneflow/compatible_single_client_python/test/models/eager_1node_test.py
@@ -0,0 +1,76 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from oneflow.compatible import single_client as flow
+import env_1node
+import os
+
+from absl import app
+from absl.testing import absltest
+from test_1node_mixin import Test1NodeMixin
+from cnns_tests import (
+    TestAlexNetMixin,
+    TestResNet50Mixin,
+    TestVgg16Mixin,
+    TestInceptionV3Mixin,
+)
+
+
+class TestAlexNet(Test1NodeMixin, TestAlexNetMixin, absltest.TestCase):
+    def setUp(self):
+        super().setUp()
+        flow.enable_eager_execution(True)
+
+
+class TestResNet50(Test1NodeMixin, TestResNet50Mixin, absltest.TestCase):
+    def setUp(self):
+        super().setUp()
+        flow.enable_eager_execution(True)
+
+
+class TestVgg16(Test1NodeMixin, TestVgg16Mixin, absltest.TestCase):
+    def setUp(self):
+        super().setUp()
+        flow.enable_eager_execution(True)
+
+
+class TestInceptionV3(Test1NodeMixin, TestInceptionV3Mixin, absltest.TestCase):
+    def setUp(self):
+        super().setUp()
+        flow.enable_eager_execution(True)
+
+
+class TestEagerMixin(object):
+    def setUp(self):
+        flow.clear_default_session()
+        flow.enable_eager_execution(True)
+
+
+flow.unittest.register_test_cases(
+    scope=globals(),
+    directory=os.path.dirname(os.path.realpath(__file__)),
+    filter_by_num_nodes=lambda x: x == 1,
+    base_class=absltest.TestCase,
+    test_case_mixin=TestEagerMixin,
+)
+
+
+def main(argv):
+    env_1node.Init()
+    absltest.main()
+
+
+if __name__ == "__main__":
+    app.run(main)
diff --git a/oneflow/compatible_single_client_python/test/models/env_1node.py b/oneflow/compatible_single_client_python/test/models/env_1node.py
new file mode 100644
index 0000000000000000000000000000000000000000..b0dfccda41a26c7b1cc5fb40ad1eeafe76c0aa62
--- /dev/null
+++ b/oneflow/compatible_single_client_python/test/models/env_1node.py
@@ -0,0 +1,20 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from oneflow.compatible import single_client as flow
+
+
+def Init():
+    flow.env.init()
diff --git a/oneflow/compatible_single_client_python/test/models/env_2node.py b/oneflow/compatible_single_client_python/test/models/env_2node.py
new file mode 100644
index 0000000000000000000000000000000000000000..b7b056ac9b0789ad5f01cdc4964132efa4cdb9fb
--- /dev/null
+++ b/oneflow/compatible_single_client_python/test/models/env_2node.py
@@ -0,0 +1,32 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import atexit
+
+from oneflow.compatible import single_client as flow
+from absl import flags
+
+FLAGS = flags.FLAGS
+flags.DEFINE_string(
+    "nodes_list", "192.168.1.15,192.168.1.16", "nodes list seperated by comma"
+)
+flags.DEFINE_integer("ctrl_port", "9524", "control port")
+
+
+def Init():
+    flow.env.machine(FLAGS.nodes_list.split(","))
+    flow.env.ctrl_port(FLAGS.ctrl_port)
+    flow.deprecated.init_worker(scp_binary=True, use_uuid=True)
+    atexit.register(flow.deprecated.delete_worker)
diff --git a/oneflow/compatible_single_client_python/test/models/inceptionv3.py b/oneflow/compatible_single_client_python/test/models/inceptionv3.py
new file mode 100644
index 0000000000000000000000000000000000000000..47cfebe60c6c2d6592c9eb6d56a0819c00ab7964
--- /dev/null
+++ b/oneflow/compatible_single_client_python/test/models/inceptionv3.py
@@ -0,0 +1,685 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import argparse
+import os
+from datetime import datetime
+
+import numpy
+from oneflow.compatible import single_client as flow
+from oneflow.core.operator import op_conf_pb2 as op_conf_util
+
+_DATA_DIR = "/dataset/PNGS/PNG299/of_record_repeated"
+_EVAL_DIR = _DATA_DIR
+_TRAIN_DIR = _DATA_DIR
+_MODEL_LOAD = "/dataset/PNGS/cnns_model_for_test/inceptionv3/models/of_model"
+_MODEL_SAVE_DIR = "./model_save-{}".format(
+    str(datetime.now().strftime("%Y-%m-%d-%H:%M:%S"))
+)
+NODE_LIST = "192.168.1.12,192.168.1.14"
+
+
+class DLNetSpec(object):
+    def __init__(self, enable_auto_mixed_precision):
+        self.batch_size = 8
+        self.data_part_num = 32
+        self.eval_dir = _DATA_DIR
+        self.train_dir = _DATA_DIR
+        self.model_save_dir = _MODEL_SAVE_DIR
+        self.model_load_dir = _MODEL_LOAD
+        self.num_nodes = 1
+        self.gpu_num_per_node = 1
+        self.iter_num = 10
+        self.enable_auto_mixed_precision = enable_auto_mixed_precision
+
+
+parser = argparse.ArgumentParser(description="flags for multi-node and resource")
+parser.add_argument("-g", "--gpu_num_per_node", type=int, default=1, required=False)
+parser.add_argument("-i", "--iter_num", type=int, default=10, required=False)
+parser.add_argument("-b", "--batch_size", type=int, default=8, required=False)
+parser.add_argument(
+    "-m", "--multinode", default=False, action="store_true", required=False
+)
+parser.add_argument("-n", "--node_list", type=str, default=NODE_LIST, required=False)
+parser.add_argument(
+    "-s", "--skip_scp_binary", default=False, action="store_true", required=False
+)
+parser.add_argument(
+    "-c",
+    "--scp_binary_without_uuid",
+    default=False,
+    action="store_true",
+    required=False,
+)
+parser.add_argument(
+    "-r", "--remote_by_hand", default=False, action="store_true", required=False
+)
+parser.add_argument("-e", "--eval_dir", type=str, default=_DATA_DIR, required=False)
+parser.add_argument("-t", "--train_dir", type=str, default=_DATA_DIR, required=False)
+parser.add_argument(
+    "-load", "--model_load_dir", type=str, default=_MODEL_LOAD, required=False
+)
+parser.add_argument(
+    "-save", "--model_save_dir", type=str, default=_MODEL_SAVE_DIR, required=False
+)
+parser.add_argument("-dn", "--data_part_num", type=int, default=32, required=False)
+
+# TODO: add this interface to oneflow.compatible.single_client.layers
+def _conv2d_layer(
+    name,
+    input,
+    filters,
+    kernel_size=3,
+    strides=1,
+    padding="SAME",
+    data_format="NCHW",
+    dilation_rate=1,
+    activation=op_conf_util.kSigmoid,
+    use_bias=True,
+    weight_initializer=flow.random_uniform_initializer(),
+    bias_initializer=flow.constant_initializer(),
+):
+    if isinstance(kernel_size, int):
+        kernel_size = (kernel_size, kernel_size)
+    else:
+        kernel_size = tuple(kernel_size)
+    weight_shape = (filters, input.shape[1]) + kernel_size
+    weight = flow.get_variable(
+        name + "-weight",
+        shape=weight_shape,
+        dtype=input.dtype,
+        initializer=weight_initializer,
+    )
+    output = flow.nn.conv2d(
+        input, weight, strides, padding, None, data_format, dilation_rate, name=name
+    )
+    if use_bias:
+        bias = flow.get_variable(
+            name + "-bias",
+            shape=(filters,),
+            dtype=input.dtype,
+            initializer=bias_initializer,
+        )
+        output = flow.nn.bias_add(output, bias, data_format)
+
+    if activation is not None:
+        if activation == op_conf_util.kRelu:
+            output = flow.math.relu(output)
+        elif activation == op_conf_util.kSigmoid:
+            output = flow.math.sigmoid(output)
+        else:
+            raise NotImplementedError
+
+    return output
+
+
+def _data_load_layer(args, data_dir):
+    node_num = args.num_nodes
+    total_batch_size = args.batch_size * args.gpu_num_per_node * node_num
+    rgb_mean = [123.68, 116.78, 103.94]
+    ofrecord = flow.data.ofrecord_reader(
+        data_dir,
+        batch_size=total_batch_size,
+        data_part_num=args.data_part_num,
+        name="decode",
+    )
+    image = flow.data.ofrecord_image_decoder(ofrecord, "encoded", color_space="RGB")
+    label = flow.data.ofrecord_raw_decoder(
+        ofrecord, "class/label", shape=(), dtype=flow.int32
+    )
+    rsz = flow.image.resize(image, resize_x=299, resize_y=299, color_space="RGB")
+    normal = flow.image.crop_mirror_normalize(
+        rsz,
+        color_space="RGB",
+        output_layout="NCHW",
+        mean=rgb_mean,
+        output_dtype=flow.float,
+    )
+    return normal, label
+
+
+def InceptionA(in_blob, index):
+    with flow.scope.namespace("mixed_{}".format(index)):
+        with flow.scope.namespace("branch1x1"):
+            branch1x1 = _conv2d_layer(
+                "conv0", in_blob, filters=64, kernel_size=1, strides=1, padding="SAME"
+            )
+        with flow.scope.namespace("branch5x5"):
+            branch5x5_1 = _conv2d_layer(
+                "conv0", in_blob, filters=48, kernel_size=1, strides=1, padding="SAME"
+            )
+            branch5x5_2 = _conv2d_layer(
+                "conv1",
+                branch5x5_1,
+                filters=64,
+                kernel_size=5,
+                strides=1,
+                padding="SAME",
+            )
+        with flow.scope.namespace("branch3x3dbl"):
+            branch3x3dbl_1 = _conv2d_layer(
+                "conv0", in_blob, filters=64, kernel_size=1, strides=1, padding="SAME"
+            )
+            branch3x3dbl_2 = _conv2d_layer(
+                "conv1",
+                branch3x3dbl_1,
+                filters=96,
+                kernel_size=3,
+                strides=1,
+                padding="SAME",
+            )
+            branch3x3dbl_3 = _conv2d_layer(
+                "conv2",
+                branch3x3dbl_2,
+                filters=96,
+                kernel_size=3,
+                strides=1,
+                padding="SAME",
+            )
+        with flow.scope.namespace("branch_pool"):
+            branch_pool_1 = flow.nn.avg_pool2d(
+                in_blob,
+                ksize=3,
+                strides=1,
+                padding="SAME",
+                data_format="NCHW",
+                name="pool",
+            )
+            branch_pool_2 = _conv2d_layer(
+                "conv",
+                branch_pool_1,
+                filters=32 if index == 0 else 64,
+                kernel_size=1,
+                strides=1,
+                padding="SAME",
+            )
+
+        inceptionA_bn = []
+        inceptionA_bn.append(branch1x1)
+        inceptionA_bn.append(branch5x5_2)
+        inceptionA_bn.append(branch3x3dbl_3)
+        inceptionA_bn.append(branch_pool_2)
+
+        mixed_concat = flow.concat(values=inceptionA_bn, axis=1, name="concat")
+
+    return mixed_concat
+
+
+def InceptionB(in_blob, index):
+    with flow.scope.namespace("mixed_{}".format(index)):
+        with flow.scope.namespace("branch3x3"):
+            branch3x3 = _conv2d_layer(
+                "conv0", in_blob, filters=384, kernel_size=3, strides=2, padding="VALID"
+            )
+        with flow.scope.namespace("branch3x3dbl"):
+            branch3x3dbl_1 = _conv2d_layer(
+                "conv0", in_blob, filters=64, kernel_size=1, strides=1, padding="SAME"
+            )
+            branch3x3dbl_2 = _conv2d_layer(
+                "conv1",
+                branch3x3dbl_1,
+                filters=96,
+                kernel_size=3,
+                strides=1,
+                padding="SAME",
+            )
+            branch3x3dbl_3 = _conv2d_layer(
+                "conv2",
+                branch3x3dbl_2,
+                filters=96,
+                kernel_size=3,
+                strides=2,
+                padding="VALID",
+            )
+        with flow.scope.namespace("branch_pool"):
+            branch_pool = flow.nn.max_pool2d(
+                in_blob,
+                ksize=3,
+                strides=2,
+                padding="VALID",
+                data_format="NCHW",
+                name="pool0",
+            )
+
+        inceptionB_bn = []
+        inceptionB_bn.append(branch3x3)
+        inceptionB_bn.append(branch3x3dbl_3)
+        inceptionB_bn.append(branch_pool)
+        mixed_concat = flow.concat(values=inceptionB_bn, axis=1, name="concat")
+
+    return mixed_concat
+
+
+def InceptionC(in_blob, index, filters):
+    with flow.scope.namespace("mixed_{}".format(index)):
+        with flow.scope.namespace("branch1x1"):
+            branch1x1 = _conv2d_layer(
+                "conv0", in_blob, filters=192, kernel_size=1, strides=1, padding="SAME"
+            )
+        with flow.scope.namespace("branch7x7"):
+            branch7x7_1 = _conv2d_layer(
+                "conv0",
+                in_blob,
+                filters=filters,
+                kernel_size=1,
+                strides=1,
+                padding="SAME",
+            )
+            branch7x7_2 = _conv2d_layer(
+                "conv1",
+                branch7x7_1,
+                filters=filters,
+                kernel_size=[1, 7],
+                strides=1,
+                padding="SAME",
+            )
+            branch7x7_3 = _conv2d_layer(
+                "conv2",
+                branch7x7_2,
+                filters=192,
+                kernel_size=[7, 1],
+                strides=[1, 1],
+                padding="SAME",
+            )
+        with flow.scope.namespace("branch7x7dbl"):
+            branch7x7dbl_1 = _conv2d_layer(
+                "conv0",
+                in_blob,
+                filters=filters,
+                kernel_size=1,
+                strides=1,
+                padding="SAME",
+            )
+            branch7x7dbl_2 = _conv2d_layer(
+                "conv1",
+                branch7x7dbl_1,
+                filters=filters,
+                kernel_size=[7, 1],
+                strides=1,
+                padding="SAME",
+            )
+            branch7x7dbl_3 = _conv2d_layer(
+                "conv2",
+                branch7x7dbl_2,
+                filters=filters,
+                kernel_size=[1, 7],
+                strides=1,
+                padding="SAME",
+            )
+            branch7x7dbl_4 = _conv2d_layer(
+                "conv3",
+                branch7x7dbl_3,
+                filters=filters,
+                kernel_size=[7, 1],
+                strides=1,
+                padding="SAME",
+            )
+            branch7x7dbl_5 = _conv2d_layer(
+                "conv4",
+                branch7x7dbl_4,
+                filters=192,
+                kernel_size=[1, 7],
+                strides=1,
+                padding="SAME",
+            )
+        with flow.scope.namespace("branch_pool"):
+            branch_pool_1 = flow.nn.avg_pool2d(
+                in_blob,
+                ksize=3,
+                strides=1,
+                padding="SAME",
+                data_format="NCHW",
+                name="pool",
+            )
+            branch_pool_2 = _conv2d_layer(
+                "conv",
+                branch_pool_1,
+                filters=192,
+                kernel_size=[1, 1],
+                strides=1,
+                padding="SAME",
+            )
+
+        inceptionC_bn = []
+        inceptionC_bn.append(branch1x1)
+        inceptionC_bn.append(branch7x7_3)
+        inceptionC_bn.append(branch7x7dbl_5)
+        inceptionC_bn.append(branch_pool_2)
+        mixed_concat = flow.concat(values=inceptionC_bn, axis=1, name="concat")
+
+    return mixed_concat
+
+
+def InceptionD(in_blob, index):
+    with flow.scope.namespace("mixed_{}".format(index)):
+        with flow.scope.namespace("branch3x3"):
+            branch3x3_1 = _conv2d_layer(
+                "conv0", in_blob, filters=192, kernel_size=1, strides=1, padding="SAME"
+            )
+            branch3x3_2 = _conv2d_layer(
+                "conv1",
+                branch3x3_1,
+                filters=320,
+                kernel_size=3,
+                strides=2,
+                padding="VALID",
+            )
+        with flow.scope.namespace("branch7x7x3"):
+            branch7x7x3_1 = _conv2d_layer(
+                "conv0", in_blob, filters=192, kernel_size=1, strides=1, padding="SAME"
+            )
+            branch7x7x3_2 = _conv2d_layer(
+                "conv1",
+                branch7x7x3_1,
+                filters=192,
+                kernel_size=[1, 7],
+                strides=1,
+                padding="SAME",
+            )
+            branch7x7x3_3 = _conv2d_layer(
+                "conv2",
+                branch7x7x3_2,
+                filters=192,
+                kernel_size=[7, 1],
+                strides=1,
+                padding="SAME",
+            )
+            branch7x7x3_4 = _conv2d_layer(
+                "conv3",
+                branch7x7x3_3,
+                filters=192,
+                kernel_size=3,
+                strides=2,
+                padding="VALID",
+            )
+        with flow.scope.namespace("branch_pool"):
+            branch_pool = flow.nn.max_pool2d(
+                in_blob,
+                ksize=3,
+                strides=2,
+                padding="VALID",
+                data_format="NCHW",
+                name="pool",
+            )
+
+        inceptionD_bn = []
+        inceptionD_bn.append(branch3x3_2)
+        inceptionD_bn.append(branch7x7x3_4)
+        inceptionD_bn.append(branch_pool)
+
+        mixed_concat = flow.concat(values=inceptionD_bn, axis=1, name="concat")
+
+    return mixed_concat
+
+
+def InceptionE(in_blob, index):
+    with flow.scope.namespace("mixed_{}".format(index)):
+        with flow.scope.namespace("branch1x1"):
+            branch1x1 = _conv2d_layer(
+                "conv0", in_blob, filters=320, kernel_size=1, strides=1, padding="SAME"
+            )
+        with flow.scope.namespace("branch3x3"):
+            branch3x3_1 = _conv2d_layer(
+                "conv0", in_blob, filters=384, kernel_size=1, strides=1, padding="SAME"
+            )
+            branch3x3_2 = _conv2d_layer(
+                "conv1",
+                branch3x3_1,
+                filters=384,
+                kernel_size=[1, 3],
+                strides=1,
+                padding="SAME",
+            )
+            branch3x3_3 = _conv2d_layer(
+                "conv2",
+                branch3x3_1,
+                filters=384,
+                kernel_size=[3, 1],
+                strides=[1, 1],
+                padding="SAME",
+            )
+            inceptionE_1_bn = []
+            inceptionE_1_bn.append(branch3x3_2)
+            inceptionE_1_bn.append(branch3x3_3)
+            concat_branch3x3 = flow.concat(
+                values=inceptionE_1_bn, axis=1, name="concat"
+            )
+        with flow.scope.namespace("branch3x3dbl"):
+            branch3x3dbl_1 = _conv2d_layer(
+                "conv0", in_blob, filters=448, kernel_size=1, strides=1, padding="SAME"
+            )
+            branch3x3dbl_2 = _conv2d_layer(
+                "conv1",
+                branch3x3dbl_1,
+                filters=384,
+                kernel_size=3,
+                strides=1,
+                padding="SAME",
+            )
+            branch3x3dbl_3 = _conv2d_layer(
+                "conv2",
+                branch3x3dbl_2,
+                filters=384,
+                kernel_size=[1, 3],
+                strides=1,
+                padding="SAME",
+            )
+            branch3x3dbl_4 = _conv2d_layer(
+                "conv3",
+                branch3x3dbl_2,
+                filters=384,
+                kernel_size=[3, 1],
+                strides=1,
+                padding="SAME",
+            )
+            inceptionE_2_bn = []
+            inceptionE_2_bn.append(branch3x3dbl_3)
+            inceptionE_2_bn.append(branch3x3dbl_4)
+            concat_branch3x3dbl = flow.concat(
+                values=inceptionE_2_bn, axis=1, name="concat"
+            )
+        with flow.scope.namespace("branch_pool"):
+            branch_pool_1 = flow.nn.avg_pool2d(
+                in_blob,
+                ksize=3,
+                strides=1,
+                padding="SAME",
+                data_format="NCHW",
+                name="pool",
+            )
+            branch_pool_2 = _conv2d_layer(
+                "conv",
+                branch_pool_1,
+                filters=192,
+                kernel_size=[1, 1],
+                strides=1,
+                padding="SAME",
+            )
+
+        inceptionE_total_bn = []
+        inceptionE_total_bn.append(branch1x1)
+        inceptionE_total_bn.append(concat_branch3x3)
+        inceptionE_total_bn.append(concat_branch3x3dbl)
+        inceptionE_total_bn.append(branch_pool_2)
+
+        concat_total = flow.concat(values=inceptionE_total_bn, axis=1, name="concat")
+
+    return concat_total
+
+
+def InceptionV3(images, labels, trainable=True):
+    conv0 = _conv2d_layer(
+        "conv0", images, filters=32, kernel_size=3, strides=2, padding="VALID"
+    )
+    conv1 = _conv2d_layer(
+        "conv1", conv0, filters=32, kernel_size=3, strides=1, padding="VALID"
+    )
+    conv2 = _conv2d_layer(
+        "conv2", conv1, filters=64, kernel_size=3, strides=1, padding="SAME"
+    )
+    pool1 = flow.nn.max_pool2d(
+        conv2, ksize=3, strides=2, padding="VALID", data_format="NCHW", name="pool1"
+    )
+    conv3 = _conv2d_layer(
+        "conv3", pool1, filters=80, kernel_size=1, strides=1, padding="VALID"
+    )
+    conv4 = _conv2d_layer(
+        "conv4", conv3, filters=192, kernel_size=3, strides=1, padding="VALID"
+    )
+    pool2 = flow.nn.max_pool2d(
+        conv4, ksize=3, strides=2, padding="VALID", data_format="NCHW", name="pool2"
+    )
+
+    # mixed_0 ~ mixed_2
+    mixed_0 = InceptionA(pool2, 0)
+    mixed_1 = InceptionA(mixed_0, 1)
+    mixed_2 = InceptionA(mixed_1, 2)
+
+    # mixed_3
+    mixed_3 = InceptionB(mixed_2, 3)
+
+    # mixed_4 ~ mixed_7
+    mixed_4 = InceptionC(mixed_3, 4, 128)
+    mixed_5 = InceptionC(mixed_4, 5, 160)
+    mixed_6 = InceptionC(mixed_5, 6, 160)
+    mixed_7 = InceptionC(mixed_6, 7, 192)
+
+    # mixed_8
+    mixed_8 = InceptionD(mixed_7, 8)
+
+    # mixed_9 ~ mixed_10
+    mixed_9 = InceptionE(mixed_8, 9)
+    mixed_10 = InceptionE(mixed_9, 10)
+
+    # pool3
+    pool3 = flow.nn.avg_pool2d(
+        mixed_10, ksize=8, strides=1, padding="VALID", data_format="NCHW", name="pool3"
+    )
+
+    with flow.scope.namespace("logits"):
+        pool3 = flow.reshape(pool3, [pool3.shape[0], -1])
+        # TODO: Need to transpose weight when converting model from TF to OF if
+        # you want to use layers.dense interface.
+        # fc1 = flow.layers.dense(
+        #     pool3,
+        #     1001,
+        #     activation=None,
+        #     use_bias=False,
+        #     kernel_initializer=flow.truncated_normal(0.816496580927726),
+        #     bias_initializer=flow.constant_initializer(),
+        #     name="fc1",
+        # )
+        weight = flow.get_variable(
+            "fc1-weight",
+            shape=(pool3.shape[1], 1001),
+            dtype=flow.float,
+            initializer=flow.truncated_normal(0.816496580927726),
+            model_name="weight",
+        )
+        bias = flow.get_variable(
+            "fc1-bias",
+            shape=(1001,),
+            dtype=flow.float,
+            initializer=flow.constant_initializer(),
+            model_name="bias",
+        )
+        fc1 = flow.matmul(pool3, weight)
+        fc1 = flow.nn.bias_add(fc1, bias)
+
+    loss = flow.nn.sparse_softmax_cross_entropy_with_logits(
+        labels=labels, logits=fc1, name="softmax_loss"
+    )
+
+    return loss
+
+
+def main(args):
+    flow.config.machine_num(args.num_nodes)
+    flow.config.gpu_device_num(args.gpu_num_per_node)
+    func_config = flow.FunctionConfig()
+    func_config.default_logical_view(flow.scope.consistent_view())
+    func_config.default_data_type(flow.float)
+    func_config.enable_auto_mixed_precision(args.enable_auto_mixed_precision)
+
+    @flow.global_function(type="train", function_config=func_config)
+    def TrainNet():
+        (images, labels) = _data_load_layer(args, args.train_dir)
+        loss = InceptionV3(images, labels)
+        flow.optimizer.SGD(
+            flow.optimizer.PiecewiseConstantScheduler([], [0.0001]), momentum=0
+        ).minimize(loss)
+        return loss
+
+    check_point = flow.train.CheckPoint()
+    if not args.model_load_dir:
+        check_point.init()
+    else:
+        check_point.load(args.model_load_dir)
+
+    num_nodes = args.num_nodes
+    print(
+        "Traning inceptionv3: num_gpu_per_node = {}, num_nodes = {}.".format(
+            args.gpu_num_per_node, num_nodes
+        )
+    )
+
+    print("{:>12}  {:>12}  {:>12}".format("iter", "loss type", "loss value"))
+    loss = []
+    for i in range(args.iter_num):
+        train_loss = TrainNet().get().mean()
+        loss.append(train_loss)
+
+        fmt_str = "{:>12}  {:>12}  {:>12.6f}"
+        print(fmt_str.format(i, "train loss:", train_loss))
+
+        if (i + 1) % 100 == 0:
+            check_point.save(_MODEL_SAVE_DIR + str(i))
+
+    # save loss to file
+    loss_file = "{}n{}c.npy".format(
+        str(num_nodes), str(args.gpu_num_per_node * num_nodes)
+    )
+    loss_path = "./of_loss/inceptionv3"
+    if not os.path.exists(loss_path):
+        os.makedirs(loss_path)
+    numpy.save(os.path.join(loss_path, loss_file), loss)
+
+
+if __name__ == "__main__":
+    args = parser.parse_args()
+    if args.multinode:
+        flow.env.ctrl_port(12138)
+        nodes = []
+        for n in args.node_list.strip().split(","):
+            addr_dict = {}
+            addr_dict["addr"] = n
+            nodes.append(addr_dict)
+
+        flow.env.machine(nodes)
+        if args.remote_by_hand is False:
+            if args.scp_binary_without_uuid:
+                flow.deprecated.init_worker(scp_binary=True, use_uuid=False)
+            elif args.skip_scp_binary:
+                flow.deprecated.init_worker(scp_binary=False, use_uuid=False)
+            else:
+                flow.deprecated.init_worker(scp_binary=True, use_uuid=True)
+
+    main(args)
+    if (
+        args.multinode
+        and args.skip_scp_binary is False
+        and args.scp_binary_without_uuid is False
+    ):
+        flow.deprecated.delete_worker()
diff --git a/oneflow/compatible_single_client_python/test/models/pretrain.py b/oneflow/compatible_single_client_python/test/models/pretrain.py
new file mode 100644
index 0000000000000000000000000000000000000000..421cdac3b1e2bcbc19f694fd1f0ea41517731ad4
--- /dev/null
+++ b/oneflow/compatible_single_client_python/test/models/pretrain.py
@@ -0,0 +1,192 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import bert as bert_util
+from oneflow.compatible import single_client as flow
+from oneflow.core.operator import op_conf_pb2 as op_conf_util
+
+
+def PreTrain(
+    input_ids_blob,
+    input_mask_blob,
+    token_type_ids_blob,
+    masked_lm_positions_blob,
+    masked_lm_ids_blob,
+    masked_lm_weights_blob,
+    next_sentence_label_blob,
+    vocab_size,
+    seq_length=512,
+    hidden_size=768,
+    num_hidden_layers=12,
+    num_attention_heads=12,
+    intermediate_size=3072,
+    hidden_act="gelu",
+    hidden_dropout_prob=0.1,
+    attention_probs_dropout_prob=0.1,
+    max_position_embeddings=512,
+    type_vocab_size=16,
+    max_predictions_per_seq=20,
+    initializer_range=0.02,
+):
+    backbone = bert_util.BertBackbone(
+        input_ids_blob=input_ids_blob,
+        input_mask_blob=input_mask_blob,
+        token_type_ids_blob=token_type_ids_blob,
+        vocab_size=vocab_size,
+        seq_length=seq_length,
+        hidden_size=hidden_size,
+        num_hidden_layers=num_hidden_layers,
+        num_attention_heads=num_attention_heads,
+        intermediate_size=intermediate_size,
+        hidden_act=hidden_act,
+        hidden_dropout_prob=hidden_dropout_prob,
+        attention_probs_dropout_prob=attention_probs_dropout_prob,
+        max_position_embeddings=max_position_embeddings,
+        type_vocab_size=type_vocab_size,
+        initializer_range=initializer_range,
+    )
+
+    (lm_loss, _, _) = _AddMaskedLanguageModelLoss(
+        input_blob=backbone.sequence_output(),
+        output_weights_blob=backbone.embedding_table(),
+        positions_blob=masked_lm_positions_blob,
+        label_id_blob=masked_lm_ids_blob,
+        label_weight_blob=masked_lm_weights_blob,
+        seq_length=seq_length,
+        hidden_size=hidden_size,
+        vocab_size=vocab_size,
+        max_predictions_per_seq=max_predictions_per_seq,
+        hidden_act=bert_util.GetActivation(hidden_act),
+        initializer_range=initializer_range,
+    )
+    pooled_output = PooledOutput(
+        backbone.sequence_output(), hidden_size, initializer_range
+    )
+    (ns_loss, _, _) = _AddNextSentenceOutput(
+        input_blob=pooled_output,
+        label_blob=next_sentence_label_blob,
+        hidden_size=hidden_size,
+        initializer_range=initializer_range,
+    )
+    with flow.scope.namespace("cls-loss"):
+        total_loss = lm_loss + ns_loss
+    return total_loss
+
+
+def PooledOutput(sequence_output, hidden_size, initializer_range):
+    with flow.scope.namespace("bert-pooler"):
+        first_token_tensor = flow.slice(sequence_output, [None, 0, 0], [None, 1, -1])
+        first_token_tensor = flow.reshape(first_token_tensor, [-1, hidden_size])
+        pooled_output = bert_util._FullyConnected(
+            first_token_tensor,
+            input_size=hidden_size,
+            units=hidden_size,
+            weight_initializer=bert_util.CreateInitializer(initializer_range),
+            name="dense",
+        )
+        pooled_output = flow.math.tanh(pooled_output)
+    return pooled_output
+
+
+def _AddMaskedLanguageModelLoss(
+    input_blob,
+    output_weights_blob,
+    positions_blob,
+    label_id_blob,
+    label_weight_blob,
+    seq_length,
+    hidden_size,
+    vocab_size,
+    max_predictions_per_seq,
+    hidden_act,
+    initializer_range,
+):
+
+    with flow.scope.namespace("other"):
+        sum_label_weight_blob = flow.math.reduce_sum(label_weight_blob, axis=[-1])
+        ones = sum_label_weight_blob * 0.0 + 1.0
+        sum_label_weight_blob = flow.math.reduce_sum(sum_label_weight_blob)
+        batch_size = flow.math.reduce_sum(ones)
+        sum_label_weight_blob = sum_label_weight_blob / batch_size
+    with flow.scope.namespace("cls-predictions"):
+        input_blob = _GatherIndexes(input_blob, positions_blob, seq_length, hidden_size)
+        with flow.scope.namespace("transform"):
+            if callable(hidden_act):
+                act_fn = op_conf_util.kNone
+            else:
+                act_fn = hidden_act
+            input_blob = bert_util._FullyConnected(
+                input_blob,
+                input_size=hidden_size,
+                units=hidden_size,
+                activation=act_fn,
+                weight_initializer=bert_util.CreateInitializer(initializer_range),
+                name="dense",
+            )
+            if callable(hidden_act):
+                input_blob = hidden_act(input_blob)
+                input_blob = bert_util._LayerNorm(input_blob, hidden_size)
+        output_bias = flow.get_variable(
+            name="output_bias",
+            shape=[vocab_size],
+            dtype=input_blob.dtype,
+            initializer=flow.constant_initializer(1.0),
+        )
+        logit_blob = flow.matmul(input_blob, output_weights_blob, transpose_b=True)
+        logit_blob = flow.nn.bias_add(logit_blob, output_bias)
+        label_id_blob = flow.reshape(label_id_blob, [-1])
+        pre_example_loss = flow.nn.sparse_softmax_cross_entropy_with_logits(
+            logits=logit_blob, labels=label_id_blob
+        )
+        pre_example_loss = flow.reshape(pre_example_loss, [-1, max_predictions_per_seq])
+        numerator = pre_example_loss * label_weight_blob
+        with flow.scope.namespace("loss"):
+            numerator = flow.math.reduce_sum(numerator, axis=[-1])
+            denominator = sum_label_weight_blob + 1e-5
+            loss = numerator / denominator
+        return loss, pre_example_loss, logit_blob
+
+
+def _GatherIndexes(sequence_blob, positions_blob, seq_length, hidden_size):
+    output = flow.gather(
+        params=sequence_blob, indices=positions_blob, axis=2, batch_dims=2
+    )
+    output = flow.reshape(output, [-1, hidden_size])
+    return output
+
+
+def _AddNextSentenceOutput(input_blob, label_blob, hidden_size, initializer_range):
+    with flow.scope.namespace("cls-seq_relationship"):
+        output_weight_blob = flow.get_variable(
+            name="output_weights",
+            shape=[2, hidden_size],
+            dtype=input_blob.dtype,
+            model_name="weight",
+            initializer=bert_util.CreateInitializer(initializer_range),
+        )
+        output_bias_blob = flow.get_variable(
+            name="output_bias",
+            shape=[2],
+            dtype=input_blob.dtype,
+            model_name="bias",
+            initializer=flow.constant_initializer(0.0),
+        )
+        logit_blob = flow.matmul(input_blob, output_weight_blob, transpose_b=True)
+        logit_blob = flow.nn.bias_add(logit_blob, output_bias_blob)
+        pre_example_loss = flow.nn.sparse_softmax_cross_entropy_with_logits(
+            logits=logit_blob, labels=label_blob
+        )
+        loss = pre_example_loss
+        return loss, pre_example_loss, logit_blob
diff --git a/oneflow/compatible_single_client_python/test/models/resnet50.py b/oneflow/compatible_single_client_python/test/models/resnet50.py
new file mode 100644
index 0000000000000000000000000000000000000000..f325c784749cab49fcb6d00d10cbdee113b133cb
--- /dev/null
+++ b/oneflow/compatible_single_client_python/test/models/resnet50.py
@@ -0,0 +1,374 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import argparse
+import os
+from datetime import datetime
+
+import numpy
+from oneflow.compatible import single_client as flow
+from oneflow.core.operator import op_conf_pb2 as op_conf_util
+
+# import hook
+
+# DATA_DIR = "/dataset/PNGS/PNG228/of_record"
+DATA_DIR = "/dataset/PNGS/PNG228/of_record_repeated"
+MODEL_LOAD = "/dataset/PNGS/cnns_model_for_test/resnet50/models/of_model"
+MODEL_SAVE = "./output/model_save-{}".format(
+    str(datetime.now().strftime("%Y-%m-%d-%H:%M:%S"))
+)
+NODE_LIST = "192.168.1.12,192.168.1.14"
+
+IMAGE_SIZE = 228
+BLOCK_COUNTS = [3, 4, 6, 3]
+BLOCK_FILTERS = [256, 512, 1024, 2048]
+BLOCK_FILTERS_INNER = [64, 128, 256, 512]
+
+
+class DLNetSpec(object):
+    def __init__(self, enable_auto_mixed_precision):
+        self.batch_size = 8
+        self.data_part_num = 32
+        self.eval_dir = DATA_DIR
+        self.train_dir = DATA_DIR
+        self.model_save_dir = MODEL_SAVE
+        self.model_load_dir = MODEL_LOAD
+        self.num_nodes = 1
+        self.gpu_num_per_node = 1
+        self.iter_num = 10
+        self.enable_auto_mixed_precision = enable_auto_mixed_precision
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument("-g", "--gpu_num_per_node", type=int, default=1, required=False)
+parser.add_argument("-i", "--iter_num", type=int, default=10, required=False)
+parser.add_argument(
+    "-m", "--multinode", default=False, action="store_true", required=False
+)
+parser.add_argument("-n", "--node_list", type=str, default=NODE_LIST, required=False)
+parser.add_argument(
+    "-s", "--skip_scp_binary", default=False, action="store_true", required=False
+)
+parser.add_argument(
+    "-c",
+    "--scp_binary_without_uuid",
+    default=False,
+    action="store_true",
+    required=False,
+)
+parser.add_argument(
+    "-r", "--remote_by_hand", default=False, action="store_true", required=False
+)
+parser.add_argument("-e", "--eval_dir", type=str, default=DATA_DIR, required=False)
+parser.add_argument("-t", "--train_dir", type=str, default=DATA_DIR, required=False)
+parser.add_argument(
+    "-load", "--model_load_dir", type=str, default=MODEL_LOAD, required=False
+)
+parser.add_argument(
+    "-save", "--model_save_dir", type=str, default=MODEL_SAVE, required=False
+)
+parser.add_argument("-dn", "--data_part_num", type=int, default=32, required=False)
+parser.add_argument("-b", "--batch_size", type=int, default=8, required=False)
+
+g_output_key = []
+g_trainable = True
+
+
+def _data_load(args, data_dir):
+    node_num = args.num_nodes
+    total_batch_size = args.batch_size * args.gpu_num_per_node * node_num
+    rgb_mean = [123.68, 116.78, 103.94]
+    ofrecord = flow.data.ofrecord_reader(
+        data_dir,
+        batch_size=total_batch_size,
+        data_part_num=args.data_part_num,
+        name="decode",
+    )
+    image = flow.data.ofrecord_image_decoder(ofrecord, "encoded", color_space="RGB")
+    label = flow.data.ofrecord_raw_decoder(
+        ofrecord, "class/label", shape=(), dtype=flow.int32
+    )
+    rsz = flow.image.resize(
+        image, resize_x=IMAGE_SIZE, resize_y=IMAGE_SIZE, color_space="RGB"
+    )
+    normal = flow.image.crop_mirror_normalize(
+        rsz,
+        color_space="RGB",
+        output_layout="NCHW",
+        mean=rgb_mean,
+        output_dtype=flow.float,
+    )
+    return label, normal
+
+
+def _conv2d(
+    name,
+    input,
+    filters,
+    kernel_size,
+    strides=1,
+    padding="SAME",
+    data_format="NCHW",
+    dilations=1,
+    weight_initializer=flow.variance_scaling_initializer(),
+):
+    weight = flow.get_variable(
+        name + "-weight",
+        shape=(filters, input.shape[1], kernel_size, kernel_size),
+        dtype=input.dtype,
+        initializer=weight_initializer,
+        trainable=g_trainable,
+    )
+    return flow.nn.conv2d(
+        input, weight, strides, padding, None, data_format, dilations, name=name
+    )
+
+
+def _batch_norm(inputs, name=None):
+    return flow.layers.batch_normalization(
+        inputs=inputs,
+        axis=1,
+        momentum=0.997,
+        epsilon=1e-5,
+        center=True,
+        scale=True,
+        trainable=g_trainable,
+        name=name,
+    )
+
+
+def conv2d_affine(
+    input, name, filters, kernel_size, strides, activation=op_conf_util.kNone
+):
+    # input data_format must be NCHW, cannot check now
+    padding = "SAME" if strides > 1 or kernel_size > 1 else "VALID"
+    output = _conv2d(name, input, filters, kernel_size, strides, padding)
+
+    return output
+
+
+def bottleneck_transformation(input, block_name, filters, filters_inner, strides):
+    a = conv2d_affine(
+        input,
+        block_name + "_branch2a",
+        filters_inner,
+        1,
+        1,
+        activation=op_conf_util.kRelu,
+    )
+
+    b = conv2d_affine(
+        a,
+        block_name + "_branch2b",
+        filters_inner,
+        1,  # 1 for test origin 3
+        strides,
+        activation=op_conf_util.kRelu,
+    )
+
+    c = conv2d_affine(b, block_name + "_branch2c", filters, 1, 1)
+
+    return c
+
+
+def residual_block(input, block_name, filters, filters_inner, strides_init):
+    if strides_init != 1 or block_name == "res2_0":
+        shortcut = conv2d_affine(
+            input, block_name + "_branch1", filters, 1, strides_init
+        )
+    else:
+        shortcut = input
+
+    bottleneck = bottleneck_transformation(
+        input, block_name, filters, filters_inner, strides_init
+    )
+
+    return flow.math.relu(shortcut + bottleneck)
+
+
+def residual_stage(input, stage_name, counts, filters, filters_inner, stride_init=2):
+    output = input
+    for i in range(counts):
+        block_name = "%s_%d" % (stage_name, i)
+        output = residual_block(
+            output, block_name, filters, filters_inner, stride_init if i == 0 else 1,
+        )
+
+    return output
+
+
+def resnet_conv_x_body(input, on_stage_end=lambda x: x):
+    output = input
+    for i, (counts, filters, filters_inner) in enumerate(
+        zip(BLOCK_COUNTS, BLOCK_FILTERS, BLOCK_FILTERS_INNER)
+    ):
+        stage_name = "res%d" % (i + 2)
+        output = residual_stage(
+            output, stage_name, counts, filters, filters_inner, 1 if i == 0 else 2,
+        )
+        on_stage_end(output)
+        g_output_key.append(stage_name)
+
+    return output
+
+
+def resnet_stem(input):
+    conv1 = _conv2d("conv1", input, 64, 7, 2)
+    g_output_key.append("conv1")
+
+    # for test
+    conv1_bn = conv1
+
+    pool1 = flow.nn.avg_pool2d(
+        conv1_bn, ksize=3, strides=2, padding="VALID", data_format="NCHW", name="pool1",
+    )
+    g_output_key.append("pool1")
+
+    return pool1
+
+
+def resnet50(args, data_dir):
+    (labels, images) = _data_load(args, data_dir)
+    g_output_key.append("input_img")
+
+    with flow.scope.namespace("Resnet"):
+        stem = resnet_stem(images)
+        body = resnet_conv_x_body(stem, lambda x: x)
+        pool5 = flow.nn.avg_pool2d(
+            body, ksize=7, strides=1, padding="VALID", data_format="NCHW", name="pool5",
+        )
+        g_output_key.append("pool5")
+
+        fc1001 = flow.layers.dense(
+            flow.reshape(pool5, (pool5.shape[0], -1)),
+            units=1001,
+            use_bias=True,
+            kernel_initializer=flow.xavier_uniform_initializer(),
+            bias_initializer=flow.zeros_initializer(),
+            trainable=g_trainable,
+            name="fc1001",
+        )
+        g_output_key.append("fc1001")
+
+        loss = flow.nn.sparse_softmax_cross_entropy_with_logits(
+            labels, fc1001, name="softmax_loss"
+        )
+        g_output_key.append("cross_entropy")
+
+    return loss
+
+
+def _set_trainable(trainable):
+    global g_trainable
+    g_trainable = trainable
+
+
+def main(args):
+    flow.config.machine_num(args.num_nodes)
+    flow.config.gpu_device_num(args.gpu_num_per_node)
+
+    train_config = flow.FunctionConfig()
+    train_config.default_logical_view(flow.scope.consistent_view())
+    train_config.default_data_type(flow.float)
+    train_config.enable_auto_mixed_precision(args.enable_auto_mixed_precision)
+
+    @flow.global_function(type="train", function_config=train_config)
+    def TrainNet():
+        _set_trainable(True)
+        loss = resnet50(args, args.train_dir)
+        flow.optimizer.SGD(
+            flow.optimizer.PiecewiseConstantScheduler([], [0.0032]), momentum=0
+        ).minimize(loss)
+        return loss
+
+    eval_config = flow.FunctionConfig()
+    eval_config.default_data_type(flow.float)
+    eval_config.enable_auto_mixed_precision(args.enable_auto_mixed_precision)
+
+    @flow.global_function(function_config=eval_config)
+    def evaluate():
+        with flow.scope.consistent_view():
+            _set_trainable(False)
+            return resnet50(args, args.eval_dir)
+
+    check_point = flow.train.CheckPoint()
+    check_point.load(MODEL_LOAD)
+    # if not args.model_load_dir:
+    #     check_point.init()
+    # else:
+    #     check_point.load(args.model_load_dir)
+
+    loss = []
+
+    fmt_str = "{:>12}  {:>12}  {:.6f}"
+    print("{:>12}  {:>12}  {:>12}".format("iter", "loss type", "loss value"))
+    for i in range(args.iter_num):
+        train_loss = TrainNet().get().mean()
+
+        loss.append(train_loss)
+        print(fmt_str.format(i, "train loss:", train_loss))
+
+        # output_dict = dict(zip(g_output_key, g_output))
+        # hook.dump_tensor_to_file(output_dict, "./prob_output/iter_{}".format(i))
+
+        # if (i + 1) % 100 == 0:
+        #     eval = evaluate().get().mean()
+        #     print(fmt_str.format(i, "eval loss:", eval))
+
+        #     check_point.save(MODEL_SAVE + "_" + str(i))
+
+    # save loss to file
+    loss_file = "{}n{}c.npy".format(
+        str(args.num_nodes), str(args.gpu_num_per_node * args.num_nodes)
+    )
+    loss_path = "./of_loss/resnet50"
+    if not os.path.exists(loss_path):
+        os.makedirs(loss_path)
+    numpy.save(os.path.join(loss_path, loss_file), loss)
+
+
+if __name__ == "__main__":
+    flow.env.log_dir("./output/log")
+    flow.env.ctrl_port(12138)
+    args = parser.parse_args()
+    if args.multinode:
+        flow.env.ctrl_port(12139)
+        nodes = []
+        for n in args.node_list.strip().split(","):
+            addr_dict = {}
+            addr_dict["addr"] = n
+            nodes.append(addr_dict)
+
+        flow.env.machine(nodes)
+
+        if args.scp_binary_without_uuid:
+            flow.deprecated.init_worker(scp_binary=True, use_uuid=False)
+        elif args.skip_scp_binary:
+            flow.deprecated.init_worker(scp_binary=False, use_uuid=False)
+        else:
+            flow.deprecated.init_worker(scp_binary=True, use_uuid=True)
+    num_nodes = len(args.node_list.strip().split(",")) if args.multinode else 1
+    print(
+        "Traning resnet50: num_gpu_per_node = {}, num_nodes = {}.".format(
+            args.gpu_num_per_node, num_nodes
+        )
+    )
+    main(args)
+    if (
+        args.multinode
+        and args.skip_scp_binary is False
+        and args.scp_binary_without_uuid is False
+    ):
+        flow.deprecated.delete_worker()
diff --git a/oneflow/compatible_single_client_python/test/models/run_cnns_test.py b/oneflow/compatible_single_client_python/test/models/run_cnns_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..1add9a5fb9100d09724b0402e94f39d66c48b488
--- /dev/null
+++ b/oneflow/compatible_single_client_python/test/models/run_cnns_test.py
@@ -0,0 +1,55 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import os
+
+import cnns_tests
+import env_2node
+import numpy
+from absl import app
+from absl.testing import absltest
+from test_1node_mixin import Test1NodeMixin
+from test_2node_mixin import Test2NodeMixin
+
+
+class TestNodeMixin(Test1NodeMixin, Test2NodeMixin):
+    pass
+
+
+class TestAlexNet(TestNodeMixin, cnns_tests.TestAlexNetMixin, absltest.TestCase):
+    pass
+
+
+class TestResNet50(TestNodeMixin, cnns_tests.TestResNet50Mixin, absltest.TestCase):
+    pass
+
+
+class TestVgg16(TestNodeMixin, cnns_tests.TestVgg16Mixin, absltest.TestCase):
+    pass
+
+
+class TestInceptionV3(
+    TestNodeMixin, cnns_tests.TestInceptionV3Mixin, absltest.TestCase
+):
+    pass
+
+
+def main(argv):
+    env_2node.Init()
+    absltest.main()
+
+
+if __name__ == "__main__":
+    app.run(main)
diff --git a/oneflow/compatible_single_client_python/test/models/test_1node_mixin.py b/oneflow/compatible_single_client_python/test/models/test_1node_mixin.py
new file mode 100644
index 0000000000000000000000000000000000000000..edc25350d8d42d254a3fb47690bbfcb99b6ecead
--- /dev/null
+++ b/oneflow/compatible_single_client_python/test/models/test_1node_mixin.py
@@ -0,0 +1,25 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+
+class Test1NodeMixin:
+    def test_1n1c(self):
+        self.run_net(1)
+        self.print_and_check_result("1n1c")
+
+    def test_1n4c(self):
+        self.run_net(4)
+        self.print_and_check_result("1n4c")
diff --git a/oneflow/compatible_single_client_python/test/models/test_2node_mixin.py b/oneflow/compatible_single_client_python/test/models/test_2node_mixin.py
new file mode 100644
index 0000000000000000000000000000000000000000..4edadae6f568caddf7122ff6f92bca8da41a46b1
--- /dev/null
+++ b/oneflow/compatible_single_client_python/test/models/test_2node_mixin.py
@@ -0,0 +1,24 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from absl import flags
+
+FLAGS = flags.FLAGS
+
+
+class Test2NodeMixin:
+    def test_2n8c(self):
+        self.run_net(4, 2, FLAGS.nodes_list)
+        self.print_and_check_result("2n8c")
diff --git a/oneflow/compatible_single_client_python/test/models/test_alexnet_model.py b/oneflow/compatible_single_client_python/test/models/test_alexnet_model.py
new file mode 100644
index 0000000000000000000000000000000000000000..776e9e11aa4285c6cdd595dcf7600b610be1d730
--- /dev/null
+++ b/oneflow/compatible_single_client_python/test/models/test_alexnet_model.py
@@ -0,0 +1,276 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import os
+from datetime import datetime
+import unittest
+
+from oneflow.compatible import single_client as flow
+from oneflow.core.operator import op_conf_pb2 as op_conf_util
+from oneflow.core.job import initializer_conf_pb2 as initializer_conf_util
+
+
+_DATA_DIR = "/dataset/PNGS/PNG227/of_record_repeated"
+_MODEL_SAVE_DIR = "./model_save-{}".format(
+    str(datetime.now().strftime("%Y-%m-%d-%H:%M:%S"))
+)
+_MODEL_LOAD = "/dataset/PNGS/cnns_model_for_test/alexnet/models/of_model_bk"
+
+
+class DLNetSpec(object):
+    def __init__(self):
+        self.batch_size = 8
+        self.data_part_num = 32
+        self.eval_dir = _DATA_DIR
+        self.train_dir = _DATA_DIR
+        self.model_save_dir = _MODEL_SAVE_DIR
+        self.model_load_dir = _MODEL_LOAD
+        self.num_nodes = 1
+        self.node_list = None
+        self.gpu_num_per_node = 1
+        self.iter_num = 10
+
+
+global_specs = DLNetSpec()
+
+
+class TrainData(flow.model.DataModule):
+    def __init__(self, specs):
+        super().__init__()
+        self.specs = specs
+
+    def forward(self, *args):
+        return _data_load_layer(self.specs, self.specs.train_dir)
+
+
+class ValData(flow.model.DataModule):
+    def __init__(self, specs):
+        super().__init__()
+        self.specs = specs
+
+    def forward(self, *args):
+        return _data_load_layer(self.specs, self.specs.eval_dir)
+
+
+class AlexNet(flow.model.Model):
+    def __init__(self, specs, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.specs = specs
+
+    def forward(self, images, trainable=False):
+        conv1 = _conv2d_layer(
+            "conv1", images, filters=64, kernel_size=11, strides=4, padding="VALID",
+        )
+
+        pool1 = flow.nn.avg_pool2d(conv1, 3, 2, "VALID", "NCHW", name="pool1")
+
+        conv2 = _conv2d_layer("conv2", pool1, filters=192, kernel_size=5)
+
+        pool2 = flow.nn.avg_pool2d(conv2, 3, 2, "VALID", "NCHW", name="pool2")
+
+        conv3 = _conv2d_layer("conv3", pool2, filters=384)
+
+        conv4 = _conv2d_layer("conv4", conv3, filters=384)
+
+        conv5 = _conv2d_layer("conv5", conv4, filters=256)
+
+        pool5 = flow.nn.avg_pool2d(conv5, 3, 2, "VALID", "NCHW", name="pool5")
+
+        def _get_initializer():
+            kernel_initializer = initializer_conf_util.InitializerConf()
+            kernel_initializer.truncated_normal_conf.std = 0.816496580927726
+            return kernel_initializer
+
+        if len(pool5.shape) > 2:
+            pool5 = flow.reshape(pool5, shape=(pool5.shape[0], -1))
+
+        fc1 = flow.layers.dense(
+            inputs=pool5,
+            units=4096,
+            activation=flow.math.relu,
+            use_bias=False,
+            kernel_initializer=_get_initializer(),
+            bias_initializer=False,
+            trainable=trainable,
+            name="fc1",
+        )
+
+        dropout1 = fc1
+
+        fc2 = flow.layers.dense(
+            inputs=dropout1,
+            units=4096,
+            activation=flow.math.relu,
+            use_bias=False,
+            kernel_initializer=_get_initializer(),
+            bias_initializer=False,
+            trainable=trainable,
+            name="fc2",
+        )
+
+        dropout2 = fc2
+
+        fc3 = flow.layers.dense(
+            inputs=dropout2,
+            units=1001,
+            activation=None,
+            use_bias=False,
+            kernel_initializer=_get_initializer(),
+            bias_initializer=False,
+            trainable=trainable,
+            name="fc3",
+        )
+
+        return fc3
+
+    def training_step(self, batch, optimizer_idx):
+        assert optimizer_idx == 0
+        images, labels = batch
+        fc3 = self(images, True)
+        loss = flow.nn.sparse_softmax_cross_entropy_with_logits(
+            labels, fc3, name="softmax_loss"
+        )
+        return loss
+
+    def validation_step(self, batch):
+        images, labels = batch
+        fc3 = self(images, False)
+        loss = flow.nn.sparse_softmax_cross_entropy_with_logits(
+            labels, fc3, name="softmax_loss"
+        )
+        return loss
+
+    def configure_optimizers(self):
+        return flow.optimizer.SGD(
+            flow.optimizer.PiecewiseConstantScheduler([], [0.00001]), momentum=0
+        )
+
+
+class LossMoniter(flow.model.Callback):
+    def on_training_step_end(self, step_idx, outputs, optimizer_idx):
+        assert optimizer_idx == 0
+        loss = outputs.mean()
+        fmt_str = "{:>12}  {:>12}  {:>12.6f}"
+        print(fmt_str.format(step_idx, "train loss:", loss))
+
+    def on_validation_step_end(self, step_idx, outputs):
+        loss = outputs.mean()
+        fmt_str = "{:>12}  {:>12}  {:>12.6f}"
+        print(fmt_str.format(step_idx, "validation loss:", loss))
+
+
+@unittest.skipIf(os.getenv("ONEFLOW_TEST_CPU_ONLY"), "only test cpu cases")
+def test_1n1c(test_case):
+
+    flow.env.ctrl_port(9788)
+    flow.config.machine_num(global_specs.num_nodes)
+    flow.config.gpu_device_num(global_specs.gpu_num_per_node)
+
+    train_exe_config = flow.ExecutionConfig()
+    train_exe_config.default_logical_view(flow.scope.consistent_view())
+    train_exe_config.default_data_type(flow.float)
+    train_config = flow.model.TrainingConfig()
+    train_config.config_execution(train_exe_config)
+    train_config.config_data(TrainData(global_specs))
+
+    val_exe_config = flow.ExecutionConfig()
+    val_exe_config.default_logical_view(flow.scope.consistent_view())
+    val_exe_config.default_data_type(flow.float)
+    val_config = flow.model.ValidationConfig()
+    val_config.config_execution(val_exe_config)
+    val_config.config_data(ValData(global_specs))
+    val_config.config_step_interval(10)
+
+    ck_config = flow.model.CheckpointConfig()
+    ck_config.config_load(dirpath=global_specs.model_load_dir)
+    ck_config.config_save(dirpath=global_specs.model_save_dir, step_interval=10)
+
+    loss_monitor_cb = LossMoniter()
+
+    alexnet_md = AlexNet(global_specs, is_deprecated_function_style=True,)
+
+    alexnet_md.fit(
+        training_config=train_config,
+        validation_config=val_config,
+        checkpoint_config=ck_config,
+        callbacks=[loss_monitor_cb],
+        max_steps=20,
+    )
+
+
+def _conv2d_layer(
+    name,
+    input,
+    filters,
+    kernel_size=3,
+    strides=1,
+    padding="SAME",
+    data_format="NCHW",
+    dilation_rate=1,
+    activation=op_conf_util.kRelu,
+    use_bias=False,
+    weight_initializer=flow.random_uniform_initializer(),
+    bias_initializer=flow.random_uniform_initializer(),
+):
+    weight_shape = (filters, input.shape[1], kernel_size, kernel_size)
+    weight = flow.get_variable(
+        name + "-weight",
+        shape=weight_shape,
+        dtype=input.dtype,
+        initializer=weight_initializer,
+    )
+    output = flow.nn.conv2d(
+        input, weight, strides, padding, None, data_format, dilation_rate, name=name
+    )
+    if use_bias:
+        bias = flow.get_variable(
+            name + "-bias",
+            shape=(filters,),
+            dtype=input.dtype,
+            initializer=bias_initializer,
+        )
+        output = flow.nn.bias_add(output, bias, data_format)
+
+    if activation is not None:
+        if activation == op_conf_util.kRelu:
+            output = flow.nn.relu(output)
+        else:
+            raise NotImplementedError
+
+    return output
+
+
+def _data_load_layer(args, data_dir):
+    node_num = args.num_nodes
+    total_batch_size = args.batch_size * args.gpu_num_per_node * node_num
+    rgb_mean = [123.68, 116.78, 103.94]
+    (image, label) = flow.data.ofrecord_image_classification_reader(
+        data_dir,
+        batch_size=total_batch_size,
+        data_part_num=args.data_part_num,
+        image_feature_name="encoded",
+        label_feature_name="class/label",
+        color_space="RGB",
+        name="decode",
+    )
+    rsz = flow.image.resize(image, target_size=[227, 227], color_space="RGB")
+    normal = flow.image.crop_mirror_normalize(
+        rsz,
+        color_space="RGB",
+        output_layout="NCHW",
+        mean=rgb_mean,
+        output_dtype=flow.float,
+    )
+    return (normal, label)
diff --git a/oneflow/compatible_single_client_python/test/models/test_bert.py b/oneflow/compatible_single_client_python/test/models/test_bert.py
new file mode 100644
index 0000000000000000000000000000000000000000..8db83d90e5c1b8dbe96111d7d989baca70493448
--- /dev/null
+++ b/oneflow/compatible_single_client_python/test/models/test_bert.py
@@ -0,0 +1,277 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import copy
+import sys
+
+import numpy as np
+from oneflow.compatible import single_client as flow
+from absl import flags
+from pretrain import PreTrain
+import unittest
+import os
+
+FLAGS = flags.FLAGS
+flags.DEFINE_string("data_dir", "/dataset/bert/bert_seq_len_128_repeat1024", "")
+flags.DEFINE_string(
+    "model_load_dir", "/dataset/bert_regression_test/of_random_init_L-12_H-768_A-12", ""
+)
+flags.DEFINE_string("model_save_dir", "snapshots", "")
+flags.DEFINE_float("lr", 1e-4, "learning rate")
+flags.DEFINE_float("weight_decay_rate", 0.01, "")
+flags.DEFINE_integer("batch_size", 24, "")
+flags.DEFINE_integer("data_part_num", 8, "")
+flags.DEFINE_integer("seq_length", 128, "")
+flags.DEFINE_integer("max_predictions_per_seq", 20, "")
+flags.DEFINE_integer("num_hidden_layers", 12, "")
+flags.DEFINE_integer("num_attention_heads", 12, "")
+flags.DEFINE_integer("max_position_embeddings", 512, "")
+flags.DEFINE_integer("type_vocab_size", 2, "")
+flags.DEFINE_integer("vocab_size", 30522, "")
+flags.DEFINE_float("attention_probs_dropout_prob", 0.0, "")
+flags.DEFINE_float("hidden_dropout_prob", 0.0, "")
+flags.DEFINE_integer("hidden_size_per_head", 64, "")
+FLAGS(sys.argv)
+
+
+def _blob_conf(name, shape, dtype=flow.int32):
+    return flow.data.BlobConf(
+        name=name, shape=shape, dtype=dtype, codec=flow.data.RawCodec()
+    )
+
+
+def BertDecoder(
+    data_dir, batch_size=1, data_part_num=1, seq_length=128, max_predictions_per_seq=20
+):
+    ofrecord = flow.data.ofrecord_reader(
+        data_dir, batch_size=batch_size, data_part_num=data_part_num, name="decode",
+    )
+    input_ids = flow.data.ofrecord_raw_decoder(
+        ofrecord, "input_ids", shape=(seq_length,), dtype=flow.int32
+    )
+    next_sentence_labels = flow.data.ofrecord_raw_decoder(
+        ofrecord, "next_sentence_labels", shape=(1,), dtype=flow.int32
+    )
+    input_mask = flow.data.ofrecord_raw_decoder(
+        ofrecord, "input_mask", shape=(seq_length,), dtype=flow.int32
+    )
+    segment_ids = flow.data.ofrecord_raw_decoder(
+        ofrecord, "segment_ids", shape=(seq_length,), dtype=flow.int32
+    )
+    masked_lm_ids = flow.data.ofrecord_raw_decoder(
+        ofrecord, "masked_lm_ids", shape=(max_predictions_per_seq,), dtype=flow.int32
+    )
+    masked_lm_positions = flow.data.ofrecord_raw_decoder(
+        ofrecord,
+        "masked_lm_positions",
+        shape=(max_predictions_per_seq,),
+        dtype=flow.int32,
+    )
+    masked_lm_weights = flow.data.ofrecord_raw_decoder(
+        ofrecord,
+        "masked_lm_weights",
+        shape=(max_predictions_per_seq,),
+        dtype=flow.float,
+    )
+
+    return (
+        input_ids,
+        next_sentence_labels,
+        input_mask,
+        segment_ids,
+        masked_lm_ids,
+        masked_lm_positions,
+        masked_lm_weights,
+    )
+
+
+def BuildPreTrainNet(
+    batch_size,
+    data_part_num,
+    seq_length=128,
+    max_position_embeddings=512,
+    num_hidden_layers=12,
+    num_attention_heads=12,
+    hidden_dropout_prob=0.1,
+    attention_probs_dropout_prob=0.1,
+    vocab_size=30522,
+    type_vocab_size=2,
+    max_predictions_per_seq=20,
+):
+
+    hidden_size = 64 * num_attention_heads
+    intermediate_size = hidden_size * 4
+    if data_part_num == 1:
+        with flow.scope.placement("cpu", "0:0"):
+            decoders = BertDecoder(
+                FLAGS.data_dir,
+                batch_size,
+                data_part_num,
+                seq_length,
+                max_predictions_per_seq,
+            )
+    else:
+        assert data_part_num > 1
+        decoders = BertDecoder(
+            FLAGS.data_dir,
+            batch_size,
+            data_part_num,
+            seq_length,
+            max_predictions_per_seq,
+        )
+
+    input_ids = decoders[0]
+    next_sentence_labels = decoders[1]
+    input_mask = decoders[2]
+    token_type_ids = decoders[3]
+    masked_lm_ids = decoders[4]
+    masked_lm_positions = decoders[5]
+    masked_lm_weights = decoders[6]
+    return PreTrain(
+        input_ids,
+        input_mask,
+        token_type_ids,
+        masked_lm_positions,
+        masked_lm_ids,
+        masked_lm_weights,
+        next_sentence_labels,
+        vocab_size,
+        seq_length=seq_length,
+        hidden_size=hidden_size,
+        num_hidden_layers=num_hidden_layers,
+        num_attention_heads=num_attention_heads,
+        intermediate_size=intermediate_size,
+        hidden_act="gelu",
+        hidden_dropout_prob=hidden_dropout_prob,
+        attention_probs_dropout_prob=attention_probs_dropout_prob,
+        max_position_embeddings=max_position_embeddings,
+        type_vocab_size=type_vocab_size,
+        max_predictions_per_seq=max_predictions_per_seq,
+        initializer_range=0.02,
+    )
+
+
+def CreateOptimizer():
+    lr_warmup = flow.optimizer.warmup.linear(1000, 0)
+    lr_scheduler = flow.optimizer.PolynomialScheduler(
+        FLAGS.lr, 100000, 0.0, warmup=lr_warmup
+    )
+    return flow.optimizer.AdamW(
+        lr_scheduler,
+        epsilon=1e-6,
+        weight_decay=FLAGS.weight_decay_rate,
+        weight_decay_excludes=["bias", "LayerNorm", "layer_norm"],
+        grad_clipping=flow.optimizer.grad_clipping.by_global_norm(1.0),
+    )
+
+
+def PretrainJob():
+    total_loss = BuildPreTrainNet(
+        batch_size=FLAGS.batch_size,
+        data_part_num=FLAGS.data_part_num,
+        seq_length=FLAGS.seq_length,
+        max_position_embeddings=FLAGS.max_position_embeddings,
+        num_hidden_layers=FLAGS.num_hidden_layers,
+        num_attention_heads=FLAGS.num_attention_heads,
+        hidden_dropout_prob=FLAGS.hidden_dropout_prob,
+        attention_probs_dropout_prob=FLAGS.attention_probs_dropout_prob,
+        vocab_size=FLAGS.vocab_size,
+        type_vocab_size=FLAGS.type_vocab_size,
+        max_predictions_per_seq=FLAGS.max_predictions_per_seq,
+    )
+    opt = CreateOptimizer()
+    opt.minimize(total_loss)
+    return total_loss
+
+
+func_config = flow.FunctionConfig()
+func_config.default_logical_view(flow.scope.consistent_view())
+func_config.enable_auto_mixed_precision(FLAGS.enable_auto_mixed_precision)
+
+
+@unittest.skipIf(os.getenv("ONEFLOW_TEST_CPU_ONLY"), "only test cpu cases")
+def test_1n1c(test_case):
+    flow.config.enable_debug_mode(True)
+    flow.config.gpu_device_num(1)
+    pretrain_job = flow.global_function(type="train", function_config=func_config)(
+        PretrainJob
+    )
+    check_point = flow.train.CheckPoint()
+    check_point.load(FLAGS.model_load_dir)
+    of_loss = [pretrain_job().get().mean() for _ in range(10)]
+    print(of_loss)
+
+
+@unittest.skipIf(os.getenv("ONEFLOW_TEST_CPU_ONLY"), "only test cpu cases")
+def test_1n4c(test_case):
+    flow.config.gpu_device_num(4)
+    pretrain_job = flow.global_function(type="train", function_config=func_config)(
+        PretrainJob
+    )
+    check_point = flow.train.CheckPoint()
+    check_point.load(FLAGS.model_load_dir)
+    of_loss = [pretrain_job().get().mean() for _ in range(10)]
+    print(of_loss)
+
+
+@unittest.skipIf(os.getenv("ONEFLOW_TEST_CPU_ONLY"), "only test cpu cases")
+@flow.unittest.num_nodes_required(2)
+def test_2n8c(test_case):
+    flow.config.gpu_device_num(4)
+    pretrain_job = flow.global_function(type="train", function_config=func_config)(
+        PretrainJob
+    )
+    check_point = flow.train.CheckPoint()
+    check_point.load(FLAGS.model_load_dir)
+    of_loss = [pretrain_job().get().mean() for _ in range(10)]
+    print(of_loss)
+
+
+def test_inplace(test_case):
+    test_case.assertTrue(
+        np.allclose(GetSeveralLossesAsNumpy(True), GetSeveralLossesAsNumpy(False))
+    )
+
+
+def GetSeveralLossesAsNumpy(enable_inplace, num_iters=10):
+    flow.config.enable_debug_mode(True)
+    flow.config.gpu_device_num(1)
+    train_config = flow.FunctionConfig()
+    train_config.default_logical_view(flow.scope.consistent_view())
+    train_config.enable_inplace(enable_inplace)
+
+    @flow.global_function(type="train", function_config=train_config)
+    def PretrainJob():
+        loss = BuildPreTrainNet(
+            batch_size=FLAGS.batch_size,
+            data_part_num=FLAGS.data_part_num,
+            seq_length=FLAGS.seq_length,
+            max_position_embeddings=FLAGS.max_position_embeddings,
+            num_hidden_layers=1,
+            num_attention_heads=FLAGS.num_attention_heads,
+            hidden_dropout_prob=FLAGS.hidden_dropout_prob,
+            attention_probs_dropout_prob=FLAGS.attention_probs_dropout_prob,
+            vocab_size=FLAGS.vocab_size,
+            type_vocab_size=FLAGS.type_vocab_size,
+            max_predictions_per_seq=FLAGS.max_predictions_per_seq,
+        )
+        CreateOptimizer().minimize(loss)
+        return loss
+
+    check_point = flow.train.CheckPoint()
+    check_point.load(FLAGS.model_load_dir)
+    ret = [PretrainJob().get().mean() for _ in range(num_iters)]
+    flow.clear_default_session()
+    return np.array(ret)
diff --git a/oneflow/compatible_single_client_python/test/models/test_dcgan.py b/oneflow/compatible_single_client_python/test/models/test_dcgan.py
new file mode 100644
index 0000000000000000000000000000000000000000..54c9507cae2ff973369cc390e76048bc0df85764
--- /dev/null
+++ b/oneflow/compatible_single_client_python/test/models/test_dcgan.py
@@ -0,0 +1,353 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as oft
+import numpy as np
+import os
+import unittest
+
+
+@unittest.skipIf(os.getenv("ONEFLOW_TEST_CPU_ONLY"), "only test cpu cases")
+def test_1n1c(test_case):
+    dcgan = DCGAN()
+    dcgan.compare_with_tf(1)
+
+
+@unittest.skipIf(os.getenv("ONEFLOW_TEST_CPU_ONLY"), "only test cpu cases")
+def test_1n4c(test_case):
+    dcgan = DCGAN()
+    dcgan.compare_with_tf(4)
+
+
+class DCGAN:
+    def __init__(self):
+        self.lr = 1e-4
+        self.z_dim = 100
+        self.batch_size = 32
+
+    def compare_with_tf(self, gpu_num, result_dir="/dataset/gan_test/dcgan/"):
+        flow.config.gpu_device_num(gpu_num)
+        func_config = flow.FunctionConfig()
+        func_config.default_data_type(flow.float)
+        func_config.default_logical_view(flow.scope.consistent_view())
+
+        @flow.global_function(type="train", function_config=func_config)
+        def test_generator(
+            z: oft.Numpy.Placeholder((self.batch_size, self.z_dim)),
+            label1: oft.Numpy.Placeholder((self.batch_size, 1)),
+        ):
+            g_out = self.generator(z, trainable=True, const_init=True)
+            g_logits = self.discriminator(g_out, trainable=False, const_init=True)
+            g_loss = flow.nn.sigmoid_cross_entropy_with_logits(
+                flow.ones_like(g_logits),
+                g_logits,
+                name="Gloss_sigmoid_cross_entropy_with_logits",
+            )
+
+            flow.optimizer.SGD(
+                flow.optimizer.PiecewiseConstantScheduler([], [self.lr]), momentum=0
+            ).minimize(g_loss)
+            return g_loss
+
+        @flow.global_function(type="train", function_config=func_config)
+        def test_discriminator(
+            z: oft.Numpy.Placeholder((self.batch_size, 100)),
+            images: oft.Numpy.Placeholder((self.batch_size, 1, 28, 28)),
+            label1: oft.Numpy.Placeholder((self.batch_size, 1)),
+            label0: oft.Numpy.Placeholder((self.batch_size, 1)),
+        ):
+            g_out = self.generator(z, trainable=False, const_init=True)
+            g_logits = self.discriminator(g_out, trainable=True, const_init=True)
+            d_loss_fake = flow.nn.sigmoid_cross_entropy_with_logits(
+                flow.zeros_like(g_logits),
+                g_logits,
+                name="Dloss_fake_sigmoid_cross_entropy_with_logits",
+            )
+
+            d_logits = self.discriminator(
+                images, trainable=True, reuse=True, const_init=True
+            )
+            d_loss_real = flow.nn.sigmoid_cross_entropy_with_logits(
+                flow.ones_like(d_logits),
+                d_logits,
+                name="Dloss_real_sigmoid_cross_entropy_with_logits",
+            )
+            d_loss = d_loss_fake + d_loss_real
+            flow.optimizer.SGD(
+                flow.optimizer.PiecewiseConstantScheduler([], [self.lr]), momentum=0
+            ).minimize(d_loss)
+
+            return d_loss
+
+        check_point = flow.train.CheckPoint()
+        check_point.init()
+
+        z = np.load(os.path.join(result_dir, "z.npy"))
+        imgs = np.load(os.path.join(result_dir, "img.npy")).transpose(0, 3, 1, 2)
+        label1 = np.ones((self.batch_size, 1)).astype(np.float32)
+        label0 = np.zeros((self.batch_size, 1)).astype(np.float32)
+        g_loss = test_generator(z, label1).get()
+        d_loss = test_discriminator(z, imgs, label1, label0).get()
+        tf_g_loss = np.load(os.path.join(result_dir, "g_loss.npy"))
+        tf_d_loss = np.load(os.path.join(result_dir, "d_loss.npy"))
+
+        if gpu_num == 1:  # multi-gpu result can not pass
+            assert np.allclose(
+                g_loss.numpy(), tf_g_loss, rtol=1e-2, atol=1e-1
+            ), "{}-{}".format(g_loss.ndarray().mean(), tf_g_loss.mean())
+            assert np.allclose(
+                d_loss.numpy(), tf_d_loss, rtol=1e-2, atol=1e-1
+            ), "{}-{}".format(d_loss.ndarray().mean(), tf_d_loss.mean())
+
+    def generator(self, z, const_init=False, trainable=True):
+        # (n, 256, 7, 7)
+        h0 = layers.dense(
+            z, 7 * 7 * 256, name="g_fc1", const_init=const_init, trainable=trainable
+        )
+        h0 = layers.batchnorm(h0, axis=1, name="g_bn1")
+        h0 = flow.nn.leaky_relu(h0, 0.3)
+        h0 = flow.reshape(h0, (-1, 256, 7, 7))
+        # (n, 128, 7, 7)
+        h1 = layers.deconv2d(
+            h0,
+            128,
+            5,
+            strides=1,
+            name="g_deconv1",
+            const_init=const_init,
+            trainable=trainable,
+        )
+        h1 = layers.batchnorm(h1, name="g_bn2")
+        h1 = flow.nn.leaky_relu(h1, 0.3)
+        # (n, 64, 14, 14)
+        h2 = layers.deconv2d(
+            h1,
+            64,
+            5,
+            strides=2,
+            name="g_deconv2",
+            const_init=const_init,
+            trainable=trainable,
+        )
+        h2 = layers.batchnorm(h2, name="g_bn3")
+        h2 = flow.nn.leaky_relu(h2, 0.3)
+        # (n, 1, 28, 28)
+        out = layers.deconv2d(
+            h2,
+            1,
+            5,
+            strides=2,
+            name="g_deconv3",
+            const_init=const_init,
+            trainable=trainable,
+        )
+        out = flow.math.tanh(out)
+        return out
+
+    def discriminator(self, img, const_init=False, trainable=True, reuse=False):
+        # (n, 1, 28, 28)
+        h0 = layers.conv2d(
+            img,
+            64,
+            5,
+            name="d_conv1",
+            const_init=const_init,
+            trainable=trainable,
+            reuse=reuse,
+        )
+        h0 = flow.nn.leaky_relu(h0, 0.3)
+        # h0 = flow.nn.dropout(h0, rate=0.3)
+        # (n, 64, 14, 14)
+        h1 = layers.conv2d(
+            h0,
+            128,
+            5,
+            name="d_conv2",
+            const_init=const_init,
+            trainable=trainable,
+            reuse=reuse,
+        )
+        h1 = flow.nn.leaky_relu(h1, 0.3)
+        # h1 = flow.nn.dropout(h1, rate=0.3)
+        # (n, 128 * 7 * 7)
+        out = flow.reshape(h1, (self.batch_size, -1))
+        # (n, 1)
+        out = layers.dense(
+            out, 1, name="d_fc", const_init=const_init, trainable=trainable, reuse=reuse
+        )
+        return out
+
+
+class layers:
+    @staticmethod
+    def deconv2d(
+        input,
+        filters,
+        size,
+        name,
+        strides=2,
+        trainable=True,
+        reuse=False,
+        const_init=False,
+        use_bias=False,
+    ):
+        name_ = name if reuse == False else name + "_reuse"
+        # weight : [in_channels, out_channels, height, width]
+        weight_shape = (input.shape[1], filters, size, size)
+        output_shape = (
+            input.shape[0],
+            filters,
+            input.shape[2] * strides,
+            input.shape[3] * strides,
+        )
+
+        weight = flow.get_variable(
+            name + "-weight",
+            shape=weight_shape,
+            dtype=input.dtype,
+            initializer=flow.random_normal_initializer(stddev=0.02)
+            if not const_init
+            else flow.constant_initializer(0.002),
+            trainable=trainable,
+            reuse=reuse,
+        )
+
+        output = flow.nn.conv2d_transpose(
+            input,
+            weight,
+            strides=[strides, strides],
+            output_shape=output_shape,
+            padding="SAME",
+            data_format="NCHW",
+            name=name_,
+        )
+
+        if use_bias:
+            bias = flow.get_variable(
+                name + "-bias",
+                shape=(filters,),
+                dtype=input.dtype,
+                initializer=flow.constant_initializer(0.0),
+                trainable=trainable,
+                reuse=reuse,
+            )
+
+            output = flow.nn.bias_add(output, bias, "NCHW")
+        return output
+
+    @staticmethod
+    def conv2d(
+        input,
+        filters,
+        size,
+        name,
+        strides=2,
+        padding="same",
+        trainable=True,
+        reuse=False,
+        const_init=False,
+        use_bias=True,
+    ):
+        name_ = name if reuse == False else name + "_reuse"
+
+        # (output_dim, k_h, k_w, input.shape[3]) if NHWC
+        weight_shape = (filters, input.shape[1], size, size)
+        weight = flow.get_variable(
+            name + "-weight",
+            shape=weight_shape,
+            dtype=input.dtype,
+            initializer=flow.random_normal_initializer(stddev=0.02)
+            if not const_init
+            else flow.constant_initializer(0.002),
+            trainable=trainable,
+            reuse=reuse,
+        )
+
+        output = flow.nn.compat_conv2d(
+            input,
+            weight,
+            strides=[strides, strides],
+            padding=padding,
+            data_format="NCHW",
+            name=name_,
+        )
+
+        if use_bias:
+            bias = flow.get_variable(
+                name + "-bias",
+                shape=(filters,),
+                dtype=input.dtype,
+                initializer=flow.constant_initializer(0.0),
+                trainable=trainable,
+                reuse=reuse,
+            )
+
+            output = flow.nn.bias_add(output, bias, "NCHW")
+        return output
+
+    @staticmethod
+    def dense(
+        input,
+        units,
+        name,
+        use_bias=False,
+        trainable=True,
+        reuse=False,
+        const_init=False,
+    ):
+        name_ = name if reuse == False else name + "_reuse"
+
+        in_shape = input.shape
+        in_num_axes = len(in_shape)
+        assert in_num_axes >= 2
+
+        inputs = flow.reshape(input, (-1, in_shape[-1])) if in_num_axes > 2 else input
+
+        weight = flow.get_variable(
+            name="{}-weight".format(name),
+            shape=(units, inputs.shape[1]),
+            dtype=inputs.dtype,
+            initializer=flow.random_normal_initializer(stddev=0.02)
+            if not const_init
+            else flow.constant_initializer(0.002),
+            trainable=trainable,
+            model_name="weight",
+            reuse=reuse,
+        )
+
+        out = flow.matmul(a=inputs, b=weight, transpose_b=True, name=name_ + "matmul",)
+
+        if use_bias:
+            bias = flow.get_variable(
+                name="{}-bias".format(name),
+                shape=(units,),
+                dtype=inputs.dtype,
+                initializer=flow.random_normal_initializer()
+                if not const_init
+                else flow.constant_initializer(0.002),
+                trainable=trainable,
+                model_name="bias",
+                reuse=reuse,
+            )
+            out = flow.nn.bias_add(out, bias, name=name_ + "_bias_add")
+
+        out = flow.reshape(out, in_shape[:-1] + (units,)) if in_num_axes > 2 else out
+        return out
+
+    @staticmethod
+    def batchnorm(input, name, axis=1, reuse=False):
+        name_ = name if reuse == False else name + "_reuse"
+        return flow.layers.batch_normalization(input, axis=axis, name=name_)
diff --git a/oneflow/compatible_single_client_python/test/models/test_dcgan_model.py b/oneflow/compatible_single_client_python/test/models/test_dcgan_model.py
new file mode 100644
index 0000000000000000000000000000000000000000..c7c05cea3015de99a54906334a61da677d015a99
--- /dev/null
+++ b/oneflow/compatible_single_client_python/test/models/test_dcgan_model.py
@@ -0,0 +1,388 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from oneflow.compatible import single_client as flow
+import numpy as np
+import os
+import unittest
+
+
+class DCGAN(flow.model.Model):
+    def __init__(self, gpu_num, batch_size, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.gpu_num = gpu_num
+        self.lr = 1e-4
+        self.z_dim = 100
+        self.batch_size = batch_size
+
+    def _generator(self, z, const_init=False, trainable=True):
+        # (n, 256, 7, 7)
+        h0 = Layers.dense(
+            z, 7 * 7 * 256, name="g_fc1", const_init=const_init, trainable=trainable
+        )
+        h0 = Layers.batchnorm(h0, axis=1, name="g_bn1")
+        h0 = flow.nn.leaky_relu(h0, 0.3)
+        h0 = flow.reshape(h0, (-1, 256, 7, 7))
+        # (n, 128, 7, 7)
+        h1 = Layers.deconv2d(
+            h0,
+            128,
+            5,
+            strides=1,
+            name="g_deconv1",
+            const_init=const_init,
+            trainable=trainable,
+        )
+        h1 = Layers.batchnorm(h1, name="g_bn2")
+        h1 = flow.nn.leaky_relu(h1, 0.3)
+        # (n, 64, 14, 14)
+        h2 = Layers.deconv2d(
+            h1,
+            64,
+            5,
+            strides=2,
+            name="g_deconv2",
+            const_init=const_init,
+            trainable=trainable,
+        )
+        h2 = Layers.batchnorm(h2, name="g_bn3")
+        h2 = flow.nn.leaky_relu(h2, 0.3)
+        # (n, 1, 28, 28)
+        out = Layers.deconv2d(
+            h2,
+            1,
+            5,
+            strides=2,
+            name="g_deconv3",
+            const_init=const_init,
+            trainable=trainable,
+        )
+        out = flow.math.tanh(out)
+        return out
+
+    def _discriminator(self, img, const_init=False, trainable=True, reuse=False):
+        # (n, 1, 28, 28)
+        h0 = Layers.conv2d(
+            img,
+            64,
+            5,
+            name="d_conv1",
+            const_init=const_init,
+            trainable=trainable,
+            reuse=reuse,
+        )
+        h0 = flow.nn.leaky_relu(h0, 0.3)
+        # h0 = flow.nn.dropout(h0, rate=0.3)
+        # (n, 64, 14, 14)
+        h1 = Layers.conv2d(
+            h0,
+            128,
+            5,
+            name="d_conv2",
+            const_init=const_init,
+            trainable=trainable,
+            reuse=reuse,
+        )
+        h1 = flow.nn.leaky_relu(h1, 0.3)
+        # h1 = flow.nn.dropout(h1, rate=0.3)
+        # (n, 128 * 7 * 7)
+        out = flow.reshape(h1, (self.batch_size, -1))
+        # (n, 1)
+        out = Layers.dense(
+            out, 1, name="d_fc", const_init=const_init, trainable=trainable, reuse=reuse
+        )
+        return out
+
+    def forward(self, batch, const_init=False, trainable=False):
+        return self._generator(batch, const_init=const_init, trainable=trainable)
+
+    def training_step(self, batch, optimizer_idx):
+        if optimizer_idx == 0:
+            # generator
+            (z,) = batch
+            g_out = self._generator(z, trainable=True, const_init=True)
+            g_logits = self._discriminator(g_out, trainable=False, const_init=True)
+            g_loss = flow.nn.sigmoid_cross_entropy_with_logits(
+                flow.ones_like(g_logits),
+                g_logits,
+                name="Gloss_sigmoid_cross_entropy_with_logits",
+            )
+            return (g_loss, g_out)
+        elif optimizer_idx == 1:
+            # discriminator
+            z, images = batch
+            g_out = self._generator(z, trainable=False, const_init=True)
+            g_logits = self._discriminator(g_out, trainable=True, const_init=True)
+            d_loss_fake = flow.nn.sigmoid_cross_entropy_with_logits(
+                flow.zeros_like(g_logits),
+                g_logits,
+                name="Dloss_fake_sigmoid_cross_entropy_with_logits",
+            )
+
+            d_logits = self._discriminator(
+                images, trainable=True, reuse=True, const_init=True
+            )
+            d_loss_real = flow.nn.sigmoid_cross_entropy_with_logits(
+                flow.ones_like(d_logits),
+                d_logits,
+                name="Dloss_real_sigmoid_cross_entropy_with_logits",
+            )
+            d_loss = d_loss_fake + d_loss_real
+            return d_loss
+
+    def configure_optimizers(self):
+        generator_opt = flow.optimizer.SGD(
+            flow.optimizer.PiecewiseConstantScheduler([], [self.lr]), momentum=0
+        )
+        discriminator_opt = flow.optimizer.SGD(
+            flow.optimizer.PiecewiseConstantScheduler([], [self.lr]), momentum=0
+        )
+        return [generator_opt, discriminator_opt]
+
+
+class LossMoniter(flow.model.Callback):
+    def __init__(self, result_dir):
+        self.result_dir = result_dir
+
+    def on_training_step_end(self, step_idx, outputs, optimizer_idx):
+        if optimizer_idx == 0:
+            g_loss, g_out = outputs
+            fmt_str = "{:>12}  {:>12}  {:>12.6f}"
+            print(fmt_str.format(step_idx, "train g_loss:", g_loss.numpy().mean()))
+            print(fmt_str.format(step_idx, "train g_out:", g_out.numpy().mean()))
+            tf_g_loss = np.load(os.path.join(self.result_dir, "g_loss.npy"))
+            assert np.allclose(
+                g_loss.numpy(), tf_g_loss, rtol=1e-2, atol=1e-1
+            ), "{}-{}".format(g_loss.numpy().mean(), tf_g_loss.mean())
+        elif optimizer_idx == 1:
+            d_loss = outputs
+            fmt_str = "{:>12}  {:>12}  {:>12.6f}"
+            print(fmt_str.format(step_idx, "train d_loss:", d_loss.numpy().mean()))
+            tf_d_loss = np.load(os.path.join(self.result_dir, "d_loss.npy"))
+            assert np.allclose(
+                d_loss.numpy(), tf_d_loss, rtol=1e-2, atol=1e-1
+            ), "{}-{}".format(d_loss.numpy().mean(), tf_d_loss.mean())
+
+
+class NumpyTrainData(flow.model.NumpyDataModule):
+    def __init__(self, result_dir, batch_size):
+        super().__init__()
+        self.z = np.load(os.path.join(result_dir, "z.npy"))
+        self.images = np.load(os.path.join(result_dir, "img.npy")).transpose(0, 3, 1, 2)
+
+    def forward(self, step_idx, optimizer_idx):
+        if optimizer_idx == 0:
+            return (self.z,)
+        else:
+            return (self.z, self.images)
+
+
+class NumpyValData(flow.model.NumpyDataModule):
+    def __init__(self, result_dir, batch_size):
+        super().__init__()
+        self.z = np.load(os.path.join(result_dir, "z.npy"))
+
+    def forward(self, step_idx):
+        return (self.z,)
+
+
+@unittest.skipIf(os.getenv("ONEFLOW_TEST_CPU_ONLY"), "only test cpu cases")
+def test_1n1c(test_case):
+    dcgan_compare = DCGANCompare()
+    dcgan_compare.compare_with_tf(1)
+
+
+class DCGANCompare:
+    def compare_with_tf(self, gpu_num, result_dir="/dataset/gan_test/dcgan/"):
+        batch_size = 32
+
+        flow.config.gpu_device_num(gpu_num)
+
+        train_exe_config = flow.ExecutionConfig()
+        train_exe_config.default_data_type(flow.float)
+        train_exe_config.default_logical_view(flow.scope.consistent_view())
+        train_config = flow.model.TrainingConfig()
+        train_config.config_execution(train_exe_config)
+        train_config.config_data(NumpyTrainData(result_dir, batch_size))
+
+        loss_monitor_cb = LossMoniter(result_dir)
+
+        dcgan_md = DCGAN(gpu_num, batch_size, is_deprecated_function_style=True,)
+
+        dcgan_md.fit(
+            training_config=train_config, callbacks=[loss_monitor_cb], max_steps=3,
+        )
+
+
+class Layers:
+    @staticmethod
+    def deconv2d(
+        input,
+        filters,
+        size,
+        name,
+        strides=2,
+        trainable=True,
+        reuse=False,
+        const_init=False,
+        use_bias=False,
+    ):
+        name_ = name if not reuse else name + "_reuse"
+        # weight : [in_channels, out_channels, height, width]
+        weight_shape = (input.shape[1], filters, size, size)
+        output_shape = (
+            input.shape[0],
+            filters,
+            input.shape[2] * strides,
+            input.shape[3] * strides,
+        )
+
+        weight = flow.get_variable(
+            name + "-weight",
+            shape=weight_shape,
+            dtype=input.dtype,
+            initializer=flow.random_normal_initializer(stddev=0.02)
+            if not const_init
+            else flow.constant_initializer(0.002),
+            trainable=trainable,
+            reuse=reuse,
+        )
+
+        output = flow.nn.conv2d_transpose(
+            input,
+            weight,
+            strides=[strides, strides],
+            output_shape=output_shape,
+            padding="SAME",
+            data_format="NCHW",
+            name=name_,
+        )
+
+        if use_bias:
+            bias = flow.get_variable(
+                name + "-bias",
+                shape=(filters,),
+                dtype=input.dtype,
+                initializer=flow.constant_initializer(0.0),
+                trainable=trainable,
+                reuse=reuse,
+            )
+
+            output = flow.nn.bias_add(output, bias, "NCHW")
+        return output
+
+    @staticmethod
+    def conv2d(
+        input,
+        filters,
+        size,
+        name,
+        strides=2,
+        padding="same",
+        trainable=True,
+        reuse=False,
+        const_init=False,
+        use_bias=True,
+    ):
+        name_ = name if not reuse else name + "_reuse"
+
+        # (output_dim, k_h, k_w, input.shape[3]) if NHWC
+        weight_shape = (filters, input.shape[1], size, size)
+        weight = flow.get_variable(
+            name + "-weight",
+            shape=weight_shape,
+            dtype=input.dtype,
+            initializer=flow.random_normal_initializer(stddev=0.02)
+            if not const_init
+            else flow.constant_initializer(0.002),
+            trainable=trainable,
+            reuse=reuse,
+        )
+
+        output = flow.nn.compat_conv2d(
+            input,
+            weight,
+            strides=[strides, strides],
+            padding=padding,
+            data_format="NCHW",
+            name=name_,
+        )
+
+        if use_bias:
+            bias = flow.get_variable(
+                name + "-bias",
+                shape=(filters,),
+                dtype=input.dtype,
+                initializer=flow.constant_initializer(0.0),
+                trainable=trainable,
+                reuse=reuse,
+            )
+
+            output = flow.nn.bias_add(output, bias, "NCHW")
+        return output
+
+    @staticmethod
+    def dense(
+        input,
+        units,
+        name,
+        use_bias=False,
+        trainable=True,
+        reuse=False,
+        const_init=False,
+    ):
+        name_ = name if not reuse else name + "_reuse"
+
+        in_shape = input.shape
+        in_num_axes = len(in_shape)
+        assert in_num_axes >= 2
+
+        inputs = flow.reshape(input, (-1, in_shape[-1])) if in_num_axes > 2 else input
+
+        weight = flow.get_variable(
+            name="{}-weight".format(name),
+            shape=(units, inputs.shape[1]),
+            dtype=inputs.dtype,
+            initializer=flow.random_normal_initializer(stddev=0.02)
+            if not const_init
+            else flow.constant_initializer(0.002),
+            trainable=trainable,
+            model_name="weight",
+            reuse=reuse,
+        )
+
+        out = flow.matmul(a=inputs, b=weight, transpose_b=True, name=name_ + "matmul",)
+
+        if use_bias:
+            bias = flow.get_variable(
+                name="{}-bias".format(name),
+                shape=(units,),
+                dtype=inputs.dtype,
+                initializer=flow.random_normal_initializer()
+                if not const_init
+                else flow.constant_initializer(0.002),
+                trainable=trainable,
+                model_name="bias",
+                reuse=reuse,
+            )
+            out = flow.nn.bias_add(out, bias, name=name_ + "_bias_add")
+
+        out = flow.reshape(out, in_shape[:-1] + (units,)) if in_num_axes > 2 else out
+        return out
+
+    @staticmethod
+    def batchnorm(input, name, axis=1, reuse=False):
+        name_ = name if not reuse else name + "_reuse"
+        return flow.layers.batch_normalization(input, axis=axis, name=name_)
diff --git a/oneflow/compatible_single_client_python/test/models/test_dqn.py b/oneflow/compatible_single_client_python/test/models/test_dqn.py
new file mode 100644
index 0000000000000000000000000000000000000000..3d48d13994a5f2561caf1965c5e30a375fae13b0
--- /dev/null
+++ b/oneflow/compatible_single_client_python/test/models/test_dqn.py
@@ -0,0 +1,268 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as oft
+import numpy as np
+import os
+import unittest
+
+
+@unittest.skipIf(os.getenv("ONEFLOW_TEST_CPU_ONLY"), "only test cpu cases")
+def test_1n1c(test_case):
+    dqn = DQN("gpu")
+    dqn.test_parameters_copy()
+
+
+# get QNet parameters
+def getQNetParams(var_name_prefix: str = "QNet", is_train: bool = True):
+    weight_init = flow.variance_scaling_initializer(
+        scale=1.0, mode="fan_in", distribution="truncated_normal", data_format="NCHW"
+    )
+    bias_init = flow.constant_initializer(value=0.0)
+
+    conv_prefix = "_conv1"
+    conv1_weight = flow.get_variable(
+        var_name_prefix + conv_prefix + "_weight",
+        shape=(32, 4, 3, 3),
+        dtype=flow.float32,
+        initializer=weight_init,
+        trainable=is_train,
+    )
+    conv1_bias = flow.get_variable(
+        var_name_prefix + conv_prefix + "_bias",
+        shape=(32,),
+        dtype=flow.float32,
+        initializer=bias_init,
+        trainable=is_train,
+    )
+
+    conv_prefix = "_conv2"
+    conv2_weight = flow.get_variable(
+        var_name_prefix + conv_prefix + "_weight",
+        shape=(32, 32, 3, 3),
+        dtype=flow.float32,
+        initializer=weight_init,
+        trainable=is_train,
+    )
+    conv2_bias = flow.get_variable(
+        var_name_prefix + conv_prefix + "_bias",
+        shape=(32,),
+        dtype=flow.float32,
+        initializer=bias_init,
+        trainable=is_train,
+    )
+
+    fc_prefix = "_fc1"
+    fc1_weight = flow.get_variable(
+        var_name_prefix + fc_prefix + "_weight",
+        shape=(512, 32 * 16 * 16),
+        dtype=flow.float32,
+        initializer=weight_init,
+        trainable=is_train,
+    )
+    fc1_bias = flow.get_variable(
+        var_name_prefix + fc_prefix + "_bias",
+        shape=(512,),
+        dtype=flow.float32,
+        initializer=bias_init,
+        trainable=is_train,
+    )
+
+    fc_prefix = "_fc2"
+    fc2_weight = flow.get_variable(
+        var_name_prefix + fc_prefix + "_weight",
+        shape=(2, 512),
+        dtype=flow.float32,
+        initializer=weight_init,
+        trainable=is_train,
+    )
+    fc2_bias = flow.get_variable(
+        var_name_prefix + fc_prefix + "_bias",
+        shape=(2,),
+        dtype=flow.float32,
+        initializer=bias_init,
+        trainable=is_train,
+    )
+
+    return (
+        conv1_weight,
+        conv1_bias,
+        conv2_weight,
+        conv2_bias,
+        fc1_weight,
+        fc1_bias,
+        fc2_weight,
+        fc2_bias,
+    )
+
+
+BATCH_SIZE = 32
+
+
+def createOfQNet(
+    input_image: oft.Numpy.Placeholder((BATCH_SIZE, 4, 64, 64), dtype=flow.float32),
+    var_name_prefix: str = "QNet",
+    is_train: bool = True,
+) -> oft.Numpy:
+
+    (
+        conv1_weight,
+        conv1_bias,
+        conv2_weight,
+        conv2_bias,
+        fc1_weight,
+        fc1_bias,
+        fc2_weight,
+        fc2_bias,
+    ) = getQNetParams(var_name_prefix=var_name_prefix, is_train=is_train)
+
+    (
+        conv1_weight,
+        conv1_bias,
+        conv2_weight,
+        conv2_bias,
+        fc1_weight,
+        fc1_bias,
+        fc2_weight,
+        fc2_bias,
+    ) = getQNetParams(var_name_prefix=var_name_prefix, is_train=is_train)
+
+    conv1 = flow.nn.compat_conv2d(
+        input_image, conv1_weight, strides=[1, 1], padding="same", data_format="NCHW"
+    )
+    conv1 = flow.nn.bias_add(conv1, conv1_bias, "NCHW")
+    conv1 = flow.nn.relu(conv1)
+
+    pool1 = flow.nn.max_pool2d(conv1, 2, 2, "VALID", "NCHW", name="pool1")
+
+    conv2 = flow.nn.compat_conv2d(
+        pool1, conv2_weight, strides=[1, 1], padding="same", data_format="NCHW"
+    )
+    conv2 = flow.nn.bias_add(conv2, conv2_bias, "NCHW")
+    conv2 = flow.nn.relu(conv2)
+
+    pool2 = flow.nn.max_pool2d(conv2, 2, 2, "VALID", "NCHW", name="pool2")
+
+    # conv3.shape = (32, 32, 16, 16), after reshape become (32, 32 * 16 * 16)
+    pool2_flatten = flow.reshape(pool2, (BATCH_SIZE, -1))
+    fc1 = flow.matmul(a=pool2_flatten, b=fc1_weight, transpose_b=True)
+    fc1 = flow.nn.bias_add(fc1, fc1_bias)
+    fc1 = flow.nn.relu(fc1)
+
+    fc2 = flow.matmul(a=fc1, b=fc2_weight, transpose_b=True)
+    fc2 = flow.nn.bias_add(fc2, fc2_bias)
+
+    return fc2
+
+
+def get_train_config():
+    func_config = flow.FunctionConfig()
+    func_config.default_data_type(flow.float32)
+    func_config.default_logical_view(flow.scope.consistent_view())
+    return func_config
+
+
+def get_predict_config():
+    func_config = flow.FunctionConfig()
+    func_config.default_data_type(flow.float32)
+    func_config.default_logical_view(flow.scope.consistent_view())
+    return func_config
+
+
+class DQN:
+    def __init__(self, device_tag):
+        self.device_tag_ = device_tag
+
+    def test_parameters_copy(self):
+        @flow.global_function("train", get_train_config())
+        def trainQNet(
+            input_image: oft.Numpy.Placeholder(
+                (BATCH_SIZE, 4, 64, 64), dtype=flow.float32
+            ),
+            y_input: oft.Numpy.Placeholder((BATCH_SIZE,), dtype=flow.float32),
+            action_input: oft.Numpy.Placeholder((BATCH_SIZE, 2), dtype=flow.float32),
+        ) -> oft.Numpy:
+            with flow.scope.placement(self.device_tag_, "0:0-0"):
+                out = createOfQNet(input_image, var_name_prefix="QNet", is_train=True)
+                Q_Action = flow.math.reduce_sum(out * action_input, axis=1)
+                cost = flow.math.reduce_mean(flow.math.square(y_input - Q_Action))
+                learning_rate = 0.0002
+                flow.optimizer.SGD(
+                    flow.optimizer.PiecewiseConstantScheduler([], [learning_rate]),
+                    momentum=0,
+                ).minimize(cost)
+            return out
+
+        @flow.global_function("predict", get_predict_config())
+        def predictQNet(
+            input_image: oft.Numpy.Placeholder(
+                (BATCH_SIZE, 4, 64, 64), dtype=flow.float32
+            )
+        ) -> oft.Numpy:
+            with flow.scope.placement(self.device_tag_, "0:0-0"):
+                out = createOfQNet(input_image, var_name_prefix="QNetT", is_train=False)
+                return out
+
+        # copy QNet parameters to QNetT
+        @flow.global_function("predict", get_predict_config())
+        def copyQNetToQnetT():
+            with flow.scope.placement(self.device_tag_, "0:0-0"):
+                (
+                    t_conv1_weight,
+                    t_conv1_bias,
+                    t_conv2_weight,
+                    t_conv2_bias,
+                    t_fc1_weight,
+                    t_fc1_bias,
+                    t_fc2_weight,
+                    t_fc2_bias,
+                ) = getQNetParams(var_name_prefix="QNet", is_train=True)
+                (
+                    p_conv1_weight,
+                    p_conv1_bias,
+                    p_conv2_weight,
+                    p_conv2_bias,
+                    p_fc1_weight,
+                    p_fc1_bias,
+                    p_fc2_weight,
+                    p_fc2_bias,
+                ) = getQNetParams(var_name_prefix="QNetT", is_train=False)
+
+                flow.assign(p_conv1_weight, t_conv1_weight)
+                flow.assign(p_conv1_bias, t_conv1_bias)
+                flow.assign(p_conv2_weight, t_conv2_weight)
+                flow.assign(p_conv2_bias, t_conv2_bias)
+                flow.assign(p_fc1_weight, t_fc1_weight)
+                flow.assign(p_fc1_bias, t_fc1_bias)
+                flow.assign(p_fc2_weight, t_fc2_weight)
+                flow.assign(p_fc2_bias, t_fc2_bias)
+
+        check_point = flow.train.CheckPoint()
+        check_point.init()
+
+        input_image = np.ones((BATCH_SIZE, 4, 64, 64)).astype(np.float32)
+        y_input = np.random.random_sample((BATCH_SIZE,)).astype(np.float32)
+        action_input = np.random.random_sample((BATCH_SIZE, 2)).astype(np.float32)
+
+        train_out = trainQNet(input_image, y_input, action_input)
+        copyQNetToQnetT()
+
+        train_out = trainQNet(input_image, y_input, action_input)
+        predict_out = predictQNet(input_image)
+
+        assert np.allclose(
+            train_out, predict_out, rtol=1e-2, atol=1e-1
+        ), "{}-{}".format(train_out.mean(), predict_out.mean())
diff --git a/oneflow/compatible_single_client_python/test/models/vgg16.py b/oneflow/compatible_single_client_python/test/models/vgg16.py
new file mode 100644
index 0000000000000000000000000000000000000000..30d13b2b13f9a8d885a716061614288885d916db
--- /dev/null
+++ b/oneflow/compatible_single_client_python/test/models/vgg16.py
@@ -0,0 +1,331 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import argparse
+import os
+from datetime import datetime
+
+import numpy
+from oneflow.compatible import single_client as flow
+from oneflow.core.operator import op_conf_pb2 as op_conf_util
+from oneflow.core.job import initializer_conf_pb2 as initializer_conf_util
+
+_DATA_DIR = "/dataset/PNGS/PNG224/of_record_repeated"
+_SINGLE_DATA_DIR = "/dataset/PNGS/PNG224/of_record"
+_MODEL_LOAD_DIR = "/dataset/PNGS/cnns_model_for_test/vgg16/models/of_model"
+_MODEL_SAVE_DIR = "./model_save-{}".format(
+    str(datetime.now().strftime("%Y-%m-%d-%H:%M:%S"))
+)
+NODE_LIST = "192.168.1.12,192.168.1.14"
+
+
+class DLNetSpec(object):
+    def __init__(self, enable_auto_mixed_precision):
+        self.batch_size = 8
+        self.data_part_num = 32
+        self.eval_dir = _DATA_DIR
+        self.train_dir = _DATA_DIR
+        self.model_save_dir = _MODEL_SAVE_DIR
+        self.model_load_dir = _MODEL_LOAD_DIR
+        self.num_nodes = 1
+        self.gpu_num_per_node = 1
+        self.iter_num = 10
+        self.enable_auto_mixed_precision = enable_auto_mixed_precision
+
+
+parser = argparse.ArgumentParser(description="flags for multi-node and resource")
+parser.add_argument("-g", "--gpu_num_per_node", type=int, default=1, required=False)
+parser.add_argument("-i", "--iter_num", type=int, default=10, required=False)
+parser.add_argument(
+    "-m", "--multinode", default=False, action="store_true", required=False
+)
+parser.add_argument("-n", "--node_list", type=str, default=NODE_LIST, required=False)
+parser.add_argument(
+    "-s", "--skip_scp_binary", default=False, action="store_true", required=False
+)
+parser.add_argument(
+    "-c",
+    "--scp_binary_without_uuid",
+    default=False,
+    action="store_true",
+    required=False,
+)
+parser.add_argument(
+    "-r", "--remote_by_hand", default=False, action="store_true", required=False
+)
+parser.add_argument("-e", "--eval_dir", type=str, default=_DATA_DIR, required=False)
+parser.add_argument("-t", "--train_dir", type=str, default=_DATA_DIR, required=False)
+parser.add_argument(
+    "-load", "--model_load_dir", type=str, default=_MODEL_LOAD_DIR, required=False
+)
+parser.add_argument(
+    "-save", "--model_save_dir", type=str, default=_MODEL_SAVE_DIR, required=False
+)
+parser.add_argument("-dn", "--data_part_num", type=int, default=32, required=False)
+parser.add_argument("-b", "--batch_size", type=int, default=8, required=False)
+
+
+def _conv2d_layer(
+    name,
+    input,
+    filters,
+    kernel_size=3,
+    strides=1,
+    padding="VALID",
+    data_format="NCHW",
+    dilation_rate=1,
+    activation=op_conf_util.kRelu,
+    use_bias=True,
+    weight_initializer=flow.random_uniform_initializer(),
+    bias_initializer=flow.constant_initializer(),
+):
+    weight_shape = (filters, input.shape[1], kernel_size, kernel_size)
+    weight = flow.get_variable(
+        name + "-weight",
+        shape=weight_shape,
+        dtype=input.dtype,
+        initializer=weight_initializer,
+    )
+    output = flow.nn.conv2d(
+        input, weight, strides, padding, None, data_format, dilation_rate, name=name
+    )
+
+    if use_bias:
+        bias = flow.get_variable(
+            name + "-bias",
+            shape=(filters,),
+            dtype=input.dtype,
+            initializer=bias_initializer,
+        )
+        output = flow.nn.bias_add(output, bias, "NCHW")
+    if activation is not None:
+        if activation == op_conf_util.kRelu:
+            output = flow.math.relu(output)
+        else:
+            raise NotImplementedError
+
+    return output
+
+
+def _data_load_layer(args, data_dir):
+    node_num = args.num_nodes
+    total_batch_size = args.batch_size * args.gpu_num_per_node * node_num
+    rgb_mean = [123.68, 116.78, 103.94]
+    ofrecord = flow.data.ofrecord_reader(
+        data_dir,
+        batch_size=total_batch_size,
+        data_part_num=args.data_part_num,
+        name="decode",
+    )
+    image = flow.data.ofrecord_image_decoder(ofrecord, "encoded", color_space="RGB")
+    label = flow.data.ofrecord_raw_decoder(
+        ofrecord, "class/label", shape=(), dtype=flow.int32
+    )
+    rsz = flow.image.resize(image, resize_x=224, resize_y=224, color_space="RGB")
+    normal = flow.image.crop_mirror_normalize(
+        rsz,
+        color_space="RGB",
+        output_layout="NCHW",
+        mean=rgb_mean,
+        output_dtype=flow.float,
+    )
+    return label, normal
+
+
+def _conv_block(in_blob, index, filters, conv_times):
+    conv_block = []
+    conv_block.insert(0, in_blob)
+    for i in range(conv_times):
+        conv_i = _conv2d_layer(
+            name="conv{}".format(index),
+            input=conv_block[i],
+            filters=filters,
+            kernel_size=3,
+            strides=1,
+        )
+        conv_block.append(conv_i)
+        index += 1
+
+    return conv_block
+
+
+def vgg(images, labels, trainable=True):
+    to_return = []
+    conv1 = _conv_block(images, 0, 64, 2)
+    pool1 = flow.nn.max_pool2d(conv1[-1], 2, 2, "VALID", "NCHW", name="pool1")
+
+    conv2 = _conv_block(pool1, 2, 128, 2)
+    pool2 = flow.nn.max_pool2d(conv2[-1], 2, 2, "VALID", "NCHW", name="pool2")
+
+    conv3 = _conv_block(pool2, 4, 256, 3)
+    pool3 = flow.nn.max_pool2d(conv3[-1], 2, 2, "VALID", "NCHW", name="pool3")
+
+    conv4 = _conv_block(pool3, 7, 512, 3)
+    pool4 = flow.nn.max_pool2d(conv4[-1], 2, 2, "VALID", "NCHW", name="pool4")
+
+    conv5 = _conv_block(pool4, 10, 512, 3)
+    pool5 = flow.nn.max_pool2d(conv5[-1], 2, 2, "VALID", "NCHW", name="pool5")
+
+    def _get_kernel_initializer():
+        kernel_initializer = initializer_conf_util.InitializerConf()
+        kernel_initializer.truncated_normal_conf.std = 0.816496580927726
+        return kernel_initializer
+
+    def _get_bias_initializer():
+        bias_initializer = initializer_conf_util.InitializerConf()
+        bias_initializer.constant_conf.value = 0.0
+        return bias_initializer
+
+    pool5 = flow.reshape(pool5, [-1, 512])
+
+    fc6 = flow.layers.dense(
+        inputs=pool5,
+        units=4096,
+        activation=flow.math.relu,
+        use_bias=True,
+        kernel_initializer=_get_kernel_initializer(),
+        bias_initializer=_get_bias_initializer(),
+        trainable=trainable,
+        name="fc1",
+    )
+
+    fc7 = flow.layers.dense(
+        inputs=fc6,
+        units=4096,
+        activation=flow.math.relu,
+        use_bias=True,
+        kernel_initializer=_get_kernel_initializer(),
+        bias_initializer=_get_bias_initializer(),
+        trainable=trainable,
+        name="fc2",
+    )
+
+    fc8 = flow.layers.dense(
+        inputs=fc7,
+        units=1001,
+        use_bias=True,
+        kernel_initializer=_get_kernel_initializer(),
+        bias_initializer=_get_bias_initializer(),
+        trainable=trainable,
+        name="fc_final",
+    )
+
+    loss = flow.nn.sparse_softmax_cross_entropy_with_logits(
+        labels, fc8, name="softmax_loss"
+    )
+
+    to_return.append(loss)
+    return tuple(to_return)
+
+
+def main(args):
+    flow.config.machine_num(args.num_nodes)
+    flow.config.gpu_device_num(args.gpu_num_per_node)
+    train_config = flow.FunctionConfig()
+    train_config.default_logical_view(flow.scope.consistent_view())
+    train_config.default_data_type(flow.float)
+    train_config.enable_auto_mixed_precision(args.enable_auto_mixed_precision)
+
+    @flow.global_function(type="train", function_config=train_config)
+    def vgg_train_job():
+        (labels, images) = _data_load_layer(args, args.train_dir)
+        to_return = vgg(images, labels)
+        loss = to_return[-1]
+        flow.optimizer.SGD(
+            flow.optimizer.PiecewiseConstantScheduler([], [0.00001]), momentum=0
+        ).minimize(loss)
+        return loss
+
+    eval_config = flow.FunctionConfig()
+    eval_config.default_logical_view(flow.scope.consistent_view())
+    eval_config.default_data_type(flow.float)
+    eval_config.enable_auto_mixed_precision(args.enable_auto_mixed_precision)
+
+    @flow.global_function(function_config=eval_config)
+    def vgg_eval_job():
+        (labels, images) = _data_load_layer(args, args.eval_dir)
+        return vgg(images, labels, False)
+
+    check_point = flow.train.CheckPoint()
+    if not args.model_load_dir:
+        check_point.init()
+    else:
+        check_point.load(args.model_load_dir)
+
+    num_nodes = args.num_nodes
+    print(
+        "Traning vgg16: num_gpu_per_node = {}, num_nodes = {}.".format(
+            args.gpu_num_per_node, num_nodes
+        )
+    )
+
+    print("{:>12}  {:>12}  {:>12}".format("iter", "loss type", "loss value"))
+    loss = []
+    for i in range(args.iter_num):
+        train_loss = vgg_train_job().get().mean()
+        loss.append(train_loss)
+
+        fmt_str = "{:>12}  {:>12}  {:>12.6f}"
+        print(fmt_str.format(i, "train loss:", train_loss))
+
+        # if (i + 1) % 10 == 0:
+        #   eval_loss = alexnet_eval_job().get().mean()
+        # print(
+        #     fmt_str.format(
+        #         i, "eval loss:", eval_loss
+        #     )
+        # )
+        if (i + 1) % 100 == 0:
+            check_point.save(_MODEL_SAVE_DIR + str(i))
+
+    # save loss to file
+    loss_file = "{}n{}c.npy".format(
+        str(num_nodes), str(args.gpu_num_per_node * num_nodes)
+    )
+    loss_path = "./of_loss/vgg16"
+    if not os.path.exists(loss_path):
+        os.makedirs(loss_path)
+    numpy.save(os.path.join(loss_path, loss_file), loss)
+
+
+if __name__ == "__main__":
+    args = parser.parse_args()
+    flow.env.log_dir("./log")
+    if args.multinode:
+        flow.env.ctrl_port(12138)
+
+        nodes = []
+        for n in args.node_list.strip().split(","):
+            addr_dict = {}
+            addr_dict["addr"] = n
+            nodes.append(addr_dict)
+
+        flow.env.machine(nodes)
+
+        if args.remote_by_hand is False:
+            if args.scp_binary_without_uuid:
+                flow.deprecated.init_worker(scp_binary=True, use_uuid=False)
+            elif args.skip_scp_binary:
+                flow.deprecated.init_worker(scp_binary=False, use_uuid=False)
+            else:
+                flow.deprecated.init_worker(scp_binary=True, use_uuid=True)
+
+    main(args)
+    if (
+        args.multinode
+        and args.skip_scp_binary is False
+        and args.scp_binary_without_uuid is False
+    ):
+        flow.deprecated.delete_worker()
diff --git a/oneflow/python/test/ops/image_test_util.py b/oneflow/compatible_single_client_python/test/ops/image_test_util.py
similarity index 99%
rename from oneflow/python/test/ops/image_test_util.py
rename to oneflow/compatible_single_client_python/test/ops/image_test_util.py
index 897f219b30957a6b2b2d606e52b1fbd450b0fb6e..742c255780be2d76e46c6ac22be76e183feeda6d 100644
--- a/oneflow/python/test/ops/image_test_util.py
+++ b/oneflow/compatible_single_client_python/test/ops/image_test_util.py
@@ -15,7 +15,7 @@ limitations under the License.
 """
 import numpy as np
 import cv2
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import PIL
 import random
 import os
diff --git a/oneflow/python/test/ops/test_2d_gpu_variable.py b/oneflow/compatible_single_client_python/test/ops/test_2d_gpu_variable.py
similarity index 96%
rename from oneflow/python/test/ops/test_2d_gpu_variable.py
rename to oneflow/compatible_single_client_python/test/ops/test_2d_gpu_variable.py
index a424cf7fe39aeef632c16b7e84028f88ac503cdd..064eab795f4e32bda2c173ed3d3e0358bd6d559c 100644
--- a/oneflow/python/test/ops/test_2d_gpu_variable.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_2d_gpu_variable.py
@@ -13,7 +13,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 """
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import os
 import unittest
 
diff --git a/oneflow/python/test/ops/test_KLDivloss.py b/oneflow/compatible_single_client_python/test/ops/test_KLDivloss.py
similarity index 98%
rename from oneflow/python/test/ops/test_KLDivloss.py
rename to oneflow/compatible_single_client_python/test/ops/test_KLDivloss.py
index 9ab32f1e723b5e1552f934f9b4da9fc621b453dd..364d89763f3a272e81d089346109ca33c76deeb1 100644
--- a/oneflow/python/test/ops/test_KLDivloss.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_KLDivloss.py
@@ -13,9 +13,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 """
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import numpy as np
-import oneflow.typing as tp
+from oneflow.compatible.single_client import typing as tp
 from test_util import GenArgList
 import unittest
 from collections import OrderedDict
diff --git a/oneflow/python/test/ops/test_MarginRankingLoss.py b/oneflow/compatible_single_client_python/test/ops/test_MarginRankingLoss.py
similarity index 98%
rename from oneflow/python/test/ops/test_MarginRankingLoss.py
rename to oneflow/compatible_single_client_python/test/ops/test_MarginRankingLoss.py
index 76a78e5e85314c2ad932651a20a1bac88d776178..dc79f0215adb24897c3d4ef5dc6d16a4b81d3d83 100644
--- a/oneflow/python/test/ops/test_MarginRankingLoss.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_MarginRankingLoss.py
@@ -13,9 +13,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 """
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import numpy as np
-import oneflow.typing as tp
+from oneflow.compatible.single_client import typing as tp
 from test_util import GenArgList
 import unittest
 from collections import OrderedDict
diff --git a/oneflow/python/test/ops/test_TestDataTypeAttr.py b/oneflow/compatible_single_client_python/test/ops/test_TestDataTypeAttr.py
similarity index 95%
rename from oneflow/python/test/ops/test_TestDataTypeAttr.py
rename to oneflow/compatible_single_client_python/test/ops/test_TestDataTypeAttr.py
index 2ab6352d4f9608bbebf820174255fcc97fa0c662..efc21b5c258bf882138c59ca034a4894d75ff5af 100644
--- a/oneflow/python/test/ops/test_TestDataTypeAttr.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_TestDataTypeAttr.py
@@ -16,8 +16,8 @@ limitations under the License.
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
-import oneflow.typing as oft
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as oft
 from test_util import GenArgList, type_name_to_flow_type, type_name_to_np_type
 import unittest
 import os
diff --git a/oneflow/python/test/ops/test_TestDynamicSource.py b/oneflow/compatible_single_client_python/test/ops/test_TestDynamicSource.py
similarity index 96%
rename from oneflow/python/test/ops/test_TestDynamicSource.py
rename to oneflow/compatible_single_client_python/test/ops/test_TestDynamicSource.py
index b9ab2d3233395e3f3565891f559d5b3a79a7639b..efce73476f41521757196875293b292902764700 100644
--- a/oneflow/python/test/ops/test_TestDynamicSource.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_TestDynamicSource.py
@@ -14,7 +14,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 """
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import unittest
 import os
 
diff --git a/oneflow/python/test/ops/test_TestListDataTypeAndListShapeAndListStringAttr.py b/oneflow/compatible_single_client_python/test/ops/test_TestListDataTypeAndListShapeAndListStringAttr.py
similarity index 96%
rename from oneflow/python/test/ops/test_TestListDataTypeAndListShapeAndListStringAttr.py
rename to oneflow/compatible_single_client_python/test/ops/test_TestListDataTypeAndListShapeAndListStringAttr.py
index f194104a93958829a19d6e00cd4aacac27638450..02f7fbf79d63297537d7a0951a184c0bbde47093 100644
--- a/oneflow/python/test/ops/test_TestListDataTypeAndListShapeAndListStringAttr.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_TestListDataTypeAndListShapeAndListStringAttr.py
@@ -17,8 +17,8 @@ import unittest
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
-import oneflow.typing as oft
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as oft
 
 from test_util import GenArgList, type_name_to_flow_type, type_name_to_np_type
 
diff --git a/oneflow/python/test/ops/test_TestMultiInputGrad.py b/oneflow/compatible_single_client_python/test/ops/test_TestMultiInputGrad.py
similarity index 98%
rename from oneflow/python/test/ops/test_TestMultiInputGrad.py
rename to oneflow/compatible_single_client_python/test/ops/test_TestMultiInputGrad.py
index 9d4ea4fbfe2398143644dcbf17b570baef955eaf..8cfcbc22d6989ae42a09cc66afa1de6e8de30783 100644
--- a/oneflow/python/test/ops/test_TestMultiInputGrad.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_TestMultiInputGrad.py
@@ -17,7 +17,7 @@ import os
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import test_global_storage
 from test_util import GenArgList
 import unittest
diff --git a/oneflow/python/test/ops/test_TestMultiOutputOrder.py b/oneflow/compatible_single_client_python/test/ops/test_TestMultiOutputOrder.py
similarity index 95%
rename from oneflow/python/test/ops/test_TestMultiOutputOrder.py
rename to oneflow/compatible_single_client_python/test/ops/test_TestMultiOutputOrder.py
index e0baf27ce284044d5596e829db0d45ad1bd1421c..7fa7f0bc69fb3f5e0bb583c5096b524a8907b53e 100644
--- a/oneflow/python/test/ops/test_TestMultiOutputOrder.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_TestMultiOutputOrder.py
@@ -14,8 +14,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 """
 import numpy as np
-import oneflow as flow
-import oneflow.typing as oft
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as oft
 import unittest
 import os
 
diff --git a/oneflow/python/test/ops/test_TestRandomSource.py b/oneflow/compatible_single_client_python/test/ops/test_TestRandomSource.py
similarity index 97%
rename from oneflow/python/test/ops/test_TestRandomSource.py
rename to oneflow/compatible_single_client_python/test/ops/test_TestRandomSource.py
index 3f0e46a694def09dabb1d5a3368e1871b6e27b33..f17aead85359ced961618e4bc465ee9b12dc3756 100644
--- a/oneflow/python/test/ops/test_TestRandomSource.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_TestRandomSource.py
@@ -15,7 +15,7 @@ limitations under the License.
 """
 import unittest
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 
 
 def my_test_source(name, seed):
diff --git a/oneflow/python/test/ops/test_TestReshape.py b/oneflow/compatible_single_client_python/test/ops/test_TestReshape.py
similarity index 95%
rename from oneflow/python/test/ops/test_TestReshape.py
rename to oneflow/compatible_single_client_python/test/ops/test_TestReshape.py
index 221bc3ae94855c2102c575d2de8084df4cebb313..4955231d0ca0e86c3c464477efb3bbc380f8521e 100644
--- a/oneflow/python/test/ops/test_TestReshape.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_TestReshape.py
@@ -14,8 +14,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 """
 import numpy as np
-import oneflow as flow
-import oneflow.typing as oft
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as oft
 import unittest
 import os
 
diff --git a/oneflow/python/test/ops/test_TestSource.py b/oneflow/compatible_single_client_python/test/ops/test_TestSource.py
similarity index 97%
rename from oneflow/python/test/ops/test_TestSource.py
rename to oneflow/compatible_single_client_python/test/ops/test_TestSource.py
index 0efeb925c56eb2983955c2670dc46ebc66fc7ee1..55aada43f367a71fb5c56c713236e76c84dc14f3 100644
--- a/oneflow/python/test/ops/test_TestSource.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_TestSource.py
@@ -15,7 +15,7 @@ limitations under the License.
 """
 import unittest
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 
 
 def my_test_source(name):
diff --git a/oneflow/python/test/ops/test_TestSourceMultiGpuFixedOutNum.py b/oneflow/compatible_single_client_python/test/ops/test_TestSourceMultiGpuFixedOutNum.py
similarity index 96%
rename from oneflow/python/test/ops/test_TestSourceMultiGpuFixedOutNum.py
rename to oneflow/compatible_single_client_python/test/ops/test_TestSourceMultiGpuFixedOutNum.py
index 8500d6a652ebbfc62282642dfb8f87ee0409e423..b565378cd22c422d89161789027b8720ea85c2ec 100644
--- a/oneflow/python/test/ops/test_TestSourceMultiGpuFixedOutNum.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_TestSourceMultiGpuFixedOutNum.py
@@ -15,7 +15,7 @@ limitations under the License.
 """
 import unittest
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 
 
 def my_test_source(name, out_num):
diff --git a/oneflow/python/test/ops/test_TripletMarginLoss.py b/oneflow/compatible_single_client_python/test/ops/test_TripletMarginLoss.py
similarity index 98%
rename from oneflow/python/test/ops/test_TripletMarginLoss.py
rename to oneflow/compatible_single_client_python/test/ops/test_TripletMarginLoss.py
index 22cefad6425d536acb7f6c0dcd7359be0d4819db..080d8bb89f8673f5cf6936981342c78dd60d6648 100644
--- a/oneflow/python/test/ops/test_TripletMarginLoss.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_TripletMarginLoss.py
@@ -13,9 +13,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 """
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import numpy as np
-import oneflow.typing as tp
+from oneflow.compatible.single_client import typing as tp
 from test_util import GenArgList
 import unittest
 from collections import OrderedDict
diff --git a/oneflow/python/test/ops/test_activations.py b/oneflow/compatible_single_client_python/test/ops/test_activations.py
similarity index 98%
rename from oneflow/python/test/ops/test_activations.py
rename to oneflow/compatible_single_client_python/test/ops/test_activations.py
index 499b3593c6f14d1dab3e2fb2f34b9a20eebb2222..4e64b99a9f4b7d0f7f08d7a5dad393fc6f836ac5 100644
--- a/oneflow/python/test/ops/test_activations.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_activations.py
@@ -19,7 +19,7 @@ import os
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import tensorflow as tf
 import test_global_storage
 from test_util import GenArgList
diff --git a/oneflow/python/test/ops/test_add.py b/oneflow/compatible_single_client_python/test/ops/test_add.py
similarity index 96%
rename from oneflow/python/test/ops/test_add.py
rename to oneflow/compatible_single_client_python/test/ops/test_add.py
index f057b075684b1422cd3d05cd03585649edfd8c42..cdd473cf5c9b70b05b04eecf93b62e80a1adace2 100644
--- a/oneflow/python/test/ops/test_add.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_add.py
@@ -17,10 +17,10 @@ import unittest
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import tensorflow as tf
 from test_util import Args, CompareOpWithTensorFlow, GenArgDict
-import oneflow.typing as oft
+from oneflow.compatible.single_client import typing as oft
 
 func_config = flow.FunctionConfig()
 func_config.default_data_type(flow.float)
diff --git a/oneflow/python/test/ops/test_add_n.py b/oneflow/compatible_single_client_python/test/ops/test_add_n.py
similarity index 96%
rename from oneflow/python/test/ops/test_add_n.py
rename to oneflow/compatible_single_client_python/test/ops/test_add_n.py
index 01e0104d34833b1f8e651f2b26f507002f0d17be..fc9001cd7ce8326b40c3239879d89c76644e0893 100644
--- a/oneflow/python/test/ops/test_add_n.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_add_n.py
@@ -15,8 +15,8 @@ limitations under the License.
 """
 import unittest
 import numpy as np
-import oneflow as flow
-import oneflow.typing as oft
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as oft
 from typing import Tuple
 
 func_config = flow.FunctionConfig()
diff --git a/oneflow/python/test/ops/test_all_reduce_group.py b/oneflow/compatible_single_client_python/test/ops/test_all_reduce_group.py
similarity index 97%
rename from oneflow/python/test/ops/test_all_reduce_group.py
rename to oneflow/compatible_single_client_python/test/ops/test_all_reduce_group.py
index f7438eb4a05f5cd6ffeccf7d7c99d2b21fcaafc3..f8e26999eda771ed02cd2e7c88c4af8eb19dcba1 100644
--- a/oneflow/python/test/ops/test_all_reduce_group.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_all_reduce_group.py
@@ -16,7 +16,7 @@ limitations under the License.
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 from test_util import GenArgList
 import unittest
 import os
diff --git a/oneflow/python/test/ops/test_argmax.py b/oneflow/compatible_single_client_python/test/ops/test_argmax.py
similarity index 96%
rename from oneflow/python/test/ops/test_argmax.py
rename to oneflow/compatible_single_client_python/test/ops/test_argmax.py
index 430be1d7d9f69441da3dd3722157e4fd67bce85f..38958293cac492e0ecd6aac9b20001e5b73275f4 100644
--- a/oneflow/python/test/ops/test_argmax.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_argmax.py
@@ -17,10 +17,10 @@ import unittest
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import tensorflow as tf
 from test_util import GenArgList, type_name_to_flow_type, type_name_to_np_type
-import oneflow.typing as oft
+from oneflow.compatible.single_client import typing as oft
 import os
 
 gpus = tf.config.experimental.list_physical_devices("GPU")
diff --git a/oneflow/python/test/ops/test_argsort.py b/oneflow/compatible_single_client_python/test/ops/test_argsort.py
similarity index 96%
rename from oneflow/python/test/ops/test_argsort.py
rename to oneflow/compatible_single_client_python/test/ops/test_argsort.py
index 518624af381c4c23a366cc36ac80c3ec88e77926..122d0c299bf42bdc70cccc29b541c968d5b4d2af 100644
--- a/oneflow/python/test/ops/test_argsort.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_argsort.py
@@ -17,10 +17,10 @@ import unittest
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import tensorflow as tf
 from test_util import GenArgList, type_name_to_flow_type, type_name_to_np_type
-import oneflow.typing as oft
+from oneflow.compatible.single_client import typing as oft
 import os
 
 gpus = tf.config.experimental.list_physical_devices("GPU")
diff --git a/oneflow/python/test/ops/test_argwhere.py b/oneflow/compatible_single_client_python/test/ops/test_argwhere.py
similarity index 99%
rename from oneflow/python/test/ops/test_argwhere.py
rename to oneflow/compatible_single_client_python/test/ops/test_argwhere.py
index 808f4d6d99cf2f1a17643b8a088bc7812524d08c..c8265dc035ba352b92fe2f6338520b5f85cff6b3 100644
--- a/oneflow/python/test/ops/test_argwhere.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_argwhere.py
@@ -18,7 +18,7 @@ import unittest
 from collections import OrderedDict
 import os
 
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 
 from test_util import GenArgDict
 
diff --git a/oneflow/python/test/ops/test_assign.py b/oneflow/compatible_single_client_python/test/ops/test_assign.py
similarity index 98%
rename from oneflow/python/test/ops/test_assign.py
rename to oneflow/compatible_single_client_python/test/ops/test_assign.py
index 31e41000df791797f5a4696a3efba7caf5e62f16..f764fb2502eca92b7067e6d5fdb7cc5a36eed627 100644
--- a/oneflow/python/test/ops/test_assign.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_assign.py
@@ -17,9 +17,9 @@ import unittest
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 from test_util import GenArgDict
-import oneflow.typing as oft
+from oneflow.compatible.single_client import typing as oft
 import os
 
 flow_to_np_dtype_dict = {
diff --git a/oneflow/python/test/ops/test_batch_gather.py b/oneflow/compatible_single_client_python/test/ops/test_batch_gather.py
similarity index 98%
rename from oneflow/python/test/ops/test_batch_gather.py
rename to oneflow/compatible_single_client_python/test/ops/test_batch_gather.py
index 6a8e8d68afc6e31cfcae23681d0520c5668cbc84..197e7f5aa7e265595c3abbe23dce868517ece57f 100644
--- a/oneflow/python/test/ops/test_batch_gather.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_batch_gather.py
@@ -17,11 +17,11 @@ import unittest
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import tensorflow as tf
 from tensorflow.python.ops import gen_math_ops
 from test_util import GenArgList
-import oneflow.typing as oft
+from oneflow.compatible.single_client import typing as oft
 
 gpus = tf.config.experimental.list_physical_devices("GPU")
 for gpu in gpus:
diff --git a/oneflow/python/test/ops/test_batch_normalization.py b/oneflow/compatible_single_client_python/test/ops/test_batch_normalization.py
similarity index 99%
rename from oneflow/python/test/ops/test_batch_normalization.py
rename to oneflow/compatible_single_client_python/test/ops/test_batch_normalization.py
index 10615e16756bd8c666c2ea95682e4204ce99f35a..d72a3cf476b50132bf1d23597a8e501a90c55397 100644
--- a/oneflow/python/test/ops/test_batch_normalization.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_batch_normalization.py
@@ -17,11 +17,11 @@ import os
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import tensorflow as tf
 import test_global_storage
 from test_util import Args, GenArgDict, type_name_to_flow_type, type_name_to_np_type
-import oneflow.typing as oft
+from oneflow.compatible.single_client import typing as oft
 import unittest
 
 gpus = tf.config.experimental.list_physical_devices("GPU")
diff --git a/oneflow/python/test/ops/test_bce_loss.py b/oneflow/compatible_single_client_python/test/ops/test_bce_loss.py
similarity index 98%
rename from oneflow/python/test/ops/test_bce_loss.py
rename to oneflow/compatible_single_client_python/test/ops/test_bce_loss.py
index 214a58e3943cf4a638c26bb1a0eb5fd6ae7a5c11..203f41d63573de3996ca864f2dd8e23bcb550579 100644
--- a/oneflow/python/test/ops/test_bce_loss.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_bce_loss.py
@@ -13,9 +13,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 """
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import numpy as np
-import oneflow.typing as tp
+from oneflow.compatible.single_client import typing as tp
 from test_util import GenArgList
 import unittest
 from collections import OrderedDict
diff --git a/oneflow/python/test/ops/test_bce_with_logits_loss.py b/oneflow/compatible_single_client_python/test/ops/test_bce_with_logits_loss.py
similarity index 98%
rename from oneflow/python/test/ops/test_bce_with_logits_loss.py
rename to oneflow/compatible_single_client_python/test/ops/test_bce_with_logits_loss.py
index 6796d453df07240fe8684e90f1faf07662eabd67..824593f7750b0898b86c5aeaa2e2040000876fce 100644
--- a/oneflow/python/test/ops/test_bce_with_logits_loss.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_bce_with_logits_loss.py
@@ -13,9 +13,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 """
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import numpy as np
-import oneflow.typing as tp
+from oneflow.compatible.single_client import typing as tp
 from test_util import GenArgList
 import unittest
 from collections import OrderedDict
diff --git a/oneflow/python/test/ops/test_bernoulli.py b/oneflow/compatible_single_client_python/test/ops/test_bernoulli.py
similarity index 94%
rename from oneflow/python/test/ops/test_bernoulli.py
rename to oneflow/compatible_single_client_python/test/ops/test_bernoulli.py
index 223c7a4147ee7de8113b0b8a8071d1382c0c0149..10432fd1e6e7b56626674965aa3eaa6a4403b28c 100644
--- a/oneflow/python/test/ops/test_bernoulli.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_bernoulli.py
@@ -15,8 +15,8 @@ limitations under the License.
 """
 import unittest
 import numpy as np
-import oneflow as flow
-import oneflow.typing as oft
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as oft
 
 
 @flow.unittest.skip_unless_1n1d()
diff --git a/oneflow/python/test/ops/test_bias_add.py b/oneflow/compatible_single_client_python/test/ops/test_bias_add.py
similarity index 97%
rename from oneflow/python/test/ops/test_bias_add.py
rename to oneflow/compatible_single_client_python/test/ops/test_bias_add.py
index 88fd9f406349ff8443fb475a12b3d9087e40dec4..35a936ff82ca42ea5b0e9fdbc60d30ff502ff5d5 100644
--- a/oneflow/python/test/ops/test_bias_add.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_bias_add.py
@@ -18,11 +18,11 @@ import os
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import tensorflow as tf
 import test_global_storage
 from test_util import Args, GenArgDict
-import oneflow.typing as oft
+from oneflow.compatible.single_client import typing as oft
 
 gpus = tf.config.experimental.list_physical_devices("GPU")
 for gpu in gpus:
diff --git a/oneflow/python/test/ops/test_binary_elementwise_ops.py b/oneflow/compatible_single_client_python/test/ops/test_binary_elementwise_ops.py
similarity index 98%
rename from oneflow/python/test/ops/test_binary_elementwise_ops.py
rename to oneflow/compatible_single_client_python/test/ops/test_binary_elementwise_ops.py
index 73a545bf284bada93a9c22257df929bd444101d3..657ae3a690985d31d9b9ea35483aebd5381f3dd7 100644
--- a/oneflow/python/test/ops/test_binary_elementwise_ops.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_binary_elementwise_ops.py
@@ -16,9 +16,9 @@ limitations under the License.
 import unittest
 import numpy as np
 import tensorflow as tf
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 from collections import OrderedDict
-import oneflow.typing as oft
+from oneflow.compatible.single_client import typing as oft
 
 import test_global_storage
 from test_util import (
diff --git a/oneflow/python/test/ops/test_boxing_v2.py b/oneflow/compatible_single_client_python/test/ops/test_boxing_v2.py
similarity index 99%
rename from oneflow/python/test/ops/test_boxing_v2.py
rename to oneflow/compatible_single_client_python/test/ops/test_boxing_v2.py
index caa3c1a1ec705fd65d3e5d9a0275f99f2f9df2be..fbf7e29f0d00ac3f541c7d23d1b6f408ab4e49f8 100644
--- a/oneflow/python/test/ops/test_boxing_v2.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_boxing_v2.py
@@ -17,9 +17,9 @@ import unittest
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 from test_util import GenArgList
-import oneflow.typing as oft
+from oneflow.compatible.single_client import typing as oft
 import os
 
 
diff --git a/oneflow/python/test/ops/test_broadcast_like.py b/oneflow/compatible_single_client_python/test/ops/test_broadcast_like.py
similarity index 97%
rename from oneflow/python/test/ops/test_broadcast_like.py
rename to oneflow/compatible_single_client_python/test/ops/test_broadcast_like.py
index edc93cf46d69da7944d0541d6648729e89868881..649d7b7c1104fc6e5b222f0333016add552491b0 100644
--- a/oneflow/python/test/ops/test_broadcast_like.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_broadcast_like.py
@@ -18,10 +18,10 @@ import os
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import tensorflow as tf
 from test_util import GenArgList
-import oneflow.typing as tp
+from oneflow.compatible.single_client import typing as tp
 
 gpus = tf.config.experimental.list_physical_devices("GPU")
 for gpu in gpus:
diff --git a/oneflow/python/test/ops/test_broadcast_logical_ops.py b/oneflow/compatible_single_client_python/test/ops/test_broadcast_logical_ops.py
similarity index 97%
rename from oneflow/python/test/ops/test_broadcast_logical_ops.py
rename to oneflow/compatible_single_client_python/test/ops/test_broadcast_logical_ops.py
index 2a4ac2886a629805a387f499b36e52bfb7e40cbc..fa27b5782c2d25599ff08e5da03f49ba5074706b 100644
--- a/oneflow/python/test/ops/test_broadcast_logical_ops.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_broadcast_logical_ops.py
@@ -17,9 +17,9 @@ import unittest
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 from test_util import GenArgList, type_name_to_flow_type
-import oneflow.typing as oft
+from oneflow.compatible.single_client import typing as oft
 import os
 
 
diff --git a/oneflow/python/test/ops/test_broadcast_maximum.py b/oneflow/compatible_single_client_python/test/ops/test_broadcast_maximum.py
similarity index 94%
rename from oneflow/python/test/ops/test_broadcast_maximum.py
rename to oneflow/compatible_single_client_python/test/ops/test_broadcast_maximum.py
index 156446829c3a74c75d84cdee511a0b16a56c2920..3be594e7b9155492e3c0f9039c57eb576822a96b 100644
--- a/oneflow/python/test/ops/test_broadcast_maximum.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_broadcast_maximum.py
@@ -14,8 +14,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 """
 import numpy as np
-import oneflow as flow
-import oneflow.typing as oft
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as oft
 import unittest
 import os
 
diff --git a/oneflow/python/test/ops/test_broadcast_minimum.py b/oneflow/compatible_single_client_python/test/ops/test_broadcast_minimum.py
similarity index 94%
rename from oneflow/python/test/ops/test_broadcast_minimum.py
rename to oneflow/compatible_single_client_python/test/ops/test_broadcast_minimum.py
index ec78dabf8d777af138c5f576fe90a1fb3cc5c507..cc6d2f22a8e29f48cac3cd4d5a2e4902d08edf2f 100644
--- a/oneflow/python/test/ops/test_broadcast_minimum.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_broadcast_minimum.py
@@ -14,8 +14,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 """
 import numpy as np
-import oneflow as flow
-import oneflow.typing as oft
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as oft
 import unittest
 import os
 
diff --git a/oneflow/python/test/ops/test_broadcast_normal.py b/oneflow/compatible_single_client_python/test/ops/test_broadcast_normal.py
similarity index 98%
rename from oneflow/python/test/ops/test_broadcast_normal.py
rename to oneflow/compatible_single_client_python/test/ops/test_broadcast_normal.py
index f546dfcc839e2b54ee0e3fdd950a9059761fad1a..8fee5c52c800643edc021d0c698eb015a91fff2c 100644
--- a/oneflow/python/test/ops/test_broadcast_normal.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_broadcast_normal.py
@@ -18,7 +18,7 @@ import os
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import tensorflow as tf
 import test_global_storage
 from test_util import (
@@ -27,7 +27,7 @@ from test_util import (
     type_name_to_flow_type,
     type_name_to_np_type,
 )
-import oneflow.typing as oft
+from oneflow.compatible.single_client import typing as oft
 
 gpus = tf.config.experimental.list_physical_devices("GPU")
 for gpu in gpus:
diff --git a/oneflow/python/test/ops/test_broadcast_to_compatible_with.py b/oneflow/compatible_single_client_python/test/ops/test_broadcast_to_compatible_with.py
similarity index 98%
rename from oneflow/python/test/ops/test_broadcast_to_compatible_with.py
rename to oneflow/compatible_single_client_python/test/ops/test_broadcast_to_compatible_with.py
index 6c0d0701d3f374d0020070d147cfba12877e8ba8..4fa859067703c6d345b69e4b4862e9b249655c53 100644
--- a/oneflow/python/test/ops/test_broadcast_to_compatible_with.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_broadcast_to_compatible_with.py
@@ -15,8 +15,8 @@ limitations under the License.
 """
 import unittest
 import numpy as np
-import oneflow as flow
-import oneflow.typing as oft
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as oft
 
 
 def _of_broadcast_to_compatible_with(x, compatible_shape, x_shape=None):
diff --git a/oneflow/python/test/ops/test_cast.py b/oneflow/compatible_single_client_python/test/ops/test_cast.py
similarity index 97%
rename from oneflow/python/test/ops/test_cast.py
rename to oneflow/compatible_single_client_python/test/ops/test_cast.py
index ade0d5d570dafbe05d7ce5e16c81327dae822f86..854d575aa17c5d598989d304ddde832162b2beee 100644
--- a/oneflow/python/test/ops/test_cast.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_cast.py
@@ -18,11 +18,11 @@ import os
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import tensorflow as tf
 import test_global_storage
 from test_util import GenArgList, type_name_to_flow_type, type_name_to_np_type
-import oneflow.typing as oft
+from oneflow.compatible.single_client import typing as oft
 
 gpus = tf.config.experimental.list_physical_devices("GPU")
 for gpu in gpus:
diff --git a/oneflow/python/test/ops/test_cast_to_static_shape.py b/oneflow/compatible_single_client_python/test/ops/test_cast_to_static_shape.py
similarity index 98%
rename from oneflow/python/test/ops/test_cast_to_static_shape.py
rename to oneflow/compatible_single_client_python/test/ops/test_cast_to_static_shape.py
index f954a3bdb7db65e8bab3eb54eaf0abb5ca3731e9..63424b15145e068fb27c8e4ebda85071d5ec639a 100644
--- a/oneflow/python/test/ops/test_cast_to_static_shape.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_cast_to_static_shape.py
@@ -16,7 +16,7 @@ limitations under the License.
 import unittest
 import numpy as np
 from collections import OrderedDict
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 from test_util import GenArgDict, type_name_to_flow_type, type_name_to_np_type
 
 
diff --git a/oneflow/python/test/ops/test_categorical_ordinal_encoder.py b/oneflow/compatible_single_client_python/test/ops/test_categorical_ordinal_encoder.py
similarity index 97%
rename from oneflow/python/test/ops/test_categorical_ordinal_encoder.py
rename to oneflow/compatible_single_client_python/test/ops/test_categorical_ordinal_encoder.py
index 1b22d2b57c5b27ba779b21e9628cf3b51e3d5521..080755d7ecd8af777a9a3754f19a98da95a14860 100644
--- a/oneflow/python/test/ops/test_categorical_ordinal_encoder.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_categorical_ordinal_encoder.py
@@ -15,8 +15,8 @@ limitations under the License.
 """
 import sys
 import numpy as np
-import oneflow as flow
-import oneflow.typing as oft
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as oft
 import typing
 import unittest
 import os
diff --git a/oneflow/python/test/ops/test_ccrelu.py b/oneflow/compatible_single_client_python/test/ops/test_ccrelu.py
similarity index 96%
rename from oneflow/python/test/ops/test_ccrelu.py
rename to oneflow/compatible_single_client_python/test/ops/test_ccrelu.py
index c72bb0c12ba47e17c39a7987b45702e02632e418..75c31e801e2a41304e8c8f94e1df68478f43a11e 100644
--- a/oneflow/python/test/ops/test_ccrelu.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_ccrelu.py
@@ -14,8 +14,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 """
 import numpy as np
-import oneflow as flow
-import oneflow.typing as oft
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as oft
 import unittest
 import os
 
diff --git a/oneflow/python/test/ops/test_checkpoint.py b/oneflow/compatible_single_client_python/test/ops/test_checkpoint.py
similarity index 99%
rename from oneflow/python/test/ops/test_checkpoint.py
rename to oneflow/compatible_single_client_python/test/ops/test_checkpoint.py
index 12ffe03940f39b97bd599bb5f21795977d763c1c..0399d7aeef8e39ff92fba908e8e3c578fdbb149d 100644
--- a/oneflow/python/test/ops/test_checkpoint.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_checkpoint.py
@@ -19,8 +19,8 @@ import shutil
 import tempfile
 
 import numpy as np
-import oneflow as flow
-import oneflow.typing as tp
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as tp
 
 
 def refresh_session():
diff --git a/oneflow/python/test/ops/test_clip_by_value.py b/oneflow/compatible_single_client_python/test/ops/test_clip_by_value.py
similarity index 98%
rename from oneflow/python/test/ops/test_clip_by_value.py
rename to oneflow/compatible_single_client_python/test/ops/test_clip_by_value.py
index e7ce1b9360f6f9abaa9c7ed92df8dd0f04f6e1a2..ab469fe5097b124c86a68d5be339eaa949f8d85f 100644
--- a/oneflow/python/test/ops/test_clip_by_value.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_clip_by_value.py
@@ -17,10 +17,10 @@ import unittest
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import tensorflow as tf
 from test_util import GenArgList
-import oneflow.typing as oft
+from oneflow.compatible.single_client import typing as oft
 
 gpus = tf.config.experimental.list_physical_devices("GPU")
 for gpu in gpus:
diff --git a/oneflow/python/test/ops/test_coco_reader.py b/oneflow/compatible_single_client_python/test/ops/test_coco_reader.py
similarity index 99%
rename from oneflow/python/test/ops/test_coco_reader.py
rename to oneflow/compatible_single_client_python/test/ops/test_coco_reader.py
index 7c5a85716dd63db34b05c872ab9721de364f6149..988cf28f38ee4515fc4c2c87497e6a696244ed72 100644
--- a/oneflow/python/test/ops/test_coco_reader.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_coco_reader.py
@@ -19,7 +19,7 @@ import os
 
 import cv2
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 
 VERBOSE = False
 coco_dict = dict()
diff --git a/oneflow/python/test/ops/test_combined_margin_loss.py b/oneflow/compatible_single_client_python/test/ops/test_combined_margin_loss.py
similarity index 97%
rename from oneflow/python/test/ops/test_combined_margin_loss.py
rename to oneflow/compatible_single_client_python/test/ops/test_combined_margin_loss.py
index 3057734e5d0716b36d4215015fcb2a43952e1f5b..4b17c22b4473315b32c58163e0e89a5b4abf2509 100644
--- a/oneflow/python/test/ops/test_combined_margin_loss.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_combined_margin_loss.py
@@ -17,10 +17,10 @@ import os
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import test_global_storage
 from test_util import Args, GenArgDict, type_name_to_flow_type, type_name_to_np_type
-import oneflow.typing as oft
+from oneflow.compatible.single_client import typing as oft
 import unittest
 
 
diff --git a/oneflow/python/test/ops/test_compat_conv2d.py b/oneflow/compatible_single_client_python/test/ops/test_compat_conv2d.py
similarity index 99%
rename from oneflow/python/test/ops/test_compat_conv2d.py
rename to oneflow/compatible_single_client_python/test/ops/test_compat_conv2d.py
index fc90fef82d7cc6b9f23f2476941122f4bdff0885..4b265c1abfe1405e80461918f657152feef32310 100644
--- a/oneflow/python/test/ops/test_compat_conv2d.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_compat_conv2d.py
@@ -17,7 +17,7 @@ import unittest
 import os
 import numpy as np
 import tensorflow as tf
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 from collections import OrderedDict
 
 from test_util import GenArgList
diff --git a/oneflow/python/test/ops/test_concat.py b/oneflow/compatible_single_client_python/test/ops/test_concat.py
similarity index 99%
rename from oneflow/python/test/ops/test_concat.py
rename to oneflow/compatible_single_client_python/test/ops/test_concat.py
index 9363dcd1d5c820fe0ee607954971d1118ecc8965..91c391ceaca6b57210a55b909ee815d624c96415 100644
--- a/oneflow/python/test/ops/test_concat.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_concat.py
@@ -14,8 +14,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 """
 import numpy as np
-import oneflow as flow
-import oneflow.typing as oft
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as oft
 import tensorflow as tf
 import test_global_storage
 import random
diff --git a/oneflow/python/test/ops/test_constant.py b/oneflow/compatible_single_client_python/test/ops/test_constant.py
similarity index 97%
rename from oneflow/python/test/ops/test_constant.py
rename to oneflow/compatible_single_client_python/test/ops/test_constant.py
index da0150ab3ad90d12e7f80f253d725f0fdeaf7357..193d9e40fe66af1be873690c1a537b16f33e669e 100644
--- a/oneflow/python/test/ops/test_constant.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_constant.py
@@ -18,7 +18,7 @@ import os
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 from test_util import GenArgList
 from test_util import type_name_to_flow_type
 from test_util import type_name_to_np_type
diff --git a/oneflow/python/test/ops/test_constant_like.py b/oneflow/compatible_single_client_python/test/ops/test_constant_like.py
similarity index 97%
rename from oneflow/python/test/ops/test_constant_like.py
rename to oneflow/compatible_single_client_python/test/ops/test_constant_like.py
index 0835df5662dd52418395e75ff72231a7b88a33a0..ba1f851f9f29b1349980fa4824edc571483b8e38 100644
--- a/oneflow/python/test/ops/test_constant_like.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_constant_like.py
@@ -14,8 +14,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 """
 import numpy as np
-import oneflow as flow
-import oneflow.typing as oft
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as oft
 import unittest
 import os
 
diff --git a/oneflow/python/test/ops/test_constant_pad2d.py b/oneflow/compatible_single_client_python/test/ops/test_constant_pad2d.py
similarity index 98%
rename from oneflow/python/test/ops/test_constant_pad2d.py
rename to oneflow/compatible_single_client_python/test/ops/test_constant_pad2d.py
index 777a5f8d4f8b0a00c75f8e8693d321b372cf31db..b0f609480f35057cac52e60c72aaa31516ce3909 100644
--- a/oneflow/python/test/ops/test_constant_pad2d.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_constant_pad2d.py
@@ -18,8 +18,8 @@ import os
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
-import oneflow.typing as tp
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as tp
 from test_util import (
     Args,
     GenArgDict,
diff --git a/oneflow/python/test/ops/test_copy_comm_net_pass_empty.py b/oneflow/compatible_single_client_python/test/ops/test_copy_comm_net_pass_empty.py
similarity index 97%
rename from oneflow/python/test/ops/test_copy_comm_net_pass_empty.py
rename to oneflow/compatible_single_client_python/test/ops/test_copy_comm_net_pass_empty.py
index fecd50ce67c3068e62360735df62c5dd0c189638..1bebdf041da33ea510ebcf5f6a07532c14433a1f 100644
--- a/oneflow/python/test/ops/test_copy_comm_net_pass_empty.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_copy_comm_net_pass_empty.py
@@ -14,8 +14,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 """
 import numpy as np
-import oneflow as flow
-import oneflow.typing as oft
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as oft
 import unittest
 import os
 
diff --git a/oneflow/python/test/ops/test_count_not_finite.py b/oneflow/compatible_single_client_python/test/ops/test_count_not_finite.py
similarity index 96%
rename from oneflow/python/test/ops/test_count_not_finite.py
rename to oneflow/compatible_single_client_python/test/ops/test_count_not_finite.py
index 177aeae836199d72a2d9932a9fb6cc586dbb6dc3..eaf92d0edb760e83dde697a7f9efb8cbdd5da785 100644
--- a/oneflow/python/test/ops/test_count_not_finite.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_count_not_finite.py
@@ -17,9 +17,9 @@ import unittest
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 from test_util import GenArgList, type_name_to_flow_type, type_name_to_np_type
-import oneflow.typing as oft
+from oneflow.compatible.single_client import typing as oft
 
 
 def _run_count_test(test_case, device_type, x_shape, dtype):
diff --git a/oneflow/python/test/ops/test_cpu_only_user_op.py b/oneflow/compatible_single_client_python/test/ops/test_cpu_only_user_op.py
similarity index 95%
rename from oneflow/python/test/ops/test_cpu_only_user_op.py
rename to oneflow/compatible_single_client_python/test/ops/test_cpu_only_user_op.py
index a4d328a910250e78a3f0f88a0bd0360b5e34b538..3966d4f6db88a6a60a2d9977c32de62e67c359b1 100644
--- a/oneflow/python/test/ops/test_cpu_only_user_op.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_cpu_only_user_op.py
@@ -13,9 +13,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 """
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import numpy as np
-import oneflow.typing as oft
+from oneflow.compatible.single_client import typing as oft
 import unittest
 import os
 
diff --git a/oneflow/python/test/ops/test_ctc_greedy_decoder.py b/oneflow/compatible_single_client_python/test/ops/test_ctc_greedy_decoder.py
similarity index 97%
rename from oneflow/python/test/ops/test_ctc_greedy_decoder.py
rename to oneflow/compatible_single_client_python/test/ops/test_ctc_greedy_decoder.py
index 1f279605cbd921225cba6ea2da65f1fa219dc26c..6fa63161be67d4826194cb37ac17d168a499666b 100644
--- a/oneflow/python/test/ops/test_ctc_greedy_decoder.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_ctc_greedy_decoder.py
@@ -18,8 +18,8 @@ from collections import OrderedDict
 from typing import Tuple
 
 import numpy as np
-import oneflow as flow
-import oneflow.typing as tp
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as tp
 from test_util import GenArgList, type_name_to_flow_type, type_name_to_np_type
 import os
 
diff --git a/oneflow/python/test/ops/test_ctc_loss.py b/oneflow/compatible_single_client_python/test/ops/test_ctc_loss.py
similarity index 99%
rename from oneflow/python/test/ops/test_ctc_loss.py
rename to oneflow/compatible_single_client_python/test/ops/test_ctc_loss.py
index f60ac036d4e684e298f55b0ba7f080c0d351ef8b..301693472a240ff60d3eace5930be6e099ed7323 100644
--- a/oneflow/python/test/ops/test_ctc_loss.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_ctc_loss.py
@@ -17,9 +17,9 @@ import unittest
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 from test_util import GenArgList, type_name_to_flow_type, type_name_to_np_type
-import oneflow.typing as tp
+from oneflow.compatible.single_client import typing as tp
 import os
 
 ninf = -float("inf")
diff --git a/oneflow/python/test/ops/test_deconv2d.py b/oneflow/compatible_single_client_python/test/ops/test_deconv2d.py
similarity index 99%
rename from oneflow/python/test/ops/test_deconv2d.py
rename to oneflow/compatible_single_client_python/test/ops/test_deconv2d.py
index f9bf3752e7d0c219ddba132e59ecf6238c955bdf..e8d5756902ebaeafed3755f06e44205e2991a24e 100644
--- a/oneflow/python/test/ops/test_deconv2d.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_deconv2d.py
@@ -18,7 +18,7 @@ import os
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import tensorflow as tf
 import test_global_storage
 from test_util import GenArgList
diff --git a/oneflow/python/test/ops/test_demo.py b/oneflow/compatible_single_client_python/test/ops/test_demo.py
similarity index 96%
rename from oneflow/python/test/ops/test_demo.py
rename to oneflow/compatible_single_client_python/test/ops/test_demo.py
index 4588c54383efa1066ecfb0b611e076b471644df1..6d7b66f7b685fd7b7793ae677e44e1f30f6b45f1 100644
--- a/oneflow/python/test/ops/test_demo.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_demo.py
@@ -14,7 +14,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 """
 import unittest
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 
 # test file names and methods names are starts with `test'
 
diff --git a/oneflow/python/test/ops/test_demo_matmul.py b/oneflow/compatible_single_client_python/test/ops/test_demo_matmul.py
similarity index 93%
rename from oneflow/python/test/ops/test_demo_matmul.py
rename to oneflow/compatible_single_client_python/test/ops/test_demo_matmul.py
index 1cc920c1dc7b18846fb608dc360cc4e0ba91ce3d..476688514aa4f02acee14776c19a8ad67d9a2f18 100644
--- a/oneflow/python/test/ops/test_demo_matmul.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_demo_matmul.py
@@ -14,8 +14,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 """
 import unittest
-import oneflow as flow
-import oneflow.typing as tp
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as tp
 import numpy as np
 
 
diff --git a/oneflow/python/test/ops/test_diag.py b/oneflow/compatible_single_client_python/test/ops/test_diag.py
similarity index 97%
rename from oneflow/python/test/ops/test_diag.py
rename to oneflow/compatible_single_client_python/test/ops/test_diag.py
index 3497bd1f616657e90c398d76b725033a24972f8f..f91c1862ef7ba170b56f11478866a39d92b3d0fb 100644
--- a/oneflow/python/test/ops/test_diag.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_diag.py
@@ -18,8 +18,8 @@ import os
 from collections import OrderedDict
 import unittest
 import numpy as np
-import oneflow as flow
-import oneflow.typing as tp
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as tp
 from test_util import GenArgList, type_name_to_flow_type, type_name_to_np_type
 
 
diff --git a/oneflow/python/test/ops/test_dim_gather.py b/oneflow/compatible_single_client_python/test/ops/test_dim_gather.py
similarity index 98%
rename from oneflow/python/test/ops/test_dim_gather.py
rename to oneflow/compatible_single_client_python/test/ops/test_dim_gather.py
index a4e6e97c2acafc413c9a2ad8aef42bd4432040d6..b54a0fe116759f631b350bab0aded7fbeaa60ead 100644
--- a/oneflow/python/test/ops/test_dim_gather.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_dim_gather.py
@@ -13,9 +13,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 """
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import numpy as np
-import oneflow.typing as oft
+from oneflow.compatible.single_client import typing as oft
 from test_util import GenArgList
 import unittest
 from collections import OrderedDict
diff --git a/oneflow/python/test/ops/test_dim_gather_dynamic.py b/oneflow/compatible_single_client_python/test/ops/test_dim_gather_dynamic.py
similarity index 97%
rename from oneflow/python/test/ops/test_dim_gather_dynamic.py
rename to oneflow/compatible_single_client_python/test/ops/test_dim_gather_dynamic.py
index c0ac1b1f33f69944a7f0801648f9419710bdb0ba..a77c4b4f64b16edd784563d6b7571e7c2c8ef947 100644
--- a/oneflow/python/test/ops/test_dim_gather_dynamic.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_dim_gather_dynamic.py
@@ -15,8 +15,8 @@ limitations under the License.
 """
 import unittest
 import numpy as np
-import oneflow as flow
-import oneflow.typing as oft
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as oft
 import os
 
 
diff --git a/oneflow/python/test/ops/test_distribute_concat.py b/oneflow/compatible_single_client_python/test/ops/test_distribute_concat.py
similarity index 96%
rename from oneflow/python/test/ops/test_distribute_concat.py
rename to oneflow/compatible_single_client_python/test/ops/test_distribute_concat.py
index 248ccfba3aca600f399e0915a1dfcdc21bf512cf..a0d8fb408467cade20e1e2cf208cafc9459316c0 100644
--- a/oneflow/python/test/ops/test_distribute_concat.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_distribute_concat.py
@@ -14,7 +14,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 """
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import unittest
 import os
 
diff --git a/oneflow/python/test/ops/test_dropout.py b/oneflow/compatible_single_client_python/test/ops/test_dropout.py
similarity index 99%
rename from oneflow/python/test/ops/test_dropout.py
rename to oneflow/compatible_single_client_python/test/ops/test_dropout.py
index 08d0b68c04269bf7ea77e22da395d8a5fe47718c..b685011a0255d56c55becac70e017bddd35b6710 100644
--- a/oneflow/python/test/ops/test_dropout.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_dropout.py
@@ -19,7 +19,7 @@ import shutil
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import test_global_storage
 from test_util import GenArgList, type_name_to_flow_type
 
diff --git a/oneflow/python/test/ops/test_dynamic_loss_scale_schedule.py b/oneflow/compatible_single_client_python/test/ops/test_dynamic_loss_scale_schedule.py
similarity index 97%
rename from oneflow/python/test/ops/test_dynamic_loss_scale_schedule.py
rename to oneflow/compatible_single_client_python/test/ops/test_dynamic_loss_scale_schedule.py
index d8fe511116687270c5e964d409c6cbcc7e1f9ebc..873da718274f9e69a1953dfc51c33de867c164fd 100644
--- a/oneflow/python/test/ops/test_dynamic_loss_scale_schedule.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_dynamic_loss_scale_schedule.py
@@ -17,9 +17,9 @@ import unittest
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 from test_util import GenArgList, type_name_to_flow_type, type_name_to_np_type
-import oneflow.typing as oft
+from oneflow.compatible.single_client import typing as oft
 
 
 def dynamic_loss_scale_schedule(
diff --git a/oneflow/python/test/ops/test_dynamic_reshape.py b/oneflow/compatible_single_client_python/test/ops/test_dynamic_reshape.py
similarity index 95%
rename from oneflow/python/test/ops/test_dynamic_reshape.py
rename to oneflow/compatible_single_client_python/test/ops/test_dynamic_reshape.py
index 21a2de303101de9fcec3387f5113413b7d017a0a..6a5c21e92fcaae747a1d73d25996730b55e3193b 100644
--- a/oneflow/python/test/ops/test_dynamic_reshape.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_dynamic_reshape.py
@@ -15,8 +15,8 @@ limitations under the License.
 """
 import unittest
 import numpy as np
-import oneflow as flow
-import oneflow.typing as oft
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as oft
 
 
 # @flow.unittest.skip_unless_1n2d()
diff --git a/oneflow/python/test/ops/test_elementwise_maximum_minimum.py b/oneflow/compatible_single_client_python/test/ops/test_elementwise_maximum_minimum.py
similarity index 98%
rename from oneflow/python/test/ops/test_elementwise_maximum_minimum.py
rename to oneflow/compatible_single_client_python/test/ops/test_elementwise_maximum_minimum.py
index e12b498fd4b6192854e77d778e578c391697d6a3..d988bf9b351e43c63c1f572b72c683e0beff2f1f 100644
--- a/oneflow/python/test/ops/test_elementwise_maximum_minimum.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_elementwise_maximum_minimum.py
@@ -13,9 +13,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 """
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import numpy as np
-import oneflow.typing as tp
+from oneflow.compatible.single_client import typing as tp
 from test_util import GenArgList
 import unittest
 from collections import OrderedDict
diff --git a/oneflow/python/test/ops/test_elementwise_maximum_minimum_dynamic.py b/oneflow/compatible_single_client_python/test/ops/test_elementwise_maximum_minimum_dynamic.py
similarity index 97%
rename from oneflow/python/test/ops/test_elementwise_maximum_minimum_dynamic.py
rename to oneflow/compatible_single_client_python/test/ops/test_elementwise_maximum_minimum_dynamic.py
index bb28693d481f39c8a09450767ce853c5a82b490f..66c2f1eb967d55b4fbe44c878c74389cadc10d2b 100644
--- a/oneflow/python/test/ops/test_elementwise_maximum_minimum_dynamic.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_elementwise_maximum_minimum_dynamic.py
@@ -13,9 +13,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 """
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import numpy as np
-import oneflow.typing as tp
+from oneflow.compatible.single_client import typing as tp
 from test_util import GenArgList
 import unittest
 from collections import OrderedDict
diff --git a/oneflow/python/test/ops/test_elu.py b/oneflow/compatible_single_client_python/test/ops/test_elu.py
similarity index 98%
rename from oneflow/python/test/ops/test_elu.py
rename to oneflow/compatible_single_client_python/test/ops/test_elu.py
index cf831c11c0dc72d055111abff1174b90447813ae..9f933d858efab1823b25e74245cc6c902569c9d0 100644
--- a/oneflow/python/test/ops/test_elu.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_elu.py
@@ -13,9 +13,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 """
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import numpy as np
-import oneflow.typing as tp
+from oneflow.compatible.single_client import typing as tp
 from test_util import GenArgList
 import unittest
 from collections import OrderedDict
diff --git a/oneflow/python/test/ops/test_expand_dims.py b/oneflow/compatible_single_client_python/test/ops/test_expand_dims.py
similarity index 97%
rename from oneflow/python/test/ops/test_expand_dims.py
rename to oneflow/compatible_single_client_python/test/ops/test_expand_dims.py
index e2b36677a1cc464db71e217521846b4bbc7c87d7..bfa2bca305922f1d7401e0fedd3bce02d7df0e62 100644
--- a/oneflow/python/test/ops/test_expand_dims.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_expand_dims.py
@@ -17,7 +17,7 @@ import unittest
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import tensorflow as tf
 from test_util import GenArgList
 
diff --git a/oneflow/python/test/ops/test_expand_op.py b/oneflow/compatible_single_client_python/test/ops/test_expand_op.py
similarity index 98%
rename from oneflow/python/test/ops/test_expand_op.py
rename to oneflow/compatible_single_client_python/test/ops/test_expand_op.py
index 147a5237ab5fe454dac68248eb0885a53c4c29fb..2a3654bd1245a1e87e8be3c7c248ee98e2336930 100644
--- a/oneflow/python/test/ops/test_expand_op.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_expand_op.py
@@ -13,8 +13,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 """
-import oneflow as flow
-import oneflow.typing as tp
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as tp
 
 from collections import OrderedDict
 import numpy as np
diff --git a/oneflow/python/test/ops/test_flatten.py b/oneflow/compatible_single_client_python/test/ops/test_flatten.py
similarity index 98%
rename from oneflow/python/test/ops/test_flatten.py
rename to oneflow/compatible_single_client_python/test/ops/test_flatten.py
index 875ee2affb3c5717877872f59c4e10e03e0ce751..17585b7f603d90e07454c83e4cfecb8b4e3b02ce 100644
--- a/oneflow/python/test/ops/test_flatten.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_flatten.py
@@ -17,7 +17,7 @@ import unittest
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 from test_util import GenArgList
 import test_global_storage
 
diff --git a/oneflow/python/test/ops/test_function_config.py b/oneflow/compatible_single_client_python/test/ops/test_function_config.py
similarity index 97%
rename from oneflow/python/test/ops/test_function_config.py
rename to oneflow/compatible_single_client_python/test/ops/test_function_config.py
index b8d58ccc580d5db9a6bc952965447ebd8a1c0c1d..2730e4857e4b4e966d8daafb3fc30d4d5ccc4c89 100644
--- a/oneflow/python/test/ops/test_function_config.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_function_config.py
@@ -14,7 +14,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 """
 import unittest
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 
 
 @flow.unittest.skip_unless_1n1d()
diff --git a/oneflow/python/test/ops/test_function_input_output.py b/oneflow/compatible_single_client_python/test/ops/test_function_input_output.py
similarity index 95%
rename from oneflow/python/test/ops/test_function_input_output.py
rename to oneflow/compatible_single_client_python/test/ops/test_function_input_output.py
index f709722cba9996c6b9ab92c49c7def3b7f426a8a..d3e9a5a72abbc70d36aeea313900576d3ca8306c 100644
--- a/oneflow/python/test/ops/test_function_input_output.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_function_input_output.py
@@ -15,8 +15,8 @@ limitations under the License.
 """
 import unittest
 import numpy as np
-import oneflow as flow
-import oneflow.typing as oft
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as oft
 import oneflow._oneflow_internal
 from typing import Tuple
 
diff --git a/oneflow/python/test/ops/test_fuse_cast_scale.py b/oneflow/compatible_single_client_python/test/ops/test_fuse_cast_scale.py
similarity index 97%
rename from oneflow/python/test/ops/test_fuse_cast_scale.py
rename to oneflow/compatible_single_client_python/test/ops/test_fuse_cast_scale.py
index 2a927bd25b5890d8d8aba013f666d958c3ea667d..04031b38e6c1ff1a10fd827146d1ec549b8e0960 100644
--- a/oneflow/python/test/ops/test_fuse_cast_scale.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_fuse_cast_scale.py
@@ -18,11 +18,11 @@ import os
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import tensorflow as tf
 import test_global_storage
 from test_util import GenArgList, type_name_to_flow_type, type_name_to_np_type
-import oneflow.typing as oft
+from oneflow.compatible.single_client import typing as oft
 
 gpus = tf.config.experimental.list_physical_devices("GPU")
 for gpu in gpus:
diff --git a/oneflow/python/test/ops/test_fused_bias_add_dropout.py b/oneflow/compatible_single_client_python/test/ops/test_fused_bias_add_dropout.py
similarity index 98%
rename from oneflow/python/test/ops/test_fused_bias_add_dropout.py
rename to oneflow/compatible_single_client_python/test/ops/test_fused_bias_add_dropout.py
index 39abe3dd7914ea7e593ea2bfa44f5f0088a4ec02..8ee6ccd34da206f8d417d1cdc07ddb11c4e7e81c 100644
--- a/oneflow/python/test/ops/test_fused_bias_add_dropout.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_fused_bias_add_dropout.py
@@ -18,13 +18,13 @@ import os
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import test_global_storage
 from test_util import GenArgList, type_name_to_flow_type
 
 import test_global_storage
 from test_util import Args, GenArgDict
-import oneflow.typing as oft
+from oneflow.compatible.single_client import typing as oft
 
 
 def compare_with_not_fused(
diff --git a/oneflow/python/test/ops/test_fused_bias_add_gelu.py b/oneflow/compatible_single_client_python/test/ops/test_fused_bias_add_gelu.py
similarity index 97%
rename from oneflow/python/test/ops/test_fused_bias_add_gelu.py
rename to oneflow/compatible_single_client_python/test/ops/test_fused_bias_add_gelu.py
index 079d686092e067bd78fde2ba3015b349583f491c..68f28831e1ac53b93d1cb5fc64ecdd9c1981e7ff 100644
--- a/oneflow/python/test/ops/test_fused_bias_add_gelu.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_fused_bias_add_gelu.py
@@ -18,13 +18,13 @@ import os
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import test_global_storage
 from test_util import GenArgList, type_name_to_flow_type
 
 import test_global_storage
 from test_util import Args, GenArgDict
-import oneflow.typing as oft
+from oneflow.compatible.single_client import typing as oft
 
 
 def compare_with_not_fused(test_case, device_type, x_shape, data_type, data_format):
diff --git a/oneflow/python/test/ops/test_fused_scale_tril.py b/oneflow/compatible_single_client_python/test/ops/test_fused_scale_tril.py
similarity index 97%
rename from oneflow/python/test/ops/test_fused_scale_tril.py
rename to oneflow/compatible_single_client_python/test/ops/test_fused_scale_tril.py
index 752c92e6ee44849c4e8a10d93bb4028c13f33cff..78ad704416ebbd75ef98fc73f0a2c4ff9feb66f7 100644
--- a/oneflow/python/test/ops/test_fused_scale_tril.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_fused_scale_tril.py
@@ -16,14 +16,14 @@ limitations under the License.
 import unittest
 from collections import OrderedDict
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 from test_util import (
     GenArgDict,
     test_global_storage,
     type_name_to_flow_type,
     type_name_to_np_type,
 )
-import oneflow.typing as oft
+from oneflow.compatible.single_client import typing as oft
 import os
 
 
diff --git a/oneflow/python/test/ops/test_fused_scale_tril_softmax_mask_and_scale.py b/oneflow/compatible_single_client_python/test/ops/test_fused_scale_tril_softmax_mask_and_scale.py
similarity index 99%
rename from oneflow/python/test/ops/test_fused_scale_tril_softmax_mask_and_scale.py
rename to oneflow/compatible_single_client_python/test/ops/test_fused_scale_tril_softmax_mask_and_scale.py
index 5ef8c69a25b01edb4eba70015d1eafd65d99b04c..42caac686fe48fef70673cc94f3a336528158066 100644
--- a/oneflow/python/test/ops/test_fused_scale_tril_softmax_mask_and_scale.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_fused_scale_tril_softmax_mask_and_scale.py
@@ -18,7 +18,7 @@ import os
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import test_global_storage
 from test_util import GenArgList, type_name_to_flow_type
 
diff --git a/oneflow/python/test/ops/test_fused_self_attention_query_mul_key_and_value.py b/oneflow/compatible_single_client_python/test/ops/test_fused_self_attention_query_mul_key_and_value.py
similarity index 99%
rename from oneflow/python/test/ops/test_fused_self_attention_query_mul_key_and_value.py
rename to oneflow/compatible_single_client_python/test/ops/test_fused_self_attention_query_mul_key_and_value.py
index 2788597d4cf600d316f451696db504e6f5ebf390..61de06a259788a775b22c03088597e6e302212f0 100644
--- a/oneflow/python/test/ops/test_fused_self_attention_query_mul_key_and_value.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_fused_self_attention_query_mul_key_and_value.py
@@ -17,7 +17,7 @@ import os
 import numpy as np
 import unittest
 import typing
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import test_global_storage
 
 
diff --git a/oneflow/python/test/ops/test_gather.py b/oneflow/compatible_single_client_python/test/ops/test_gather.py
similarity index 98%
rename from oneflow/python/test/ops/test_gather.py
rename to oneflow/compatible_single_client_python/test/ops/test_gather.py
index f0a3d1ec5994c07ce8b77a5062d9f6c5ab938b12..87632e21cb4ed4a2e0e245604683438de0c2f600 100644
--- a/oneflow/python/test/ops/test_gather.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_gather.py
@@ -17,10 +17,10 @@ import unittest
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import tensorflow as tf
 from test_util import GenArgList
-import oneflow.typing as oft
+from oneflow.compatible.single_client import typing as oft
 
 gpus = tf.config.experimental.list_physical_devices("GPU")
 for gpu in gpus:
diff --git a/oneflow/python/test/ops/test_gather_model_parallel.py b/oneflow/compatible_single_client_python/test/ops/test_gather_model_parallel.py
similarity index 96%
rename from oneflow/python/test/ops/test_gather_model_parallel.py
rename to oneflow/compatible_single_client_python/test/ops/test_gather_model_parallel.py
index f6176ad650a958af4d1164f0ad5622448f212edf..977b6895b78f3ddf03fc5423d60668845bb03cc4 100644
--- a/oneflow/python/test/ops/test_gather_model_parallel.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_gather_model_parallel.py
@@ -18,9 +18,9 @@ import os
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 from test_util import GenArgList
-import oneflow.typing as oft
+from oneflow.compatible.single_client import typing as oft
 
 
 def _gen_test_data(params_shape, indices_shape, axis):
diff --git a/oneflow/python/test/ops/test_gather_nd.py b/oneflow/compatible_single_client_python/test/ops/test_gather_nd.py
similarity index 99%
rename from oneflow/python/test/ops/test_gather_nd.py
rename to oneflow/compatible_single_client_python/test/ops/test_gather_nd.py
index f795ae59819bb5532c4eaf3b28534a6ffd7640b9..7fb4600b8be38d83174ffcefd3f8df34b07472c7 100644
--- a/oneflow/python/test/ops/test_gather_nd.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_gather_nd.py
@@ -18,7 +18,7 @@ import os
 import numpy as np
 import unittest
 from collections import OrderedDict
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 from test_util import GenArgDict, type_name_to_flow_type, type_name_to_np_type
 
 
diff --git a/oneflow/python/test/ops/test_gelu.py b/oneflow/compatible_single_client_python/test/ops/test_gelu.py
similarity index 97%
rename from oneflow/python/test/ops/test_gelu.py
rename to oneflow/compatible_single_client_python/test/ops/test_gelu.py
index f7604b7d490d06f0df5fb5cbf0af766fa1db792a..4f6c3a31ef430e37c56a8468df5d337c4ac2b4dc 100644
--- a/oneflow/python/test/ops/test_gelu.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_gelu.py
@@ -19,7 +19,7 @@ import os
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import tensorflow as tf
 from test_util import GenArgDict, RunOneflowOp
 
diff --git a/oneflow/python/test/ops/test_get_variable.py b/oneflow/compatible_single_client_python/test/ops/test_get_variable.py
similarity index 97%
rename from oneflow/python/test/ops/test_get_variable.py
rename to oneflow/compatible_single_client_python/test/ops/test_get_variable.py
index ed311ebbb76ab214ec74fb420dc242e1508f7ef9..1661ef356907ba297f494b41623c9cf2cbbd96e3 100644
--- a/oneflow/python/test/ops/test_get_variable.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_get_variable.py
@@ -15,8 +15,8 @@ limitations under the License.
 """
 import unittest
 import numpy as np
-import oneflow as flow
-import oneflow.typing as oft
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as oft
 
 
 @flow.unittest.skip_unless_1n1d()
diff --git a/oneflow/python/test/ops/test_global_function_input_output.py b/oneflow/compatible_single_client_python/test/ops/test_global_function_input_output.py
similarity index 98%
rename from oneflow/python/test/ops/test_global_function_input_output.py
rename to oneflow/compatible_single_client_python/test/ops/test_global_function_input_output.py
index 6ee78ab75b40c6ea75776b0f2db4a01e7d5c7d40..2b1f488275515d19127aa4bd7ba444aad7a6093d 100644
--- a/oneflow/python/test/ops/test_global_function_input_output.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_global_function_input_output.py
@@ -14,12 +14,12 @@ See the License for the specific language governing permissions and
 limitations under the License.
 """
 import unittest
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import numpy as np
 import os
 import random
 
-import oneflow.typing as oft
+from oneflow.compatible.single_client import typing as oft
 
 
 def _test_input_ndarray_not_contiguous(test_case, shape):
diff --git a/oneflow/python/test/ops/test_global_function_signature.py b/oneflow/compatible_single_client_python/test/ops/test_global_function_signature.py
similarity index 99%
rename from oneflow/python/test/ops/test_global_function_signature.py
rename to oneflow/compatible_single_client_python/test/ops/test_global_function_signature.py
index 6ad0b9110a40bc88faa717dcee7850ee29dbd488..4e5246677a52b967694688f68c2495e135c35955 100644
--- a/oneflow/python/test/ops/test_global_function_signature.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_global_function_signature.py
@@ -14,8 +14,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 """
 import unittest
-import oneflow as flow
-import oneflow.typing as oft
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as oft
 import numpy as np
 from typing import Tuple, Dict, List
 
diff --git a/oneflow/python/test/ops/test_global_storage.py b/oneflow/compatible_single_client_python/test/ops/test_global_storage.py
similarity index 100%
rename from oneflow/python/test/ops/test_global_storage.py
rename to oneflow/compatible_single_client_python/test/ops/test_global_storage.py
diff --git a/oneflow/python/test/ops/test_gpt_data_loader.py b/oneflow/compatible_single_client_python/test/ops/test_gpt_data_loader.py
similarity index 99%
rename from oneflow/python/test/ops/test_gpt_data_loader.py
rename to oneflow/compatible_single_client_python/test/ops/test_gpt_data_loader.py
index f7d08283e73576cd63ac9e87cc756d1f0bdf5e2a..fb21f6d5b931d92db5d392403a194523278efeb9 100644
--- a/oneflow/python/test/ops/test_gpt_data_loader.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_gpt_data_loader.py
@@ -15,7 +15,7 @@ limitations under the License.
 """
 import numpy as np
 import unittest
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import os
 
 
diff --git a/oneflow/python/test/ops/test_group_norm_op.py b/oneflow/compatible_single_client_python/test/ops/test_group_norm_op.py
similarity index 98%
rename from oneflow/python/test/ops/test_group_norm_op.py
rename to oneflow/compatible_single_client_python/test/ops/test_group_norm_op.py
index 4c854af2f842387aa099b68120cacb87c6863c57..06d6e0ba03f514bf49e3a12bd21807f405f5f1e5 100644
--- a/oneflow/python/test/ops/test_group_norm_op.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_group_norm_op.py
@@ -14,8 +14,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 """
 
-import oneflow as flow
-import oneflow.typing as tp
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as tp
 
 from collections import OrderedDict
 import numpy as np
diff --git a/oneflow/python/test/ops/test_hardsigmoid.py b/oneflow/compatible_single_client_python/test/ops/test_hardsigmoid.py
similarity index 98%
rename from oneflow/python/test/ops/test_hardsigmoid.py
rename to oneflow/compatible_single_client_python/test/ops/test_hardsigmoid.py
index 8db7af0d9ce6d902e92ea28460dc96dbeece69c2..42b16ba2b33255b700f61546ec75ee23b10e81f0 100644
--- a/oneflow/python/test/ops/test_hardsigmoid.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_hardsigmoid.py
@@ -13,9 +13,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 """
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import numpy as np
-import oneflow.typing as tp
+from oneflow.compatible.single_client import typing as tp
 from test_util import GenArgList
 import unittest
 from collections import OrderedDict
diff --git a/oneflow/python/test/ops/test_hardswish.py b/oneflow/compatible_single_client_python/test/ops/test_hardswish.py
similarity index 98%
rename from oneflow/python/test/ops/test_hardswish.py
rename to oneflow/compatible_single_client_python/test/ops/test_hardswish.py
index 674334645da71a44cd8aa6bd0c26a2fb836e6abd..955c70bab172ae867315902f83dba0a3439966dd 100644
--- a/oneflow/python/test/ops/test_hardswish.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_hardswish.py
@@ -13,9 +13,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 """
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import numpy as np
-import oneflow.typing as tp
+from oneflow.compatible.single_client import typing as tp
 from test_util import GenArgList
 import unittest
 from collections import OrderedDict
diff --git a/oneflow/python/test/ops/test_hardtanh.py b/oneflow/compatible_single_client_python/test/ops/test_hardtanh.py
similarity index 98%
rename from oneflow/python/test/ops/test_hardtanh.py
rename to oneflow/compatible_single_client_python/test/ops/test_hardtanh.py
index c8992358c746d3969584fe47900259bf82716797..f990e7869e09baf8d21100139cae225ce7fe9252 100644
--- a/oneflow/python/test/ops/test_hardtanh.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_hardtanh.py
@@ -13,9 +13,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 """
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import numpy as np
-import oneflow.typing as tp
+from oneflow.compatible.single_client import typing as tp
 from test_util import GenArgList
 import unittest
 from collections import OrderedDict
diff --git a/oneflow/python/test/ops/test_hierarchical_parallel_cast.py b/oneflow/compatible_single_client_python/test/ops/test_hierarchical_parallel_cast.py
similarity index 99%
rename from oneflow/python/test/ops/test_hierarchical_parallel_cast.py
rename to oneflow/compatible_single_client_python/test/ops/test_hierarchical_parallel_cast.py
index 0cfec01098cb8a9694c1c20c3d71af0c96c70c82..d7034f5c50e4821ed59013153653479c0c2e0724 100644
--- a/oneflow/python/test/ops/test_hierarchical_parallel_cast.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_hierarchical_parallel_cast.py
@@ -15,8 +15,8 @@ limitations under the License.
 """
 import unittest
 import numpy as np
-import oneflow as flow
-import oneflow.typing as tp
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as tp
 from test_util import GenArgList
 from collections import OrderedDict
 from typing import Dict
diff --git a/oneflow/python/test/ops/test_identity_n.py b/oneflow/compatible_single_client_python/test/ops/test_identity_n.py
similarity index 92%
rename from oneflow/python/test/ops/test_identity_n.py
rename to oneflow/compatible_single_client_python/test/ops/test_identity_n.py
index cfd384850d1198d39f1af8449fd89e227ced9b2c..7ab8bd628e3ea8978fd481b23356c093fa4d72a3 100644
--- a/oneflow/python/test/ops/test_identity_n.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_identity_n.py
@@ -15,8 +15,8 @@ limitations under the License.
 """
 import unittest
 import numpy as np
-import oneflow as flow
-import oneflow.typing as oft
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as oft
 from typing import Tuple
 
 func_config = flow.FunctionConfig()
diff --git a/oneflow/python/test/ops/test_image_batch_align.py b/oneflow/compatible_single_client_python/test/ops/test_image_batch_align.py
similarity index 97%
rename from oneflow/python/test/ops/test_image_batch_align.py
rename to oneflow/compatible_single_client_python/test/ops/test_image_batch_align.py
index 89bc13371a207b2554911b92efd15406f3f53c4c..469cc3719ce7fc8149d8f66040eed77317dd6595 100644
--- a/oneflow/python/test/ops/test_image_batch_align.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_image_batch_align.py
@@ -16,8 +16,8 @@ limitations under the License.
 import unittest
 import cv2
 import numpy as np
-import oneflow as flow
-import oneflow.typing as oft
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as oft
 
 
 def _of_image_batch_align(images, input_shape, output_shape, alignment):
diff --git a/oneflow/python/test/ops/test_image_decode.py b/oneflow/compatible_single_client_python/test/ops/test_image_decode.py
similarity index 97%
rename from oneflow/python/test/ops/test_image_decode.py
rename to oneflow/compatible_single_client_python/test/ops/test_image_decode.py
index ab888ab2f703836fbdcdc9d02070a7a2ea594f3c..5e8d44351004d4a291b55685746029f7468e950e 100644
--- a/oneflow/python/test/ops/test_image_decode.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_image_decode.py
@@ -15,9 +15,9 @@ limitations under the License.
 """
 import unittest
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 from PIL import Image
-import oneflow.typing as oft
+from oneflow.compatible.single_client import typing as oft
 
 
 def _of_image_decode(images):
diff --git a/oneflow/python/test/ops/test_image_flip.py b/oneflow/compatible_single_client_python/test/ops/test_image_flip.py
similarity index 96%
rename from oneflow/python/test/ops/test_image_flip.py
rename to oneflow/compatible_single_client_python/test/ops/test_image_flip.py
index f686c58db5657047095a1ebee0a957efc3209933..d10226a31b21779817e50d42190c6a2d8496d286 100644
--- a/oneflow/python/test/ops/test_image_flip.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_image_flip.py
@@ -16,8 +16,8 @@ limitations under the License.
 import unittest
 import cv2
 import numpy as np
-import oneflow as flow
-import oneflow.typing as oft
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as oft
 
 
 def _of_image_flip(images, image_shape, flip_code):
diff --git a/oneflow/python/test/ops/test_image_normalize.py b/oneflow/compatible_single_client_python/test/ops/test_image_normalize.py
similarity index 96%
rename from oneflow/python/test/ops/test_image_normalize.py
rename to oneflow/compatible_single_client_python/test/ops/test_image_normalize.py
index c69d759f3de158c6192b43bc43fcf9163417c22b..13f2e99db34b3956f1fd8eeb77d8154106de6fee 100644
--- a/oneflow/python/test/ops/test_image_normalize.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_image_normalize.py
@@ -16,8 +16,8 @@ limitations under the License.
 import unittest
 import cv2
 import numpy as np
-import oneflow as flow
-import oneflow.typing as oft
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as oft
 
 
 def _of_image_normalize(images, image_shape, std, mean):
diff --git a/oneflow/python/test/ops/test_image_resize.py b/oneflow/compatible_single_client_python/test/ops/test_image_resize.py
similarity index 99%
rename from oneflow/python/test/ops/test_image_resize.py
rename to oneflow/compatible_single_client_python/test/ops/test_image_resize.py
index db492e49574f1189c0c29553747528764feb9853..25f1516352fdfbb3d7f557902dc5ed074e1c377d 100644
--- a/oneflow/python/test/ops/test_image_resize.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_image_resize.py
@@ -17,8 +17,8 @@ import unittest
 import cv2
 import numpy as np
 import typing as tp
-import oneflow as flow
-import oneflow.typing as otp
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as otp
 import image_test_util
 
 
diff --git a/oneflow/python/test/ops/test_image_target_resize.py b/oneflow/compatible_single_client_python/test/ops/test_image_target_resize.py
similarity index 97%
rename from oneflow/python/test/ops/test_image_target_resize.py
rename to oneflow/compatible_single_client_python/test/ops/test_image_target_resize.py
index dcdebc8b998c74dc357dfe3fbe777a416c1f4125..d491af5d5b57912989630e8c30f6f8fb50a6ad5e 100644
--- a/oneflow/python/test/ops/test_image_target_resize.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_image_target_resize.py
@@ -17,8 +17,8 @@ import unittest
 import cv2
 import numpy as np
 import typing as tp
-import oneflow as flow
-import oneflow.typing as otp
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as otp
 import image_test_util
 
 
diff --git a/oneflow/python/test/ops/test_in_top_k.py b/oneflow/compatible_single_client_python/test/ops/test_in_top_k.py
similarity index 96%
rename from oneflow/python/test/ops/test_in_top_k.py
rename to oneflow/compatible_single_client_python/test/ops/test_in_top_k.py
index ac04b6bb651e950a7eca41512ca3c9ddd7807059..663c38afa2587249aeddaea5b2b04219dcddd808 100644
--- a/oneflow/python/test/ops/test_in_top_k.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_in_top_k.py
@@ -18,8 +18,8 @@ from collections import OrderedDict
 import os
 import unittest
 import numpy as np
-import oneflow as flow
-import oneflow.typing as tp
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as tp
 import tensorflow as tf
 from test_util import GenArgList, type_name_to_flow_type, type_name_to_np_type
 
diff --git a/oneflow/python/test/ops/test_indexed_slices_reduce_sum.py b/oneflow/compatible_single_client_python/test/ops/test_indexed_slices_reduce_sum.py
similarity index 96%
rename from oneflow/python/test/ops/test_indexed_slices_reduce_sum.py
rename to oneflow/compatible_single_client_python/test/ops/test_indexed_slices_reduce_sum.py
index c6bfd6471ea722da08515d2698f12eba4f480a6b..dd6c371e392324c1f2399595394b87e737c5eb01 100644
--- a/oneflow/python/test/ops/test_indexed_slices_reduce_sum.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_indexed_slices_reduce_sum.py
@@ -14,8 +14,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 """
 import numpy as np
-import oneflow as flow
-import oneflow.typing as oft
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as oft
 import unittest
 import os
 
diff --git a/oneflow/python/test/ops/test_inplace.py b/oneflow/compatible_single_client_python/test/ops/test_inplace.py
similarity index 97%
rename from oneflow/python/test/ops/test_inplace.py
rename to oneflow/compatible_single_client_python/test/ops/test_inplace.py
index 42bd49642342a8e44c1d46f57f895062f657c867..6ab3419ae2eaef05182d266040e4e1a7fb9c0a29 100644
--- a/oneflow/python/test/ops/test_inplace.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_inplace.py
@@ -15,8 +15,8 @@ limitations under the License.
 """
 import unittest
 import numpy as np
-import oneflow as flow
-import oneflow.typing as oft
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as oft
 
 
 def MakeFuncConfig(enable_inplace):
diff --git a/oneflow/python/test/ops/test_instance_norm_op.py b/oneflow/compatible_single_client_python/test/ops/test_instance_norm_op.py
similarity index 98%
rename from oneflow/python/test/ops/test_instance_norm_op.py
rename to oneflow/compatible_single_client_python/test/ops/test_instance_norm_op.py
index 864979cc8f3ff5f7749de273fab06cb8d97e3e3a..acd990af3b44d98a1f58c46f2a99d693a998840a 100644
--- a/oneflow/python/test/ops/test_instance_norm_op.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_instance_norm_op.py
@@ -14,8 +14,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 """
 
-import oneflow as flow
-import oneflow.typing as tp
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as tp
 
 from collections import OrderedDict
 import numpy as np
diff --git a/oneflow/python/test/ops/test_interface_op_read_and_write.py b/oneflow/compatible_single_client_python/test/ops/test_interface_op_read_and_write.py
similarity index 95%
rename from oneflow/python/test/ops/test_interface_op_read_and_write.py
rename to oneflow/compatible_single_client_python/test/ops/test_interface_op_read_and_write.py
index 705b2b0dd044ec0bee2b9580d371669a3a6be081..1e876ed09adc056e937bf3fd72cb063a01ebd0f2 100644
--- a/oneflow/python/test/ops/test_interface_op_read_and_write.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_interface_op_read_and_write.py
@@ -18,8 +18,8 @@ import unittest
 
 import numpy as np
 
-import oneflow as flow
-import oneflow.typing as tp
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as tp
 
 
 @flow.unittest.skip_unless_1n2d()
diff --git a/oneflow/python/test/ops/test_l1loss.py b/oneflow/compatible_single_client_python/test/ops/test_l1loss.py
similarity index 98%
rename from oneflow/python/test/ops/test_l1loss.py
rename to oneflow/compatible_single_client_python/test/ops/test_l1loss.py
index 861c79331c252b7937573a42f8e033c57c978cd9..63410e163c557695f745743bedb28a4d19f73cd4 100644
--- a/oneflow/python/test/ops/test_l1loss.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_l1loss.py
@@ -13,9 +13,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 """
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import numpy as np
-import oneflow.typing as tp
+from oneflow.compatible.single_client import typing as tp
 from test_util import GenArgList
 import unittest
 from collections import OrderedDict
diff --git a/oneflow/python/test/ops/test_l2_normalize.py b/oneflow/compatible_single_client_python/test/ops/test_l2_normalize.py
similarity index 98%
rename from oneflow/python/test/ops/test_l2_normalize.py
rename to oneflow/compatible_single_client_python/test/ops/test_l2_normalize.py
index 77e8bc22476c52b7fe39bf064c809199b86caa91..5738eaba0381c1094f029f95dade633633302918 100644
--- a/oneflow/python/test/ops/test_l2_normalize.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_l2_normalize.py
@@ -18,7 +18,7 @@ import os
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import tensorflow as tf
 import test_global_storage
 from test_util import GenArgList, type_name_to_flow_type
diff --git a/oneflow/python/test/ops/test_lamb.py b/oneflow/compatible_single_client_python/test/ops/test_lamb.py
similarity index 98%
rename from oneflow/python/test/ops/test_lamb.py
rename to oneflow/compatible_single_client_python/test/ops/test_lamb.py
index 741225e030c4cd9cf886d745122cfc2d623947cb..cf0f20cdca9242ea6f32f29414c9a1db4949d4c5 100644
--- a/oneflow/python/test/ops/test_lamb.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_lamb.py
@@ -18,7 +18,7 @@ import os
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import tensorflow as tf
 import tensorflow_addons as tfa
 import test_global_storage
diff --git a/oneflow/python/test/ops/test_layer_norm.py b/oneflow/compatible_single_client_python/test/ops/test_layer_norm.py
similarity index 98%
rename from oneflow/python/test/ops/test_layer_norm.py
rename to oneflow/compatible_single_client_python/test/ops/test_layer_norm.py
index 00662f4b15ec2ac429ed1f158aca5c70afaf1e28..d32513d0a30edc6a39efa91bf98583798a2b346b 100644
--- a/oneflow/python/test/ops/test_layer_norm.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_layer_norm.py
@@ -19,12 +19,12 @@ import os
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import tensorflow as tf
 import test_global_storage
 
 from test_util import GenArgList, type_name_to_flow_type, type_name_to_np_type
-import oneflow.typing as oft
+from oneflow.compatible.single_client import typing as oft
 
 gpus = tf.config.experimental.list_physical_devices("GPU")
 for gpu in gpus:
diff --git a/oneflow/python/test/ops/test_layers_conv1d.py b/oneflow/compatible_single_client_python/test/ops/test_layers_conv1d.py
similarity index 99%
rename from oneflow/python/test/ops/test_layers_conv1d.py
rename to oneflow/compatible_single_client_python/test/ops/test_layers_conv1d.py
index 51e80fa834a958721d4b8b3939812e36a1c4173a..aa1655caafc2bca51ebd9e37f9c2af464e7a8096 100644
--- a/oneflow/python/test/ops/test_layers_conv1d.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_layers_conv1d.py
@@ -18,7 +18,7 @@ import os
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import tensorflow as tf
 import test_global_storage
 from test_util import GenArgList
diff --git a/oneflow/python/test/ops/test_layers_conv2d.py b/oneflow/compatible_single_client_python/test/ops/test_layers_conv2d.py
similarity index 99%
rename from oneflow/python/test/ops/test_layers_conv2d.py
rename to oneflow/compatible_single_client_python/test/ops/test_layers_conv2d.py
index b89d97bb4188762ece4777758a895cac010a667a..a8c9e11e3ec1df8f226e41491a58ff660a02bc1e 100644
--- a/oneflow/python/test/ops/test_layers_conv2d.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_layers_conv2d.py
@@ -18,7 +18,7 @@ import os
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import tensorflow as tf
 import test_global_storage
 from test_util import GenArgList
diff --git a/oneflow/python/test/ops/test_layers_conv3d.py b/oneflow/compatible_single_client_python/test/ops/test_layers_conv3d.py
similarity index 99%
rename from oneflow/python/test/ops/test_layers_conv3d.py
rename to oneflow/compatible_single_client_python/test/ops/test_layers_conv3d.py
index 75b61834cbbce4bed0d12d9984596d14ea9884e8..073bf3d10edb7bfeeb70f579b44958ea2982a3a9 100644
--- a/oneflow/python/test/ops/test_layers_conv3d.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_layers_conv3d.py
@@ -18,7 +18,7 @@ import os
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import tensorflow as tf
 import test_global_storage
 from test_util import GenArgList
diff --git a/oneflow/python/test/ops/test_leaky_relu.py b/oneflow/compatible_single_client_python/test/ops/test_leaky_relu.py
similarity index 98%
rename from oneflow/python/test/ops/test_leaky_relu.py
rename to oneflow/compatible_single_client_python/test/ops/test_leaky_relu.py
index 25d97b7b77b538ba259232ccfec2abbf34408e1c..139097ff5e704b2101d38e1d1c8c584f7ec17e15 100644
--- a/oneflow/python/test/ops/test_leaky_relu.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_leaky_relu.py
@@ -18,7 +18,7 @@ import os
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import tensorflow as tf
 import test_global_storage
 from test_util import GenArgList, type_name_to_flow_type
diff --git a/oneflow/python/test/ops/test_logical_slice.py b/oneflow/compatible_single_client_python/test/ops/test_logical_slice.py
similarity index 96%
rename from oneflow/python/test/ops/test_logical_slice.py
rename to oneflow/compatible_single_client_python/test/ops/test_logical_slice.py
index 4005302e3308de1dccd3795e5da7e9d012795334..03f52326687e9c8dc53515a02f6122f04543d116 100644
--- a/oneflow/python/test/ops/test_logical_slice.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_logical_slice.py
@@ -15,9 +15,9 @@ limitations under the License.
 """
 from collections import OrderedDict
 
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import numpy as np
-import oneflow.typing as oft
+from oneflow.compatible.single_client import typing as oft
 import test_global_storage
 from test_util import GenArgDict
 
diff --git a/oneflow/python/test/ops/test_logical_slice_assign.py b/oneflow/compatible_single_client_python/test/ops/test_logical_slice_assign.py
similarity index 97%
rename from oneflow/python/test/ops/test_logical_slice_assign.py
rename to oneflow/compatible_single_client_python/test/ops/test_logical_slice_assign.py
index b293564cc78dd67d94515eff71a05a96f757f8d7..bfe09155db2b9328dcac40425a33eeaa0cfc4144 100644
--- a/oneflow/python/test/ops/test_logical_slice_assign.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_logical_slice_assign.py
@@ -15,9 +15,9 @@ limitations under the License.
 """
 from collections import OrderedDict
 
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import numpy as np
-import oneflow.typing as oft
+from oneflow.compatible.single_client import typing as oft
 from test_util import GenArgDict
 
 
diff --git a/oneflow/python/test/ops/test_logsoftmax.py b/oneflow/compatible_single_client_python/test/ops/test_logsoftmax.py
similarity index 97%
rename from oneflow/python/test/ops/test_logsoftmax.py
rename to oneflow/compatible_single_client_python/test/ops/test_logsoftmax.py
index 2279a5619f865cc8e435bd1e6f251bafded39749..7a5d166e7059cb5cfd04a092274935583ed9c906 100644
--- a/oneflow/python/test/ops/test_logsoftmax.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_logsoftmax.py
@@ -13,9 +13,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 """
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import numpy as np
-import oneflow.typing as tp
+from oneflow.compatible.single_client import typing as tp
 from test_util import GenArgList
 import unittest
 from collections import OrderedDict
diff --git a/oneflow/python/test/ops/test_masked_fill.py b/oneflow/compatible_single_client_python/test/ops/test_masked_fill.py
similarity index 97%
rename from oneflow/python/test/ops/test_masked_fill.py
rename to oneflow/compatible_single_client_python/test/ops/test_masked_fill.py
index 61eb6c1a945054bba0a61d0b7943c5fda5623908..6504a6088f25bfedf52a162ced656af37500278b 100644
--- a/oneflow/python/test/ops/test_masked_fill.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_masked_fill.py
@@ -16,7 +16,7 @@ limitations under the License.
 import unittest
 from collections import OrderedDict
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import os
 
 from test_util import (
@@ -25,7 +25,7 @@ from test_util import (
     type_name_to_flow_type,
     type_name_to_np_type,
 )
-import oneflow.typing as oft
+from oneflow.compatible.single_client import typing as oft
 
 
 def _masked_fill_np_fw_bw(x, mask, y_diff, type_name, value=0):
diff --git a/oneflow/python/test/ops/test_matmul.py b/oneflow/compatible_single_client_python/test/ops/test_matmul.py
similarity index 99%
rename from oneflow/python/test/ops/test_matmul.py
rename to oneflow/compatible_single_client_python/test/ops/test_matmul.py
index 37acb19fe897b89fddfe13f9ab81091c3f1ea57f..8cd4d3f0587e31521cb256035c52ee5d8193c3bf 100644
--- a/oneflow/python/test/ops/test_matmul.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_matmul.py
@@ -18,7 +18,7 @@ from collections import OrderedDict
 
 import numpy as np
 import typing
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import tensorflow as tf
 import test_global_storage
 from test_util import GenArgList, GenArgDict, type_name_to_flow_type
diff --git a/oneflow/python/test/ops/test_memory_zone_out_of_memory.py b/oneflow/compatible_single_client_python/test/ops/test_memory_zone_out_of_memory.py
similarity index 97%
rename from oneflow/python/test/ops/test_memory_zone_out_of_memory.py
rename to oneflow/compatible_single_client_python/test/ops/test_memory_zone_out_of_memory.py
index e3837e53dcf697259786b77b5e209449332daa2f..01df8374fdea44eb6d0d52942013145d54e0b9d0 100644
--- a/oneflow/python/test/ops/test_memory_zone_out_of_memory.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_memory_zone_out_of_memory.py
@@ -17,7 +17,7 @@ import os
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 
 
 class MemoryZoneOutOfMemoryException(Exception):
diff --git a/oneflow/python/test/ops/test_mish.py b/oneflow/compatible_single_client_python/test/ops/test_mish.py
similarity index 97%
rename from oneflow/python/test/ops/test_mish.py
rename to oneflow/compatible_single_client_python/test/ops/test_mish.py
index df850cc2b1a24694ca274bb48de7e4c33b72ed19..164b3def0f177d16ed8245dcf2a81045a3b0fd67 100644
--- a/oneflow/python/test/ops/test_mish.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_mish.py
@@ -13,9 +13,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 """
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import numpy as np
-import oneflow.typing as tp
+from oneflow.compatible.single_client import typing as tp
 from test_util import GenArgList
 import unittest
 from collections import OrderedDict
diff --git a/oneflow/python/test/ops/test_mod.py b/oneflow/compatible_single_client_python/test/ops/test_mod.py
similarity index 95%
rename from oneflow/python/test/ops/test_mod.py
rename to oneflow/compatible_single_client_python/test/ops/test_mod.py
index eb265dbef11d807ed223beca827b7de6f1db9421..906e8da0bbf91e34a9cd7a6fdabfc8a24cfe6679 100644
--- a/oneflow/python/test/ops/test_mod.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_mod.py
@@ -15,8 +15,8 @@ limitations under the License.
 """
 import unittest
 import numpy as np
-import oneflow as flow
-import oneflow.typing as oft
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as oft
 
 func_config = flow.FunctionConfig()
 func_config.default_data_type(flow.float)
diff --git a/oneflow/python/test/ops/test_mod_int.py b/oneflow/compatible_single_client_python/test/ops/test_mod_int.py
similarity index 96%
rename from oneflow/python/test/ops/test_mod_int.py
rename to oneflow/compatible_single_client_python/test/ops/test_mod_int.py
index 20fbc3eb1503053132d5f79364e1a2377e4c3802..a2d5ce0ff4ae479aea5f51f5e54c3efd0d520fe9 100644
--- a/oneflow/python/test/ops/test_mod_int.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_mod_int.py
@@ -15,8 +15,8 @@ limitations under the License.
 """
 import unittest
 import numpy as np
-import oneflow as flow
-import oneflow.typing as oft
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as oft
 
 func_config = flow.FunctionConfig()
 func_config.default_data_type(flow.int32)
diff --git a/oneflow/python/test/ops/test_model.py b/oneflow/compatible_single_client_python/test/ops/test_model.py
similarity index 97%
rename from oneflow/python/test/ops/test_model.py
rename to oneflow/compatible_single_client_python/test/ops/test_model.py
index 51d1b8b8592aedce116502a0be9f3b242acf6844..61c1cb88f6e873e897bd554262bda2b39c1e9e61 100644
--- a/oneflow/python/test/ops/test_model.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_model.py
@@ -18,8 +18,8 @@ import unittest
 import numpy as np
 import tempfile
 
-import oneflow.experimental as flow
-from oneflow.python.nn.parameter import Parameter
+from oneflow.compatible.single_client import experimental as flow
+from oneflow.compatible.single_client.python.nn.parameter import Parameter
 
 
 @unittest.skipIf(
diff --git a/oneflow/python/test/ops/test_model_io.py b/oneflow/compatible_single_client_python/test/ops/test_model_io.py
similarity index 96%
rename from oneflow/python/test/ops/test_model_io.py
rename to oneflow/compatible_single_client_python/test/ops/test_model_io.py
index 41069b911e554c8fac8810e4d0f6a2de81aab231..77dff95a9ac9ae61f59462c8f9ed0a3d197a654c 100644
--- a/oneflow/python/test/ops/test_model_io.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_model_io.py
@@ -15,8 +15,8 @@ limitations under the License.
 """
 import unittest
 import numpy as np
-import oneflow as flow
-import oneflow.typing as tp
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as tp
 import time
 import os
 
diff --git a/oneflow/python/test/ops/test_module_container.py b/oneflow/compatible_single_client_python/test/ops/test_module_container.py
similarity index 90%
rename from oneflow/python/test/ops/test_module_container.py
rename to oneflow/compatible_single_client_python/test/ops/test_module_container.py
index 1ea302ea1ef301fd881affe673d642f309b2d901..c826d5c49ca5be26328dc904d08388804ac35b3f 100644
--- a/oneflow/python/test/ops/test_module_container.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_module_container.py
@@ -16,8 +16,8 @@ limitations under the License.
 import unittest
 from typing import Tuple
 
-import oneflow.experimental as flow
-import oneflow.typing as tp
+from oneflow.compatible.single_client import experimental as flow
+from oneflow.compatible.single_client import typing as tp
 
 
 @unittest.skipIf(
diff --git a/oneflow/python/test/ops/test_moments.py b/oneflow/compatible_single_client_python/test/ops/test_moments.py
similarity index 98%
rename from oneflow/python/test/ops/test_moments.py
rename to oneflow/compatible_single_client_python/test/ops/test_moments.py
index 9deaf02637e123e017426f99c176f43a89b93a08..78a49ee2f5d26058f9c28e9698b739f50f9f4967 100644
--- a/oneflow/python/test/ops/test_moments.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_moments.py
@@ -18,7 +18,7 @@ import os
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import tensorflow as tf
 import test_global_storage
 from test_util import GenArgList, type_name_to_flow_type
diff --git a/oneflow/python/test/ops/test_mseloss.py b/oneflow/compatible_single_client_python/test/ops/test_mseloss.py
similarity index 98%
rename from oneflow/python/test/ops/test_mseloss.py
rename to oneflow/compatible_single_client_python/test/ops/test_mseloss.py
index 90c8db222d131999800c45598ccb1d8de5b4f643..fe0c277d51daa9aab87b380fd04e7cbc0f32e419 100644
--- a/oneflow/python/test/ops/test_mseloss.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_mseloss.py
@@ -13,9 +13,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 """
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import numpy as np
-import oneflow.typing as tp
+from oneflow.compatible.single_client import typing as tp
 from test_util import GenArgList
 import unittest
 from collections import OrderedDict
diff --git a/oneflow/python/test/ops/test_multi_optimizer.py b/oneflow/compatible_single_client_python/test/ops/test_multi_optimizer.py
similarity index 99%
rename from oneflow/python/test/ops/test_multi_optimizer.py
rename to oneflow/compatible_single_client_python/test/ops/test_multi_optimizer.py
index 23431bbb728f2f5ea5de01c6599c70c81fb88679..84cf0d696c4a2ec1ac4c7309861543b647282638 100644
--- a/oneflow/python/test/ops/test_multi_optimizer.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_multi_optimizer.py
@@ -15,7 +15,7 @@ limitations under the License.
 """
 import unittest
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import tensorflow as tf
 from collections import OrderedDict
 
diff --git a/oneflow/python/test/ops/test_multi_process.py b/oneflow/compatible_single_client_python/test/ops/test_multi_process.py
similarity index 98%
rename from oneflow/python/test/ops/test_multi_process.py
rename to oneflow/compatible_single_client_python/test/ops/test_multi_process.py
index 86b830ec2c8e81dae71966dc43b55c759f11904f..9c227cd512425595198ec5deed375458ba889db6 100644
--- a/oneflow/python/test/ops/test_multi_process.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_multi_process.py
@@ -14,7 +14,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 """
 import unittest
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import os
 
 
diff --git a/oneflow/python/test/ops/test_multi_square_sum.py b/oneflow/compatible_single_client_python/test/ops/test_multi_square_sum.py
similarity index 95%
rename from oneflow/python/test/ops/test_multi_square_sum.py
rename to oneflow/compatible_single_client_python/test/ops/test_multi_square_sum.py
index 7c807b1dfab62ffdcefe06eceb9de29e317debdc..1b931f55dbf853a5e3251872b2ca150e53dbe88a 100644
--- a/oneflow/python/test/ops/test_multi_square_sum.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_multi_square_sum.py
@@ -19,8 +19,8 @@ import os
 import unittest
 
 import numpy as np
-import oneflow as flow
-import oneflow.typing as oft
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as oft
 
 func_config = flow.FunctionConfig()
 func_config.default_data_type(flow.float)
diff --git a/oneflow/python/test/ops/test_multiply.py b/oneflow/compatible_single_client_python/test/ops/test_multiply.py
similarity index 97%
rename from oneflow/python/test/ops/test_multiply.py
rename to oneflow/compatible_single_client_python/test/ops/test_multiply.py
index 585f2e6f0a0f1a42b18fde574c8b97466c31a4a8..ed70f8739ca9519f9963ecdf26677f52e7c944c3 100644
--- a/oneflow/python/test/ops/test_multiply.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_multiply.py
@@ -17,7 +17,7 @@ import unittest
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import tensorflow as tf
 from test_util import (
     Args,
@@ -27,7 +27,7 @@ from test_util import (
     type_name_to_flow_type,
     type_name_to_np_type,
 )
-import oneflow.typing as oft
+from oneflow.compatible.single_client import typing as oft
 
 gpus = tf.config.experimental.list_physical_devices("GPU")
 for gpu in gpus:
diff --git a/oneflow/python/test/ops/test_name_scope.py b/oneflow/compatible_single_client_python/test/ops/test_name_scope.py
similarity index 97%
rename from oneflow/python/test/ops/test_name_scope.py
rename to oneflow/compatible_single_client_python/test/ops/test_name_scope.py
index d347ca3402bfbc8eb8e27b668ae7af9436f9ea19..0006c72c1774a33969fcc1f22d52a3bb4fd9aa68 100644
--- a/oneflow/python/test/ops/test_name_scope.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_name_scope.py
@@ -15,7 +15,7 @@ limitations under the License.
 """
 import unittest
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 
 
 @flow.unittest.skip_unless_1n1d()
diff --git a/oneflow/python/test/ops/test_nccl_use_compute_stream.py b/oneflow/compatible_single_client_python/test/ops/test_nccl_use_compute_stream.py
similarity index 98%
rename from oneflow/python/test/ops/test_nccl_use_compute_stream.py
rename to oneflow/compatible_single_client_python/test/ops/test_nccl_use_compute_stream.py
index 65a534734817042058037b9ae913028d95a86041..8010b55bfc04fa7c756410d8604e7aca8032745d 100644
--- a/oneflow/python/test/ops/test_nccl_use_compute_stream.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_nccl_use_compute_stream.py
@@ -17,9 +17,9 @@ import unittest
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 from test_util import GenArgList
-import oneflow.typing as oft
+from oneflow.compatible.single_client import typing as oft
 
 
 def _test_split_to_split_enable_all_to_all(
diff --git a/oneflow/python/test/ops/test_nn_conv1d.py b/oneflow/compatible_single_client_python/test/ops/test_nn_conv1d.py
similarity index 99%
rename from oneflow/python/test/ops/test_nn_conv1d.py
rename to oneflow/compatible_single_client_python/test/ops/test_nn_conv1d.py
index db46f2fe1b2c1fbafa4a98adeedfd4d28fb59e15..74f2741101c0d2b557909472e8fd77cfaf1715f7 100644
--- a/oneflow/python/test/ops/test_nn_conv1d.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_nn_conv1d.py
@@ -18,7 +18,7 @@ import os
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import tensorflow as tf
 import test_global_storage
 from test_util import GenArgList
diff --git a/oneflow/python/test/ops/test_nn_conv2d.py b/oneflow/compatible_single_client_python/test/ops/test_nn_conv2d.py
similarity index 99%
rename from oneflow/python/test/ops/test_nn_conv2d.py
rename to oneflow/compatible_single_client_python/test/ops/test_nn_conv2d.py
index 7d3916b2024f3caf5749ecbb0eb9911397d6e269..d6966bbd83574ec37c9e34fa2c630240454645fa 100644
--- a/oneflow/python/test/ops/test_nn_conv2d.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_nn_conv2d.py
@@ -18,7 +18,7 @@ import os
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import tensorflow as tf
 import test_global_storage
 from test_util import GenArgList
diff --git a/oneflow/python/test/ops/test_nn_conv2d_bias.py b/oneflow/compatible_single_client_python/test/ops/test_nn_conv2d_bias.py
similarity index 98%
rename from oneflow/python/test/ops/test_nn_conv2d_bias.py
rename to oneflow/compatible_single_client_python/test/ops/test_nn_conv2d_bias.py
index 175c03e87dd969665b7eb6355c3ff92a5c34ed20..96be3603d2eaaa40cf6029223a53d4ca6e2fcc93 100644
--- a/oneflow/python/test/ops/test_nn_conv2d_bias.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_nn_conv2d_bias.py
@@ -18,11 +18,11 @@ import os
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import tensorflow as tf
 import test_global_storage
 from test_util import GenArgList
-import oneflow.typing as oft
+from oneflow.compatible.single_client import typing as oft
 
 gpus = tf.config.experimental.list_physical_devices("GPU")
 for gpu in gpus:
diff --git a/oneflow/python/test/ops/test_nn_conv2d_padding.py b/oneflow/compatible_single_client_python/test/ops/test_nn_conv2d_padding.py
similarity index 99%
rename from oneflow/python/test/ops/test_nn_conv2d_padding.py
rename to oneflow/compatible_single_client_python/test/ops/test_nn_conv2d_padding.py
index 761e445516dd2e7f086e65f481c401b6b5f42049..5f7953ccaa83c245c6e1a7ebab2e6bbb7bbf621d 100644
--- a/oneflow/python/test/ops/test_nn_conv2d_padding.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_nn_conv2d_padding.py
@@ -18,7 +18,7 @@ import os
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import tensorflow as tf
 import test_global_storage
 from test_util import GenArgList
diff --git a/oneflow/python/test/ops/test_nn_conv2d_padding_dynamic.py b/oneflow/compatible_single_client_python/test/ops/test_nn_conv2d_padding_dynamic.py
similarity index 98%
rename from oneflow/python/test/ops/test_nn_conv2d_padding_dynamic.py
rename to oneflow/compatible_single_client_python/test/ops/test_nn_conv2d_padding_dynamic.py
index 7942d558e88fc199a45bf86984a08b4e8641bb48..0477e3254cf45d777306130f275c678840bd9551 100644
--- a/oneflow/python/test/ops/test_nn_conv2d_padding_dynamic.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_nn_conv2d_padding_dynamic.py
@@ -18,10 +18,10 @@ import os
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import tensorflow as tf
 from test_util import GenArgList
-import oneflow.typing as oft
+from oneflow.compatible.single_client import typing as oft
 
 gpus = tf.config.experimental.list_physical_devices("GPU")
 for gpu in gpus:
diff --git a/oneflow/python/test/ops/test_nn_conv3d.py b/oneflow/compatible_single_client_python/test/ops/test_nn_conv3d.py
similarity index 99%
rename from oneflow/python/test/ops/test_nn_conv3d.py
rename to oneflow/compatible_single_client_python/test/ops/test_nn_conv3d.py
index 87231d955c935995de63da94f75c585d924ed25d..3fb337ac3ad094f948b139b05f2cd35d390583d1 100644
--- a/oneflow/python/test/ops/test_nn_conv3d.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_nn_conv3d.py
@@ -18,7 +18,7 @@ import os
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import tensorflow as tf
 import test_global_storage
 from test_util import GenArgList
diff --git a/oneflow/python/test/ops/test_nvtx_range.py b/oneflow/compatible_single_client_python/test/ops/test_nvtx_range.py
similarity index 95%
rename from oneflow/python/test/ops/test_nvtx_range.py
rename to oneflow/compatible_single_client_python/test/ops/test_nvtx_range.py
index 61fcd279942c517edd0bb8642767685ee47ec27d..b9d3101495e93807e85b383387f30b1c638c35c0 100644
--- a/oneflow/python/test/ops/test_nvtx_range.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_nvtx_range.py
@@ -15,8 +15,8 @@ limitations under the License.
 """
 import unittest
 import numpy as np
-import oneflow as flow
-import oneflow.typing as oft
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as oft
 import os
 
 func_config = flow.FunctionConfig()
diff --git a/oneflow/python/test/ops/test_object_bbox_flip.py b/oneflow/compatible_single_client_python/test/ops/test_object_bbox_flip.py
similarity index 97%
rename from oneflow/python/test/ops/test_object_bbox_flip.py
rename to oneflow/compatible_single_client_python/test/ops/test_object_bbox_flip.py
index 17019f1c9c10029f9aa4c8d8b82bd217494ba5e3..a83c115754b9eea4b3e762873f501f31f6727a39 100644
--- a/oneflow/python/test/ops/test_object_bbox_flip.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_object_bbox_flip.py
@@ -17,8 +17,8 @@ import unittest
 import random
 
 import numpy as np
-import oneflow as flow
-import oneflow.typing as oft
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as oft
 
 
 def _of_object_bbox_flip(bbox_list, image_size, flip_code):
diff --git a/oneflow/python/test/ops/test_object_bbox_scale.py b/oneflow/compatible_single_client_python/test/ops/test_object_bbox_scale.py
similarity index 98%
rename from oneflow/python/test/ops/test_object_bbox_scale.py
rename to oneflow/compatible_single_client_python/test/ops/test_object_bbox_scale.py
index e445f7bf5392c20dcce49b11c0aa7d13c4e960dc..61d2521dc1e1330f23db1b3e09615420fea8e92b 100644
--- a/oneflow/python/test/ops/test_object_bbox_scale.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_object_bbox_scale.py
@@ -19,8 +19,8 @@ import random
 
 import cv2
 import numpy as np
-import oneflow as flow
-import oneflow.typing as oft
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as oft
 
 
 def _random_sample_images(anno_file, image_dir, batch_size):
diff --git a/oneflow/python/test/ops/test_object_segm_poly_flip.py b/oneflow/compatible_single_client_python/test/ops/test_object_segm_poly_flip.py
similarity index 97%
rename from oneflow/python/test/ops/test_object_segm_poly_flip.py
rename to oneflow/compatible_single_client_python/test/ops/test_object_segm_poly_flip.py
index 406d69488b408bca4e01265bdd37eb11128f2de2..d1fb57d8bfaf76b8e76bf587df23a4eabc73cb3a 100644
--- a/oneflow/python/test/ops/test_object_segm_poly_flip.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_object_segm_poly_flip.py
@@ -17,8 +17,8 @@ import unittest
 import random
 
 import numpy as np
-import oneflow as flow
-import oneflow.typing as oft
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as oft
 
 
 def _of_object_segm_poly_flip(poly_list, image_size, flip_code):
diff --git a/oneflow/python/test/ops/test_object_segm_poly_to_mask.py b/oneflow/compatible_single_client_python/test/ops/test_object_segm_poly_to_mask.py
similarity index 99%
rename from oneflow/python/test/ops/test_object_segm_poly_to_mask.py
rename to oneflow/compatible_single_client_python/test/ops/test_object_segm_poly_to_mask.py
index 45b591f71dd43bc88cd6dfeddc829add9e855521..eb490cc2ab326f93dbe74ed2f33ee36001853506 100644
--- a/oneflow/python/test/ops/test_object_segm_poly_to_mask.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_object_segm_poly_to_mask.py
@@ -19,8 +19,8 @@ import random
 
 import cv2
 import numpy as np
-import oneflow as flow
-import oneflow.typing as oft
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as oft
 
 coco_dict = dict()
 
diff --git a/oneflow/python/test/ops/test_one_hot.py b/oneflow/compatible_single_client_python/test/ops/test_one_hot.py
similarity index 95%
rename from oneflow/python/test/ops/test_one_hot.py
rename to oneflow/compatible_single_client_python/test/ops/test_one_hot.py
index 78e027ecb3c02475abeca4f82fb1e14094763b93..dab118a6f637fd77aec2982c790c3104529eb974 100644
--- a/oneflow/python/test/ops/test_one_hot.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_one_hot.py
@@ -17,10 +17,10 @@ import unittest
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import tensorflow as tf
 from test_util import GenArgList, type_name_to_flow_type, type_name_to_np_type
-import oneflow.typing as oft
+from oneflow.compatible.single_client import typing as oft
 
 gpus = tf.config.experimental.list_physical_devices("GPU")
 for gpu in gpus:
diff --git a/oneflow/python/test/ops/test_oneflow_export.py b/oneflow/compatible_single_client_python/test/ops/test_oneflow_export.py
similarity index 94%
rename from oneflow/python/test/ops/test_oneflow_export.py
rename to oneflow/compatible_single_client_python/test/ops/test_oneflow_export.py
index 9dc63db0fa317564940b7398187a477763217579..6d24a3f00b8965c6f85a9456d986afe41857ecaa 100644
--- a/oneflow/python/test/ops/test_oneflow_export.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_oneflow_export.py
@@ -14,7 +14,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 """
 import unittest
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 
 
 def AddLossUnderNormalMode():
diff --git a/oneflow/python/test/ops/test_ones.py b/oneflow/compatible_single_client_python/test/ops/test_ones.py
similarity index 96%
rename from oneflow/python/test/ops/test_ones.py
rename to oneflow/compatible_single_client_python/test/ops/test_ones.py
index bf5a4fdbc6ef230aa136230e41913437ca2fdd5b..a7b68a676029ea63e7b13d32ef3a4243d3fff644 100644
--- a/oneflow/python/test/ops/test_ones.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_ones.py
@@ -13,9 +13,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 """
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import numpy as np
-import oneflow.typing as tp
+from oneflow.compatible.single_client import typing as tp
 from test_util import GenArgList
 import unittest
 from collections import OrderedDict
diff --git a/oneflow/python/test/ops/test_optimizer_placement_optimization.py b/oneflow/compatible_single_client_python/test/ops/test_optimizer_placement_optimization.py
similarity index 93%
rename from oneflow/python/test/ops/test_optimizer_placement_optimization.py
rename to oneflow/compatible_single_client_python/test/ops/test_optimizer_placement_optimization.py
index ef231fe2a9518a568d198fae61a76a091846abbd..1ce881c0a2b58030ac142fd8622e1fd369466b5a 100644
--- a/oneflow/python/test/ops/test_optimizer_placement_optimization.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_optimizer_placement_optimization.py
@@ -15,8 +15,8 @@ limitations under the License.
 """
 import unittest
 import numpy as np
-import oneflow as flow
-import oneflow.typing as oft
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as oft
 
 
 def _test(test_case, mode):
diff --git a/oneflow/python/test/ops/test_optimizers.py b/oneflow/compatible_single_client_python/test/ops/test_optimizers.py
similarity index 99%
rename from oneflow/python/test/ops/test_optimizers.py
rename to oneflow/compatible_single_client_python/test/ops/test_optimizers.py
index d60d0cac07b4b57bffac10389526e759678865cc..19defd0314d96e19aab1691e4156e7ceb1b446e7 100644
--- a/oneflow/python/test/ops/test_optimizers.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_optimizers.py
@@ -18,7 +18,7 @@ import os
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import tensorflow as tf
 import test_global_storage
 from test_util import GenArgList
diff --git a/oneflow/python/test/ops/test_pad.py b/oneflow/compatible_single_client_python/test/ops/test_pad.py
similarity index 98%
rename from oneflow/python/test/ops/test_pad.py
rename to oneflow/compatible_single_client_python/test/ops/test_pad.py
index 13d59c83ea3f2f0d6f708491203577356f62d427..df3414d7e90da615c4436fc425cd4b1e6dd76c59 100644
--- a/oneflow/python/test/ops/test_pad.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_pad.py
@@ -18,7 +18,7 @@ import os
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import tensorflow as tf
 from test_util import Args, CompareOpWithTensorFlow, GenArgDict
 
diff --git a/oneflow/python/test/ops/test_parallel.py b/oneflow/compatible_single_client_python/test/ops/test_parallel.py
similarity index 93%
rename from oneflow/python/test/ops/test_parallel.py
rename to oneflow/compatible_single_client_python/test/ops/test_parallel.py
index d886954131e2b22fff9e0c9c91304880b096e864..b13c512f238d389f10a21f73298f922c241662ac 100644
--- a/oneflow/python/test/ops/test_parallel.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_parallel.py
@@ -15,8 +15,8 @@ limitations under the License.
 """
 import unittest
 import numpy as np
-import oneflow as flow
-import oneflow.typing as oft
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as oft
 
 
 def NaiveTest(test_case):
diff --git a/oneflow/python/test/ops/test_parallel_cast.py b/oneflow/compatible_single_client_python/test/ops/test_parallel_cast.py
similarity index 98%
rename from oneflow/python/test/ops/test_parallel_cast.py
rename to oneflow/compatible_single_client_python/test/ops/test_parallel_cast.py
index 1899f3ca60d98dcafde3142b216b769eec8432fc..70a961ad09cd99e3fe76ed84b84198dad6c147f5 100644
--- a/oneflow/python/test/ops/test_parallel_cast.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_parallel_cast.py
@@ -16,7 +16,7 @@ limitations under the License.
 import os
 import unittest
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 
 
 def _test(test_case, device_num):
diff --git a/oneflow/python/test/ops/test_partial_fc.py b/oneflow/compatible_single_client_python/test/ops/test_partial_fc.py
similarity index 98%
rename from oneflow/python/test/ops/test_partial_fc.py
rename to oneflow/compatible_single_client_python/test/ops/test_partial_fc.py
index 03dd99559a4d19be48f06e1922b5c0c27687470b..3fc560bb2fd6f0a9efe0b13354f2819782aedbcc 100644
--- a/oneflow/python/test/ops/test_partial_fc.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_partial_fc.py
@@ -16,8 +16,8 @@ limitations under the License.
 import unittest
 import os
 import numpy as np
-import oneflow as flow
-import oneflow.typing as oft
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as oft
 from collections import OrderedDict
 
 from test_util import GenArgList
diff --git a/oneflow/python/test/ops/test_pixel_shuffle.py b/oneflow/compatible_single_client_python/test/ops/test_pixel_shuffle.py
similarity index 98%
rename from oneflow/python/test/ops/test_pixel_shuffle.py
rename to oneflow/compatible_single_client_python/test/ops/test_pixel_shuffle.py
index c603633d91b5156a8fba40e5112d7d45d3f4025e..644d7b59481b38d6fc122b119f08350365a46c36 100644
--- a/oneflow/python/test/ops/test_pixel_shuffle.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_pixel_shuffle.py
@@ -13,9 +13,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 """
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import numpy as np
-import oneflow.typing as tp
+from oneflow.compatible.single_client import typing as tp
 from test_util import GenArgList
 import unittest
 from collections import OrderedDict
diff --git a/oneflow/python/test/ops/test_pixel_shufflev2.py b/oneflow/compatible_single_client_python/test/ops/test_pixel_shufflev2.py
similarity index 97%
rename from oneflow/python/test/ops/test_pixel_shufflev2.py
rename to oneflow/compatible_single_client_python/test/ops/test_pixel_shufflev2.py
index af8ac1b8d5048995220c6678b707e328e0d39ede..868569165c71c7a3b2ca26c170968fa92e7efb34 100644
--- a/oneflow/python/test/ops/test_pixel_shufflev2.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_pixel_shufflev2.py
@@ -13,9 +13,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 """
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import numpy as np
-import oneflow.typing as tp
+from oneflow.compatible.single_client import typing as tp
 from test_util import GenArgList
 import unittest
 from collections import OrderedDict
diff --git a/oneflow/python/test/ops/test_polyval.py b/oneflow/compatible_single_client_python/test/ops/test_polyval.py
similarity index 97%
rename from oneflow/python/test/ops/test_polyval.py
rename to oneflow/compatible_single_client_python/test/ops/test_polyval.py
index 4e39d7f632214af54b1514aed450f4a1aff9cce3..bff6313bc16cedd9e3fc515616f0e774831af748 100644
--- a/oneflow/python/test/ops/test_polyval.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_polyval.py
@@ -15,8 +15,8 @@ limitations under the License.
 """
 import unittest
 import numpy as np
-import oneflow as flow
-import oneflow.typing as tp
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as tp
 from collections import OrderedDict
 
 from test_util import GenArgList, type_name_to_flow_type, type_name_to_np_type
diff --git a/oneflow/python/test/ops/test_pool.py b/oneflow/compatible_single_client_python/test/ops/test_pool.py
similarity index 98%
rename from oneflow/python/test/ops/test_pool.py
rename to oneflow/compatible_single_client_python/test/ops/test_pool.py
index 20b9a1683bff2829f5eaf23ae8903cbb38f7aa36..87aa5440e01502f1b03824b22527a6579d31e5e6 100644
--- a/oneflow/python/test/ops/test_pool.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_pool.py
@@ -19,10 +19,10 @@ import os
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import tensorflow as tf
 from test_util import GenArgList, type_name_to_flow_type, type_name_to_np_type
-import oneflow.typing as oft
+from oneflow.compatible.single_client import typing as oft
 
 gpus = tf.config.experimental.list_physical_devices("GPU")
 for gpu in gpus:
diff --git a/oneflow/python/test/ops/test_pool_padding.py b/oneflow/compatible_single_client_python/test/ops/test_pool_padding.py
similarity index 98%
rename from oneflow/python/test/ops/test_pool_padding.py
rename to oneflow/compatible_single_client_python/test/ops/test_pool_padding.py
index e70290979531026f2c36f7a0f9a801a52f8290fe..a335aa3ae3e9b48b1fd1a4159ab83ebda44feebf 100644
--- a/oneflow/python/test/ops/test_pool_padding.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_pool_padding.py
@@ -19,10 +19,10 @@ import os
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import tensorflow as tf
 from test_util import GenArgList, type_name_to_flow_type, type_name_to_np_type
-import oneflow.typing as oft
+from oneflow.compatible.single_client import typing as oft
 
 gpus = tf.config.experimental.list_physical_devices("GPU")
 for gpu in gpus:
diff --git a/oneflow/python/test/ops/test_prelu.py b/oneflow/compatible_single_client_python/test/ops/test_prelu.py
similarity index 97%
rename from oneflow/python/test/ops/test_prelu.py
rename to oneflow/compatible_single_client_python/test/ops/test_prelu.py
index 81beec393129cd74993aee05a514ee451ac646ef..5aff042386d2782ace152df328927feb157f1a52 100644
--- a/oneflow/python/test/ops/test_prelu.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_prelu.py
@@ -18,10 +18,10 @@ import os
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import test_global_storage
 from test_util import GenArgList, type_name_to_flow_type, type_name_to_np_type
-import oneflow.typing as oft
+from oneflow.compatible.single_client import typing as oft
 
 
 def _check(test_case, x, y, shared_axes):
diff --git a/oneflow/python/test/ops/test_quantization_aware_training.py b/oneflow/compatible_single_client_python/test/ops/test_quantization_aware_training.py
similarity index 96%
rename from oneflow/python/test/ops/test_quantization_aware_training.py
rename to oneflow/compatible_single_client_python/test/ops/test_quantization_aware_training.py
index ccd2b841f9e00919c9c14e58fc4bb97c48d41e13..373fbb1084962dabf6f3396c1ae478029a325001 100644
--- a/oneflow/python/test/ops/test_quantization_aware_training.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_quantization_aware_training.py
@@ -18,8 +18,8 @@ from collections import OrderedDict
 
 import numpy as np
 
-import oneflow as flow
-import oneflow.typing as tp
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as tp
 from test_util import GenArgList
 
 
diff --git a/oneflow/python/test/ops/test_quantize_op.py b/oneflow/compatible_single_client_python/test/ops/test_quantize_op.py
similarity index 99%
rename from oneflow/python/test/ops/test_quantize_op.py
rename to oneflow/compatible_single_client_python/test/ops/test_quantize_op.py
index 0316d7444058e9975f926aa242af59e10a99b629..6dc1022f493048bd28f47f1eacf930ef65c9104d 100644
--- a/oneflow/python/test/ops/test_quantize_op.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_quantize_op.py
@@ -18,8 +18,8 @@ import math
 import numpy as np
 import unittest
 
-import oneflow as flow
-import oneflow.typing as oft
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as oft
 import test_global_storage
 from test_util import GenArgList, type_name_to_flow_type, type_name_to_np_type
 
diff --git a/oneflow/python/test/ops/test_random_mask_like.py b/oneflow/compatible_single_client_python/test/ops/test_random_mask_like.py
similarity index 94%
rename from oneflow/python/test/ops/test_random_mask_like.py
rename to oneflow/compatible_single_client_python/test/ops/test_random_mask_like.py
index 3fa168477765461fc1764c3984c9bdc19bdf091c..abb1cdd46ef167be0b9b03ca686b1d00551e800e 100644
--- a/oneflow/python/test/ops/test_random_mask_like.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_random_mask_like.py
@@ -20,9 +20,9 @@ import tempfile
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 from test_util import GenArgList, type_name_to_flow_type
-import oneflow.typing as oft
+from oneflow.compatible.single_client import typing as oft
 
 
 def of_run(device_type, x_shape, rate, seed):
diff --git a/oneflow/python/test/ops/test_range.py b/oneflow/compatible_single_client_python/test/ops/test_range.py
similarity index 98%
rename from oneflow/python/test/ops/test_range.py
rename to oneflow/compatible_single_client_python/test/ops/test_range.py
index 87d110f5dca75df3f7f9d25866da700f6f8d0830..6f1e7bc15aa82c85603e56eab88ad09e1f6137fd 100644
--- a/oneflow/python/test/ops/test_range.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_range.py
@@ -13,9 +13,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 """
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import numpy as np
-import oneflow.typing as tp
+from oneflow.compatible.single_client import typing as tp
 from test_util import GenArgList
 import unittest
 from collections import OrderedDict
diff --git a/oneflow/python/test/ops/test_reduce_mean.py b/oneflow/compatible_single_client_python/test/ops/test_reduce_mean.py
similarity index 98%
rename from oneflow/python/test/ops/test_reduce_mean.py
rename to oneflow/compatible_single_client_python/test/ops/test_reduce_mean.py
index 6281ab04461a8c7d1404d31886d42fe357db2a89..210d5d4ca3168f43706ec81a51c42a2f469b87ca 100644
--- a/oneflow/python/test/ops/test_reduce_mean.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_reduce_mean.py
@@ -18,7 +18,7 @@ import os
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import tensorflow as tf
 import test_global_storage
 from test_util import GenArgList
diff --git a/oneflow/python/test/ops/test_reduce_ops.py b/oneflow/compatible_single_client_python/test/ops/test_reduce_ops.py
similarity index 99%
rename from oneflow/python/test/ops/test_reduce_ops.py
rename to oneflow/compatible_single_client_python/test/ops/test_reduce_ops.py
index 9bfe0cea53b1196413ae4bec896bb16c23e8d20f..8e66c069dbb49e8593431e9ff284724af87bc884 100644
--- a/oneflow/python/test/ops/test_reduce_ops.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_reduce_ops.py
@@ -18,11 +18,11 @@ import os
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import oneflow._oneflow_internal
 import tensorflow as tf
 from test_util import GenArgList
-import oneflow.typing as oft
+from oneflow.compatible.single_client import typing as oft
 import test_global_storage
 
 gpus = tf.config.experimental.list_physical_devices("GPU")
diff --git a/oneflow/python/test/ops/test_reduce_opsV2.py b/oneflow/compatible_single_client_python/test/ops/test_reduce_opsV2.py
similarity index 97%
rename from oneflow/python/test/ops/test_reduce_opsV2.py
rename to oneflow/compatible_single_client_python/test/ops/test_reduce_opsV2.py
index 3238e297a78be7cd535c85a0d52dcf4f7f7d6353..afa3c3ce8bfbd1f8b90362896ad94535b96d9f32 100644
--- a/oneflow/python/test/ops/test_reduce_opsV2.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_reduce_opsV2.py
@@ -18,12 +18,12 @@ import os
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import oneflow._oneflow_internal
 import tensorflow as tf
 import test_global_storage
 from test_util import GenArgList
-import oneflow.typing as oft
+from oneflow.compatible.single_client import typing as oft
 
 gpus = tf.config.experimental.list_physical_devices("GPU")
 for gpu in gpus:
diff --git a/oneflow/python/test/ops/test_reduce_sum.py b/oneflow/compatible_single_client_python/test/ops/test_reduce_sum.py
similarity index 97%
rename from oneflow/python/test/ops/test_reduce_sum.py
rename to oneflow/compatible_single_client_python/test/ops/test_reduce_sum.py
index 4c102f25f89334e32905ffad067ee2cf4d3ddc26..3d30bd2f1288765e99c675e49f41380e2193c3aa 100644
--- a/oneflow/python/test/ops/test_reduce_sum.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_reduce_sum.py
@@ -18,11 +18,11 @@ import os
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import oneflow._oneflow_internal
 import tensorflow as tf
 from test_util import GenArgList
-import oneflow.typing as oft
+from oneflow.compatible.single_client import typing as oft
 
 gpus = tf.config.experimental.list_physical_devices("GPU")
 for gpu in gpus:
diff --git a/oneflow/python/test/ops/test_reduce_sum_like.py b/oneflow/compatible_single_client_python/test/ops/test_reduce_sum_like.py
similarity index 97%
rename from oneflow/python/test/ops/test_reduce_sum_like.py
rename to oneflow/compatible_single_client_python/test/ops/test_reduce_sum_like.py
index eb76cd747e075ed4cf6f2babf46c6cc0519007a7..58865b275b8d92a379215535cbc107fadb1d6aea 100644
--- a/oneflow/python/test/ops/test_reduce_sum_like.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_reduce_sum_like.py
@@ -18,11 +18,11 @@ import os
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import oneflow._oneflow_internal
 import tensorflow as tf
 from test_util import GenArgList
-import oneflow.typing as oft
+from oneflow.compatible.single_client import typing as oft
 
 gpus = tf.config.experimental.list_physical_devices("GPU")
 for gpu in gpus:
diff --git a/oneflow/python/test/ops/test_reflection_pad2d.py b/oneflow/compatible_single_client_python/test/ops/test_reflection_pad2d.py
similarity index 98%
rename from oneflow/python/test/ops/test_reflection_pad2d.py
rename to oneflow/compatible_single_client_python/test/ops/test_reflection_pad2d.py
index 2bb68ead6124be569c69ea1ba03decb9a74d4c4c..6d724b77d10f26df3ef7b91ec49428b858b8d890 100644
--- a/oneflow/python/test/ops/test_reflection_pad2d.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_reflection_pad2d.py
@@ -18,8 +18,8 @@ import os
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
-import oneflow.typing as tp
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as tp
 from test_util import (
     Args,
     GenArgDict,
diff --git a/oneflow/python/test/ops/test_relu6.py b/oneflow/compatible_single_client_python/test/ops/test_relu6.py
similarity index 98%
rename from oneflow/python/test/ops/test_relu6.py
rename to oneflow/compatible_single_client_python/test/ops/test_relu6.py
index 3e6f65345ed34304ccc4fe8447038a4334a31adb..0b25285f7caa6e8aa5856fabf96f303a24598c21 100644
--- a/oneflow/python/test/ops/test_relu6.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_relu6.py
@@ -13,9 +13,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 """
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import numpy as np
-import oneflow.typing as tp
+from oneflow.compatible.single_client import typing as tp
 from test_util import GenArgList
 import unittest
 from collections import OrderedDict
diff --git a/oneflow/python/test/ops/test_repeat_acc.py b/oneflow/compatible_single_client_python/test/ops/test_repeat_acc.py
similarity index 95%
rename from oneflow/python/test/ops/test_repeat_acc.py
rename to oneflow/compatible_single_client_python/test/ops/test_repeat_acc.py
index de2420797b22fb533b730d4d5266ae54b396c040..2f64aaf29d1b95d19f1524369db1bbcfc6ccf9e3 100644
--- a/oneflow/python/test/ops/test_repeat_acc.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_repeat_acc.py
@@ -15,8 +15,8 @@ limitations under the License.
 """
 import unittest
 import numpy as np
-import oneflow as flow
-import oneflow.typing as oft
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as oft
 
 from collections import OrderedDict
 from test_util import Args, GenArgDict
diff --git a/oneflow/python/test/ops/test_replication_pad2d.py b/oneflow/compatible_single_client_python/test/ops/test_replication_pad2d.py
similarity index 98%
rename from oneflow/python/test/ops/test_replication_pad2d.py
rename to oneflow/compatible_single_client_python/test/ops/test_replication_pad2d.py
index 8726b23379e5f5888aa7fdd760bcfb0ec207f99c..5093b3c6c8f37cb4fc8d52cd5f086e59786e43b3 100644
--- a/oneflow/python/test/ops/test_replication_pad2d.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_replication_pad2d.py
@@ -18,8 +18,8 @@ import os
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
-import oneflow.typing as tp
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as tp
 from test_util import (
     Args,
     GenArgDict,
diff --git a/oneflow/python/test/ops/test_reshape.py b/oneflow/compatible_single_client_python/test/ops/test_reshape.py
similarity index 96%
rename from oneflow/python/test/ops/test_reshape.py
rename to oneflow/compatible_single_client_python/test/ops/test_reshape.py
index 3a0775b215bd5754ce20852c4d5ffa086092bd01..24d21c6bf40c0bf4370e71a2cb52f2b0a8001485 100644
--- a/oneflow/python/test/ops/test_reshape.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_reshape.py
@@ -18,7 +18,7 @@ import os
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import tensorflow as tf
 from test_util import Args, CompareOpWithTensorFlow, GenArgDict
 
diff --git a/oneflow/python/test/ops/test_reshapeV2.py b/oneflow/compatible_single_client_python/test/ops/test_reshapeV2.py
similarity index 98%
rename from oneflow/python/test/ops/test_reshapeV2.py
rename to oneflow/compatible_single_client_python/test/ops/test_reshapeV2.py
index 106cdde421036a3ed602fc13472a4b5442c55df3..831efa4791fd95c4e2702139dad9918c5f4e1d1e 100644
--- a/oneflow/python/test/ops/test_reshapeV2.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_reshapeV2.py
@@ -18,7 +18,7 @@ import os
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import tensorflow as tf
 import test_global_storage
 from test_util import GenArgList
diff --git a/oneflow/python/test/ops/test_reshapeV3.py b/oneflow/compatible_single_client_python/test/ops/test_reshapeV3.py
similarity index 97%
rename from oneflow/python/test/ops/test_reshapeV3.py
rename to oneflow/compatible_single_client_python/test/ops/test_reshapeV3.py
index 140b2b34b91ae760b5110a9fe13c5bed14ff63de..b79a0f9229e27988fe4d827ae16f081a2b8bed4c 100644
--- a/oneflow/python/test/ops/test_reshapeV3.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_reshapeV3.py
@@ -17,7 +17,7 @@ import unittest
 import os
 from collections import OrderedDict
 
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 from test_util import GenArgList
 
 
diff --git a/oneflow/python/test/ops/test_reverse.py b/oneflow/compatible_single_client_python/test/ops/test_reverse.py
similarity index 95%
rename from oneflow/python/test/ops/test_reverse.py
rename to oneflow/compatible_single_client_python/test/ops/test_reverse.py
index 51f6021a6576a39a41ccb66564f0490560e41ee6..5d37752cfa42405f346cce3bac79bb2b789c9915 100644
--- a/oneflow/python/test/ops/test_reverse.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_reverse.py
@@ -15,8 +15,8 @@ limitations under the License.
 """
 import unittest
 import numpy as np
-import oneflow as flow
-import oneflow.typing as tp
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as tp
 
 
 def _of_reverse(input, axis, dtype):
diff --git a/oneflow/python/test/ops/test_rsqrt.py b/oneflow/compatible_single_client_python/test/ops/test_rsqrt.py
similarity index 93%
rename from oneflow/python/test/ops/test_rsqrt.py
rename to oneflow/compatible_single_client_python/test/ops/test_rsqrt.py
index 6dac51016932dfc2361f9b62c3a5a9de46a0f884..2a3930277791232634edcc8c432d539f02a8ffbc 100644
--- a/oneflow/python/test/ops/test_rsqrt.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_rsqrt.py
@@ -15,8 +15,8 @@ limitations under the License.
 """
 import unittest
 import numpy as np
-import oneflow as flow
-import oneflow.typing as oft
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as oft
 
 
 def _check(test_case, x, y):
diff --git a/oneflow/python/test/ops/test_scalar_by_tensor_int.py b/oneflow/compatible_single_client_python/test/ops/test_scalar_by_tensor_int.py
similarity index 99%
rename from oneflow/python/test/ops/test_scalar_by_tensor_int.py
rename to oneflow/compatible_single_client_python/test/ops/test_scalar_by_tensor_int.py
index dde7039366a030ddf1deb52be65937a4d0bbcdee..bdfb9ca15e8f900f65c03c7a8ebeeddc675a3180 100644
--- a/oneflow/python/test/ops/test_scalar_by_tensor_int.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_scalar_by_tensor_int.py
@@ -15,8 +15,8 @@ limitations under the License.
 """
 import unittest
 import numpy as np
-import oneflow as flow
-import oneflow.typing as oft
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as oft
 
 
 def _check(test_case, x, y, out, case):
diff --git a/oneflow/python/test/ops/test_scalar_by_tensor_ops.py b/oneflow/compatible_single_client_python/test/ops/test_scalar_by_tensor_ops.py
similarity index 98%
rename from oneflow/python/test/ops/test_scalar_by_tensor_ops.py
rename to oneflow/compatible_single_client_python/test/ops/test_scalar_by_tensor_ops.py
index f0e97cc358451e3de1e97ad3b4f93329bc8a3aa4..d54993f4bb49cac68569c0fcf6e34aea352fa8cb 100644
--- a/oneflow/python/test/ops/test_scalar_by_tensor_ops.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_scalar_by_tensor_ops.py
@@ -18,7 +18,7 @@ import os
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import tensorflow as tf
 import test_global_storage
 from test_util import GenArgList
diff --git a/oneflow/python/test/ops/test_scalar_divide.py b/oneflow/compatible_single_client_python/test/ops/test_scalar_divide.py
similarity index 96%
rename from oneflow/python/test/ops/test_scalar_divide.py
rename to oneflow/compatible_single_client_python/test/ops/test_scalar_divide.py
index 1b0c34e9bd98796594a3ccb234a1c9d4c7336e1e..e665da54c6d3dea2373e4a9272b8989908fa5f03 100644
--- a/oneflow/python/test/ops/test_scalar_divide.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_scalar_divide.py
@@ -15,8 +15,8 @@ limitations under the License.
 """
 import unittest
 import numpy as np
-import oneflow as flow
-import oneflow.typing as oft
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as oft
 
 
 @flow.unittest.skip_unless_1n1d()
diff --git a/oneflow/python/test/ops/test_scalar_pow.py b/oneflow/compatible_single_client_python/test/ops/test_scalar_pow.py
similarity index 97%
rename from oneflow/python/test/ops/test_scalar_pow.py
rename to oneflow/compatible_single_client_python/test/ops/test_scalar_pow.py
index bff96c29d9d2c64114e5c780070a403d3f49dfd5..6760ad9561ed22a3d3275ead748dfc6a4bd1028c 100644
--- a/oneflow/python/test/ops/test_scalar_pow.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_scalar_pow.py
@@ -13,9 +13,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 """
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import numpy as np
-import oneflow.typing as tp
+from oneflow.compatible.single_client import typing as tp
 from test_util import GenArgList
 import unittest
 from collections import OrderedDict
diff --git a/oneflow/python/test/ops/test_scatter_nd.py b/oneflow/compatible_single_client_python/test/ops/test_scatter_nd.py
similarity index 99%
rename from oneflow/python/test/ops/test_scatter_nd.py
rename to oneflow/compatible_single_client_python/test/ops/test_scatter_nd.py
index 0544377d74de6cad8e9a1c9ebf757b307970ca75..7a9f13104006de1eb70431de24c7eb176e628483 100644
--- a/oneflow/python/test/ops/test_scatter_nd.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_scatter_nd.py
@@ -16,10 +16,10 @@ limitations under the License.
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import tensorflow as tf
 from test_util import GenArgList
-import oneflow.typing as oft
+from oneflow.compatible.single_client import typing as oft
 import unittest
 import os
 
diff --git a/oneflow/python/test/ops/test_shape.py b/oneflow/compatible_single_client_python/test/ops/test_shape.py
similarity index 92%
rename from oneflow/python/test/ops/test_shape.py
rename to oneflow/compatible_single_client_python/test/ops/test_shape.py
index 214df6c3739de518bce408c6b645d882aa7b607b..1cf909843511863101e8725f2c5a59731dc60f88 100644
--- a/oneflow/python/test/ops/test_shape.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_shape.py
@@ -14,12 +14,12 @@ See the License for the specific language governing permissions and
 limitations under the License.
 """
 import unittest
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import numpy as np
 import os
 import random
 
-import oneflow.typing as oft
+from oneflow.compatible.single_client import typing as oft
 
 
 @flow.unittest.skip_unless_1n2d()
diff --git a/oneflow/python/test/ops/test_shuffle.py b/oneflow/compatible_single_client_python/test/ops/test_shuffle.py
similarity index 96%
rename from oneflow/python/test/ops/test_shuffle.py
rename to oneflow/compatible_single_client_python/test/ops/test_shuffle.py
index c4a31bca5b9a9a00cd0eaae167c065aa35ba0446..911f0ae3fcb5f8251ab6a4c2f538f8c1c64a2b8e 100644
--- a/oneflow/python/test/ops/test_shuffle.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_shuffle.py
@@ -18,10 +18,10 @@ import uuid
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 from test_util import GenArgList, type_name_to_flow_type, type_name_to_np_type
 
-import oneflow.typing as oft
+from oneflow.compatible.single_client import typing as oft
 
 
 @flow.unittest.skip_unless_1n1d()
diff --git a/oneflow/python/test/ops/test_sigmoid_cross_entropy.py b/oneflow/compatible_single_client_python/test/ops/test_sigmoid_cross_entropy.py
similarity index 96%
rename from oneflow/python/test/ops/test_sigmoid_cross_entropy.py
rename to oneflow/compatible_single_client_python/test/ops/test_sigmoid_cross_entropy.py
index bcc04e16560ac57292f88fc5ca487213af4b02d8..cee48fa168cec9e2a82320e91ebb84b5837fa05f 100644
--- a/oneflow/python/test/ops/test_sigmoid_cross_entropy.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_sigmoid_cross_entropy.py
@@ -17,8 +17,8 @@ import unittest
 import os
 import numpy as np
 import tensorflow as tf
-import oneflow as flow
-import oneflow.typing as oft
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as oft
 from collections import OrderedDict
 
 from test_util import GenArgList
diff --git a/oneflow/python/test/ops/test_size.py b/oneflow/compatible_single_client_python/test/ops/test_size.py
similarity index 97%
rename from oneflow/python/test/ops/test_size.py
rename to oneflow/compatible_single_client_python/test/ops/test_size.py
index 8235f20379c462e998a63fe771a64c26342fc2eb..4eaac6280cf627c45067cbbf9c733f28695209ad 100644
--- a/oneflow/python/test/ops/test_size.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_size.py
@@ -14,11 +14,11 @@ See the License for the specific language governing permissions and
 limitations under the License.
 """
 import unittest
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import numpy as np
 import os
 import random
-import oneflow.typing as oft
+from oneflow.compatible.single_client import typing as oft
 from collections import OrderedDict
 from test_util import (
     GenArgList,
diff --git a/oneflow/python/test/ops/test_slice_v2.py b/oneflow/compatible_single_client_python/test/ops/test_slice_v2.py
similarity index 99%
rename from oneflow/python/test/ops/test_slice_v2.py
rename to oneflow/compatible_single_client_python/test/ops/test_slice_v2.py
index 72e4a14ae1934ee7f23b715f8630aee10b89e2cf..9d93f52a43cf4989103e628c8596192082e9ee14 100644
--- a/oneflow/python/test/ops/test_slice_v2.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_slice_v2.py
@@ -14,8 +14,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 """
 import numpy as np
-import oneflow as flow
-import oneflow.typing as otp
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as otp
 import test_util
 import typing as tp
 import collections
diff --git a/oneflow/python/test/ops/test_smooth_l1_loss.py b/oneflow/compatible_single_client_python/test/ops/test_smooth_l1_loss.py
similarity index 97%
rename from oneflow/python/test/ops/test_smooth_l1_loss.py
rename to oneflow/compatible_single_client_python/test/ops/test_smooth_l1_loss.py
index aa2013208ce856dff72ffae31ebd33e126acd599..6aba55cfeb5a773eae43f14ddb5b1a5c0659051d 100644
--- a/oneflow/python/test/ops/test_smooth_l1_loss.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_smooth_l1_loss.py
@@ -19,8 +19,8 @@ from collections import OrderedDict
 
 import os
 import numpy as np
-import oneflow as flow
-import oneflow.typing as oft
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as oft
 from test_util import GenArgList, type_name_to_flow_type, type_name_to_np_type
 
 
diff --git a/oneflow/python/test/ops/test_softmax.py b/oneflow/compatible_single_client_python/test/ops/test_softmax.py
similarity index 98%
rename from oneflow/python/test/ops/test_softmax.py
rename to oneflow/compatible_single_client_python/test/ops/test_softmax.py
index 798327dcf54fa4513eb95892f752fc04d7e1eb28..9c04dbf47d3a19634b9cc1226110f211f6e8a371 100644
--- a/oneflow/python/test/ops/test_softmax.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_softmax.py
@@ -18,7 +18,7 @@ import os
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import tensorflow as tf
 import test_global_storage
 from test_util import GenArgList, type_name_to_flow_type
diff --git a/oneflow/python/test/ops/test_softmax_cross_entropy.py b/oneflow/compatible_single_client_python/test/ops/test_softmax_cross_entropy.py
similarity index 97%
rename from oneflow/python/test/ops/test_softmax_cross_entropy.py
rename to oneflow/compatible_single_client_python/test/ops/test_softmax_cross_entropy.py
index 2c7d27923bb29fc28e6d5993d9b5ff3f5c29c403..02d7237ff8044378ac06bda6cc0d2c0dba89e0b6 100644
--- a/oneflow/python/test/ops/test_softmax_cross_entropy.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_softmax_cross_entropy.py
@@ -17,8 +17,8 @@ import unittest
 import os
 import numpy as np
 import tensorflow as tf
-import oneflow as flow
-import oneflow.typing as oft
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as oft
 from collections import OrderedDict
 
 from test_util import GenArgList
diff --git a/oneflow/python/test/ops/test_sort.py b/oneflow/compatible_single_client_python/test/ops/test_sort.py
similarity index 96%
rename from oneflow/python/test/ops/test_sort.py
rename to oneflow/compatible_single_client_python/test/ops/test_sort.py
index 0386febb72811d5df72ed5ef516e6579b5f4e165..c906404491dcae29e8f30a21c35e2c0fa0cf8756 100644
--- a/oneflow/python/test/ops/test_sort.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_sort.py
@@ -18,10 +18,10 @@ from collections import OrderedDict
 
 import os
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import tensorflow as tf
 from test_util import GenArgList, type_name_to_flow_type, type_name_to_np_type
-import oneflow.typing as oft
+from oneflow.compatible.single_client import typing as oft
 
 gpus = tf.config.experimental.list_physical_devices("GPU")
 for gpu in gpus:
diff --git a/oneflow/python/test/ops/test_sparse_cross_entropy.py b/oneflow/compatible_single_client_python/test/ops/test_sparse_cross_entropy.py
similarity index 97%
rename from oneflow/python/test/ops/test_sparse_cross_entropy.py
rename to oneflow/compatible_single_client_python/test/ops/test_sparse_cross_entropy.py
index 83650fa6eaf9b158a20c94e748a25ed63bc7d72a..0b4903bb63fba90ddff14dc8eab028542a039591 100644
--- a/oneflow/python/test/ops/test_sparse_cross_entropy.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_sparse_cross_entropy.py
@@ -18,8 +18,8 @@ import os
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
-import oneflow.typing as oft
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as oft
 import tensorflow as tf
 import test_global_storage
 from test_util import GenArgList, type_name_to_flow_type, type_name_to_np_type
diff --git a/oneflow/python/test/ops/test_sparse_cross_entropy_ms.py b/oneflow/compatible_single_client_python/test/ops/test_sparse_cross_entropy_ms.py
similarity index 97%
rename from oneflow/python/test/ops/test_sparse_cross_entropy_ms.py
rename to oneflow/compatible_single_client_python/test/ops/test_sparse_cross_entropy_ms.py
index 6578eeee1fd1214087a6433f420ff75cc6fe0497..bdcdf07e86ea51c53e5c2aca0f04ba96f07c07b8 100644
--- a/oneflow/python/test/ops/test_sparse_cross_entropy_ms.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_sparse_cross_entropy_ms.py
@@ -17,8 +17,8 @@ import unittest
 import os
 import numpy as np
 import tensorflow as tf
-import oneflow as flow
-import oneflow.typing as oft
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as oft
 from collections import OrderedDict
 
 from test_util import GenArgList
diff --git a/oneflow/python/test/ops/test_sparse_softmax_cross_entropy.py b/oneflow/compatible_single_client_python/test/ops/test_sparse_softmax_cross_entropy.py
similarity index 97%
rename from oneflow/python/test/ops/test_sparse_softmax_cross_entropy.py
rename to oneflow/compatible_single_client_python/test/ops/test_sparse_softmax_cross_entropy.py
index 86b33c93550f6f1409a4d5c51e83fdb13cf028aa..314511f14190390904a34360e1451fff09185702 100644
--- a/oneflow/python/test/ops/test_sparse_softmax_cross_entropy.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_sparse_softmax_cross_entropy.py
@@ -18,8 +18,8 @@ import os
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
-import oneflow.typing as oft
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as oft
 import tensorflow as tf
 import test_global_storage
 from test_util import GenArgList, type_name_to_flow_type, type_name_to_np_type
diff --git a/oneflow/python/test/ops/test_sparse_softmax_cross_entropy_ms.py b/oneflow/compatible_single_client_python/test/ops/test_sparse_softmax_cross_entropy_ms.py
similarity index 97%
rename from oneflow/python/test/ops/test_sparse_softmax_cross_entropy_ms.py
rename to oneflow/compatible_single_client_python/test/ops/test_sparse_softmax_cross_entropy_ms.py
index b7f28c7aa16b12ceaf7951205b0a5f2a66f0bd4c..95af611a1441bf53d897266a30861c435f1a0b9d 100644
--- a/oneflow/python/test/ops/test_sparse_softmax_cross_entropy_ms.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_sparse_softmax_cross_entropy_ms.py
@@ -17,8 +17,8 @@ import unittest
 import os
 import numpy as np
 import tensorflow as tf
-import oneflow as flow
-import oneflow.typing as oft
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as oft
 from collections import OrderedDict
 
 from test_util import GenArgList
diff --git a/oneflow/python/test/ops/test_split_like.py b/oneflow/compatible_single_client_python/test/ops/test_split_like.py
similarity index 96%
rename from oneflow/python/test/ops/test_split_like.py
rename to oneflow/compatible_single_client_python/test/ops/test_split_like.py
index b94eee5acc7669a3b8af0c5ad9237d2618c97e21..e5518894f553a8b4e4603377a343153e231dbeac 100644
--- a/oneflow/python/test/ops/test_split_like.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_split_like.py
@@ -14,8 +14,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 """
 import numpy as np
-import oneflow as flow
-import oneflow.typing as oft
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as oft
 import test_global_storage
 import random
 import math
diff --git a/oneflow/python/test/ops/test_sqrt.py b/oneflow/compatible_single_client_python/test/ops/test_sqrt.py
similarity index 96%
rename from oneflow/python/test/ops/test_sqrt.py
rename to oneflow/compatible_single_client_python/test/ops/test_sqrt.py
index 085b7cf4de89cafc525f3329d1226efdb1764b0b..fcb22b5bed7e9dffb2961aa9d4255ac23ce1f615 100644
--- a/oneflow/python/test/ops/test_sqrt.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_sqrt.py
@@ -18,7 +18,7 @@ import os
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import tensorflow as tf
 from test_util import CompareOpWithTensorFlow, GenArgDict
 
diff --git a/oneflow/python/test/ops/test_square.py b/oneflow/compatible_single_client_python/test/ops/test_square.py
similarity index 98%
rename from oneflow/python/test/ops/test_square.py
rename to oneflow/compatible_single_client_python/test/ops/test_square.py
index fe840f301fa451faab3d749d733afdf603976fe6..9d458679faeb713d3bb3995a80479d4d98d96778 100644
--- a/oneflow/python/test/ops/test_square.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_square.py
@@ -18,7 +18,7 @@ import os
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import tensorflow as tf
 import test_global_storage
 from test_util import GenArgList
diff --git a/oneflow/python/test/ops/test_square_sum.py b/oneflow/compatible_single_client_python/test/ops/test_square_sum.py
similarity index 94%
rename from oneflow/python/test/ops/test_square_sum.py
rename to oneflow/compatible_single_client_python/test/ops/test_square_sum.py
index 20ce2cec12ea6fc4663a7f0e539f9cea19868b6d..ca316c641293c4906535dd063fcab83ca8d91834 100644
--- a/oneflow/python/test/ops/test_square_sum.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_square_sum.py
@@ -14,8 +14,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 """
 import numpy as np
-import oneflow as flow
-import oneflow.typing as oft
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as oft
 import unittest
 import os
 
diff --git a/oneflow/python/test/ops/test_squared_difference.py b/oneflow/compatible_single_client_python/test/ops/test_squared_difference.py
similarity index 96%
rename from oneflow/python/test/ops/test_squared_difference.py
rename to oneflow/compatible_single_client_python/test/ops/test_squared_difference.py
index 257f61a954ca9a3606e4be495d24f128bb60c835..254436c04f38d9cba3af2515f764c7a75bbe5702 100644
--- a/oneflow/python/test/ops/test_squared_difference.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_squared_difference.py
@@ -17,8 +17,8 @@ import unittest
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
-import oneflow.typing as oft
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as oft
 import tensorflow as tf
 from test_util import Args, CompareOpWithTensorFlow, GenArgDict
 
diff --git a/oneflow/python/test/ops/test_squeeze.py b/oneflow/compatible_single_client_python/test/ops/test_squeeze.py
similarity index 98%
rename from oneflow/python/test/ops/test_squeeze.py
rename to oneflow/compatible_single_client_python/test/ops/test_squeeze.py
index 3c06b63f18510734ddba9dfcbbd798a41cddfceb..01017a569d681c81f11ef7591b63fd6d09b84044 100644
--- a/oneflow/python/test/ops/test_squeeze.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_squeeze.py
@@ -18,7 +18,7 @@ from collections import OrderedDict
 
 import os
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import tensorflow as tf
 from test_util import GenArgList
 
diff --git a/oneflow/python/test/ops/test_ssp_variable_proxy.py b/oneflow/compatible_single_client_python/test/ops/test_ssp_variable_proxy.py
similarity index 98%
rename from oneflow/python/test/ops/test_ssp_variable_proxy.py
rename to oneflow/compatible_single_client_python/test/ops/test_ssp_variable_proxy.py
index 9722bf9d718e231c5cfdf4df39123355e9c633bb..98445b4840545a95dda22135ffb9ff44fc818410 100644
--- a/oneflow/python/test/ops/test_ssp_variable_proxy.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_ssp_variable_proxy.py
@@ -13,9 +13,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 """
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import numpy as np
-import oneflow.typing as tp
+from oneflow.compatible.single_client import typing as tp
 import os
 import unittest
 
diff --git a/oneflow/python/test/ops/test_stack.py b/oneflow/compatible_single_client_python/test/ops/test_stack.py
similarity index 97%
rename from oneflow/python/test/ops/test_stack.py
rename to oneflow/compatible_single_client_python/test/ops/test_stack.py
index d76a9d63329cfc2e56bcd3d5f8db0cbf5724cdb3..31da3c8c68bdd5ef97fe8f29b0af9f4d208fbc03 100644
--- a/oneflow/python/test/ops/test_stack.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_stack.py
@@ -13,9 +13,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 """
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import numpy as np
-import oneflow.typing as tp
+from oneflow.compatible.single_client import typing as tp
 from test_util import GenArgList
 import unittest
 from collections import OrderedDict
diff --git a/oneflow/python/test/ops/test_stateful_local_kernel.py b/oneflow/compatible_single_client_python/test/ops/test_stateful_local_kernel.py
similarity index 97%
rename from oneflow/python/test/ops/test_stateful_local_kernel.py
rename to oneflow/compatible_single_client_python/test/ops/test_stateful_local_kernel.py
index c0f9448c7eddef07cc4f93f5dbbf0a432d406de9..9a79c181ede3693aa675b09540e65cc56cc499c2 100644
--- a/oneflow/python/test/ops/test_stateful_local_kernel.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_stateful_local_kernel.py
@@ -15,7 +15,7 @@ limitations under the License.
 """
 import unittest
 import os
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 
 
 @unittest.skipIf(
diff --git a/oneflow/python/test/ops/test_summary.py b/oneflow/compatible_single_client_python/test/ops/test_summary.py
similarity index 99%
rename from oneflow/python/test/ops/test_summary.py
rename to oneflow/compatible_single_client_python/test/ops/test_summary.py
index 753ea5b3090e49bb98b36341e6ffbccc60e96f54..97fc21c184e5c3742999799df084cadc11e741fd 100644
--- a/oneflow/python/test/ops/test_summary.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_summary.py
@@ -15,7 +15,7 @@ limitations under the License.
 """
 import os
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 from collections import OrderedDict
 import cv2
 import time
diff --git a/oneflow/python/test/ops/test_swish.py b/oneflow/compatible_single_client_python/test/ops/test_swish.py
similarity index 97%
rename from oneflow/python/test/ops/test_swish.py
rename to oneflow/compatible_single_client_python/test/ops/test_swish.py
index 0d2bcccbc91c68fa7864090b0eabeab986cc36c8..3803ffaaf93114d1465f69ad38e5691d33b9b4ab 100644
--- a/oneflow/python/test/ops/test_swish.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_swish.py
@@ -13,9 +13,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 """
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import numpy as np
-import oneflow.typing as tp
+from oneflow.compatible.single_client import typing as tp
 from test_util import GenArgList
 import unittest
 from collections import OrderedDict
diff --git a/oneflow/python/test/ops/test_sync_dynamic_resize.py b/oneflow/compatible_single_client_python/test/ops/test_sync_dynamic_resize.py
similarity index 95%
rename from oneflow/python/test/ops/test_sync_dynamic_resize.py
rename to oneflow/compatible_single_client_python/test/ops/test_sync_dynamic_resize.py
index ced4bc56cd186f7237d772b8db20e40208f9221d..77afa95c400959f230602c347adb0784e0ab2eb2 100644
--- a/oneflow/python/test/ops/test_sync_dynamic_resize.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_sync_dynamic_resize.py
@@ -18,8 +18,8 @@ from collections import OrderedDict
 
 import os
 import numpy as np
-import oneflow as flow
-import oneflow.typing as oft
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as oft
 from test_util import GenArgList, type_name_to_flow_type, type_name_to_np_type
 
 
diff --git a/oneflow/python/test/ops/test_tensor_buffer_ops.py b/oneflow/compatible_single_client_python/test/ops/test_tensor_buffer_ops.py
similarity index 93%
rename from oneflow/python/test/ops/test_tensor_buffer_ops.py
rename to oneflow/compatible_single_client_python/test/ops/test_tensor_buffer_ops.py
index 3405581f2217f4ef21fe30712f11cf9434ca8667..f4021026f155ee7265eff6f107ffade2f8f5999d 100644
--- a/oneflow/python/test/ops/test_tensor_buffer_ops.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_tensor_buffer_ops.py
@@ -15,8 +15,8 @@ limitations under the License.
 """
 import unittest
 import numpy as np
-import oneflow as flow
-import oneflow.typing as oft
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as oft
 
 
 def _test_tensor_buffer_convert(test_case):
diff --git a/oneflow/python/test/ops/test_tensor_buffer_to_list_of_tensors.py b/oneflow/compatible_single_client_python/test/ops/test_tensor_buffer_to_list_of_tensors.py
similarity index 97%
rename from oneflow/python/test/ops/test_tensor_buffer_to_list_of_tensors.py
rename to oneflow/compatible_single_client_python/test/ops/test_tensor_buffer_to_list_of_tensors.py
index 7d7d6ebfd44c54007205655844abff3d6c9a767f..a87fd52fdde52e48966a69c6a23daa8b9703e947 100644
--- a/oneflow/python/test/ops/test_tensor_buffer_to_list_of_tensors.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_tensor_buffer_to_list_of_tensors.py
@@ -17,7 +17,7 @@ import unittest
 import numpy as np
 from collections import OrderedDict
 
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 from test_util import GenArgList, type_name_to_flow_type
 
 
diff --git a/oneflow/python/test/ops/test_top_k.py b/oneflow/compatible_single_client_python/test/ops/test_top_k.py
similarity index 91%
rename from oneflow/python/test/ops/test_top_k.py
rename to oneflow/compatible_single_client_python/test/ops/test_top_k.py
index 0051b031b34e297a57e4db2f05ea45614d001669..1ab8b55c9aaca27968961e93c9b93f155ceb3cf6 100644
--- a/oneflow/python/test/ops/test_top_k.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_top_k.py
@@ -17,13 +17,15 @@ import unittest
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import tensorflow as tf
 from test_util import GenArgList, type_name_to_flow_type, type_name_to_np_type
-import oneflow.typing as oft
+from oneflow.compatible.single_client import typing as oft
 import os
-from oneflow.python.ops.transpose_util import get_perm_when_transpose_axis_to_last_dim
-from oneflow.python.ops.transpose_util import get_inversed_perm
+from oneflow.compatible.single_client.python.ops.transpose_util import (
+    get_perm_when_transpose_axis_to_last_dim,
+)
+from oneflow.compatible.single_client.python.ops.transpose_util import get_inversed_perm
 
 gpus = tf.config.experimental.list_physical_devices("GPU")
 for gpu in gpus:
diff --git a/oneflow/python/test/ops/test_transpose.py b/oneflow/compatible_single_client_python/test/ops/test_transpose.py
similarity index 97%
rename from oneflow/python/test/ops/test_transpose.py
rename to oneflow/compatible_single_client_python/test/ops/test_transpose.py
index 8b221f90891f99b9f29d24a1f0f3ed15b03907be..0403db47d07865b5708850b0f798197e5c088dee 100644
--- a/oneflow/python/test/ops/test_transpose.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_transpose.py
@@ -18,8 +18,8 @@ import os
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
-import oneflow.typing as tp
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as tp
 import tensorflow as tf
 import test_global_storage
 from test_util import GenArgList
diff --git a/oneflow/python/test/ops/test_tril.py b/oneflow/compatible_single_client_python/test/ops/test_tril.py
similarity index 97%
rename from oneflow/python/test/ops/test_tril.py
rename to oneflow/compatible_single_client_python/test/ops/test_tril.py
index 6c96c1d32f0e003d8d48b53595d59b607e4a9341..ee52cc14c56922d548651e1e1cfe8fdd6ca06f6d 100644
--- a/oneflow/python/test/ops/test_tril.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_tril.py
@@ -18,14 +18,14 @@ from collections import OrderedDict
 
 import os
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 from test_util import (
     GenArgDict,
     test_global_storage,
     type_name_to_flow_type,
     type_name_to_np_type,
 )
-import oneflow.typing as oft
+from oneflow.compatible.single_client import typing as oft
 
 
 def _test_tril_fw_bw(test_case, device, shape, type_name, diagonal, fill_value):
diff --git a/oneflow/python/test/ops/test_two_node_boxing.py b/oneflow/compatible_single_client_python/test/ops/test_two_node_boxing.py
similarity index 94%
rename from oneflow/python/test/ops/test_two_node_boxing.py
rename to oneflow/compatible_single_client_python/test/ops/test_two_node_boxing.py
index 84a92c28d6dd16d87842764eeb3a4dc26fa0738a..60a692f9583d305f134df8c3b2862bdc8fd2cf94 100644
--- a/oneflow/python/test/ops/test_two_node_boxing.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_two_node_boxing.py
@@ -15,8 +15,8 @@ limitations under the License.
 """
 import unittest
 import numpy as np
-import oneflow as flow
-import oneflow.typing as oft
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as oft
 from typing import Tuple
 import time
 
diff --git a/oneflow/python/test/ops/test_two_stage_reduce.py b/oneflow/compatible_single_client_python/test/ops/test_two_stage_reduce.py
similarity index 97%
rename from oneflow/python/test/ops/test_two_stage_reduce.py
rename to oneflow/compatible_single_client_python/test/ops/test_two_stage_reduce.py
index 766887e590264d85bf52cf02126093877155dd4f..dbd83569a905e5edd25d64089bff418beabddd51 100644
--- a/oneflow/python/test/ops/test_two_stage_reduce.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_two_stage_reduce.py
@@ -15,8 +15,8 @@ limitations under the License.
 """
 import unittest
 import numpy as np
-import oneflow as flow
-import oneflow.typing as oft
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as oft
 import os
 
 from collections import OrderedDict
diff --git a/oneflow/python/test/ops/test_unary_elementwise_ops.py b/oneflow/compatible_single_client_python/test/ops/test_unary_elementwise_ops.py
similarity index 99%
rename from oneflow/python/test/ops/test_unary_elementwise_ops.py
rename to oneflow/compatible_single_client_python/test/ops/test_unary_elementwise_ops.py
index c6d65df234c7f4356a24f7211b7f6dc4742bbeee..084c6650bd5347a9c228d842aa0df353db271a94 100644
--- a/oneflow/python/test/ops/test_unary_elementwise_ops.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_unary_elementwise_ops.py
@@ -15,9 +15,9 @@ limitations under the License.
 """
 import unittest
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 from scipy.special import erf, erfc, gammaln
-import oneflow.typing as oft
+from oneflow.compatible.single_client import typing as oft
 import os
 
 
diff --git a/oneflow/python/test/ops/test_unique.py b/oneflow/compatible_single_client_python/test/ops/test_unique.py
similarity index 96%
rename from oneflow/python/test/ops/test_unique.py
rename to oneflow/compatible_single_client_python/test/ops/test_unique.py
index d514c64b80653c7078ce2c6a63bda9e04a67fa7c..dc0b9e8d375ffd76328c9f73fe2b6ef2d1a2d422 100644
--- a/oneflow/python/test/ops/test_unique.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_unique.py
@@ -14,8 +14,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 """
 import numpy as np
-import oneflow as flow
-import oneflow.typing as oft
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as oft
 import unittest
 import os
 
diff --git a/oneflow/python/test/ops/test_unpack_pack.py b/oneflow/compatible_single_client_python/test/ops/test_unpack_pack.py
similarity index 92%
rename from oneflow/python/test/ops/test_unpack_pack.py
rename to oneflow/compatible_single_client_python/test/ops/test_unpack_pack.py
index 8de6a2880e8a4040ba48e3ffb445873dc25a8a13..1ed17cb788836a2991d86a310e8b2d4577bd2f02 100644
--- a/oneflow/python/test/ops/test_unpack_pack.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_unpack_pack.py
@@ -15,8 +15,8 @@ limitations under the License.
 """
 import unittest
 import numpy as np
-import oneflow as flow
-import oneflow.typing as oft
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as oft
 
 
 func_config = flow.FunctionConfig()
diff --git a/oneflow/python/test/ops/test_unsorted_batch_segment_sum.py b/oneflow/compatible_single_client_python/test/ops/test_unsorted_batch_segment_sum.py
similarity index 97%
rename from oneflow/python/test/ops/test_unsorted_batch_segment_sum.py
rename to oneflow/compatible_single_client_python/test/ops/test_unsorted_batch_segment_sum.py
index dfe4e2eb027498b97bd9f04766687302b494a474..2e2436b7ed74258804905b35178768951ec7e3b5 100644
--- a/oneflow/python/test/ops/test_unsorted_batch_segment_sum.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_unsorted_batch_segment_sum.py
@@ -17,8 +17,8 @@ import unittest
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
-import oneflow.typing as oft
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as oft
 
 import test_global_storage
 from test_util import GenArgList
diff --git a/oneflow/python/test/ops/test_unsorted_segment_sum.py b/oneflow/compatible_single_client_python/test/ops/test_unsorted_segment_sum.py
similarity index 98%
rename from oneflow/python/test/ops/test_unsorted_segment_sum.py
rename to oneflow/compatible_single_client_python/test/ops/test_unsorted_segment_sum.py
index 95cb7f25f6a4f226c45baa8e4085d16e75243df8..722a06f37f285fd6284187db81f1ba1a10bab0fa 100644
--- a/oneflow/python/test/ops/test_unsorted_segment_sum.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_unsorted_segment_sum.py
@@ -17,8 +17,8 @@ import unittest
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
-import oneflow.typing as oft
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as oft
 
 from test_util import GenArgList, type_name_to_flow_type, type_name_to_np_type
 
diff --git a/oneflow/python/test/ops/test_unsorted_segment_sum_fw_bw.py b/oneflow/compatible_single_client_python/test/ops/test_unsorted_segment_sum_fw_bw.py
similarity index 97%
rename from oneflow/python/test/ops/test_unsorted_segment_sum_fw_bw.py
rename to oneflow/compatible_single_client_python/test/ops/test_unsorted_segment_sum_fw_bw.py
index 4d556fede50c47a2732a85ae1dd05fcea4d6f66c..76761da594c8505317df9e8f86ab9ed8ac9950d8 100644
--- a/oneflow/python/test/ops/test_unsorted_segment_sum_fw_bw.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_unsorted_segment_sum_fw_bw.py
@@ -17,10 +17,10 @@ import unittest
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import tensorflow as tf
 from test_util import GenArgList
-import oneflow.typing as oft
+from oneflow.compatible.single_client import typing as oft
 
 gpus = tf.config.experimental.list_physical_devices("GPU")
 for gpu in gpus:
diff --git a/oneflow/python/test/ops/test_unsorted_segment_sum_model_parallel.py b/oneflow/compatible_single_client_python/test/ops/test_unsorted_segment_sum_model_parallel.py
similarity index 97%
rename from oneflow/python/test/ops/test_unsorted_segment_sum_model_parallel.py
rename to oneflow/compatible_single_client_python/test/ops/test_unsorted_segment_sum_model_parallel.py
index ed84dc20dccb7b041bfa714526c78bd40c310361..cad4907905f8d4e76a3e2743d046860a183d5bee 100644
--- a/oneflow/python/test/ops/test_unsorted_segment_sum_model_parallel.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_unsorted_segment_sum_model_parallel.py
@@ -18,8 +18,8 @@ import os
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
-import oneflow.typing as oft
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as oft
 
 from test_util import GenArgList
 
diff --git a/oneflow/python/test/ops/test_upsample.py b/oneflow/compatible_single_client_python/test/ops/test_upsample.py
similarity index 99%
rename from oneflow/python/test/ops/test_upsample.py
rename to oneflow/compatible_single_client_python/test/ops/test_upsample.py
index da2d5ddb60fda88561d742038fd50853fc12b659..c506414cd7a3af7b406dce82e74914754354f684 100644
--- a/oneflow/python/test/ops/test_upsample.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_upsample.py
@@ -18,7 +18,7 @@ import os
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import tensorflow as tf
 import test_global_storage
 from test_util import GenArgList, type_name_to_flow_type
diff --git a/oneflow/python/test/ops/test_user_op_attr_auto_type.py b/oneflow/compatible_single_client_python/test/ops/test_user_op_attr_auto_type.py
similarity index 93%
rename from oneflow/python/test/ops/test_user_op_attr_auto_type.py
rename to oneflow/compatible_single_client_python/test/ops/test_user_op_attr_auto_type.py
index 826265a49f227c508abc6ecfa1323663223a45f5..86800057327208cb4080e4718031907f0ec1bc9d 100644
--- a/oneflow/python/test/ops/test_user_op_attr_auto_type.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_user_op_attr_auto_type.py
@@ -15,8 +15,8 @@ limitations under the License.
 """
 import unittest
 import numpy as np
-import oneflow as flow
-import oneflow.typing as oft
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as oft
 
 
 def _test_user_op_attr_auto_type(input, attr1, attr2):
diff --git a/oneflow/python/test/ops/test_user_op_module.py b/oneflow/compatible_single_client_python/test/ops/test_user_op_module.py
similarity index 96%
rename from oneflow/python/test/ops/test_user_op_module.py
rename to oneflow/compatible_single_client_python/test/ops/test_user_op_module.py
index 2ba91c6ab2d9ba51a5bce961f7d46b998a739274..5d664b409a497ba60bab81d12eeba6a844a33262 100644
--- a/oneflow/python/test/ops/test_user_op_module.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_user_op_module.py
@@ -14,8 +14,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 """
 import unittest
-import oneflow as flow
-import oneflow.typing as oft
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as oft
 import numpy as np
 from typing import Tuple
 
diff --git a/oneflow/python/test/ops/test_util.py b/oneflow/compatible_single_client_python/test/ops/test_util.py
similarity index 97%
rename from oneflow/python/test/ops/test_util.py
rename to oneflow/compatible_single_client_python/test/ops/test_util.py
index c0ec121f6eb840c6a750046347c6a2f8e30fe240..f916c4167c0ae326ad740585782b4699f9793c44 100644
--- a/oneflow/python/test/ops/test_util.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_util.py
@@ -19,8 +19,8 @@ from collections import OrderedDict
 from collections.abc import Iterable
 
 import numpy as np
-import oneflow as flow
-import oneflow.typing as oft
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as oft
 
 import test_global_storage
 
diff --git a/oneflow/python/test/ops/test_watch.py b/oneflow/compatible_single_client_python/test/ops/test_watch.py
similarity index 93%
rename from oneflow/python/test/ops/test_watch.py
rename to oneflow/compatible_single_client_python/test/ops/test_watch.py
index d44111d41bc0ecb9e17370cf954f41c633c7e5c3..8c0a027ef144696035180e76d698e6caa7fcd0cb 100644
--- a/oneflow/python/test/ops/test_watch.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_watch.py
@@ -15,8 +15,8 @@ limitations under the License.
 """
 import unittest
 import numpy as np
-import oneflow as flow
-import oneflow.typing as oft
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as oft
 
 
 @flow.unittest.skip_unless_1n2d()
diff --git a/oneflow/python/test/ops/test_watch_diff.py b/oneflow/compatible_single_client_python/test/ops/test_watch_diff.py
similarity index 97%
rename from oneflow/python/test/ops/test_watch_diff.py
rename to oneflow/compatible_single_client_python/test/ops/test_watch_diff.py
index 7efe95115b4349472e17755cbfb36b4a2112a6a1..1e3bcdae9cc4cdbe528a4ae80afd30abaea247b7 100644
--- a/oneflow/python/test/ops/test_watch_diff.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_watch_diff.py
@@ -18,7 +18,7 @@ import os
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import test_global_storage
 from test_util import GenArgList, type_name_to_flow_type, type_name_to_np_type
 
diff --git a/oneflow/python/test/ops/test_where.py b/oneflow/compatible_single_client_python/test/ops/test_where.py
similarity index 99%
rename from oneflow/python/test/ops/test_where.py
rename to oneflow/compatible_single_client_python/test/ops/test_where.py
index 5df206bcd84df4ad3ca3737fbffdfbde3ea352d3..a395c5279b7aa2a7287053b671ad8a27d996c1b4 100644
--- a/oneflow/python/test/ops/test_where.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_where.py
@@ -15,13 +15,13 @@ limitations under the License.
 """
 import unittest
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import tensorflow as tf
 import os
 
 from collections import OrderedDict
 from test_util import GenArgDict
-import oneflow.typing as oft
+from oneflow.compatible.single_client import typing as oft
 
 gpus = tf.config.experimental.list_physical_devices("GPU")
 for gpu in gpus:
diff --git a/oneflow/python/test/ops/test_zero_pad2d.py b/oneflow/compatible_single_client_python/test/ops/test_zero_pad2d.py
similarity index 98%
rename from oneflow/python/test/ops/test_zero_pad2d.py
rename to oneflow/compatible_single_client_python/test/ops/test_zero_pad2d.py
index 269484258ecc9ebcd580f077c656301966bd8244..f639759847cddb9a499a8faa8eacb6a989dfab06 100644
--- a/oneflow/python/test/ops/test_zero_pad2d.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_zero_pad2d.py
@@ -18,8 +18,8 @@ import os
 from collections import OrderedDict
 
 import numpy as np
-import oneflow as flow
-import oneflow.typing as tp
+from oneflow.compatible import single_client as flow
+from oneflow.compatible.single_client import typing as tp
 from test_util import (
     Args,
     GenArgDict,
diff --git a/oneflow/python/test/ops/test_zeros.py b/oneflow/compatible_single_client_python/test/ops/test_zeros.py
similarity index 96%
rename from oneflow/python/test/ops/test_zeros.py
rename to oneflow/compatible_single_client_python/test/ops/test_zeros.py
index 2855445b53072b8bc5955351d70a0967f071622d..17d3088ffe278323755ba04546913d9a640574dc 100644
--- a/oneflow/python/test/ops/test_zeros.py
+++ b/oneflow/compatible_single_client_python/test/ops/test_zeros.py
@@ -13,9 +13,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 """
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import numpy as np
-import oneflow.typing as tp
+from oneflow.compatible.single_client import typing as tp
 from test_util import GenArgList
 import unittest
 from collections import OrderedDict
diff --git a/oneflow/python/test/serving/alexnet.py b/oneflow/compatible_single_client_python/test/serving/alexnet.py
similarity index 98%
rename from oneflow/python/test/serving/alexnet.py
rename to oneflow/compatible_single_client_python/test/serving/alexnet.py
index b9a202cca57659d0b3b01188aee4a0b699a7810d..7e7c4bbebd10ae026d25234c227ddd56d6c4fe60 100644
--- a/oneflow/python/test/serving/alexnet.py
+++ b/oneflow/compatible_single_client_python/test/serving/alexnet.py
@@ -13,7 +13,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 """
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 
 
 def _conv2d_layer(
diff --git a/oneflow/python/test/serving/insightface_resnet100.py b/oneflow/compatible_single_client_python/test/serving/insightface_resnet100.py
similarity index 98%
rename from oneflow/python/test/serving/insightface_resnet100.py
rename to oneflow/compatible_single_client_python/test/serving/insightface_resnet100.py
index 9ffb8ee96bad77e2c6c2ef3df8b5a85b81ec33df..8b75bc6ae3ce144c6233ce1bcf5d508e86fdf002 100644
--- a/oneflow/python/test/serving/insightface_resnet100.py
+++ b/oneflow/compatible_single_client_python/test/serving/insightface_resnet100.py
@@ -13,8 +13,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 """
-import oneflow as flow
-import oneflow.core.operator.op_conf_pb2 as op_conf_util
+from oneflow.compatible import single_client as flow
+from oneflow.core.operator import op_conf_pb2 as op_conf_util
 
 
 def _get_initializer():
diff --git a/oneflow/python/test/serving/ofrecord_dataset.py b/oneflow/compatible_single_client_python/test/serving/ofrecord_dataset.py
similarity index 99%
rename from oneflow/python/test/serving/ofrecord_dataset.py
rename to oneflow/compatible_single_client_python/test/serving/ofrecord_dataset.py
index 437a7ec67e1682f2d70eca090c95fab435eb6bd2..ebf6f84a6e09793d54f3e0cc76f2cae417a43d43 100644
--- a/oneflow/python/test/serving/ofrecord_dataset.py
+++ b/oneflow/compatible_single_client_python/test/serving/ofrecord_dataset.py
@@ -18,7 +18,7 @@ import os
 import struct
 import cv2
 import numpy as np
-import oneflow.core.record.record_pb2 as record_pb
+from oneflow.core.record import record_pb2 as record_pb
 
 
 class OFRecordDataset(object):
diff --git a/oneflow/python/test/serving/resnet_model.py b/oneflow/compatible_single_client_python/test/serving/resnet_model.py
similarity index 99%
rename from oneflow/python/test/serving/resnet_model.py
rename to oneflow/compatible_single_client_python/test/serving/resnet_model.py
index 54848bc53b3d4f69fcf5c0353e29f5bcbb1e8196..edd91cf9060f4afb7bc95e949a312f923adf0fa0 100644
--- a/oneflow/python/test/serving/resnet_model.py
+++ b/oneflow/compatible_single_client_python/test/serving/resnet_model.py
@@ -14,7 +14,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 """
 
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 
 BLOCK_COUNTS = [3, 4, 6, 3]
 BLOCK_FILTERS = [256, 512, 1024, 2048]
diff --git a/oneflow/python/test/serving/style_model.py b/oneflow/compatible_single_client_python/test/serving/style_model.py
similarity index 99%
rename from oneflow/python/test/serving/style_model.py
rename to oneflow/compatible_single_client_python/test/serving/style_model.py
index dc544508d13713cbb82874479a6ae1186a8ec536..3c1f6253b51ecdf01f1f02a97fa037ca6bca0307 100644
--- a/oneflow/python/test/serving/style_model.py
+++ b/oneflow/compatible_single_client_python/test/serving/style_model.py
@@ -13,7 +13,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 """
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 
 
 def instance_norm(input, name_prefix, trainable=True):
diff --git a/oneflow/python/test/serving/test_alexnet_save_and_load.py b/oneflow/compatible_single_client_python/test/serving/test_alexnet_save_and_load.py
similarity index 97%
rename from oneflow/python/test/serving/test_alexnet_save_and_load.py
rename to oneflow/compatible_single_client_python/test/serving/test_alexnet_save_and_load.py
index d538f9b49767734e6429acfcb1e3a1442ef2a605..4f5288e9b34baff210a6294129f9996b914f415d 100644
--- a/oneflow/python/test/serving/test_alexnet_save_and_load.py
+++ b/oneflow/compatible_single_client_python/test/serving/test_alexnet_save_and_load.py
@@ -17,10 +17,10 @@ import os
 import unittest
 import shutil
 import numpy as np
-import google.protobuf.text_format as text_format
+from google.protobuf import text_format as text_format
 
-import oneflow as flow
-import oneflow.core.serving.saved_model_pb2 as saved_model_pb
+from oneflow.compatible import single_client as flow
+from oneflow.core.serving import saved_model_pb2 as saved_model_pb
 
 from alexnet import load_data, alexnet
 from ofrecord_dataset import ImageNetRecordDataset
diff --git a/oneflow/python/test/serving/test_insightface_save_and_load.py b/oneflow/compatible_single_client_python/test/serving/test_insightface_save_and_load.py
similarity index 99%
rename from oneflow/python/test/serving/test_insightface_save_and_load.py
rename to oneflow/compatible_single_client_python/test/serving/test_insightface_save_and_load.py
index 19572085b8f0802d74d8d7b2ac9d94cb91602947..5b34d3092edeea18bf38ffa6b91527bfb505c2b4 100644
--- a/oneflow/python/test/serving/test_insightface_save_and_load.py
+++ b/oneflow/compatible_single_client_python/test/serving/test_insightface_save_and_load.py
@@ -20,7 +20,7 @@ import numpy as np
 import shutil
 import unittest
 import argparse
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 
 from insightface_resnet100 import Resnet100
 from ofrecord_dataset import FaceEmoreRecordDataset
diff --git a/oneflow/python/test/serving/test_resnet_save_and_load.py b/oneflow/compatible_single_client_python/test/serving/test_resnet_save_and_load.py
similarity index 96%
rename from oneflow/python/test/serving/test_resnet_save_and_load.py
rename to oneflow/compatible_single_client_python/test/serving/test_resnet_save_and_load.py
index c8ed522e1a26aeac4e0b857be34ebc4907d5816a..281a09d5dc563cad6493ff37968ba0e8527e1276 100644
--- a/oneflow/python/test/serving/test_resnet_save_and_load.py
+++ b/oneflow/compatible_single_client_python/test/serving/test_resnet_save_and_load.py
@@ -18,9 +18,9 @@ import os
 import numpy as np
 import shutil
 import unittest
-import google.protobuf.text_format as text_format
-import oneflow as flow
-import oneflow.core.serving.saved_model_pb2 as saved_model_pb
+from google.protobuf import text_format as text_format
+from oneflow.compatible import single_client as flow
+from oneflow.core.serving import saved_model_pb2 as saved_model_pb
 
 from resnet_model import resnet50
 from ofrecord_dataset import ImageNetRecordDataset
diff --git a/oneflow/python/test/serving/test_style_transfer_save_and_load.py b/oneflow/compatible_single_client_python/test/serving/test_style_transfer_save_and_load.py
similarity index 98%
rename from oneflow/python/test/serving/test_style_transfer_save_and_load.py
rename to oneflow/compatible_single_client_python/test/serving/test_style_transfer_save_and_load.py
index 3b7b7b078fd18b2c99b10a5c748afffb09ea54b0..296a1b4b49deaaf452ca6d58e4ad2cb5641f09c9 100644
--- a/oneflow/python/test/serving/test_style_transfer_save_and_load.py
+++ b/oneflow/compatible_single_client_python/test/serving/test_style_transfer_save_and_load.py
@@ -20,7 +20,7 @@ import sys
 import os
 import argparse
 
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 import style_model
 
 
diff --git a/oneflow/python/test/xrt/test_add.py b/oneflow/compatible_single_client_python/test/xrt/test_add.py
similarity index 98%
rename from oneflow/python/test/xrt/test_add.py
rename to oneflow/compatible_single_client_python/test/xrt/test_add.py
index d9a7066fae3042752ecefae6fa46225e1c7df984..d474c6e4c1d5bd0b9c6eb239eb38fbd40acde77b 100644
--- a/oneflow/python/test/xrt/test_add.py
+++ b/oneflow/compatible_single_client_python/test/xrt/test_add.py
@@ -16,7 +16,7 @@ limitations under the License.
 import unittest
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 
 config = flow.function_config()
 
diff --git a/oneflow/python/test/xrt/test_batch_norm.py b/oneflow/compatible_single_client_python/test/xrt/test_batch_norm.py
similarity index 98%
rename from oneflow/python/test/xrt/test_batch_norm.py
rename to oneflow/compatible_single_client_python/test/xrt/test_batch_norm.py
index 9513443e661b704ef7df3455fdc484a7038a56cc..c16bfad19416a43fef081a8fccf43d8f28cb1ca1 100644
--- a/oneflow/python/test/xrt/test_batch_norm.py
+++ b/oneflow/compatible_single_client_python/test/xrt/test_batch_norm.py
@@ -16,7 +16,7 @@ limitations under the License.
 import unittest
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 
 config = flow.function_config()
 
diff --git a/oneflow/python/test/xrt/test_batch_norm_add.py b/oneflow/compatible_single_client_python/test/xrt/test_batch_norm_add.py
similarity index 98%
rename from oneflow/python/test/xrt/test_batch_norm_add.py
rename to oneflow/compatible_single_client_python/test/xrt/test_batch_norm_add.py
index e007fa4766653687f27a761e48847badc4f4b516..b6bbc2ec9f17fc7d37bbf32a3fb1e1cd34fbffca 100644
--- a/oneflow/python/test/xrt/test_batch_norm_add.py
+++ b/oneflow/compatible_single_client_python/test/xrt/test_batch_norm_add.py
@@ -16,7 +16,7 @@ limitations under the License.
 import unittest
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 
 config = flow.function_config()
 
diff --git a/oneflow/python/test/xrt/test_bias_add.py b/oneflow/compatible_single_client_python/test/xrt/test_bias_add.py
similarity index 98%
rename from oneflow/python/test/xrt/test_bias_add.py
rename to oneflow/compatible_single_client_python/test/xrt/test_bias_add.py
index 6c6fb1c0aa1a23c581be6c4eb97de432d6f372b8..5184b54106be864070f3966afa358a13a63b22bf 100644
--- a/oneflow/python/test/xrt/test_bias_add.py
+++ b/oneflow/compatible_single_client_python/test/xrt/test_bias_add.py
@@ -16,7 +16,7 @@ limitations under the License.
 import unittest
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 
 config = flow.function_config()
 
diff --git a/oneflow/python/test/xrt/test_broadcast_op.py b/oneflow/compatible_single_client_python/test/xrt/test_broadcast_op.py
similarity index 99%
rename from oneflow/python/test/xrt/test_broadcast_op.py
rename to oneflow/compatible_single_client_python/test/xrt/test_broadcast_op.py
index fd807a6978e37e28c128a9fa1a9bf37840a619c8..8ed15157c4733dc5f83aed4885422e0f2a501c26 100644
--- a/oneflow/python/test/xrt/test_broadcast_op.py
+++ b/oneflow/compatible_single_client_python/test/xrt/test_broadcast_op.py
@@ -16,7 +16,7 @@ limitations under the License.
 import unittest
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 
 config = flow.function_config()
 
diff --git a/oneflow/python/test/xrt/test_cast.py b/oneflow/compatible_single_client_python/test/xrt/test_cast.py
similarity index 98%
rename from oneflow/python/test/xrt/test_cast.py
rename to oneflow/compatible_single_client_python/test/xrt/test_cast.py
index 6b6372d51e699ac29a7bab03b0ab90299d662755..843f7126c3e857e9a73f5d18fc61e763743b8e5e 100644
--- a/oneflow/python/test/xrt/test_cast.py
+++ b/oneflow/compatible_single_client_python/test/xrt/test_cast.py
@@ -16,7 +16,7 @@ limitations under the License.
 import unittest
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 
 config = flow.function_config()
 
diff --git a/oneflow/python/test/xrt/test_concat.py b/oneflow/compatible_single_client_python/test/xrt/test_concat.py
similarity index 98%
rename from oneflow/python/test/xrt/test_concat.py
rename to oneflow/compatible_single_client_python/test/xrt/test_concat.py
index 902f326118f837e22a8f00b2becf20a76d2a4646..742b2bea0e87a82427e6e983b299f473b1c74405 100644
--- a/oneflow/python/test/xrt/test_concat.py
+++ b/oneflow/compatible_single_client_python/test/xrt/test_concat.py
@@ -16,7 +16,7 @@ limitations under the License.
 import unittest
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 
 config = flow.function_config()
 
diff --git a/oneflow/python/test/xrt/test_conv2d.py b/oneflow/compatible_single_client_python/test/xrt/test_conv2d.py
similarity index 99%
rename from oneflow/python/test/xrt/test_conv2d.py
rename to oneflow/compatible_single_client_python/test/xrt/test_conv2d.py
index 49ef70d9f4e3389a5e0ee2426c5fafce613ac672..ca18b7bbd02c53b2643de6bccae2b0ea47c51eae 100644
--- a/oneflow/python/test/xrt/test_conv2d.py
+++ b/oneflow/compatible_single_client_python/test/xrt/test_conv2d.py
@@ -16,7 +16,7 @@ limitations under the License.
 import unittest
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 
 config = flow.function_config()
 
diff --git a/oneflow/python/test/xrt/test_gather.py b/oneflow/compatible_single_client_python/test/xrt/test_gather.py
similarity index 98%
rename from oneflow/python/test/xrt/test_gather.py
rename to oneflow/compatible_single_client_python/test/xrt/test_gather.py
index ddaef47ed60061019ef7627bf547e2301d18b100..4a64fd9726ed948c3dcbf780becfd134fe91e06a 100644
--- a/oneflow/python/test/xrt/test_gather.py
+++ b/oneflow/compatible_single_client_python/test/xrt/test_gather.py
@@ -16,7 +16,7 @@ limitations under the License.
 import unittest
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 
 config = flow.function_config()
 
diff --git a/oneflow/python/test/xrt/test_gelu.py b/oneflow/compatible_single_client_python/test/xrt/test_gelu.py
similarity index 97%
rename from oneflow/python/test/xrt/test_gelu.py
rename to oneflow/compatible_single_client_python/test/xrt/test_gelu.py
index f2647067f656171c0957ff8ad23a368317eb02c6..c71a0ae86d9515eb41413cef515f909ac80ac5b5 100644
--- a/oneflow/python/test/xrt/test_gelu.py
+++ b/oneflow/compatible_single_client_python/test/xrt/test_gelu.py
@@ -16,7 +16,7 @@ limitations under the License.
 import unittest
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 
 config = flow.function_config()
 
diff --git a/oneflow/python/test/xrt/test_gelu_grad.py b/oneflow/compatible_single_client_python/test/xrt/test_gelu_grad.py
similarity index 98%
rename from oneflow/python/test/xrt/test_gelu_grad.py
rename to oneflow/compatible_single_client_python/test/xrt/test_gelu_grad.py
index a7d7458f661f55dc38a003a70677ee366bf842fc..e7d1710c4367f53d9ea251e0eeb37bc3ca962f83 100644
--- a/oneflow/python/test/xrt/test_gelu_grad.py
+++ b/oneflow/compatible_single_client_python/test/xrt/test_gelu_grad.py
@@ -16,7 +16,7 @@ limitations under the License.
 import unittest
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 
 config = flow.function_config()
 
diff --git a/oneflow/python/test/xrt/test_identity.py b/oneflow/compatible_single_client_python/test/xrt/test_identity.py
similarity index 98%
rename from oneflow/python/test/xrt/test_identity.py
rename to oneflow/compatible_single_client_python/test/xrt/test_identity.py
index 57e74dfa40d49b9223c7312ff5cc7da0ecb29d5a..005948f5def94f70b347b44aeeda9b2215d73f9a 100644
--- a/oneflow/python/test/xrt/test_identity.py
+++ b/oneflow/compatible_single_client_python/test/xrt/test_identity.py
@@ -16,7 +16,7 @@ limitations under the License.
 import unittest
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 
 config = flow.function_config()
 
diff --git a/oneflow/python/test/xrt/test_layer_norm.py b/oneflow/compatible_single_client_python/test/xrt/test_layer_norm.py
similarity index 98%
rename from oneflow/python/test/xrt/test_layer_norm.py
rename to oneflow/compatible_single_client_python/test/xrt/test_layer_norm.py
index 48c7e2cddb61149953d8bb8dcc8f9eb1584bd298..069871ec459350bb035e802187b5b7bd7d5a468d 100644
--- a/oneflow/python/test/xrt/test_layer_norm.py
+++ b/oneflow/compatible_single_client_python/test/xrt/test_layer_norm.py
@@ -17,7 +17,7 @@ import os
 import unittest
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 
 config = flow.function_config()
 
diff --git a/oneflow/python/test/xrt/test_layer_norm_grad.py b/oneflow/compatible_single_client_python/test/xrt/test_layer_norm_grad.py
similarity index 98%
rename from oneflow/python/test/xrt/test_layer_norm_grad.py
rename to oneflow/compatible_single_client_python/test/xrt/test_layer_norm_grad.py
index 34ec391f61bfb692aca2eeef97253d2f00e8c3b3..fb4337f0792e3a6282186d7a9e59449cd6c0f704 100644
--- a/oneflow/python/test/xrt/test_layer_norm_grad.py
+++ b/oneflow/compatible_single_client_python/test/xrt/test_layer_norm_grad.py
@@ -17,7 +17,7 @@ import os
 import unittest
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 
 config = flow.function_config()
 
diff --git a/oneflow/python/test/xrt/test_layer_norm_param_grad.py b/oneflow/compatible_single_client_python/test/xrt/test_layer_norm_param_grad.py
similarity index 98%
rename from oneflow/python/test/xrt/test_layer_norm_param_grad.py
rename to oneflow/compatible_single_client_python/test/xrt/test_layer_norm_param_grad.py
index f5db34a5457cb4f72e68f82fb24d24aa1db4fa30..99ba87656f6614e406f0c57af53cdb912d51fc5f 100644
--- a/oneflow/python/test/xrt/test_layer_norm_param_grad.py
+++ b/oneflow/compatible_single_client_python/test/xrt/test_layer_norm_param_grad.py
@@ -17,7 +17,7 @@ import os
 import unittest
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 
 config = flow.function_config()
 
diff --git a/oneflow/python/test/xrt/test_leaky_relu.py b/oneflow/compatible_single_client_python/test/xrt/test_leaky_relu.py
similarity index 98%
rename from oneflow/python/test/xrt/test_leaky_relu.py
rename to oneflow/compatible_single_client_python/test/xrt/test_leaky_relu.py
index 4bb01e14d653fb9169784f3b4e5cd43be6ff9940..1f54f41ef2d7b86039afcc6963e1cbab629b6717 100644
--- a/oneflow/python/test/xrt/test_leaky_relu.py
+++ b/oneflow/compatible_single_client_python/test/xrt/test_leaky_relu.py
@@ -16,7 +16,7 @@ limitations under the License.
 import unittest
 import numpy as np
 
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 
 config = flow.function_config()
 
diff --git a/oneflow/python/test/xrt/test_matmul.py b/oneflow/compatible_single_client_python/test/xrt/test_matmul.py
similarity index 98%
rename from oneflow/python/test/xrt/test_matmul.py
rename to oneflow/compatible_single_client_python/test/xrt/test_matmul.py
index ec69467662b99e3ddf837dba991dd7ce927cbc71..1a83c3d86f3c3add376eeb490053ca19b5fdc626 100644
--- a/oneflow/python/test/xrt/test_matmul.py
+++ b/oneflow/compatible_single_client_python/test/xrt/test_matmul.py
@@ -16,7 +16,7 @@ limitations under the License.
 import unittest
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 
 config = flow.function_config()
 
diff --git a/oneflow/python/test/xrt/test_matmul_add.py b/oneflow/compatible_single_client_python/test/xrt/test_matmul_add.py
similarity index 99%
rename from oneflow/python/test/xrt/test_matmul_add.py
rename to oneflow/compatible_single_client_python/test/xrt/test_matmul_add.py
index a34edd67364af7fae8bb3d921455e9e5c7924474..d8c084d37af61a1a775de3043cf8f965e4d544a3 100644
--- a/oneflow/python/test/xrt/test_matmul_add.py
+++ b/oneflow/compatible_single_client_python/test/xrt/test_matmul_add.py
@@ -16,7 +16,7 @@ limitations under the License.
 import unittest
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 
 config = flow.function_config()
 
diff --git a/oneflow/python/test/xrt/test_multiply.py b/oneflow/compatible_single_client_python/test/xrt/test_multiply.py
similarity index 98%
rename from oneflow/python/test/xrt/test_multiply.py
rename to oneflow/compatible_single_client_python/test/xrt/test_multiply.py
index 919041b52d361f4cae88eaf9e6fe397902e89950..75e5bfdd99ef61c3ce3ccdb5c1132523c38b23a6 100644
--- a/oneflow/python/test/xrt/test_multiply.py
+++ b/oneflow/compatible_single_client_python/test/xrt/test_multiply.py
@@ -16,7 +16,7 @@ limitations under the License.
 import unittest
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 
 config = flow.function_config()
 
diff --git a/oneflow/python/test/xrt/test_online_int8.py b/oneflow/compatible_single_client_python/test/xrt/test_online_int8.py
similarity index 98%
rename from oneflow/python/test/xrt/test_online_int8.py
rename to oneflow/compatible_single_client_python/test/xrt/test_online_int8.py
index ff02d1f2ed384dc88c623e246126c442ba9d2ca2..8d4475daa93bc80212fd70fb17994c9225e3b257 100644
--- a/oneflow/python/test/xrt/test_online_int8.py
+++ b/oneflow/compatible_single_client_python/test/xrt/test_online_int8.py
@@ -16,7 +16,7 @@ limitations under the License.
 import unittest
 import numpy as np
 
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 
 config = flow.function_config()
 
diff --git a/oneflow/python/test/xrt/test_pooling.py b/oneflow/compatible_single_client_python/test/xrt/test_pooling.py
similarity index 99%
rename from oneflow/python/test/xrt/test_pooling.py
rename to oneflow/compatible_single_client_python/test/xrt/test_pooling.py
index d9de50ec4bdc7ba36f7aa309b17227f6df6cb629..91aca8283748a5b6cec68ec12d13d7624e1c4fe3 100644
--- a/oneflow/python/test/xrt/test_pooling.py
+++ b/oneflow/compatible_single_client_python/test/xrt/test_pooling.py
@@ -16,7 +16,7 @@ limitations under the License.
 import unittest
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 
 config = flow.function_config()
 
diff --git a/oneflow/python/test/xrt/test_reduce_op.py b/oneflow/compatible_single_client_python/test/xrt/test_reduce_op.py
similarity index 98%
rename from oneflow/python/test/xrt/test_reduce_op.py
rename to oneflow/compatible_single_client_python/test/xrt/test_reduce_op.py
index 4f8e7e12e09bc1765d23aa578bc203ddd79ec401..f537552b135c6ed6a87cd6d909ee74a43d0f7ef2 100644
--- a/oneflow/python/test/xrt/test_reduce_op.py
+++ b/oneflow/compatible_single_client_python/test/xrt/test_reduce_op.py
@@ -16,7 +16,7 @@ limitations under the License.
 import unittest
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 
 config = flow.function_config()
 
diff --git a/oneflow/python/test/xrt/test_relu.py b/oneflow/compatible_single_client_python/test/xrt/test_relu.py
similarity index 98%
rename from oneflow/python/test/xrt/test_relu.py
rename to oneflow/compatible_single_client_python/test/xrt/test_relu.py
index 3d6f7098eea617d2c509460bfb390a690d0e2db4..909d220420b749d35e4eb5bc8d5abdc8fee8bad1 100644
--- a/oneflow/python/test/xrt/test_relu.py
+++ b/oneflow/compatible_single_client_python/test/xrt/test_relu.py
@@ -16,7 +16,7 @@ limitations under the License.
 import unittest
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 
 config = flow.function_config()
 
diff --git a/oneflow/python/test/xrt/test_reshape.py b/oneflow/compatible_single_client_python/test/xrt/test_reshape.py
similarity index 98%
rename from oneflow/python/test/xrt/test_reshape.py
rename to oneflow/compatible_single_client_python/test/xrt/test_reshape.py
index 35e7bd99ed8e5df7bd5335b24554bbc6e053a8f5..ca0e4b9f9dc2fafe0168bf211c56dfa1864d6c0d 100644
--- a/oneflow/python/test/xrt/test_reshape.py
+++ b/oneflow/compatible_single_client_python/test/xrt/test_reshape.py
@@ -16,7 +16,7 @@ limitations under the License.
 import unittest
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 
 config = flow.function_config()
 
diff --git a/oneflow/python/test/xrt/test_reshape_like.py b/oneflow/compatible_single_client_python/test/xrt/test_reshape_like.py
similarity index 98%
rename from oneflow/python/test/xrt/test_reshape_like.py
rename to oneflow/compatible_single_client_python/test/xrt/test_reshape_like.py
index 69ab7073d5973e638ed093ff2f8ba1a52ccd8225..28f9052bdd1263edcd4a520fe6958649cf1a16b6 100644
--- a/oneflow/python/test/xrt/test_reshape_like.py
+++ b/oneflow/compatible_single_client_python/test/xrt/test_reshape_like.py
@@ -16,7 +16,7 @@ limitations under the License.
 import unittest
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 
 config = flow.function_config()
 
diff --git a/oneflow/python/test/xrt/test_rsqrt.py b/oneflow/compatible_single_client_python/test/xrt/test_rsqrt.py
similarity index 97%
rename from oneflow/python/test/xrt/test_rsqrt.py
rename to oneflow/compatible_single_client_python/test/xrt/test_rsqrt.py
index ecb460fc54b6115b4d44b61e373df6b1a8d64ca2..e45c5eb9625356ccd1f26c23141fb6626686e418 100644
--- a/oneflow/python/test/xrt/test_rsqrt.py
+++ b/oneflow/compatible_single_client_python/test/xrt/test_rsqrt.py
@@ -16,7 +16,7 @@ limitations under the License.
 import unittest
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 
 config = flow.function_config()
 
diff --git a/oneflow/python/test/xrt/test_scalar_op.py b/oneflow/compatible_single_client_python/test/xrt/test_scalar_op.py
similarity index 98%
rename from oneflow/python/test/xrt/test_scalar_op.py
rename to oneflow/compatible_single_client_python/test/xrt/test_scalar_op.py
index d7ec09ac86a9ae1b01258870bf5399cd606fad77..4ad1e33e68be17241fa3e6a9853ca8ddd297dcc8 100644
--- a/oneflow/python/test/xrt/test_scalar_op.py
+++ b/oneflow/compatible_single_client_python/test/xrt/test_scalar_op.py
@@ -16,7 +16,7 @@ limitations under the License.
 import unittest
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 
 config = flow.function_config()
 
diff --git a/oneflow/python/test/xrt/test_sigmoid.py b/oneflow/compatible_single_client_python/test/xrt/test_sigmoid.py
similarity index 98%
rename from oneflow/python/test/xrt/test_sigmoid.py
rename to oneflow/compatible_single_client_python/test/xrt/test_sigmoid.py
index 2f23bc0c4b5fca354964d4aa826b733ba9914317..d168a0c4ddb351f6cc6386045f19d09b07019f80 100644
--- a/oneflow/python/test/xrt/test_sigmoid.py
+++ b/oneflow/compatible_single_client_python/test/xrt/test_sigmoid.py
@@ -16,7 +16,7 @@ limitations under the License.
 import unittest
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 
 config = flow.function_config()
 
diff --git a/oneflow/python/test/xrt/test_softmax.py b/oneflow/compatible_single_client_python/test/xrt/test_softmax.py
similarity index 98%
rename from oneflow/python/test/xrt/test_softmax.py
rename to oneflow/compatible_single_client_python/test/xrt/test_softmax.py
index a2279a92fe62dc827f319032b82ee1e164bf5563..f384618f6ab810bcd0c92559b40c63f016044649 100644
--- a/oneflow/python/test/xrt/test_softmax.py
+++ b/oneflow/compatible_single_client_python/test/xrt/test_softmax.py
@@ -16,7 +16,7 @@ limitations under the License.
 import unittest
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 
 config = flow.function_config()
 
diff --git a/oneflow/python/test/xrt/test_softmax_grad.py b/oneflow/compatible_single_client_python/test/xrt/test_softmax_grad.py
similarity index 98%
rename from oneflow/python/test/xrt/test_softmax_grad.py
rename to oneflow/compatible_single_client_python/test/xrt/test_softmax_grad.py
index 604ec9b2b5b21f6a3c69548bbe41aef1c422e7e6..f7340855246935b040a51ded058fc0215f55b050 100644
--- a/oneflow/python/test/xrt/test_softmax_grad.py
+++ b/oneflow/compatible_single_client_python/test/xrt/test_softmax_grad.py
@@ -16,7 +16,7 @@ limitations under the License.
 import unittest
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 
 config = flow.function_config()
 
diff --git a/oneflow/python/test/xrt/test_square_sum.py b/oneflow/compatible_single_client_python/test/xrt/test_square_sum.py
similarity index 97%
rename from oneflow/python/test/xrt/test_square_sum.py
rename to oneflow/compatible_single_client_python/test/xrt/test_square_sum.py
index b9ef3cf8cccec2ebe41626b949f3418b9c26eba4..0ff2ba110782ef7bb27276760986fd78e5e4f7fb 100644
--- a/oneflow/python/test/xrt/test_square_sum.py
+++ b/oneflow/compatible_single_client_python/test/xrt/test_square_sum.py
@@ -16,7 +16,7 @@ limitations under the License.
 import unittest
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 
 config = flow.function_config()
 
diff --git a/oneflow/python/test/xrt/test_tanh.py b/oneflow/compatible_single_client_python/test/xrt/test_tanh.py
similarity index 98%
rename from oneflow/python/test/xrt/test_tanh.py
rename to oneflow/compatible_single_client_python/test/xrt/test_tanh.py
index 8331b995ab53378eafba624d08398fe02f890567..3a45a891e3c36d3d7762639ef1d24e50ed4d0859 100644
--- a/oneflow/python/test/xrt/test_tanh.py
+++ b/oneflow/compatible_single_client_python/test/xrt/test_tanh.py
@@ -16,7 +16,7 @@ limitations under the License.
 import unittest
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 
 config = flow.function_config()
 
diff --git a/oneflow/python/test/xrt/test_transpose.py b/oneflow/compatible_single_client_python/test/xrt/test_transpose.py
similarity index 98%
rename from oneflow/python/test/xrt/test_transpose.py
rename to oneflow/compatible_single_client_python/test/xrt/test_transpose.py
index b1cc825aad70fbcf2bdf51d4e38f7497b3d28ac9..1a0cdb0badb0779d319e1066c0bea3e766f87d56 100644
--- a/oneflow/python/test/xrt/test_transpose.py
+++ b/oneflow/compatible_single_client_python/test/xrt/test_transpose.py
@@ -16,7 +16,7 @@ limitations under the License.
 import unittest
 
 import numpy as np
-import oneflow as flow
+from oneflow.compatible import single_client as flow
 
 config = flow.function_config()
 
diff --git a/oneflow/compatible_single_client_python/vm/__init__.py b/oneflow/compatible_single_client_python/vm/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/oneflow/core/framework/session_util.cpp b/oneflow/core/framework/session_util.cpp
index 05ca35f1bf73d8f535d82c83f59a35d3b466b7da..cde057cec630dc28c9a156150a33b2410a474c30 100644
--- a/oneflow/core/framework/session_util.cpp
+++ b/oneflow/core/framework/session_util.cpp
@@ -31,14 +31,14 @@ HashMap<int64_t, std::shared_ptr<Session>>* GlobalId2SessionMap() {
   return &id2session_map;
 }
 
-int64_t* DefaultSessionId() {
-  static int64_t default_sess_id;
+std::vector<int64_t>* RegsiteredSessionIds() {
+  static std::vector<int64_t> default_sess_id;
   return &default_sess_id;
 }
 
 Maybe<void> SetDefaultSessionId(int64_t val) {
-  int64_t* id = DefaultSessionId();
-  *id = val;
+  std::vector<int64_t>* ids = RegsiteredSessionIds();
+  ids->push_back(val);
   return Maybe<void>::Ok();
 }
 
@@ -78,7 +78,12 @@ Maybe<bool> Session::IsConsistentStrategyEnabled() const {
          && !is_mirrored_strategy_enabled_stack_->back();
 }
 
-Maybe<int64_t> GetDefaultSessionId() { return *(DefaultSessionId()); }
+Maybe<int64_t> GetDefaultSessionId() {
+  std::unique_lock<std::mutex> lock(*GlobalSessionUtilMutex());
+  const auto& regsitered_ids = *(RegsiteredSessionIds());
+  CHECK_GT_OR_RETURN(regsitered_ids.size(), 0);
+  return regsitered_ids.back();
+}
 
 Maybe<Session> RegsiterSession(int64_t id) {
   std::shared_ptr<Session> sess = std::make_shared<Session>(id);
@@ -91,8 +96,10 @@ Maybe<Session> RegsiterSession(int64_t id) {
 }
 
 Maybe<Session> GetDefaultSession() {
-  int64_t default_sess_id = JUST(GetDefaultSessionId());
   std::unique_lock<std::mutex> lock(*GlobalSessionUtilMutex());
+  const auto& regsitered_ids = *(RegsiteredSessionIds());
+  CHECK_GT_OR_RETURN(regsitered_ids.size(), 0);
+  int64_t default_sess_id = regsitered_ids.back();
   auto* id2session_map = GlobalId2SessionMap();
   CHECK_OR_RETURN(id2session_map->find(default_sess_id) != id2session_map->end());
   return id2session_map->at(default_sess_id);
@@ -103,6 +110,12 @@ Maybe<void> ClearSessionById(int64_t id) {
   auto* id2session_map = GlobalId2SessionMap();
   CHECK_OR_RETURN(id2session_map->find(id) != id2session_map->end());
   id2session_map->erase(id);
+  auto* sess_ids = RegsiteredSessionIds();
+  int32_t i = 0;
+  for (; i < sess_ids->size(); ++i) {
+    if (sess_ids->at(i) == id) { break; }
+  }
+  sess_ids->erase(sess_ids->begin() + i);
   return Maybe<void>::Ok();
 }
 
diff --git a/oneflow/core/framework/symbol_id_cache.h b/oneflow/core/framework/symbol_id_cache.h
index 10ba0d51ac6453e7e1413db1c13e00afebaf07d9..6dfb3d251707fdc211189e19af9bcb1f954b3f45 100644
--- a/oneflow/core/framework/symbol_id_cache.h
+++ b/oneflow/core/framework/symbol_id_cache.h
@@ -63,6 +63,11 @@ class IdCache final {
     return symbol_id;
   }
 
+  void ClearAll() {
+    std::unique_lock<std::mutex> lock(mutex_);
+    symbol_data2id_.clear();
+  }
+
  private:
   mutable std::mutex mutex_;
   std::map<T, int64_t> symbol_data2id_;
diff --git a/oneflow/core/job/env_global_objects_scope.cpp b/oneflow/core/job/env_global_objects_scope.cpp
index c6f11608ac90b9ee176e3c0e4fa453374f9d7b2b..42e79b945b56b842f45f93ae699def667986d5f2 100644
--- a/oneflow/core/job/env_global_objects_scope.cpp
+++ b/oneflow/core/job/env_global_objects_scope.cpp
@@ -35,6 +35,10 @@ limitations under the License.
 #include "oneflow/core/rpc/include/manager.h"
 #include "oneflow/core/transport/transport.h"
 #include "oneflow/core/device/node_device_descriptor_manager.h"
+#include "oneflow/core/vm/symbol_storage.h"
+#include "oneflow/core/framework/symbol_id_cache.h"
+#include "oneflow/core/operator/op_node_signature.cfg.h"
+#include "oneflow/core/operator/op_conf.cfg.h"
 
 namespace oneflow {
 
@@ -85,6 +89,25 @@ Resource GetDefaultResource(const EnvProto& env_proto) {
   return resource;
 }
 
+void ClearAllSymbolAndIdCache() {
+  Global<symbol::Storage<StringSymbol>>::Get()->ClearAll();
+  Global<symbol::IdCache<std::string>>::Get()->ClearAll();
+
+  Global<symbol::Storage<Scope>>::Get()->ClearAll();
+  Global<symbol::IdCache<cfg::ScopeProto>>::Get()->ClearAll();
+
+  Global<symbol::Storage<JobDesc>>::Get()->ClearAll();
+  Global<symbol::IdCache<cfg::JobConfigProto>>::Get()->ClearAll();
+
+  Global<symbol::Storage<ParallelDesc>>::Get()->ClearAll();
+  Global<symbol::IdCache<cfg::ParallelConf>>::Get()->ClearAll();
+
+  Global<symbol::Storage<OperatorConfSymbol>>::Get()->ClearAll();
+  Global<symbol::IdCache<cfg::OperatorConf>>::Get()->ClearAll();
+  Global<symbol::Storage<OpNodeSignatureDesc>>::Get()->ClearAll();
+  Global<symbol::IdCache<cfg::OpNodeSignature>>::Get()->ClearAll();
+}
+
 }  // namespace
 
 Maybe<void> EnvGlobalObjectsScope::Init(const EnvProto& env_proto) {
@@ -177,6 +200,7 @@ EnvGlobalObjectsScope::~EnvGlobalObjectsScope() {
 #ifdef WITH_CUDA
   Global<cudaDeviceProp>::Delete();
 #endif
+  ClearAllSymbolAndIdCache();
   google::ShutdownGoogleLogging();
 }
 
diff --git a/oneflow/init.py b/oneflow/init.py
index d67b6b7301eb40b95d849b0d279ac303bf67736b..58303f5f03b78b0d29271291e3c9c57d88619f4d 100644
--- a/oneflow/init.py
+++ b/oneflow/init.py
@@ -70,28 +70,27 @@ from oneflow.python.framework.session_util import Session
 from oneflow.python.framework.multi_client_session import MultiClientSession
 
 
-if env_util.HasAllMultiClientEnvVars():
-    oneflow._oneflow_internal.SetIsMultiClient(True)
-    env_util.api_env_init()
-    session_ctx.OpenDefaultSession(
-        MultiClientSession(oneflow._oneflow_internal.NewSessionId())
-    )
-    scope_util.InitScopeStack()
-else:
-    oneflow._oneflow_internal.SetIsMultiClient(False)
-    env_util.init_default_physical_env()
-    session_ctx.OpenDefaultSession(Session(oneflow._oneflow_internal.NewSessionId()))
+if not env_util.HasAllMultiClientEnvVars():
+    env_util.SetDefaultMultiClientEnvVars()
+oneflow._oneflow_internal.SetIsMultiClient(True)
+env_util.api_env_init()
+session_ctx.OpenDefaultSession(
+    MultiClientSession(oneflow._oneflow_internal.NewSessionId())
+)
+scope_util.InitScopeStack()
+oneflow._oneflow_internal.EnableEagerEnvironment(True)
 
 del env_util
 
 
 # capture oneflow methods so that they can be still accessed after `del oneflow`
-def _SyncOnMasterFn(is_multi_client, get_rank, sync):
-    def SyncOnMaster():
-        if is_multi_client or get_rank() == 0:
-            sync()
+def _SyncOnMasterFn():
+    import oneflow
 
-    return SyncOnMaster
+    if oneflow.python.framework.distribute.is_multi_client():
+        oneflow._oneflow_internal.eager.multi_client.Sync()
+    elif oneflow.python.framework.distribute.get_rank() == 0:
+        oneflow._oneflow_internal.eager.single_client.Sync()
 
 
 atexit.register(oneflow._oneflow_internal.SetShuttingDown)
@@ -100,33 +99,9 @@ atexit.register(oneflow.python.framework.session_context.TryCloseDefaultSession)
 # Global<ResourceDesc, ForSession>::Get(), used by vm in background thread,
 # will be set to nullptr by TryCloseDefaultSession,
 # so sync vm in advance to avoid data race
-atexit.register(
-    _SyncOnMasterFn(
-        oneflow.python.framework.distribute.is_multi_client(),
-        oneflow.python.framework.distribute.get_rank,
-        oneflow._oneflow_internal.eager.multi_client.Sync
-        if oneflow.python.framework.distribute.is_multi_client()
-        else oneflow._oneflow_internal.eager.single_client.Sync,
-    )
-)
-del atexit
-
-if not oneflow._oneflow_internal.IsMultiClient():
-    import sys
-
-    __original_exit__ = sys.exit
+atexit.register(_SyncOnMasterFn)
 
-    def custom_exit(returncode):
-        if returncode != 0:
-            import oneflow
-
-            oneflow._oneflow_internal.MasterSendAbort()
-        __original_exit__(returncode)
-
-    sys.exit = custom_exit
-
-    del custom_exit
-    del sys
+del atexit
 
 del absolute_import
 del oneflow
diff --git a/oneflow/python/eager/boxing_util.py b/oneflow/python/eager/boxing_util.py
index 928cb7d2b20a5f338d9db9b74312c0c89dd36fb9..185a97cb5b9aae605f0cecb74e3f6a1159f272dc 100644
--- a/oneflow/python/eager/boxing_util.py
+++ b/oneflow/python/eager/boxing_util.py
@@ -1006,4 +1006,3 @@ class BoxingUtil(oneflow._oneflow_internal.deprecated.ForeignBoxingUtil):
 
 
 _global_boxing_util = BoxingUtil()
-oneflow._oneflow_internal.deprecated.RegisterBoxingUtilOnlyOnce(_global_boxing_util)
diff --git a/oneflow/python/framework/env_util.py b/oneflow/python/framework/env_util.py
index 254757257823f01a35b26e72dd5e40b026dc1762..870755c3022a301c3a621ecca176651cb6dfb980 100644
--- a/oneflow/python/framework/env_util.py
+++ b/oneflow/python/framework/env_util.py
@@ -400,6 +400,14 @@ def HasAllMultiClientEnvVars():
     return has_all_env_vars
 
 
+def SetDefaultMultiClientEnvVars():
+    os.environ["MASTER_ADDR"] = "127.0.0.1"
+    os.environ["MASTER_PORT"] = str(_FindFreePort())
+    os.environ["WORLD_SIZE"] = "1"
+    os.environ["RANK"] = "0"
+    os.environ["LOCAL_RANK"] = "0"
+
+
 def _UpdateDefaultEnvProtoByMultiClientEnvVars(env_proto):
     assert HasAllMultiClientEnvVars()
 
diff --git a/oneflow/python/framework/register_python_callback.py b/oneflow/python/framework/register_python_callback.py
index a897204ed4f70cc26b597a3ed5a77ca07778843a..1b77b17054f7b38c5e40760c20196ba689b6cf6e 100644
--- a/oneflow/python/framework/register_python_callback.py
+++ b/oneflow/python/framework/register_python_callback.py
@@ -20,6 +20,3 @@ import oneflow.python.eager.interpreter_callback as interpreter_callback
 import oneflow._oneflow_internal
 
 python_callback.interpreter_callback = interpreter_callback
-oneflow._oneflow_internal.RegisterForeignCallbackOnlyOnce(
-    python_callback.global_python_callback
-)
diff --git a/oneflow/python/framework/session_util.py b/oneflow/python/framework/session_util.py
index 93e30e1acf70b44591a11c4a91d69db86e6148dd..55a4b96e7831b00bcdd41e09c1c24afee40c214b 100644
--- a/oneflow/python/framework/session_util.py
+++ b/oneflow/python/framework/session_util.py
@@ -230,11 +230,9 @@ class Session(object):
         self.ForceReleaseEagerBlobs()
         oneflow._oneflow_internal.StopLazyGlobalSession()
         oneflow._oneflow_internal.DestroyLazyGlobalSession()
-        self.status_ = SessionStatus.CLOSED
         self.resource_ = None
         if self.eager_config_proto_ctx_:
             del self.eager_config_proto_ctx_
-        oneflow._oneflow_internal.ClearSessionById(self.id)
 
     def AddJob(self, function_desc):
         assert self.status_ is SessionStatus.OPEN
@@ -428,6 +426,9 @@ class Session(object):
         self.cond_var_.notify()
         self.cond_var_.release()
 
+    def __del__(self):
+        oneflow._oneflow_internal.ClearSessionById(self.id)
+
 
 @oneflow_export("find_or_create_module")
 def api_find_or_create_module(
diff --git a/oneflow/python/framework/watcher.py b/oneflow/python/framework/watcher.py
index f1a7a67d53f435dcbc71d61058a61f0f1792a14d..fc766a2979a74cc648684f073eb5599b5dbb31b3 100644
--- a/oneflow/python/framework/watcher.py
+++ b/oneflow/python/framework/watcher.py
@@ -56,4 +56,3 @@ def _WatcherHandler(handler_uuid, of_blob_ptr):
 
 # static lifetime
 _global_watcher = _Watcher()
-oneflow._oneflow_internal.RegisterWatcherOnlyOnce(_global_watcher)
diff --git a/oneflow/python/ops/util/custom_op_module.py b/oneflow/python/ops/util/custom_op_module.py
index 46c7da15a25651b0a0902f1c089bb68265d5811b..a079c384decb9ed20e983f7ee295a7ae53933f09 100644
--- a/oneflow/python/ops/util/custom_op_module.py
+++ b/oneflow/python/ops/util/custom_op_module.py
@@ -81,7 +81,6 @@ class PythonKernelRegistry(object):
 
 
 _python_kernel_reg = PythonKernelRegistry()
-oneflow._oneflow_internal.RegisterPyKernels(_python_kernel_reg.kernels_)
 
 
 @oneflow_export("experimental.custom_op_module")
diff --git a/oneflow/python/test/customized/ccrelu.py b/oneflow/python/test/customized/ccrelu.py
deleted file mode 100644
index 6b6ff8c4d1e20e86d3fc3ae010acc8400999f0cf..0000000000000000000000000000000000000000
--- a/oneflow/python/test/customized/ccrelu.py
+++ /dev/null
@@ -1,48 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import numpy as np
-import oneflow as flow
-
-flow.config.gpu_device_num(1)
-
-func_config = flow.FunctionConfig()
-func_config.default_distribute_strategy(flow.scope.consistent_view())
-func_config.default_data_type(flow.float)
-
-
-def ccrelu(x, name):
-    return (
-        flow.user_op_builder(name)
-        .Op("ccrelu")
-        .Input("in", [x])
-        .Output("out")
-        .Build()
-        .InferAndTryRun()
-        .RemoteBlobList()[0]
-    )
-
-
-@flow.global_function(func_config)
-def ReluJob(x=flow.FixedTensorDef((10, 2))):
-    return ccrelu(x, "my_cc_relu_op")
-
-
-index = [-2, -1, 0, 1, 2]
-data = []
-for i in index:
-    data.append(np.ones((10, 2,), dtype=np.float32) * i)
-for x in data:
-    print(ReluJob(x).get())
diff --git a/oneflow/python/test/customized/new_data_pre.py b/oneflow/python/test/customized/new_data_pre.py
deleted file mode 100644
index 44d806a4eddc2bb1c84c1c201cc7a9f006ac998b..0000000000000000000000000000000000000000
--- a/oneflow/python/test/customized/new_data_pre.py
+++ /dev/null
@@ -1,125 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import numpy as np
-import oneflow as flow
-
-# flow.enable_eager_execution(True)
-flow.config.gpu_device_num(1)
-
-func_config = flow.FunctionConfig()
-func_config.default_logical_view(flow.scope.consistent_view())
-func_config.default_data_type(flow.float)
-
-data_dir = "/dataset/imagenet_16_same_pics/ofrecord"
-
-
-@flow.global_function(func_config)
-def DataLoaderJob():
-    batch_size = 8
-    seed = 0
-    rgb_mean = [123.68, 116.779, 103.939]
-    rgb_std = [58.393, 57.12, 57.375]
-
-    # ofrecord = flow.data.ofrecord_loader(data_dir, batch_size=batch_size)
-    ofrecord = flow.data.ofrecord_reader(
-        data_dir, batch_size=batch_size, random_shuffle=True
-    )
-    image = flow.data.OFRecordImageDecoderRandomCrop(
-        ofrecord,
-        "encoded",
-        seed=seed,
-        color_space="RGB",
-        name="of_record_iamge_decoder_random_crop",
-    )
-    label = flow.data.OFRecordRawDecoder(
-        ofrecord, "class/label", shape=(), dtype=flow.int32
-    )
-    rsz = flow.image.Resize(image, resize_x=224, resize_y=224, color_space="RGB")
-    print(rsz.shape)
-    print(label.shape)
-
-    rng = flow.random.CoinFlip(batch_size=batch_size, seed=seed, name="coin_flip")
-    normal = flow.image.CropMirrorNormalize(
-        rsz,
-        mirror_blob=rng,
-        color_space="RGB",
-        mean=rgb_mean,
-        std=rgb_std,
-        output_dtype=flow.float,
-    )
-    print(normal.shape)
-    return rsz, normal, label, rng
-
-
-@flow.global_function(func_config)
-def DataLoaderEvalJob():
-    batch_size = 8
-    rgb_mean = [123.68, 116.779, 103.939]
-    rgb_std = [58.393, 57.12, 57.375]
-
-    # ofrecord = flow.data.ofrecord_loader(
-    #     data_dir,
-    #     batch_size=batch_size,
-    #     part_name_suffix_length=5,
-    #     data_part_num=1,
-    #     shuffle=False,
-    # )
-    ofrecord = flow.data.ofrecord_reader(
-        data_dir,
-        batch_size=batch_size,
-        part_name_suffix_length=5,
-        data_part_num=1,
-        random_shuffle=False,
-    )
-    image = flow.data.OFRecordImageDecoder(ofrecord, "encoded", color_space="RGB")
-    label = flow.data.OFRecordRawDecoder(
-        ofrecord, "class/label", shape=(), dtype=flow.int32
-    )
-    rsz = flow.image.Resize(image, resize_shorter=256, color_space="RGB")
-
-    normal = flow.image.CropMirrorNormalize(
-        rsz,
-        color_space="RGB",
-        crop_h=224,
-        crop_w=224,
-        crop_pos_y=0.5,
-        crop_pos_x=0.5,
-        mean=rgb_mean,
-        std=rgb_std,
-        output_dtype=flow.float,
-    )
-    return normal, label
-
-
-if __name__ == "__main__":
-    rsz, normal, label, rng = DataLoaderJob().get()
-    print("resized image: ", rsz)
-    print("normalized image output: ", normal)
-    print("label: ", label)
-    print("mirror:", rng)
-    np.save("output/oneflow_train_data_0.npy", normal.numpy())
-
-    rsz, normal, label, rng = DataLoaderJob().get()
-    print("resized image: ", rsz)
-    print("normalized image output: ", normal)
-    print("label: ", label)
-    print("mirror:", rng)
-    np.save("output/oneflow_train_data_1.npy", normal.numpy())
-
-    normal, label = DataLoaderEvalJob().get()
-    print("normalized image output: ", normal)
-    print("label: ", label)
-    np.save("output/oneflow_eval_data_0.npy", normal.numpy())
diff --git a/oneflow/python/test/customized/test_reshape.py b/oneflow/python/test/customized/test_reshape.py
deleted file mode 100644
index 21544d93f01d195e7ca579f7d237271099e0ad6b..0000000000000000000000000000000000000000
--- a/oneflow/python/test/customized/test_reshape.py
+++ /dev/null
@@ -1,49 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import numpy as np
-import oneflow as flow
-
-flow.config.gpu_device_num(1)
-
-func_config = flow.FunctionConfig()
-func_config.default_distribute_strategy(flow.scope.consistent_view())
-func_config.default_data_type(flow.float)
-
-
-def test_reshape(x, shape, name):
-    return (
-        flow.user_op_builder(name)
-        .Op("TestReshape")
-        .Input("in", [x])
-        .Output("out")
-        .Attr("shape", shape)
-        .Build()
-        .InferAndTryRun()
-        .RemoteBlobList()[0]
-    )
-
-
-@flow.global_function(func_config)
-def ReshapeJob(x=flow.FixedTensorDef((10, 2))):
-    return test_reshape(x, [5, 4], "xx_test_reshape")
-
-
-index = [2.22, -1, 0, 1.1, 2]
-data = []
-for i in index:
-    data.append(np.ones((10, 2,), dtype=np.float32) * i)
-for x in data:
-    print(ReshapeJob(x).get())
diff --git a/oneflow/python/test/customized/test_source.py b/oneflow/python/test/customized/test_source.py
deleted file mode 100644
index c8275ebbebeb3f03f4e2442dcd04b73ef0618273..0000000000000000000000000000000000000000
--- a/oneflow/python/test/customized/test_source.py
+++ /dev/null
@@ -1,44 +0,0 @@
-"""
-Copyright 2020 The OneFlow Authors. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
-import numpy as np
-import oneflow as flow
-
-flow.config.gpu_device_num(1)
-
-func_config = flow.FunctionConfig()
-func_config.default_distribute_strategy(flow.scope.consistent_view())
-func_config.default_data_type(flow.float)
-
-
-def my_test_source(name):
-    with flow.scope.placement("cpu", "0:0"):
-        return (
-            flow.user_op_builder(name)
-            .Op("TestSource")
-            .Output("out")
-            .Build()
-            .InferAndTryRun()
-            .RemoteBlobList()[0]
-        )
-
-
-@flow.global_function(func_config)
-def TestSourceJob():
-    return my_test_source("my_test_source")
-
-
-print(TestSourceJob().get())
-# 0, 1, 2, 3, 4
diff --git a/oneflow/python/test/modules/test_abs.py b/oneflow/python/test/modules/test_abs.py
index 580dca8a1ebb72c832d13bc6827e9c40466634bd..990022fd3f46686270b116e01780e9cb8ef14201 100644
--- a/oneflow/python/test/modules/test_abs.py
+++ b/oneflow/python/test/modules/test_abs.py
@@ -59,10 +59,6 @@ def _test_abs_tensor_function_backward(test_case, device):
     test_case.assertTrue(np.allclose(input.grad.numpy(), np_grad, 1e-5, 1e-5))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestAbs(flow.unittest.TestCase):
     def test_cosh(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_acos.py b/oneflow/python/test/modules/test_acos.py
index c1668f2a4eee30f343d60fc0b07ed6a8551ac2cf..64a1298a98def25fa64636d55d5286a0f3fd0aef 100644
--- a/oneflow/python/test/modules/test_acos.py
+++ b/oneflow/python/test/modules/test_acos.py
@@ -39,10 +39,6 @@ def _test_acos_impl(test_case, shape, device):
     )
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestAcos(flow.unittest.TestCase):
     def test_acos(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_acosh.py b/oneflow/python/test/modules/test_acosh.py
new file mode 100644
index 0000000000000000000000000000000000000000..54f8e02ce7c55b32817f42e5d28034d83cadeacc
--- /dev/null
+++ b/oneflow/python/test/modules/test_acosh.py
@@ -0,0 +1,54 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import unittest
+from collections import OrderedDict
+
+import numpy as np
+
+import oneflow.experimental as flow
+from test_util import GenArgList
+
+
+def _test_acosh_impl(test_case, shape, device):
+    np_input = np.random.rand(*shape) + 2.0
+    of_input = flow.Tensor(
+        np_input, dtype=flow.float32, device=flow.device(device), requires_grad=True
+    )
+    of_out = flow.acosh(of_input)
+    np_out = np.arccosh(np_input)
+    test_case.assertTrue(
+        np.allclose(of_out.numpy(), np_out, 1e-4, 1e-4, equal_nan=True)
+    )
+
+    of_out = of_out.sum()
+    of_out.backward()
+    np_grad = 1.0 / np.sqrt(np.square(np_input) - 1)
+    test_case.assertTrue(
+        np.allclose(of_input.grad.numpy(), np_grad, 1e-4, 1e-4, equal_nan=True)
+    )
+
+
+class TestAcosh(flow.unittest.TestCase):
+    def test_acosh(test_case):
+        arg_dict = OrderedDict()
+        arg_dict["shape"] = [(2, 3), (2, 3, 4), (2, 4, 5, 6)]
+        arg_dict["device"] = ["cpu", "cuda"]
+        for arg in GenArgList(arg_dict):
+            _test_acosh_impl(test_case, *arg)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/oneflow/python/test/modules/test_activation.py b/oneflow/python/test/modules/test_activation.py
index 2b9b1c6107dc6a1f129c931be8f5e8f7c7a23f67..d62a8278312cd91ed03a67861c4e351d37253371 100644
--- a/oneflow/python/test/modules/test_activation.py
+++ b/oneflow/python/test/modules/test_activation.py
@@ -53,10 +53,6 @@ def _test_relu_impl(test_case, shape, device):
     test_case.assertTrue(np.allclose(of_input.grad.numpy(), np_out > 0, 1e-5, 1e-5))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestReLUModule(flow.unittest.TestCase):
     def test_relu(test_case):
         arg_dict = OrderedDict()
@@ -95,10 +91,6 @@ def _test_relu6_impl(test_case, shape, device):
     )
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestReLU6Module(flow.unittest.TestCase):
     def test_relu6(test_case):
         arg_dict = OrderedDict()
@@ -149,10 +141,6 @@ def _test_tanh_function_impl(test_case, shape, device):
     )
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestTanh(flow.unittest.TestCase):
     def test_tanh(test_case):
         arg_dict = OrderedDict()
@@ -202,10 +190,6 @@ def _test_elu_function_impl(test_case, shape, device):
     test_case.assertTrue(np.allclose(x.grad.numpy(), np_grad, 1e-5, 1e-5))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestELUModule(flow.unittest.TestCase):
     def test_elu(test_case):
         arg_dict = OrderedDict()
@@ -246,10 +230,6 @@ def _test_gelu_impl(test_case, device):
     test_case.assertTrue(np.allclose(of_input.grad.numpy(), np_grad, 1e-5, 1e-5))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestGelu(flow.unittest.TestCase):
     def test_gelu(test_case):
         arg_dict = OrderedDict()
@@ -322,10 +302,6 @@ def _test_sigmoid_backward(test_case, device):
     test_case.assertTrue(np.allclose(x.grad.numpy(), x_grad, 1e-5, 1e-5))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestSigmoid(flow.unittest.TestCase):
     def test_sigmoid(test_case):
         arg_dict = OrderedDict()
@@ -429,10 +405,6 @@ def _test_softmax_backward_1_dim(test_case, device):
     test_case.assertTrue(np.allclose(a.grad.numpy(), a_grad, 1e-5, 1e-5))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestSoftmax(flow.unittest.TestCase):
     def test_softmax(test_case):
         arg_dict = OrderedDict()
@@ -467,10 +439,6 @@ def _test_hardsigmoid_impl(test_case, shape, device):
     )
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestHardsigmoidModule(flow.unittest.TestCase):
     def test_hardsigmoid(test_case):
         arg_dict = OrderedDict()
@@ -613,10 +581,6 @@ def _test_logsoftmax_backward(test_case, device):
     test_case.assertTrue(np.allclose(x.grad.numpy(), x_grad, 1e-5, 1e-5))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestLogSoftmax(flow.unittest.TestCase):
     def test_log_softmax(test_case):
         arg_dict = OrderedDict()
@@ -650,10 +614,6 @@ def _test_logsigmoid(test_case, device):
     test_case.assertTrue(np.allclose(x.grad.numpy(), np_grad, 1e-5, 1e-5))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestLogSigmoidModule(flow.unittest.TestCase):
     def test_logsigmoid(test_case):
         arg_dict = OrderedDict()
@@ -710,10 +670,6 @@ def _test_softplus_backward(test_case, device):
     test_case.assertTrue(np.allclose(x.grad.numpy(), np_grad, 1e-5, 1e-5))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestSoftplusModule(flow.unittest.TestCase):
     def test_softplus(test_case):
         arg_dict = OrderedDict()
@@ -755,10 +711,6 @@ def _test_hardswish_impl(test_case, shape, device):
     test_case.assertTrue(np.allclose(x.grad.numpy(), np_grad, 1e-5, 1e-5))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestHardswishModule(flow.unittest.TestCase):
     def test_hardswish(test_case):
         arg_dict = OrderedDict()
@@ -798,10 +750,6 @@ def _test_hardtanh_impl(test_case, shape, device):
     )
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestHardtanhModule(flow.unittest.TestCase):
     def test_hardtanh(test_case):
         arg_dict = OrderedDict()
@@ -827,10 +775,6 @@ def _test_leakyrelu_impl(test_case, shape, device):
     test_case.assertTrue(np.allclose(x.grad.numpy(), np_grad, 1e-5, 1e-5))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestLeakyReLUModule(flow.unittest.TestCase):
     def test_leaky_relu(test_case):
         arg_dict = OrderedDict()
@@ -872,10 +816,6 @@ def _test_mish_backward(test_case, shape, device):
     test_case.assertTrue(np.allclose(x.grad.numpy(), np_grad, 1e-5, 1e-5))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestMishModule(flow.unittest.TestCase):
     def test_mish(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_adaptive_pool.py b/oneflow/python/test/modules/test_adaptive_pool.py
index 61ba768331bf886812116e1bf4a9c41df9f822c7..1447ef1a2475047589dafa3dcf4121180496d1a2 100644
--- a/oneflow/python/test/modules/test_adaptive_pool.py
+++ b/oneflow/python/test/modules/test_adaptive_pool.py
@@ -188,10 +188,6 @@ def _test_adaptive_avgpool2d_hw_backward(test_case, device):
     test_case.assertTrue(np.allclose(input.grad.numpy(), np_grad, 1e-5, 1e-5))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestAdaptiveAvgPool2d(flow.unittest.TestCase):
     def test_adaptive_avgpool2d(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_add.py b/oneflow/python/test/modules/test_add.py
index 953e0e24f88b690398348faacab6d2c0c8636369..b37fd4cc7f1375ed7515137c40153f5327c39cb3 100644
--- a/oneflow/python/test/modules/test_add.py
+++ b/oneflow/python/test/modules/test_add.py
@@ -152,10 +152,6 @@ def _test_inplace_add(test_case, shape, device):
     test_case.assertTrue(np.allclose(of_x.grad.numpy(), np.ones(shape), 1e-5, 1e-5))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestAddModule(flow.unittest.TestCase):
     def test_add(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_addmm.py b/oneflow/python/test/modules/test_addmm.py
index 80bd57ed89f2871b21f6550cdb841719ddf9850b..28278adf295f97dad91e0647ff70c86efb2b6435 100644
--- a/oneflow/python/test/modules/test_addmm.py
+++ b/oneflow/python/test/modules/test_addmm.py
@@ -51,10 +51,6 @@ def _test_addmm_backward(test_case, shape, alpha, beta, device):
     )
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestAddmm(flow.unittest.TestCase):
     def test_addmm(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_arange.py b/oneflow/python/test/modules/test_arange.py
index 1bd28edc8144be445f49ea7f6c3d31b75c087f0a..441805c6db5c3f39b6f12fd3c3020ac93650c120 100644
--- a/oneflow/python/test/modules/test_arange.py
+++ b/oneflow/python/test/modules/test_arange.py
@@ -49,10 +49,6 @@ def _test_arange_backward(test_case, device):
     test_case.assertTrue(np.allclose(x.grad.numpy(), np.ones(13), 1e-5, 1e-5))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestArange(flow.unittest.TestCase):
     def test_transpose(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_argmax.py b/oneflow/python/test/modules/test_argmax.py
index 029b24b43d92fe9bbd4f29cfdf06f38722318fcd..d266ad0f4d36ac5ba15112488018bb45ea9b5171 100644
--- a/oneflow/python/test/modules/test_argmax.py
+++ b/oneflow/python/test/modules/test_argmax.py
@@ -75,10 +75,6 @@ def _test_argmax_dim_equal_none(test_case, device):
     test_case.assertTrue(np.array_equal(of_out.numpy().flatten(), np_out.flatten()))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestArgmax(flow.unittest.TestCase):
     def test_argmax(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_argsort.py b/oneflow/python/test/modules/test_argsort.py
index 333f77dffd8839fb75e463adc130e1958b26757b..2e8c51b46ca730ba080275045385e327db2a1009 100644
--- a/oneflow/python/test/modules/test_argsort.py
+++ b/oneflow/python/test/modules/test_argsort.py
@@ -47,10 +47,6 @@ def _test_tensor_argsort(test_case, data_shape, axis, descending, data_type, dev
     test_case.assertTrue(np.array_equal(of_out.numpy().flatten(), np_out.flatten()))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestArgsort(flow.unittest.TestCase):
     def test_argsort(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_argwhere.py b/oneflow/python/test/modules/test_argwhere.py
index d192138be21ba967d14de868ffff290d29974f01..1dcd4dd43b86003a4d0d4fd67153d82160f02602 100644
--- a/oneflow/python/test/modules/test_argwhere.py
+++ b/oneflow/python/test/modules/test_argwhere.py
@@ -31,10 +31,6 @@ def _test_argwhere(test_case, shape, device):
     test_case.assertTrue(np.array_equal(of_out.numpy().shape, np_out.shape))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestArgwhere(flow.unittest.TestCase):
     def test_argwhere(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_atan.py b/oneflow/python/test/modules/test_atan.py
index f1ea5c791ee7eb7f302a4e1773732b669eebec2f..1f824a0593b80e30208e6d3812aa6bd165c36828 100644
--- a/oneflow/python/test/modules/test_atan.py
+++ b/oneflow/python/test/modules/test_atan.py
@@ -64,10 +64,6 @@ def _test_arctan(test_case, shape, device):
     )
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestAtan(flow.unittest.TestCase):
     def test_atan(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_atan2.py b/oneflow/python/test/modules/test_atan2.py
new file mode 100644
index 0000000000000000000000000000000000000000..466989031718067e06108c6fc8a4bf6132e57712
--- /dev/null
+++ b/oneflow/python/test/modules/test_atan2.py
@@ -0,0 +1,135 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+import unittest
+from collections import OrderedDict
+
+import numpy as np
+
+from test_util import GenArgList
+import oneflow.experimental as flow
+from automated_test_util import *
+
+
+def _test_atan2_forward(test_case, shape, scalar, device):
+    np_input_x = 10 * np.random.rand(*shape)
+    np_input_y = 10 * np.random.randn(*shape)
+    of_input_x = flow.Tensor(np_input_x, dtype=flow.float32, device=flow.device(device))
+    of_input_y = flow.Tensor(np_input_y, dtype=flow.float32, device=flow.device(device))
+    of_out = flow.atan2(of_input_x, of_input_y)
+    np_out = np.arctan2(np_input_x, np_input_y)
+    test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 1e-5, 1e-5))
+
+
+def _test_atan2_backward(test_case, device):
+    np_input_x = np.random.rand(2, 3)
+    np_input_y = np.random.rand(2, 3)
+
+    np_y_grad = -1 * np_input_x / (np_input_x * np_input_x + np_input_y * np_input_y)
+    np_x_grad = np_input_y / (np_input_x * np_input_x + np_input_y * np_input_y)
+
+    def test_x_y_grad():
+        of_input_x = flow.Tensor(
+            np_input_x,
+            dtype=flow.float32,
+            device=flow.device(device),
+            requires_grad=True,
+        )
+        of_input_y = flow.Tensor(
+            np_input_y,
+            dtype=flow.float32,
+            device=flow.device(device),
+            requires_grad=True,
+        )
+        of_out = flow.atan2(of_input_x, of_input_y)
+        of_out_sum = of_out.sum()
+        of_out_sum.backward()
+        test_case.assertTrue(
+            np.allclose(of_input_x.grad.numpy(), np_x_grad, 1e-4, 1e-4)
+        )
+        test_case.assertTrue(
+            np.allclose(of_input_y.grad.numpy(), np_y_grad, 1e-4, 1e-4)
+        )
+
+    def test_x_grad():
+        of_input_x = flow.Tensor(
+            np_input_x,
+            dtype=flow.float32,
+            device=flow.device(device),
+            requires_grad=True,
+        )
+        of_input_y = flow.Tensor(
+            np_input_y, dtype=flow.float32, device=flow.device(device)
+        )
+        of_out = flow.atan2(of_input_x, of_input_y)
+        of_out_sum = of_out.sum()
+        of_out_sum.backward()
+        test_case.assertTrue(
+            np.allclose(of_input_x.grad.numpy(), np_x_grad, 1e-4, 1e-4)
+        )
+
+    def test_y_grad():
+        of_input_x = flow.Tensor(
+            np_input_x, dtype=flow.float32, device=flow.device(device)
+        )
+        of_input_y = flow.Tensor(
+            np_input_y,
+            dtype=flow.float32,
+            device=flow.device(device),
+            requires_grad=True,
+        )
+        of_out = flow.atan2(of_input_x, of_input_y)
+        of_out_sum = of_out.sum()
+        of_out_sum.backward()
+        test_case.assertTrue(
+            np.allclose(of_input_y.grad.numpy(), np_y_grad, 1e-4, 1e-4)
+        )
+
+    test_x_y_grad()
+    test_x_grad()
+    test_y_grad()
+
+
+class TestAtan2(flow.unittest.TestCase):
+    def test_atan2_forward(test_case):
+        arg_dict = OrderedDict()
+        arg_dict["shape"] = [(2,), (2, 3), (2, 3, 4), (2, 3, 4, 5)]
+        arg_dict["scalar"] = [2.1, 0.8]
+        arg_dict["device"] = ["cpu", "cuda"]
+        for arg in GenArgList(arg_dict):
+            _test_atan2_forward(test_case, *arg)
+
+    def test_atan2_backward(test_case):
+        arg_dict = OrderedDict()
+        arg_dict["device"] = ["cpu", "cuda"]
+        for arg in GenArgList(arg_dict):
+            _test_atan2_backward(test_case, *arg)
+
+    def test_flow_atan2_with_random_data(test_case):
+        for device in ["cpu", "cuda"]:
+            test_flow_against_pytorch(
+                test_case,
+                "atan2",
+                extra_annotations={"other": flow.Tensor},
+                extra_generators={
+                    "input": random_tensor(ndim=1, dim1=1),
+                    "other": random_tensor(ndim=1, dim1=1),
+                },
+                device=device,
+            )
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/oneflow/python/test/modules/test_atanh.py b/oneflow/python/test/modules/test_atanh.py
index 1ab9ac15282bb3329fb985e3d6d430f2e1695928..21a669e39487a4858889158917c8c01c165d7499 100644
--- a/oneflow/python/test/modules/test_atanh.py
+++ b/oneflow/python/test/modules/test_atanh.py
@@ -62,10 +62,6 @@ def _test_arctanh_impl(test_case, shape, device):
     )
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestAtanh(flow.unittest.TestCase):
     def test_atanh(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_autograd.py b/oneflow/python/test/modules/test_autograd.py
index b7d7f1757046e548985d2d23f4e411ae389958d8..8812b9fdea5c4c9cbe68ac63374f761d51e2ff2a 100644
--- a/oneflow/python/test/modules/test_autograd.py
+++ b/oneflow/python/test/modules/test_autograd.py
@@ -80,10 +80,6 @@ def _test_autograd_grad(test_case, shape, device):
     # TODO(wyg): create_graph
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestAutograd(flow.unittest.TestCase):
     def test_autograd_interface(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_avgpool.py b/oneflow/python/test/modules/test_avgpool.py
index 0d7acdf097057fffbf08e03e407a9d135303498f..275c891a476ceb2b71edff2396946db1d693f386 100644
--- a/oneflow/python/test/modules/test_avgpool.py
+++ b/oneflow/python/test/modules/test_avgpool.py
@@ -592,10 +592,6 @@ def _test_avgpool3d_special_kernel_size_backward(test_case, device):
     test_case.assertTrue(np.allclose(x.grad.numpy(), numpy_grad, 1e-5, 1e-5))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestPoolingModule(flow.unittest.TestCase):
     def test_avgpool3d(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_avgpool2d.py b/oneflow/python/test/modules/test_avgpool2d.py
index a63527499a03a20b6366dd0c645f3a299ef0cd60..bf160e50f800e5a68ea299879482eeb85e4b26a7 100644
--- a/oneflow/python/test/modules/test_avgpool2d.py
+++ b/oneflow/python/test/modules/test_avgpool2d.py
@@ -412,10 +412,6 @@ g_samples = [
 ]
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestModule(flow.unittest.TestCase):
     def test_AvgPool2d(test_case):
         global g_samples
diff --git a/oneflow/python/test/modules/test_batchnorm.py b/oneflow/python/test/modules/test_batchnorm.py
index 4ca86422579ff8b429d04475aca786645e292b6a..a58650fdf33a36fa5563e0e64678f67ab9e550e4 100644
--- a/oneflow/python/test/modules/test_batchnorm.py
+++ b/oneflow/python/test/modules/test_batchnorm.py
@@ -483,10 +483,6 @@ def _test_batchnorm2d_backward(test_case, device):
     )
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestBatchNorm(flow.unittest.TestCase):
     def test_batchnorm(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_bce_loss.py b/oneflow/python/test/modules/test_bce_loss.py
index fdca59bf0a2246d320d5d5f1cf79db201a47798b..e741bf8c37d639cfe04900f785abe34e4dcbd701 100644
--- a/oneflow/python/test/modules/test_bce_loss.py
+++ b/oneflow/python/test/modules/test_bce_loss.py
@@ -145,10 +145,6 @@ def _test_bceloss_impl(test_case, device, reduction):
     test_case.assertTrue(np.allclose(input_none.grad.numpy(), np_grad_none, 1e-4, 1e-4))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestBCELossModule(flow.unittest.TestCase):
     def test_bceloss(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_bcewithlogitsloss.py b/oneflow/python/test/modules/test_bcewithlogitsloss.py
index 9166b7a0917927eec251ee5cf67cde0c2fbcf6b7..6bc78457dd68f675293bc5b0c1fe925f2772d9aa 100644
--- a/oneflow/python/test/modules/test_bcewithlogitsloss.py
+++ b/oneflow/python/test/modules/test_bcewithlogitsloss.py
@@ -103,10 +103,6 @@ def _test_bcewithlogitsloss_impl(test_case, device, shape, reduction):
     test_case.assertTrue(np.allclose(input.grad.numpy(), np_grad, 1e-4, 1e-4))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestBCEWithLogitsLossModule(flow.unittest.TestCase):
     def test_bcewithlogitsloss(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_bernoulli.py b/oneflow/python/test/modules/test_bernoulli.py
index 30eed8270d2e1dd75ee34ad6455633894a729580..39e531a06493ae9750cf02efb61b6931864302c3 100644
--- a/oneflow/python/test/modules/test_bernoulli.py
+++ b/oneflow/python/test/modules/test_bernoulli.py
@@ -40,10 +40,6 @@ def _test_bernoulli_with_generator(test_case, shape):
     test_case.assertTrue(np.allclose(y_1.numpy(), y_2.numpy()))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestBernoulli(flow.unittest.TestCase):
     def test_bernoulli(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_bmm.py b/oneflow/python/test/modules/test_bmm.py
index 222afd4e2e33b3587cf6621205e123a1ed251f29..1afcf9dab8e44c4fd03592355f7b3a76f5c608f2 100644
--- a/oneflow/python/test/modules/test_bmm.py
+++ b/oneflow/python/test/modules/test_bmm.py
@@ -85,10 +85,6 @@ def _test_bmm_backward(test_case, device):
     )
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestModule(flow.unittest.TestCase):
     def test_bmm(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_broadcast_like.py b/oneflow/python/test/modules/test_broadcast_like.py
index 2d46e7b1f92503faec014c3809308a144143b070..9e3d539f59f35e7f42f9fd7e472943c00d67ce41 100644
--- a/oneflow/python/test/modules/test_broadcast_like.py
+++ b/oneflow/python/test/modules/test_broadcast_like.py
@@ -90,10 +90,6 @@ def _test_broadcast_like_backward(test_case, device):
     test_case.assertTrue(np.allclose(input.grad.numpy(), np_grad, 1e-5, 1e-5))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestBroadCastLike(flow.unittest.TestCase):
     def test_broadcast_like(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_cast.py b/oneflow/python/test/modules/test_cast.py
index 9081b2b30f57d84eaaab5c710851824208843e02..9c5e4208f8b654efd2ed94cf12816e86b3a37ab2 100644
--- a/oneflow/python/test/modules/test_cast.py
+++ b/oneflow/python/test/modules/test_cast.py
@@ -50,10 +50,6 @@ def _test_cast_backward(test_case, device, shape):
     test_case.assertTrue(np.array_equal(x.grad.numpy(), np.ones(shape=shape)))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestCast(flow.unittest.TestCase):
     def test_cast(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_ceil.py b/oneflow/python/test/modules/test_ceil.py
index fb57066a5924db58b5fa1bccf4af17408cd9ce8f..765e2d63a61991996b92c9b4d47c9512588cd2d5 100644
--- a/oneflow/python/test/modules/test_ceil.py
+++ b/oneflow/python/test/modules/test_ceil.py
@@ -34,10 +34,6 @@ def _test_ceil_impl(test_case, device, shape):
     test_case.assertTrue(np.allclose(x.grad.numpy(), np.zeros(shape), 1e-4, 1e-4))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestCeilModule(flow.unittest.TestCase):
     def test_ceil(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_chunk.py b/oneflow/python/test/modules/test_chunk.py
index dad7758db2bbbd06c03af2fa3d492104b46fdc45..1c7d11bf3370a26e6db9d3303b97cac014a412c2 100644
--- a/oneflow/python/test/modules/test_chunk.py
+++ b/oneflow/python/test/modules/test_chunk.py
@@ -185,10 +185,6 @@ def _test_chunk_backward(test_case, device):
     test_case.assertTrue(np.array_equal(input.grad.numpy(), np_grad))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestChunk(flow.unittest.TestCase):
     def test_chunk(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_concat.py b/oneflow/python/test/modules/test_concat.py
index 65e6f9c7a06f57c0ed511e4c9b4d78edec0484e1..78cc61c547bd93d1ecad4f42816d871773738b6e 100644
--- a/oneflow/python/test/modules/test_concat.py
+++ b/oneflow/python/test/modules/test_concat.py
@@ -120,10 +120,6 @@ def _test_concat_grad_and_no_grad(test_case, device):
     )
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestModule(flow.unittest.TestCase):
     def test_concat(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_constant.py b/oneflow/python/test/modules/test_constant.py
index 1359f9f08cd177ce6dd7bde0380a449435a2ca08..88de42cd763bdfe84aa0602990de4bb3f8d537c5 100644
--- a/oneflow/python/test/modules/test_constant.py
+++ b/oneflow/python/test/modules/test_constant.py
@@ -109,10 +109,6 @@ def _test_new_ones(test_case, device, shape):
     test_case.assertTrue(np.array_equal(np.ones_like(x.numpy()), x.grad.numpy()))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestConstantModule(flow.unittest.TestCase):
     def test_cast(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_constantpad2d.py b/oneflow/python/test/modules/test_constantpad2d.py
index 4f351ab8a92a2925b06b9ecb4541459558ff578c..573cdc42a6e69d130442454e3dda3f9f722f58f7 100644
--- a/oneflow/python/test/modules/test_constantpad2d.py
+++ b/oneflow/python/test/modules/test_constantpad2d.py
@@ -97,10 +97,6 @@ def _test_ConstantPad2d(test_case, shape, padding, value, device):
     test_case.assertTrue(np.allclose(of_input.grad.numpy(), np_out_grad, 1e-5, 1e-5))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestConstantPad2dModule(flow.unittest.TestCase):
     def test_ConstantPad2d(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_conv.py b/oneflow/python/test/modules/test_conv.py
index f341d6eeb6ba9cfd3aab0557f1199ee37bb2f371..c65613b37564f52706cb9510ae8fc367b88760bb 100644
--- a/oneflow/python/test/modules/test_conv.py
+++ b/oneflow/python/test/modules/test_conv.py
@@ -1591,10 +1591,6 @@ def _test_conv2d_large_out_channel(test_case, device):
     test_case.assertTrue(np.allclose(input.grad.numpy(), np_grad, 1e-6, 1e-6))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestConv2d(flow.unittest.TestCase):
     def test_conv2d_default_init(test_case):
         for device in ["cuda", "cpu"]:
diff --git a/oneflow/python/test/modules/test_conv1d.py b/oneflow/python/test/modules/test_conv1d.py
index 6eeed068ce121671e1abf6dc562f2b7a8ad78e80..33954f86027b7fbdb6acb488cb31da92aaea4819 100644
--- a/oneflow/python/test/modules/test_conv1d.py
+++ b/oneflow/python/test/modules/test_conv1d.py
@@ -416,10 +416,6 @@ def _test_conv1d_compilcate(test_case, device):
     test_case.assertTrue(np.allclose(input.grad.numpy(), np_grad, 1e-6, 1e-6))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestConv1d(flow.unittest.TestCase):
     def test_conv1d(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_crossentropyloss.py b/oneflow/python/test/modules/test_crossentropyloss.py
index 1847b5f94ade00c1a14424ec05a66a32a7b850e4..fddc608e0f78d2f79a9b46511521397d45604445 100644
--- a/oneflow/python/test/modules/test_crossentropyloss.py
+++ b/oneflow/python/test/modules/test_crossentropyloss.py
@@ -96,10 +96,6 @@ g_test_samples = [
 ]
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestCrossEntropyLossModule(flow.unittest.TestCase):
     def test_CrossEntropyLoss(test_case):
         global g_test_samples
diff --git a/oneflow/python/test/modules/test_crossentropyloss_grad.py b/oneflow/python/test/modules/test_crossentropyloss_grad.py
index f8de2af123faea7ec36d05ba9e822ce941c48226..b1c2d8aff1ec6519e0b131f0a76900422df21294 100644
--- a/oneflow/python/test/modules/test_crossentropyloss_grad.py
+++ b/oneflow/python/test/modules/test_crossentropyloss_grad.py
@@ -212,10 +212,6 @@ def _test_CrossEntropyLoss_mean_with_random_input_with_ignore_index(test_case, d
     )
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestCrossEntropyLossModuleGrad(flow.unittest.TestCase):
     def test_crossentropyloss_grad(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_ctc_loss.py b/oneflow/python/test/modules/test_ctc_loss.py
index d1a16923a8659ebcff3fc7925d495c6759c22227..1eb8163d80a8f9e4d52a0a93a81ce5cdee6aeb4b 100644
--- a/oneflow/python/test/modules/test_ctc_loss.py
+++ b/oneflow/python/test/modules/test_ctc_loss.py
@@ -301,10 +301,6 @@ def gen_arg_list():
     return GenArgList(arg_dict)
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestCTCLoss1n1d(flow.unittest.TestCase):
     def test_ctc_loss(test_case):
         for arg in gen_arg_list():
diff --git a/oneflow/python/test/modules/test_dataset.py b/oneflow/python/test/modules/test_dataset.py
index 541cc1c65185eb4ca1075545b9c3f50413afeef6..5d693e95bce80329d4cdd5a3954e9c931969be82 100644
--- a/oneflow/python/test/modules/test_dataset.py
+++ b/oneflow/python/test/modules/test_dataset.py
@@ -23,10 +23,6 @@ import numpy as np
 import oneflow.experimental as flow
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestOFRecordModule(flow.unittest.TestCase):
     def test_record(test_case):
         batch_size = 1
@@ -259,10 +255,6 @@ def _segm_poly_list_to_tensor(img_segm_poly_list):
 
 
 @flow.unittest.skip_unless_1n1d()
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestCocoReader(flow.unittest.TestCase):
     def test_coco_reader(test_case):
         anno_file = "/dataset/mscoco_2017/annotations/instances_val2017.json"
diff --git a/oneflow/python/test/modules/test_deconv.py b/oneflow/python/test/modules/test_deconv.py
index b4b927309c62a507d5ac2fc4953ba82cab527e54..b7a48df44d3cb13ac2ac89356b60c93fcfe95830 100644
--- a/oneflow/python/test/modules/test_deconv.py
+++ b/oneflow/python/test/modules/test_deconv.py
@@ -853,10 +853,6 @@ def _test_deconv_group_large_in_channel(test_case, device):
     test_case.assertTrue(np.allclose(input.grad.numpy(), np_grad, 1e-6, 1e-6))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestDeconv2d(flow.unittest.TestCase):
     def test_deconv2d(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_diag.py b/oneflow/python/test/modules/test_diag.py
index c9288b40a6bfc1bbf276a7cdc971b44b2d5172b9..c91b334827f1d53bdd59417381f2f6d48f47b49d 100644
--- a/oneflow/python/test/modules/test_diag.py
+++ b/oneflow/python/test/modules/test_diag.py
@@ -132,10 +132,6 @@ def _test_diag_other_dim_non_square_backward(test_case, diagonal, device):
     )
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestDiag(flow.unittest.TestCase):
     def test_diag_forward(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_div.py b/oneflow/python/test/modules/test_div.py
index 078ddc9a780018d76f3f0af36cc6833bc7e4e7cd..59dab7707c1b783e3609aa9e3108558a09ef6145 100644
--- a/oneflow/python/test/modules/test_div.py
+++ b/oneflow/python/test/modules/test_div.py
@@ -67,10 +67,6 @@ def _test_div_impl(test_case, shape, device):
     test_case.assertTrue(np.allclose(x.grad.numpy(), np_grad_x, 1e-4, 1e-4))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestDiv(flow.unittest.TestCase):
     def test_div(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_dropout.py b/oneflow/python/test/modules/test_dropout.py
index 2029ffb6a0a081cd5732c41e658304beb4f47065..c28a2563f4075f4ea53d12d916ce3fb0bff3f8bf 100644
--- a/oneflow/python/test/modules/test_dropout.py
+++ b/oneflow/python/test/modules/test_dropout.py
@@ -89,10 +89,6 @@ def _test_dropout_with_generator(test_case, shape, device):
     test_case.assertTrue(np.allclose(y_1.numpy(), y_2.numpy()))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestDropout(flow.unittest.TestCase):
     def test_transpose(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_eq.py b/oneflow/python/test/modules/test_eq.py
index bdb268299bbca040b828dec7881a5f9c4d38edef..34c3a31e65ba21c9d659bcf3214c7d4931336af3 100644
--- a/oneflow/python/test/modules/test_eq.py
+++ b/oneflow/python/test/modules/test_eq.py
@@ -82,10 +82,6 @@ def _test_tensor_eq_operator_float(test_case, shape, device):
     test_case.assertTrue(np.array_equal(of_out.numpy(), np_out))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestEq(flow.unittest.TestCase):
     def test_eq(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_erf.py b/oneflow/python/test/modules/test_erf.py
index 3f75f7634687eb7c0bdefb07f161251ec46ded88..8908b069f7489f0dd0b8bf89d4d58039a36a2d35 100644
--- a/oneflow/python/test/modules/test_erf.py
+++ b/oneflow/python/test/modules/test_erf.py
@@ -66,10 +66,6 @@ def _test_tensor_erf_impl(test_case, shape, device):
     )
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestErfModule(flow.unittest.TestCase):
     def test_erf(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_erfc.py b/oneflow/python/test/modules/test_erfc.py
index 4cd3a44d3e79fed42fc01b9bdc9d3bbcfb3a6f81..29b819753bed27e21fe45ce72c4b5efe55c18548 100644
--- a/oneflow/python/test/modules/test_erfc.py
+++ b/oneflow/python/test/modules/test_erfc.py
@@ -66,10 +66,6 @@ def _test_tensor_erfc_impl(test_case, shape, device):
     )
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestErfcModule(flow.unittest.TestCase):
     def test_erfc(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_exp.py b/oneflow/python/test/modules/test_exp.py
index db500de070a8bcbeb4e23a5d199cbd4a2c08d571..4e2dc64599aea7bdc211a05497e839f8c6626b5b 100644
--- a/oneflow/python/test/modules/test_exp.py
+++ b/oneflow/python/test/modules/test_exp.py
@@ -37,10 +37,6 @@ def _test_exp_impl(test_case, shape, device):
     test_case.assertTrue(np.allclose(of_input.grad.numpy(), np_out, 1e-4, 1e-4))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestExp(flow.unittest.TestCase):
     def test_exp(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_expand.py b/oneflow/python/test/modules/test_expand.py
index a1e39ec1684366b312d35e34e243d3d836cc1ed1..c8bcd9d70a340ab6ae770a53ff4ab1af448c5066 100644
--- a/oneflow/python/test/modules/test_expand.py
+++ b/oneflow/python/test/modules/test_expand.py
@@ -184,10 +184,6 @@ def _test_expand_backward(test_case, device):
     test_case.assertTrue(np.array_equal(of_input.grad.numpy(), np_grad))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestModule(flow.unittest.TestCase):
     def test_expand(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_expm1.py b/oneflow/python/test/modules/test_expm1.py
index bb90c07a9e16f725f4f18b42936781efd5bd6dae..598cbab42e2f9802fbd25a548bef90c17437658c 100644
--- a/oneflow/python/test/modules/test_expm1.py
+++ b/oneflow/python/test/modules/test_expm1.py
@@ -38,10 +38,6 @@ def _test_expm1_impl(test_case, device, shape):
     test_case.assertTrue(np.allclose(x.grad.numpy(), np.exp(x.numpy()), 1e-4, 1e-4))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestExpm1Module(flow.unittest.TestCase):
     def test_expm1(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_flatten.py b/oneflow/python/test/modules/test_flatten.py
index c667829845e7a17afc177dc282d8d10c8401f2f8..0bff48562bbed08892386f43203b198b5e55b934 100644
--- a/oneflow/python/test/modules/test_flatten.py
+++ b/oneflow/python/test/modules/test_flatten.py
@@ -58,10 +58,6 @@ def _test_flatten_backward(test_case, device):
     test_case.assertTrue(np.array_equal(np.ones(shape=(2, 3, 4, 5)), x.grad.numpy()))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestFlattenModule(flow.unittest.TestCase):
     def test_cast(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_floor.py b/oneflow/python/test/modules/test_floor.py
index 940b372ebd0981d9b2984a2fa25afed6c65a8c10..58658a8aca5879e51ed35ee25a373a6a582a968d 100644
--- a/oneflow/python/test/modules/test_floor.py
+++ b/oneflow/python/test/modules/test_floor.py
@@ -43,10 +43,6 @@ def _test_floor(test_case, shape, device):
     )
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestFloor(flow.unittest.TestCase):
     def test_floor(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_functional_docstr.py b/oneflow/python/test/modules/test_functional_docstr.py
index 2fc9023b44f7d398fb24a6d0975885bbe097c23b..9af1847e58fb4f182db36d66547fb5cfe7359514 100644
--- a/oneflow/python/test/modules/test_functional_docstr.py
+++ b/oneflow/python/test/modules/test_functional_docstr.py
@@ -52,10 +52,6 @@ def _run_functional_doctest(
             runner.run(test)
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestFunctionalDocstrModule(flow.unittest.TestCase):
     def test_functional_docstr(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_gather.py b/oneflow/python/test/modules/test_gather.py
index 6a8f78af5a627757986a966129ef5e2fff41ea1e..29baf1615d232ce680f470f7af23983ecaedac4e 100644
--- a/oneflow/python/test/modules/test_gather.py
+++ b/oneflow/python/test/modules/test_gather.py
@@ -102,10 +102,6 @@ def _test_gather_backward(test_case, device):
     test_case.assertTrue(np.array_equal(of_input.grad.numpy(), np_grad))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestGather(flow.unittest.TestCase):
     def test_gather(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_gather_nd.py b/oneflow/python/test/modules/test_gather_nd.py
index 0636882f7996be706902ab5b30aaefa6b7138005..80a1b5945c062e71f1a79a44d77b7ec9a9f19288 100644
--- a/oneflow/python/test/modules/test_gather_nd.py
+++ b/oneflow/python/test/modules/test_gather_nd.py
@@ -82,10 +82,6 @@ def _test_gather_nd_backward_t(test_case, device):
     test_case.assertTrue(np.array_equal(of_input.grad.numpy(), np_grad))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestGather_nd(flow.unittest.TestCase):
     def test_gather_nd(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_greater.py b/oneflow/python/test/modules/test_greater.py
index 2fef0daad0c9353861eca1063e5567b5e07668bf..995d7bb46851d849cb52aca743a32ff3ed747b38 100644
--- a/oneflow/python/test/modules/test_greater.py
+++ b/oneflow/python/test/modules/test_greater.py
@@ -81,10 +81,6 @@ def _test_greater_float_scalar(test_case, device):
     test_case.assertTrue(np.array_equal(of_out.numpy(), np_out))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestGreater(flow.unittest.TestCase):
     def test_greter(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_greater_equal.py b/oneflow/python/test/modules/test_greater_equal.py
index 5482679a366305cd6161f5e2bcdde816d835cf05..b0225d508b29a05be1f82a1029004673388a510b 100644
--- a/oneflow/python/test/modules/test_greater_equal.py
+++ b/oneflow/python/test/modules/test_greater_equal.py
@@ -81,10 +81,6 @@ def _test_greater_equal_float_scalar(test_case, device):
     test_case.assertTrue(np.array_equal(of_out.numpy(), np_out))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestGreaterEqual(flow.unittest.TestCase):
     def test_greter_equal(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_groupnorm.py b/oneflow/python/test/modules/test_groupnorm.py
index f8fd903f631a1201227c7529c40d9ab35e083278..2a6e987d01f3669258bd6e10753cd85f987249da 100644
--- a/oneflow/python/test/modules/test_groupnorm.py
+++ b/oneflow/python/test/modules/test_groupnorm.py
@@ -323,10 +323,6 @@ def _test_groupnorm_backward_3d(test_case, device):
     )
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestGroupNorm(flow.unittest.TestCase):
     def test_groupnorm(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_image_batch_align.py b/oneflow/python/test/modules/test_image_batch_align.py
index 27ac535201e279774f364b795bd399f60d8e9d1b..5dedd701faf7a3d523eb5453d4f49b04cda0e8f7 100644
--- a/oneflow/python/test/modules/test_image_batch_align.py
+++ b/oneflow/python/test/modules/test_image_batch_align.py
@@ -46,10 +46,6 @@ def _roundup(x, n):
 
 
 @flow.unittest.skip_unless_1n1d()
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestImageBatchAlign(flow.unittest.TestCase):
     def test_image_batch_align(test_case):
         image_files = [
diff --git a/oneflow/python/test/modules/test_image_decode.py b/oneflow/python/test/modules/test_image_decode.py
index cfcb453111524c3c4af41354a7209c0e468105e0..41438cf1919b66bcd1fb4239320972d1b8e27448 100644
--- a/oneflow/python/test/modules/test_image_decode.py
+++ b/oneflow/python/test/modules/test_image_decode.py
@@ -22,10 +22,6 @@ import oneflow.experimental as flow
 
 
 @flow.unittest.skip_unless_1n1d()
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestImageDecode(flow.unittest.TestCase):
     def test_image_decode(test_case):
         images = [
diff --git a/oneflow/python/test/modules/test_image_flip.py b/oneflow/python/test/modules/test_image_flip.py
index 008efac15faee2c8dfdaa1f35a3b5d48fbf43262..05185b9677797d3c669d456ba1e34c4242788323 100644
--- a/oneflow/python/test/modules/test_image_flip.py
+++ b/oneflow/python/test/modules/test_image_flip.py
@@ -65,10 +65,6 @@ def _compare_image_flip_with_cv(test_case, image_files):
 
 
 @flow.unittest.skip_unless_1n1d()
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestImageFlip(flow.unittest.TestCase):
     def test_image_flip(test_case):
         _compare_image_flip_with_cv(
diff --git a/oneflow/python/test/modules/test_image_normalize.py b/oneflow/python/test/modules/test_image_normalize.py
index d164fbf1cc37a828679bc82cb662e22ff0a2548a..84e23cdf4dfa28fed31dd9203adbe96d8edd4ef5 100644
--- a/oneflow/python/test/modules/test_image_normalize.py
+++ b/oneflow/python/test/modules/test_image_normalize.py
@@ -74,10 +74,6 @@ def _compare_image_normalize(test_case, image_files, std, mean):
 
 
 @flow.unittest.skip_unless_1n1d()
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestImageNormalize(flow.unittest.TestCase):
     def test_image_normalize(test_case):
         _compare_image_normalize(
diff --git a/oneflow/python/test/modules/test_in_top_k.py b/oneflow/python/test/modules/test_in_top_k.py
index 429484858375255244418156e2e7a104adcfd71a..5c76c05be2f154e9df062e313d21eb4f0c46ebc3 100644
--- a/oneflow/python/test/modules/test_in_top_k.py
+++ b/oneflow/python/test/modules/test_in_top_k.py
@@ -99,10 +99,6 @@ def _test_in_top_k_impl(test_case, shape, k, device):
     )
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestInTopK(flow.unittest.TestCase):
     def test_in_top_k(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_instancenorm.py b/oneflow/python/test/modules/test_instancenorm.py
index 3d684864933cc2dec7a83f927236196808432b5a..c369b68025bfa266f8088dced28320729a87f7e1 100644
--- a/oneflow/python/test/modules/test_instancenorm.py
+++ b/oneflow/python/test/modules/test_instancenorm.py
@@ -409,10 +409,6 @@ def _test_instancenorm3d_backward(test_case, device):
     )
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestInstanceNorm(flow.unittest.TestCase):
     def test_instancenorm(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_instruction_replay.py b/oneflow/python/test/modules/test_instruction_replay.py
index 63ec1a99cd31f729600515d36b37d7c494551a24..fdf8643a21e02ce2c49537f923a53a01c60462e6 100644
--- a/oneflow/python/test/modules/test_instruction_replay.py
+++ b/oneflow/python/test/modules/test_instruction_replay.py
@@ -44,10 +44,6 @@ def _test_instruction_replay_impl(test_case, device, shape):
     oneflow._oneflow_internal.debug.clear_recorded_instructions()
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestIntructionReplay(flow.unittest.TestCase):
     def test_instruction_replay(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_kldivloss.py b/oneflow/python/test/modules/test_kldivloss.py
index 2c63827069ca2befbde9ee8480b28ae8ff3a5784..5a87621fddae4d5d2a1ce55c7b0633696a2726e9 100644
--- a/oneflow/python/test/modules/test_kldivloss.py
+++ b/oneflow/python/test/modules/test_kldivloss.py
@@ -89,10 +89,6 @@ def _test_kldivloss_backward(test_case, device, shape, reduction, log_target):
     test_case.assertTrue(np.allclose(input.grad.numpy(), np_grad, 1e-5, 1e-5))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestKLDivLossModule(flow.unittest.TestCase):
     def test_kldivloss(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_l1loss.py b/oneflow/python/test/modules/test_l1loss.py
index 24d8ae89d70a39d6fe2df7a7bfe7d6a56672a227..f400fa811e6d9478df8b06514c640d26b9c1e650 100644
--- a/oneflow/python/test/modules/test_l1loss.py
+++ b/oneflow/python/test/modules/test_l1loss.py
@@ -68,10 +68,6 @@ def _test_l1loss_impl(test_case, device, shape, reduction):
     test_case.assertTrue(np.allclose(input.grad.numpy(), np_grad, 1e-5, 1e-5))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestL1LossModule(flow.unittest.TestCase):
     def test_l1loss(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_less.py b/oneflow/python/test/modules/test_less.py
index ab208df76241555907651b3f9eb5b974f5e32676..fa880abaddf2662cba83e23d9edbc8879ccfe660 100644
--- a/oneflow/python/test/modules/test_less.py
+++ b/oneflow/python/test/modules/test_less.py
@@ -77,10 +77,6 @@ def _test_less_float_scalar(test_case, device):
     test_case.assertTrue(np.array_equal(of_out.numpy(), np_out))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestLess(flow.unittest.TestCase):
     def test_less(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_less_equal.py b/oneflow/python/test/modules/test_less_equal.py
index 9f1f6cf25f7ac03e63c8f4410c6990aae2317e04..90dd2cd5421e743ef4476b595a19918ff095eb4f 100644
--- a/oneflow/python/test/modules/test_less_equal.py
+++ b/oneflow/python/test/modules/test_less_equal.py
@@ -77,10 +77,6 @@ def _test_less_equal_float_scalar(test_case, device):
     test_case.assertTrue(np.array_equal(of_out.numpy(), np_out))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestLessEqual(flow.unittest.TestCase):
     def test_less_equal(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_linear.py b/oneflow/python/test/modules/test_linear.py
index 9bca71cf7c91c40d6c36d9a37d35c6eaa75083e9..3141f089c1e1b4c8fe8222b03ddbf0ad9698a50f 100644
--- a/oneflow/python/test/modules/test_linear.py
+++ b/oneflow/python/test/modules/test_linear.py
@@ -155,10 +155,6 @@ def _test_linear_backward_with_bias(test_case, device):
     test_case.assertTrue(np.allclose(np_grad, x.grad.numpy(), 1e-4, 1e-4))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestLinear(flow.unittest.TestCase):
     def test_linear_forward(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_log1p.py b/oneflow/python/test/modules/test_log1p.py
index 98c22125fdf69e84cae93e727f6f7681f0a6a703..0c9a4b3ff388665b25bd27ed5a4817fbc1581920 100644
--- a/oneflow/python/test/modules/test_log1p.py
+++ b/oneflow/python/test/modules/test_log1p.py
@@ -61,10 +61,6 @@ def _test_log1p_tensor_function(test_case, shape, device):
     )
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestLog1p(flow.unittest.TestCase):
     def test_log1p(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_lr_scheduler.py b/oneflow/python/test/modules/test_lr_scheduler.py
index e1f9301b48664fc1fb2166136b29999d910a65b0..a93b002fbf35e2a7ea1a9145b6d8e6e431509b78 100644
--- a/oneflow/python/test/modules/test_lr_scheduler.py
+++ b/oneflow/python/test/modules/test_lr_scheduler.py
@@ -21,10 +21,6 @@ import oneflow.experimental as flow
 from oneflow.python.nn.parameter import Parameter
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestLrScheduler(flow.unittest.TestCase):
     base_lr = 1.0
 
diff --git a/oneflow/python/test/modules/test_marginrankingloss.py b/oneflow/python/test/modules/test_marginrankingloss.py
index 0172543ca1c0ffa5b96cb7f83611cb27e2fbfb71..c18cc69802ab8d6107231cc2b64f1b88692f934b 100644
--- a/oneflow/python/test/modules/test_marginrankingloss.py
+++ b/oneflow/python/test/modules/test_marginrankingloss.py
@@ -151,10 +151,6 @@ def _test_marginrankingloss_grad(test_case, shape, margin, device):
     test_case.assertTrue(np.allclose(input2.grad.numpy(), np_out_grad2, 1e-5, 1e-5))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestMarginRankingLossModule(flow.unittest.TestCase):
     def test_margin_ranking_loss(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_masked_fill.py b/oneflow/python/test/modules/test_masked_fill.py
index 82206fd8765ef7db473f6f2048dc1ce2aea89aca..75e20f41f654e1ab914d5d08340b198bfb40529d 100644
--- a/oneflow/python/test/modules/test_masked_fill.py
+++ b/oneflow/python/test/modules/test_masked_fill.py
@@ -21,10 +21,6 @@ import oneflow.experimental as flow
 from automated_test_util import *
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestMaskedFill(flow.unittest.TestCase):
     def test_masked_fill_aginst_pytorch(test_case):
         import numpy as np
diff --git a/oneflow/python/test/modules/test_masked_select.py b/oneflow/python/test/modules/test_masked_select.py
index 804210802ec21b99ef524c23b35ce0f5295bc5bb..f28a6e1ac2634de11a17fa46880a2453412fd156 100644
--- a/oneflow/python/test/modules/test_masked_select.py
+++ b/oneflow/python/test/modules/test_masked_select.py
@@ -80,10 +80,6 @@ def _test_masked_select_broadcast(test_case, device):
     test_case.assertTrue(np.allclose(x.grad.numpy(), np_grad, 1e-5, 1e-5))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestAbs(flow.unittest.TestCase):
     def test_cosh(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_math_ops.py b/oneflow/python/test/modules/test_math_ops.py
index d8546a0a1c3547b600cc790573277b6893f56a03..2afe078bc7ec5f8b8ba261120fd9cfccec38bd1f 100644
--- a/oneflow/python/test/modules/test_math_ops.py
+++ b/oneflow/python/test/modules/test_math_ops.py
@@ -61,10 +61,6 @@ def _test_variance_backward(test_case, shape, device):
     test_case.assertTrue(np.allclose(x.grad.numpy(), np_grad, 1e-5, 1e-5))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestVariance(flow.unittest.TestCase):
     def test_variance(test_case):
         arg_dict = OrderedDict()
@@ -95,10 +91,6 @@ def _test_sinh_impl(test_case, shape, device):
     test_case.assertTrue(np.allclose(of_input.grad.numpy(), np_x_grad, 1e-4, 1e-4))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class Testsinh(flow.unittest.TestCase):
     def test_sinh(test_case):
         arg_dict = OrderedDict()
@@ -157,10 +149,6 @@ def _test_inplace_sin(test_case, shape, device):
     )
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestSin(flow.unittest.TestCase):
     def test_sin(test_case):
         arg_dict = OrderedDict()
@@ -198,10 +186,6 @@ def _test_cos_backward(test_case, shape, device):
     test_case.assertTrue(np.allclose(x.grad.numpy(), np_grad, 1e-5, 1e-5))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestCos(flow.unittest.TestCase):
     def test_cos(test_case):
         arg_dict = OrderedDict()
@@ -251,10 +235,6 @@ def _test_log_backward(test_case, shape, device):
     )
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestLog(flow.unittest.TestCase):
     def test_log(test_case):
         arg_dict = OrderedDict()
@@ -289,10 +269,6 @@ def _test_std_negative_dim(test_case, shape, device):
     test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 1e-4, 1e-4))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestStd(flow.unittest.TestCase):
     def test_std(test_case):
         arg_dict = OrderedDict()
@@ -332,10 +308,6 @@ def _test_sqrt_backward(test_case, shape, device):
     )
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestSqrt(flow.unittest.TestCase):
     def test_sqrt(test_case):
         arg_dict = OrderedDict()
@@ -370,10 +342,6 @@ def _test_rsqrt_backward(test_case, shape, device):
     )
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestRsqrt(flow.unittest.TestCase):
     def test_rsqrt(test_case):
         arg_dict = OrderedDict()
@@ -407,10 +375,6 @@ def _test_square_backward(test_case, shape, device):
     )
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestSquare(flow.unittest.TestCase):
     def test_square(test_case):
         arg_dict = OrderedDict()
@@ -448,10 +412,6 @@ def _test_pow_backward(test_case, shape, device):
     )
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestPow(flow.unittest.TestCase):
     def test_pow(test_case):
         arg_dict = OrderedDict()
@@ -500,10 +460,6 @@ def _test_arcsin(test_case, shape, device):
     test_case.assertTrue(np.allclose(of_input.grad.numpy(), np_out_grad, 1e-4, 1e-4))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestAsin(flow.unittest.TestCase):
     def test_asin(test_case):
         arg_dict = OrderedDict()
@@ -572,10 +528,6 @@ def _test_arcsinh(test_case, shape, device):
     test_case.assertTrue(np.allclose(of_input.grad.numpy(), np_out_grad, 1e-4, 1e-4))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestAsinh(flow.unittest.TestCase):
     def test_asinh(test_case):
         arg_dict = OrderedDict()
@@ -749,10 +701,6 @@ def _test_topk_original(test_case, device):
         )
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestPow(flow.unittest.TestCase):
     def test_pow(test_case):
         input = flow.Tensor(np.array([1, 2, 3, 4, 5, 6]), dtype=flow.float32)
@@ -767,10 +715,6 @@ class TestPow(flow.unittest.TestCase):
         test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 1e-5, 1e-5))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestTopk(flow.unittest.TestCase):
     def test_topk(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_matmul.py b/oneflow/python/test/modules/test_matmul.py
index 3ba1fa9f2a7777f578e26556895b76a4a5bc4d0b..18d628973f8ad6837bb07a58d7297ebce0cb7ba8 100644
--- a/oneflow/python/test/modules/test_matmul.py
+++ b/oneflow/python/test/modules/test_matmul.py
@@ -315,10 +315,6 @@ def _test_batch_matmul_backward(test_case, device):
     )
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestModule(flow.unittest.TestCase):
     def test_matmul(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_mean.py b/oneflow/python/test/modules/test_mean.py
index 7b8fe7e05122383644be2acdbc61838208532996..b43dede0c025060c1270d54d1d908a4fec5832b4 100644
--- a/oneflow/python/test/modules/test_mean.py
+++ b/oneflow/python/test/modules/test_mean.py
@@ -63,10 +63,6 @@ def _test_mean_backward(test_case, shape, device):
     test_case.assertTrue(np.allclose(x.grad.numpy(), np_grad, 1e-5, 1e-5))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestMean(flow.unittest.TestCase):
     def test_mean(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_meshgrid.py b/oneflow/python/test/modules/test_meshgrid.py
index 093a4e8829b29c5c61ee532ba1d0b3dcbddf2c90..2b8b30f3e454a7b6c212536bb3e73aea377ba0fd 100644
--- a/oneflow/python/test/modules/test_meshgrid.py
+++ b/oneflow/python/test/modules/test_meshgrid.py
@@ -66,10 +66,6 @@ def _test_meshgrid_forawd_3tensor(test_case, device):
     test_case.assertTrue(np.allclose(of_z.numpy(), np_z, 1e-4, 1e-4))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestMeshGrid(flow.unittest.TestCase):
     def test_meshgrid(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_module_to.py b/oneflow/python/test/modules/test_module_to.py
index c36096c17613db575a90417d87bb682b571bc646..8c445720a69e3b4cc9109bdf732d57671321d9ea 100644
--- a/oneflow/python/test/modules/test_module_to.py
+++ b/oneflow/python/test/modules/test_module_to.py
@@ -87,10 +87,6 @@ def _test_dummy_module_to(test_case):
     test_case.assertEqual(m.dummy_para.grad.device, gpu0_device)
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestModuleTo(flow.unittest.TestCase):
     def test_module_to(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_mseloss.py b/oneflow/python/test/modules/test_mseloss.py
index 3f21e55b411397ebf86c6db00f21d0d2aee30ded..d6746d5f676e0d1df00b66cfa9a64fea2f534372 100644
--- a/oneflow/python/test/modules/test_mseloss.py
+++ b/oneflow/python/test/modules/test_mseloss.py
@@ -66,10 +66,6 @@ def _test_mseloss_impl(test_case, device, shape, reduction):
     test_case.assertTrue(np.allclose(input.grad.numpy(), np_grad, 1e-5, 1e-5))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestMSELossModule(flow.unittest.TestCase):
     def test_mseloss(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_mul.py b/oneflow/python/test/modules/test_mul.py
index 8fd3372b49f6cc0fd32bb9a1e4f992440ddcfe1e..44c1efdf63508c1ad668cf64779c3ddc6edb7699 100644
--- a/oneflow/python/test/modules/test_mul.py
+++ b/oneflow/python/test/modules/test_mul.py
@@ -98,10 +98,6 @@ def _test_mul_impl(test_case, device):
     test_case.assertTrue(np.allclose(y.grad.numpy(), x.numpy(), 1e-5, 1e-5))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestMulModule(flow.unittest.TestCase):
     def test_mul(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_ne.py b/oneflow/python/test/modules/test_ne.py
index 3cf33e8f9451e0ee719cc73141772aacd1800a21..98409707a417ee8939b5ce01b34adefca5b2db33 100644
--- a/oneflow/python/test/modules/test_ne.py
+++ b/oneflow/python/test/modules/test_ne.py
@@ -82,10 +82,6 @@ def _test_tensor_ne_operator_float(test_case, shape, device):
     test_case.assertTrue(np.array_equal(of_out.numpy(), np_out))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestNe(flow.unittest.TestCase):
     def test_ne(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_negative.py b/oneflow/python/test/modules/test_negative.py
index 54ca33c1233626b5c9f3e1bdb6c2e14e2355f194..d2162377ca6b14d33746409d29884b3c9fba4bf4 100644
--- a/oneflow/python/test/modules/test_negative.py
+++ b/oneflow/python/test/modules/test_negative.py
@@ -60,10 +60,6 @@ def _test_negative_backward(test_case, shape, device):
     )
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestNegativeModule(flow.unittest.TestCase):
     def test_negative(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_nllloss.py b/oneflow/python/test/modules/test_nllloss.py
index 41420284133336bfca778dd6929f9e1c29280e36..c273ac37018bbbd83cfe7f0cd6a0aa56b95621be 100644
--- a/oneflow/python/test/modules/test_nllloss.py
+++ b/oneflow/python/test/modules/test_nllloss.py
@@ -316,10 +316,6 @@ def _test_nllloss_bert_sum(test_case, device):
     test_case.assertTrue(np.allclose(of_out.numpy(), np_out))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestNLLLossModule(flow.unittest.TestCase):
     def test_nllloss(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_nllloss_grad.py b/oneflow/python/test/modules/test_nllloss_grad.py
index ea517033d70d2d962530cf1df13b367c1b1a01cb..224cfd1e7040b8efc2f96f852786afa27aea33df 100644
--- a/oneflow/python/test/modules/test_nllloss_grad.py
+++ b/oneflow/python/test/modules/test_nllloss_grad.py
@@ -436,10 +436,6 @@ def _test_nllloss_bert_sum_backward_with_ignore_index(test_case, device):
     test_case.assertTrue(np.allclose(input.grad.numpy(), np_grad, atol=1e-5, rtol=1e-5))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestNLLLossModule(flow.unittest.TestCase):
     def test_nllloss(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_normalization.py b/oneflow/python/test/modules/test_normalization.py
index 14a502f0675fa81de099bba5ae7d037243145b4b..a0c59b59d8d54aa68b2923993f6a252f44cbd892 100644
--- a/oneflow/python/test/modules/test_normalization.py
+++ b/oneflow/python/test/modules/test_normalization.py
@@ -123,10 +123,6 @@ def _test_layernorm_backward(test_case, device):
     )
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestLayerNorm(flow.unittest.TestCase):
     def test_layernorm(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_ones_like.py b/oneflow/python/test/modules/test_ones_like.py
index c46a2222c7dcaff9ff75171ba126426bc03c0035..de93cb8c2180219627beb8f0626b370ab9c9cf18 100644
--- a/oneflow/python/test/modules/test_ones_like.py
+++ b/oneflow/python/test/modules/test_ones_like.py
@@ -44,10 +44,6 @@ def _test_ones_like_int(test_case, shape, device):
     test_case.assertTrue(np.array_equal(y.numpy(), y_numpy))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestModule(flow.unittest.TestCase):
     def test_ones_like(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_optim_adam.py b/oneflow/python/test/modules/test_optim_adam.py
index 3b3b43be6300416aea218b32208f66c0215ea38e..147a2a79a05ea68b5cb9c6cc20b6611d73d7d2e2 100644
--- a/oneflow/python/test/modules/test_optim_adam.py
+++ b/oneflow/python/test/modules/test_optim_adam.py
@@ -95,10 +95,6 @@ def compare_with_numpy_adam(
     )
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestAdam(flow.unittest.TestCase):
     def test_adam(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_optim_adamw.py b/oneflow/python/test/modules/test_optim_adamw.py
index 3553b7e3db26b542123ca9f84992c014251c9f1d..722cb1c34dc27e83408f44d852f9aaf2690e8275 100644
--- a/oneflow/python/test/modules/test_optim_adamw.py
+++ b/oneflow/python/test/modules/test_optim_adamw.py
@@ -88,10 +88,6 @@ def compare_with_numpy_adamw(
     )
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestAdamW(flow.unittest.TestCase):
     def test_adamw(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_optim_rmsprop.py b/oneflow/python/test/modules/test_optim_rmsprop.py
index 3d23fc56870384aa40315ee38848bdb0a7b956e4..a5aa676d2f85f314c56a6dc00131fe6993565389 100644
--- a/oneflow/python/test/modules/test_optim_rmsprop.py
+++ b/oneflow/python/test/modules/test_optim_rmsprop.py
@@ -108,10 +108,6 @@ def compare_with_numpy_rmsprop(
     )
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestRMSProp(flow.unittest.TestCase):
     def test_rmsprop(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_optim_sgd.py b/oneflow/python/test/modules/test_optim_sgd.py
index 2487049ebec194abd0d9b0f420d17a486150e0c4..afb4c329c05a79de3d736070af9135edcbc456cc 100644
--- a/oneflow/python/test/modules/test_optim_sgd.py
+++ b/oneflow/python/test/modules/test_optim_sgd.py
@@ -72,10 +72,6 @@ def compare_with_numpy_sgd(
     )
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestOptimizers(flow.unittest.TestCase):
     def test_sgd(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_permute.py b/oneflow/python/test/modules/test_permute.py
index bfe8023bedd4dc7b3bea59407bb151e947cdde78..8baf8aaacc7c3b8df2975297a7453e05ca3decee 100644
--- a/oneflow/python/test/modules/test_permute.py
+++ b/oneflow/python/test/modules/test_permute.py
@@ -38,10 +38,6 @@ def _test_permute_impl(test_case, device):
     test_case.assertTrue(np.allclose(input.grad.numpy(), np_grad, 1e-4, 1e-4))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestPermute(flow.unittest.TestCase):
     def test_permute(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_pixel_shuffle.py b/oneflow/python/test/modules/test_pixel_shuffle.py
index a2e586abe561526c0b05d1e4f80cd5993ebe9743..d1936bbf47173eb1a67306a2fc61f12db58850df 100644
--- a/oneflow/python/test/modules/test_pixel_shuffle.py
+++ b/oneflow/python/test/modules/test_pixel_shuffle.py
@@ -68,10 +68,6 @@ def _test_pixel_shuffle_impl(
     test_case.assertTrue(np.allclose(input.grad.numpy(), np_grad, 1e-5, 1e-5))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestPixelShuffleModule(flow.unittest.TestCase):
     def test_pixel_shuffle(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_pooling.py b/oneflow/python/test/modules/test_pooling.py
index 15d798fb749a3343c90ada854ebb795d467cf7d0..83b3d01c65f51f0e4436ba0541c96611172ca861 100644
--- a/oneflow/python/test/modules/test_pooling.py
+++ b/oneflow/python/test/modules/test_pooling.py
@@ -612,10 +612,6 @@ def _test_maxpool3d_negative_input_backward(test_case, device):
     test_case.assertTrue(np.allclose(x.grad.numpy(), numpy_grad, 1e-4, 1e-4))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestPooling(flow.unittest.TestCase):
     def test_maxpool1d(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_pow.py b/oneflow/python/test/modules/test_pow.py
index be0a1a794e4119405fe60026a609885e0be892ea..7e68d4baeb9c962f0976fe0b63867d0aa5cf9124 100644
--- a/oneflow/python/test/modules/test_pow.py
+++ b/oneflow/python/test/modules/test_pow.py
@@ -90,10 +90,6 @@ def _test_pow_backward_impl(test_case, device):
     test_x_grad_scalar()
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestPow(flow.unittest.TestCase):
     def test_pow_forward(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_prelu.py b/oneflow/python/test/modules/test_prelu.py
index 9aa69ee4570f65773a8dac8f99da8bbc1df6b03a..249823381ab93679b9c09f8aa2167bc973eb8a5d 100644
--- a/oneflow/python/test/modules/test_prelu.py
+++ b/oneflow/python/test/modules/test_prelu.py
@@ -74,10 +74,6 @@ def _test_prelu_grad(test_case, shape, device):
     test_case.assertTrue(np.allclose(input.grad.numpy(), np_grad, 1e-5, 1e-5))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestPReLU(flow.unittest.TestCase):
     def test_prelu(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_reciprocal.py b/oneflow/python/test/modules/test_reciprocal.py
index 1064780db304f55fa3cfc22316a42c4a289d575b..1f9e202acd01bba41e141f2c4ca11c3b85aa6523 100644
--- a/oneflow/python/test/modules/test_reciprocal.py
+++ b/oneflow/python/test/modules/test_reciprocal.py
@@ -31,10 +31,6 @@ def _test_reciprocal_impl(test_case, shape, device):
     test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 1e-5, 1e-5))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestReciprocalModule(flow.unittest.TestCase):
     def test_reciprocal(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_reduce_ops.py b/oneflow/python/test/modules/test_reduce_ops.py
index 6bf965d212697623011449243b9cd7d823403fff..c626888a06a7f3b3a48f6d22793623775f1e7efd 100644
--- a/oneflow/python/test/modules/test_reduce_ops.py
+++ b/oneflow/python/test/modules/test_reduce_ops.py
@@ -67,10 +67,6 @@ def _test_min_tensor_function(test_case, device, shape, dim, keepdims):
     test_case.assertTrue(np.allclose(x.grad.numpy(), np_out_grad, 1e-4, 1e-4))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestMinModule(flow.unittest.TestCase):
     def test_min(test_case):
         arg_dict = OrderedDict()
@@ -134,10 +130,6 @@ def _test_max_tensor_function(test_case, device, shape, dim, keepdims):
     test_case.assertTrue(np.allclose(x.grad.numpy(), np_out_grad, 1e-4, 1e-4))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestMaxModule(flow.unittest.TestCase):
     def test_max(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_reflection_pad2d.py b/oneflow/python/test/modules/test_reflection_pad2d.py
index e2fc9e26df1bd661ff7b7e5b690edb6fab3e9620..28223b874daa9aeb8b060d1134f9d5c03fac24f1 100644
--- a/oneflow/python/test/modules/test_reflection_pad2d.py
+++ b/oneflow/python/test/modules/test_reflection_pad2d.py
@@ -109,10 +109,6 @@ def _test_reflection_pad2d(test_case, shape, padding, device):
     test_case.assertTrue(np.allclose(of_input.grad.numpy(), np_grad, 1e-4, 1e-4))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestReflectionPad2dModule(flow.unittest.TestCase):
     def test_reflection_pad2d(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_repeat.py b/oneflow/python/test/modules/test_repeat.py
index da5ea315460695cf1c9680a9dca7b87611612c52..f34fc62667cec14498713267539c9effaca2bba3 100644
--- a/oneflow/python/test/modules/test_repeat.py
+++ b/oneflow/python/test/modules/test_repeat.py
@@ -128,10 +128,6 @@ def _test_repeat_same_dim_backward(test_case, device):
     test_case.assertTrue(np.array_equal(input.grad.numpy(), np_grad))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestRepeat(flow.unittest.TestCase):
     def test_repeat(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_replicationpad2d.py b/oneflow/python/test/modules/test_replicationpad2d.py
index fc2b5cb292a4d2813d9ced0754d9be2069d47dfc..1d6914d48c111a989987ccb364f98b13b14dad4b 100644
--- a/oneflow/python/test/modules/test_replicationpad2d.py
+++ b/oneflow/python/test/modules/test_replicationpad2d.py
@@ -105,10 +105,6 @@ def _test_ReplicationPad2d(test_case, shape, padding, device):
     test_case.assertTrue(np.allclose(of_input.grad.numpy(), np_out_grad, 1e-3, 1e-3))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestReplicationPad2dModule(flow.unittest.TestCase):
     def test_ReplicationPad2d(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_reshape.py b/oneflow/python/test/modules/test_reshape.py
index c1c1c376b9f6b8ef7029a1cfe5e12ca57a4b2494..f26e67f27d15453bc5ccf32e8366f281f033a09c 100644
--- a/oneflow/python/test/modules/test_reshape.py
+++ b/oneflow/python/test/modules/test_reshape.py
@@ -70,10 +70,6 @@ def _test_reshape_backward(test_case, device):
     test_case.assertTrue(np.allclose(np_grad, input.grad.numpy(), 1e-4, 1e-4))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestModule(flow.unittest.TestCase):
     def test_reshape(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_resnet50_with_bn.py b/oneflow/python/test/modules/test_resnet50_with_bn.py
index 41daebabd43532d2ab45a504939872718a3fcce8..46fafc4dac14cc4ca6edec755a4a5f8ee88dc832 100644
--- a/oneflow/python/test/modules/test_resnet50_with_bn.py
+++ b/oneflow/python/test/modules/test_resnet50_with_bn.py
@@ -20,10 +20,6 @@ from resnet50_model import resnet50
 
 
 @flow.unittest.skip_unless_1n1d()
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestResNet50(flow.unittest.TestCase):
     def test_resnet50_with_batchnorm(test_case):
         batch_size = 32
diff --git a/oneflow/python/test/modules/test_resnet50_without_bn.py b/oneflow/python/test/modules/test_resnet50_without_bn.py
index 0e6c3a595a55cb59c19dcdbd80efc778a3cb2ed0..d0cae3cfafb37668b3fc6faf60f6ea55102bb0a3 100644
--- a/oneflow/python/test/modules/test_resnet50_without_bn.py
+++ b/oneflow/python/test/modules/test_resnet50_without_bn.py
@@ -23,10 +23,6 @@ from resnet50_model import resnet50, FakeBN
 
 
 @flow.unittest.skip_unless_1n1d()
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestResNet50(flow.unittest.TestCase):
     def test_resnet50_without_batchnorm(test_case):
         batch_size = 32
diff --git a/oneflow/python/test/modules/test_round.py b/oneflow/python/test/modules/test_round.py
index a0498115d9955e1d369a06039884b894bdcd6acd..216356d55343c348b67ebadca4293d1123fe2f15 100644
--- a/oneflow/python/test/modules/test_round.py
+++ b/oneflow/python/test/modules/test_round.py
@@ -40,10 +40,6 @@ def _test_round_impl(test_case, shape, device):
     )
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestRound(flow.unittest.TestCase):
     def test_round(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_scatter_nd.py b/oneflow/python/test/modules/test_scatter_nd.py
index eb3a76e0e3e3ae1c5f1c9787089a42b89c5cdb44..cf4dc1700311e0d52dd4578db735f0a9e36357f1 100644
--- a/oneflow/python/test/modules/test_scatter_nd.py
+++ b/oneflow/python/test/modules/test_scatter_nd.py
@@ -84,10 +84,6 @@ def _test_scatter_nd_backward(test_case, device):
     test_case.assertTrue(np.array_equal(of_update.grad.numpy(), np_grad))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestScatter_nd(flow.unittest.TestCase):
     def test_scatter_nd(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_sign.py b/oneflow/python/test/modules/test_sign.py
index 79da44abb1d74cf6072a011c2832082f05fcef83..0a64e4035211eb1281e12f94aabf1c62ff205cfc 100644
--- a/oneflow/python/test/modules/test_sign.py
+++ b/oneflow/python/test/modules/test_sign.py
@@ -38,10 +38,6 @@ def _test_sign_impl(test_case, shape, device):
     test_case.assertTrue(np.allclose(of_input.grad.numpy(), np_grad, 1e-4, 1e-4))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestSign(flow.unittest.TestCase):
     def test_sign(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_slice.py b/oneflow/python/test/modules/test_slice.py
index 168b27256acc9080a04f2c15bc8a06fbced0fc22..869c9d4a92751fbcf02bf71fcf616a339634c964 100644
--- a/oneflow/python/test/modules/test_slice.py
+++ b/oneflow/python/test/modules/test_slice.py
@@ -125,10 +125,6 @@ def _test_slice_backward(test_case, device):
     test_case.assertTrue(np.array_equal(x.grad.numpy(), np_grad))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestSlice(flow.unittest.TestCase):
     def test_slice(test_case):
         arg_dict = OrderedDict()
@@ -147,10 +143,6 @@ class TestSlice(flow.unittest.TestCase):
             arg[0](test_case, *arg[1:])
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestSliceUpdate(flow.unittest.TestCase):
     def test_slice_update(test_case):
         x = np.array([1, 1, 1, 1, 1]).astype(np.float32)
@@ -161,10 +153,6 @@ class TestSliceUpdate(flow.unittest.TestCase):
         test_case.assertTrue(np.array_equal(y.numpy(), output))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestLogicalSliceAssign(flow.unittest.TestCase):
     # this is an in-place operation, so requires_grad should be False(no grad in backward)
     def test_logical_slice_assign(test_case):
diff --git a/oneflow/python/test/modules/test_smoothl1loss.py b/oneflow/python/test/modules/test_smoothl1loss.py
index 88996a801454bd004a8aa096bddb8add3cb0fb7b..c740241a19dabce3ddf9d8e515c109f4fcb1f38e 100644
--- a/oneflow/python/test/modules/test_smoothl1loss.py
+++ b/oneflow/python/test/modules/test_smoothl1loss.py
@@ -95,10 +95,6 @@ def _test_smoothl1loss_impl(test_case, device, shape, data_type, reduction, beta
     test_case.assertTrue(np.allclose(input.grad.numpy(), np_grad, 1e-5, 1e-5))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestSmoothL1LossModule(flow.unittest.TestCase):
     def test_smoothl1loss(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_softplus.py b/oneflow/python/test/modules/test_softplus.py
index 2a9ca66ebbc40c079a3bf6c1c22e0c73e05d9e51..28383e98ac479aa014e7685a1f969f417a8995c2 100644
--- a/oneflow/python/test/modules/test_softplus.py
+++ b/oneflow/python/test/modules/test_softplus.py
@@ -38,10 +38,6 @@ def _test_softplus_impl(test_case, shape, device):
     test_case.assertTrue(np.allclose(of_input.grad.numpy(), np_x_grad, 1e-4, 1e-4))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class Testsoftplus(flow.unittest.TestCase):
     def test_softplus(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_sort.py b/oneflow/python/test/modules/test_sort.py
index c363fc227156b50861bdd982cc63da9e41da75bb..483a813bffd0ccf3239d5d55b9d26f2dbc11e134 100644
--- a/oneflow/python/test/modules/test_sort.py
+++ b/oneflow/python/test/modules/test_sort.py
@@ -60,10 +60,6 @@ def _test_tensor_sort(test_case, data_shape, axis, descending, data_type, device
     )
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestSort(flow.unittest.TestCase):
     def test_sort(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_sparse.py b/oneflow/python/test/modules/test_sparse.py
index 176dac9bb19bb3c288478c7dfb10aa72c11d6306..0443b825803ca11451aff3823793fd632af3ca9d 100644
--- a/oneflow/python/test/modules/test_sparse.py
+++ b/oneflow/python/test/modules/test_sparse.py
@@ -84,10 +84,6 @@ def _test_embedding_impl(test_case, device):
     test_case.assertTrue(np.allclose(m.weight.grad.numpy(), weight_grad_np, 1e-5, 1e-5))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestEmbedding(flow.unittest.TestCase):
     def test_embedding(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_squeeze.py b/oneflow/python/test/modules/test_squeeze.py
index 81b6ffad36b3636664da192546927f9dda5cbb11..15445168dd6a2778ba9658b6b2631add4daa0de5 100644
--- a/oneflow/python/test/modules/test_squeeze.py
+++ b/oneflow/python/test/modules/test_squeeze.py
@@ -84,10 +84,6 @@ def _test_squeeze_backward(test_case, device):
     test_case.assertTrue(np.array_equal(input.grad.numpy(), np_grad))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestSqueeze(flow.unittest.TestCase):
     def test_squeeze(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_stack.py b/oneflow/python/test/modules/test_stack.py
index e6c1c9e826a194445a78f0c315fdabb73576d717..4ab27dac8b8e51a77920e2b5c498376989da0e5e 100644
--- a/oneflow/python/test/modules/test_stack.py
+++ b/oneflow/python/test/modules/test_stack.py
@@ -82,10 +82,6 @@ def _test_stack_multi_input(test_case, device, shape):
         test_case.assertTrue(np.allclose(out_np, out_of.numpy(), 1e-05, 1e-05))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestStack(flow.unittest.TestCase):
     def test_stack(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_sub.py b/oneflow/python/test/modules/test_sub.py
index f2c3ecd6445413affb8e25218502485261d9f26c..c5f6338d2f8d95d579a50c8aaca4303e63df83ea 100644
--- a/oneflow/python/test/modules/test_sub.py
+++ b/oneflow/python/test/modules/test_sub.py
@@ -77,10 +77,6 @@ def _test_sub_impl(test_case, shape, device):
     test_case.assertTrue(np.allclose(y.grad.numpy(), np_grad_y, 1e-5, 1e-5))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestSubModule(flow.unittest.TestCase):
     def test_sub(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_sum.py b/oneflow/python/test/modules/test_sum.py
index a82efe65331f52268dcdbc28a08b822943098616..bccf8f63191a7bac4b1194a354f36a504d88c30a 100644
--- a/oneflow/python/test/modules/test_sum.py
+++ b/oneflow/python/test/modules/test_sum.py
@@ -62,10 +62,6 @@ def _test_sum_impl(test_case, device):
     test_case.assertTrue(np.allclose(input.grad.numpy(), np_grad, 1e-5, 1e-5))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestSumModule(flow.unittest.TestCase):
     def test_sum(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_tan.py b/oneflow/python/test/modules/test_tan.py
index 57fc240a1ce824742b629b43a62c3de88d240f7d..896b0df5ec84769f4536e75b70a2228c0b5efed0 100644
--- a/oneflow/python/test/modules/test_tan.py
+++ b/oneflow/python/test/modules/test_tan.py
@@ -42,10 +42,6 @@ def _test_tan_impl(test_case, shape, device):
     )
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestTan(flow.unittest.TestCase):
     def test_tan(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_tensor_buffer.py b/oneflow/python/test/modules/test_tensor_buffer.py
index fbe746512b3bb9e93fcd20949f3eb1da46c2c21c..5f5df53df6b1d14871096b88cc5b011d3fc37f31 100644
--- a/oneflow/python/test/modules/test_tensor_buffer.py
+++ b/oneflow/python/test/modules/test_tensor_buffer.py
@@ -34,10 +34,6 @@ def _test_tensor_buffer_convert(test_case, device):
     test_case.assertTrue(np.array_equal(input.numpy(), orig_tensor.numpy()))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestTensorBufferOps(flow.unittest.TestCase):
     def test_tensor_buffer_convert(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_tensor_ops.py b/oneflow/python/test/modules/test_tensor_ops.py
index 9b31549d1d06069f314558ee0dcc02af5027ea2b..b1d55f2aafecceba4f882c9e63ac1207b30a2b3b 100644
--- a/oneflow/python/test/modules/test_tensor_ops.py
+++ b/oneflow/python/test/modules/test_tensor_ops.py
@@ -37,10 +37,6 @@ def _test_long(test_case, shape, device, dtype):
     test_case.assertEqual(input.dtype, flow.int64)
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestTensorOps(flow.unittest.TestCase):
     def test_type_as(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_tensor_to.py b/oneflow/python/test/modules/test_tensor_to.py
index 816c734d1deb9f95bd1e68c74f1384768b84cced..4c91b91a496b5bcf0b5a998007e2660b1b08ec9f 100644
--- a/oneflow/python/test/modules/test_tensor_to.py
+++ b/oneflow/python/test/modules/test_tensor_to.py
@@ -20,10 +20,6 @@ import numpy as np
 import oneflow.experimental as flow
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestTo(flow.unittest.TestCase):
     def test_tensor_to_h2d(test_case):
         input = flow.Tensor(np.random.randn(2, 3, 4, 5))
diff --git a/oneflow/python/test/modules/test_tile.py b/oneflow/python/test/modules/test_tile.py
index 12ff77fbdf23bec2391fd56282f5f0f962d6cc7c..2690397b078c5fa8d13fb599a2a0262756a97547 100644
--- a/oneflow/python/test/modules/test_tile.py
+++ b/oneflow/python/test/modules/test_tile.py
@@ -153,10 +153,6 @@ def _test_tile_same_dim_backward(test_case, device):
     test_case.assertTrue(np.array_equal(input.grad.numpy(), np_grad))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestTile(flow.unittest.TestCase):
     def test_tile(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_transpose.py b/oneflow/python/test/modules/test_transpose.py
index bd4885d40c40c4d9d6635429f7bdc5e2480259e9..127d938dbab4f6eedf5bcb66f7c4298f5a900694 100644
--- a/oneflow/python/test/modules/test_transpose.py
+++ b/oneflow/python/test/modules/test_transpose.py
@@ -73,10 +73,6 @@ def _test_transpose_backward_v2(test_case, device):
     test_case.assertTrue(np.allclose(x.grad.numpy(), np.ones((2, 3, 4, 5)), 1e-5, 1e-5))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestTranspose(flow.unittest.TestCase):
     def test_transpose(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_triu.py b/oneflow/python/test/modules/test_triu.py
index e03c43d3d057fac7a9e477ffa208367ac7cff8ff..253157b6e03712a2479fd86531abf4f264f988a8 100644
--- a/oneflow/python/test/modules/test_triu.py
+++ b/oneflow/python/test/modules/test_triu.py
@@ -39,10 +39,6 @@ def _test_triu(test_case, diagonal, device):
     test_case.assertTrue(np.allclose(input_tensor.grad.numpy(), np_grad, 1e-6, 1e-6))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestTriu(flow.unittest.TestCase):
     def test_triu(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_unsqueeze.py b/oneflow/python/test/modules/test_unsqueeze.py
index b07a416d28d2e3bc55e6177ad6537e57e8c00f5d..6eb023c8e3f68355acc814e5c82850a22ed3ea0c 100644
--- a/oneflow/python/test/modules/test_unsqueeze.py
+++ b/oneflow/python/test/modules/test_unsqueeze.py
@@ -55,10 +55,6 @@ def _test_unsqueeze_backward(test_case, device):
     test_case.assertTrue(np.allclose(x.grad.numpy(), np.ones((2, 3, 4, 5)), 1e-5, 1e-5))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestUnsqueeze(flow.unittest.TestCase):
     def test_unsqueeze(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_upsample2d.py b/oneflow/python/test/modules/test_upsample2d.py
index 690c94ed4426b45362043a1f5eb97b09287724cc..525daff6f8635895d3b42a8fa69f265c348841be 100644
--- a/oneflow/python/test/modules/test_upsample2d.py
+++ b/oneflow/python/test/modules/test_upsample2d.py
@@ -374,10 +374,6 @@ def _test_upsample_bilinear_align_corners(test_case, device):
     test_case.assertTrue(np.allclose(input.grad.numpy(), np_grad, 1e-5, 1e-5))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestUpsample2d(flow.unittest.TestCase):
     def test_upsample2d(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_view.py b/oneflow/python/test/modules/test_view.py
index 6503768f639441b72b30e9672909317826be869f..0b1a5c6b280765f4762de2f5c14a7b4b6994cdbb 100644
--- a/oneflow/python/test/modules/test_view.py
+++ b/oneflow/python/test/modules/test_view.py
@@ -71,10 +71,6 @@ def _test_view_backward(test_case, device):
     test_case.assertTrue(np.allclose(np_grad, input.grad.numpy(), 1e-4, 1e-4))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestModule(flow.unittest.TestCase):
     def test_view(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_where.py b/oneflow/python/test/modules/test_where.py
index 3b77dac6aac976349f1126a7dad7dda9a488d876..b875d24ccc37f17994b818e99564e2399e38d721 100644
--- a/oneflow/python/test/modules/test_where.py
+++ b/oneflow/python/test/modules/test_where.py
@@ -172,10 +172,6 @@ def _test_where_broadcast_x_backward(test_case, device):
     test_case.assertTrue(np.allclose(x.grad.numpy(), x_grad, 1e-5, 1e-5))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestWhere(flow.unittest.TestCase):
     def test_where(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/modules/test_zeropad2d.py b/oneflow/python/test/modules/test_zeropad2d.py
index 6067430a62479a2e02c2efae5dec18f53258323c..334bc631249ee53f4b6da1459b9317893ae9feb8 100644
--- a/oneflow/python/test/modules/test_zeropad2d.py
+++ b/oneflow/python/test/modules/test_zeropad2d.py
@@ -96,10 +96,6 @@ def _test_ZeroPad2d(test_case, shape, padding, value, device):
     test_case.assertTrue(np.allclose(of_input.grad.numpy(), np_out_grad, 1e-5, 1e-5))
 
 
-@unittest.skipIf(
-    not flow.unittest.env.eager_execution_enabled(),
-    ".numpy() doesn't work in lazy mode",
-)
 class TestZeroPad2dModule(flow.unittest.TestCase):
     def test_ConstantPad2d(test_case):
         arg_dict = OrderedDict()
diff --git a/oneflow/python/test/tensor/test_tensor.py b/oneflow/python/test/tensor/test_tensor.py
index fabf56045191584691a3adb7d2cc465938a51098..a7b666bec198fd8fa81ddfa71cb8d00e5de254b9 100644
--- a/oneflow/python/test/tensor/test_tensor.py
+++ b/oneflow/python/test/tensor/test_tensor.py
@@ -25,10 +25,6 @@ import oneflow.typing as oft
 
 @flow.unittest.skip_unless_1n1d()
 class TestTensor(flow.unittest.TestCase):
-    @unittest.skipIf(
-        not flow.unittest.env.eager_execution_enabled(),
-        "numpy doesn't work in lazy mode",
-    )
     def test_numpy_and_default_dtype(test_case):
         shape = (2, 3, 4, 5)
         tensor = flow.Tensor(*shape)
@@ -38,10 +34,6 @@ class TestTensor(flow.unittest.TestCase):
             np.array_equal(tensor.numpy(), np.ones(shape, dtype=np.float32))
         )
 
-    @unittest.skipIf(
-        not flow.unittest.env.eager_execution_enabled(),
-        "numpy doesn't work in lazy mode",
-    )
     def test_copy_to_and_from_numpy(test_case):
         np_arr = np.array([4, 6], dtype=np.float32)
         tensor = flow.Tensor(np_arr, dtype=flow.float32)
@@ -53,10 +45,6 @@ class TestTensor(flow.unittest.TestCase):
         test_case.assertTrue(np.array_equal(tensor.numpy(), np_arr))
         test_case.assertEqual(np.int32, tensor.numpy().dtype)
 
-    @unittest.skipIf(
-        not flow.unittest.env.eager_execution_enabled(),
-        "numpy doesn't work in lazy mode",
-    )
     def test_construct_from_numpy_or_list(test_case):
         shape = (2, 3, 4, 5)
         np_arr = np.random.rand(*shape).astype(np.float32)
@@ -77,10 +65,6 @@ class TestTensor(flow.unittest.TestCase):
         test_case.assertFalse(np_arr.flags["C_CONTIGUOUS"])
         test_case.assertTrue(np.array_equal(tensor.numpy(), np_arr))
 
-    @unittest.skipIf(
-        not flow.unittest.env.eager_execution_enabled(),
-        "numpy doesn't work in lazy mode",
-    )
     def test_construct_from_another_tensor(test_case):
         shape = (2, 3, 4, 5)
         np_arr = np.random.rand(*shape).astype(np.float32)
@@ -89,10 +73,6 @@ class TestTensor(flow.unittest.TestCase):
         test_case.assertEqual(output.dtype, flow.float32)
         test_case.assertTrue(np.array_equal(output.numpy(), np_arr))
 
-    @unittest.skipIf(
-        not flow.unittest.env.eager_execution_enabled(),
-        "numpy doesn't work in lazy mode",
-    )
     def test_tensor_init_methods(test_case):
         # test float dtype init
         shape = (2, 3, 4, 5)
@@ -152,10 +132,6 @@ class TestTensor(flow.unittest.TestCase):
         test_case.assertTrue(not x.is_cuda)
         x.determine()
 
-    @unittest.skipIf(
-        not flow.unittest.env.eager_execution_enabled(),
-        "numpy doesn't work in lazy mode",
-    )
     def test_tensor_device(test_case):
         shape = (2, 3, 4, 5)
         x = flow.Tensor(*shape)
@@ -165,10 +141,6 @@ class TestTensor(flow.unittest.TestCase):
         x = flow.Tensor(*shape, device=flow.device("cpu"))
         test_case.assertTrue(not x.is_cuda)
 
-    @unittest.skipIf(
-        not flow.unittest.env.eager_execution_enabled(),
-        "numpy doesn't work in lazy mode",
-    )
     def test_tensor_autograd_related_methods(test_case):
         shape = (2, 3, 4, 5)
         x = flow.Tensor(*shape)
@@ -204,10 +176,6 @@ class TestTensor(flow.unittest.TestCase):
         test_case.assertIsNone(x.grad)
         w.backward(gradient=grad, retain_graph=True)
 
-    @unittest.skipIf(
-        not flow.unittest.env.eager_execution_enabled(),
-        "numpy doesn't work in lazy mode",
-    )
     def test_tensor_register_hook(test_case):
         shape = (2, 3)
         x = flow.Tensor(*shape, requires_grad=True)
@@ -236,10 +204,6 @@ class TestTensor(flow.unittest.TestCase):
         y.backward()
         test_case.assertTrue(np.array_equal(grad_nonlocal.numpy(), np.ones(shape) * 3))
 
-    @unittest.skipIf(
-        not flow.unittest.env.eager_execution_enabled(),
-        "numpy doesn't work in lazy mode",
-    )
     def test_user_defined_data(test_case):
         list_data = [5, 5]
         tuple_data = (5, 5)
@@ -252,10 +216,6 @@ class TestTensor(flow.unittest.TestCase):
         test_case.assertTrue(np.array_equal(y.numpy(), 5 * np.ones(y.shape)))
         test_case.assertTrue(np.array_equal(z.numpy(), 5 * np.ones(z.shape)))
 
-    @unittest.skipIf(
-        not flow.unittest.env.eager_execution_enabled(),
-        "numpy doesn't work in lazy mode",
-    )
     def test_mirrored_tensor_and_op(test_case):
         x1 = flow.Tensor([[1.0, 2.0]])
         test_case.assertEqual(x1.dtype, flow.float32)
@@ -277,37 +237,21 @@ class TestTensor(flow.unittest.TestCase):
             np.array_equal(y.numpy(), np.array([[5.0]], dtype=np.float32))
         )
 
-    @unittest.skipIf(
-        not flow.unittest.env.eager_execution_enabled(),
-        "numpy doesn't work in lazy mode",
-    )
     def test_tensor_to_list(test_case):
         list_data = [[1.0, 3.0], [5.0, 6.0]]
         input = flow.Tensor(list_data)
         test_case.assertEqual(list_data, input.tolist())
 
-    @unittest.skipIf(
-        not flow.unittest.env.eager_execution_enabled(),
-        "numpy doesn't work in lazy mode",
-    )
     def test_tensor_nelement(test_case):
         shape = (2, 3, 4)
         input = flow.Tensor(*shape)
         test_case.assertEqual(input.nelement(), 24)
 
-    @unittest.skipIf(
-        not flow.unittest.env.eager_execution_enabled(),
-        "numpy doesn't work in lazy mode",
-    )
     def test_tensor_numel(test_case):
         shape = (2, 3, 4, 5)
         input = flow.Tensor(*shape)
         test_case.assertEqual(input.numel(), 120)
 
-    @unittest.skipIf(
-        not flow.unittest.env.eager_execution_enabled(),
-        "numpy doesn't work in lazy mode",
-    )
     def test_tensor_print(test_case):
         shape = (2, 3, 4, 5)
         input = flow.Tensor(*shape)
@@ -324,10 +268,6 @@ class TestTensor(flow.unittest.TestCase):
         requires_grad_input_str = str(requires_grad_input)
         test_case.assertTrue("requires_grad=" in requires_grad_input_str)
 
-    @unittest.skipIf(
-        not flow.unittest.env.eager_execution_enabled(),
-        "numpy doesn't work in lazy mode",
-    )
     def test_indexing(test_case):
         class SliceExtracter:
             def __getitem__(self, key):
@@ -361,10 +301,6 @@ class TestTensor(flow.unittest.TestCase):
         x = flow.Tensor(2, 3, 4)
         compare_setitem_with_numpy(x, se[1, :, 2], v)
 
-    @unittest.skipIf(
-        not flow.unittest.env.eager_execution_enabled(),
-        "numpy doesn't work in lazy mode",
-    )
     def test_div(test_case):
         x = flow.Tensor(np.random.randn(1, 1))
         y = flow.Tensor(np.random.randn(2, 3))
@@ -387,10 +323,6 @@ class TestTensor(flow.unittest.TestCase):
         np_out = np.divide(3, x.numpy())
         test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 1e-4, 1e-4))
 
-    @unittest.skipIf(
-        not flow.unittest.env.eager_execution_enabled(),
-        "numpy doesn't work in lazy mode",
-    )
     def test_mul(test_case):
         x = flow.Tensor(np.random.randn(1, 1))
         y = flow.Tensor(np.random.randn(2, 3))
@@ -408,10 +340,6 @@ class TestTensor(flow.unittest.TestCase):
         np_out = np.multiply(3, x.numpy())
         test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 1e-4, 1e-4))
 
-    @unittest.skipIf(
-        not flow.unittest.env.eager_execution_enabled(),
-        "numpy doesn't work in lazy mode",
-    )
     def test_add_tensor_method(test_case):
         x = flow.Tensor(np.random.randn(1, 1))
         y = flow.Tensor(np.random.randn(2, 3))
@@ -429,10 +357,6 @@ class TestTensor(flow.unittest.TestCase):
         np_out = np.add(3, x.numpy())
         test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 1e-4, 1e-4))
 
-    @unittest.skipIf(
-        not flow.unittest.env.eager_execution_enabled(),
-        "numpy doesn't work in lazy mode",
-    )
     def test_sub_tensor_method(test_case):
         x = flow.Tensor(np.random.randn(1, 1))
         y = flow.Tensor(np.random.randn(2, 3))
@@ -450,40 +374,24 @@ class TestTensor(flow.unittest.TestCase):
         np_out = np.subtract(3, x.numpy())
         test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 1e-4, 1e-4))
 
-    @unittest.skipIf(
-        not flow.unittest.env.eager_execution_enabled(),
-        "numpy doesn't work in lazy mode",
-    )
     def test_sum(test_case):
         input = flow.Tensor(np.random.randn(4, 5, 6), dtype=flow.float32)
         of_out = input.sum(dim=(2, 1))
         np_out = np.sum(input.numpy(), axis=(2, 1))
         test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 1e-4, 1e-4))
 
-    @unittest.skipIf(
-        not flow.unittest.env.eager_execution_enabled(),
-        "numpy doesn't work in lazy mode",
-    )
     def test_asinh(test_case):
         input = flow.Tensor(np.random.randn(4, 5, 6), dtype=flow.float32)
         of_out = input.asinh()
         np_out = np.arcsinh(input.numpy())
         test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 1e-5, 1e-5))
 
-    @unittest.skipIf(
-        not flow.unittest.env.eager_execution_enabled(),
-        "numpy doesn't work in lazy mode",
-    )
     def test_arcsinh(test_case):
         input = flow.Tensor(np.random.randn(4, 5, 6), dtype=flow.float32)
         of_out = input.arcsinh()
         np_out = np.arcsinh(input.numpy())
         test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 1e-5, 1e-5))
 
-    @unittest.skipIf(
-        not flow.unittest.env.eager_execution_enabled(),
-        "numpy doesn't work in lazy mode",
-    )
     def test_asin(test_case):
         input = flow.Tensor(np.random.random((4, 5, 6)) - 0.5, dtype=flow.float32)
         of_out = input.asin()
@@ -492,10 +400,6 @@ class TestTensor(flow.unittest.TestCase):
             np.allclose(of_out.numpy(), np_out, 1e-5, 1e-5, equal_nan=True)
         )
 
-    @unittest.skipIf(
-        not flow.unittest.env.eager_execution_enabled(),
-        "numpy doesn't work in lazy mode",
-    )
     def test_arcsin(test_case):
         input = flow.Tensor(np.random.random((4, 5, 6)) - 0.5, dtype=flow.float32)
         of_out = input.arcsin()
@@ -504,40 +408,24 @@ class TestTensor(flow.unittest.TestCase):
             np.allclose(of_out.numpy(), np_out, 1e-5, 1e-5, equal_nan=True)
         )
 
-    @unittest.skipIf(
-        not flow.unittest.env.eager_execution_enabled(),
-        "numpy doesn't work in lazy mode",
-    )
     def test_mean(test_case):
         input = flow.Tensor(np.random.randn(2, 3), dtype=flow.float32)
         of_out = input.mean(dim=0)
         np_out = np.mean(input.numpy(), axis=0)
         test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 1e-4, 1e-4))
 
-    @unittest.skipIf(
-        not flow.unittest.env.eager_execution_enabled(),
-        "numpy doesn't work in lazy mode",
-    )
     def test_neg(test_case):
         input = flow.Tensor(np.random.randn(2, 3), dtype=flow.float32)
         of_out = -input
         np_out = -input.numpy()
         test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 1e-4, 1e-4))
 
-    @unittest.skipIf(
-        not flow.unittest.env.eager_execution_enabled(),
-        "numpy doesn't work in lazy mode",
-    )
     def test_negative(test_case):
         input = flow.Tensor(np.random.randn(2, 3), dtype=flow.float32)
         of_out = input.negative()
         np_out = -input.numpy()
         test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 1e-4, 1e-4))
 
-    @unittest.skipIf(
-        not flow.unittest.env.eager_execution_enabled(),
-        "numpy doesn't work in lazy mode",
-    )
     def test_greater(test_case):
         input1 = flow.Tensor(
             np.array([1, 1, 4]).astype(np.float32), dtype=flow.float32,
@@ -549,10 +437,6 @@ class TestTensor(flow.unittest.TestCase):
         np_out = np.greater(input1.numpy(), input2.numpy())
         test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 1e-4, 1e-4))
 
-    @unittest.skipIf(
-        not flow.unittest.env.eager_execution_enabled(),
-        "numpy doesn't work in lazy mode",
-    )
     def test_less(test_case):
         input1 = flow.Tensor(np.random.randn(2, 6, 5, 3), dtype=flow.float32)
         input2 = flow.Tensor(np.random.randn(2, 6, 5, 3), dtype=flow.float32)
@@ -560,10 +444,6 @@ class TestTensor(flow.unittest.TestCase):
         np_out = np.less(input1.numpy(), input2.numpy())
         test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 1e-4, 1e-4))
 
-    @unittest.skipIf(
-        not flow.unittest.env.eager_execution_enabled(),
-        "numpy doesn't work in lazy mode",
-    )
     def test_tensor_slice(test_case):
         x = np.random.randn(2, 3, 4, 5).astype(np.float32)
         input = flow.Tensor(x)
@@ -574,10 +454,6 @@ class TestTensor(flow.unittest.TestCase):
             np.allclose(input[0, :, 0:2].numpy(), x[0, :, 0:2], 1e-5, 1e-5)
         )
 
-    @unittest.skipIf(
-        not flow.unittest.env.eager_execution_enabled(),
-        "numpy doesn't work in lazy mode",
-    )
     def test_tensor_logical_slice_assign(test_case):
         x = np.random.randn(2, 3, 4, 5).astype(np.float32)
         input = flow.Tensor(x)
@@ -597,20 +473,12 @@ class TestTensor(flow.unittest.TestCase):
         x[0] = 0
         test_case.assertTrue(np.allclose(input.numpy(), x, 1e-5, 1e-5))
 
-    @unittest.skipIf(
-        not flow.unittest.env.eager_execution_enabled(),
-        "numpy doesn't work in lazy mode",
-    )
     def test_zeros_(test_case):
         shape = (2, 3)
         x = flow.Tensor(np.random.randn(*shape), dtype=flow.float32)
         x.zeros_()
         test_case.assertTrue(np.array_equal(x.numpy(), np.zeros(shape)))
 
-    @unittest.skipIf(
-        not flow.unittest.env.eager_execution_enabled(),
-        "numpy doesn't work in lazy mode",
-    )
     def test_construct_small_tensor(test_case):
         shape = (2, 3, 4, 5)
         np_arr = np.random.rand(*shape).astype(np.float32)
@@ -639,10 +507,6 @@ class TestTensor(flow.unittest.TestCase):
         test_case.assertEqual(tensor.dtype, flow.float32)
         test_case.assertTrue(np.allclose(tensor.numpy(), np.array(scalar), 1e-4, 1e-4))
 
-    @unittest.skipIf(
-        not flow.unittest.env.eager_execution_enabled(),
-        "numpy doesn't work in lazy mode",
-    )
     def test_floor(test_case):
         input = flow.Tensor(np.random.randn(4, 5, 6), dtype=flow.float32)
         of_out = input.floor()
@@ -651,10 +515,6 @@ class TestTensor(flow.unittest.TestCase):
             np.allclose(of_out.numpy(), np_out, 1e-5, 1e-5, equal_nan=True)
         )
 
-    @unittest.skipIf(
-        not flow.unittest.env.eager_execution_enabled(),
-        "numpy doesn't work in lazy mode",
-    )
     def test_tensor_round(test_case):
         shape = (2, 3)
         np_input = np.random.randn(*shape)
@@ -670,10 +530,6 @@ class TestTensor(flow.unittest.TestCase):
             np.allclose(of_input.grad.numpy(), np.zeros(shape), 1e-4, 1e-4)
         )
 
-    @unittest.skipIf(
-        not flow.unittest.env.eager_execution_enabled(),
-        "numpy doesn't work in lazy mode",
-    )
     def test_tensor_where(test_case):
         x = flow.Tensor(
             np.array([[-0.4620, 0.3139], [0.3898, -0.7197], [0.0478, -0.1657]]),
@@ -685,10 +541,6 @@ class TestTensor(flow.unittest.TestCase):
         np_out = np.array([[1.0000, 0.3139], [0.3898, 1.0000], [0.0478, 1.0000]])
         test_case.assertTrue(np.allclose(of_out.numpy(), np_out))
 
-    @unittest.skipIf(
-        not flow.unittest.env.eager_execution_enabled(),
-        "numpy doesn't work in lazy mode",
-    )
     def test_tensor_equal(test_case):
         arr1 = np.random.randint(1, 10, size=(2, 3, 4, 5))
         arr2 = np.random.randint(1, 10, size=(2, 3, 4, 5))
@@ -699,10 +551,6 @@ class TestTensor(flow.unittest.TestCase):
         np_out = np.equal(arr1, arr2)
         test_case.assertTrue(np.array_equal(of_out.numpy(), np_out))
 
-    @unittest.skipIf(
-        not flow.unittest.env.eager_execution_enabled(),
-        "numpy doesn't work in lazy mode",
-    )
     def _test_tensor_atan(test_case, shape, device):
         np_input = np.random.randn(*shape)
         of_input = flow.Tensor(
@@ -723,10 +571,6 @@ class TestTensor(flow.unittest.TestCase):
             np.allclose(of_input.grad.numpy(), np_out_grad, 1e-5, 1e-5, equal_nan=True)
         )
 
-    @unittest.skipIf(
-        not flow.unittest.env.eager_execution_enabled(),
-        "numpy doesn't work in lazy mode",
-    )
     def _test_tensor_arctan(test_case, shape, device):
         np_input = np.random.randn(*shape)
         of_input = flow.Tensor(
@@ -747,10 +591,6 @@ class TestTensor(flow.unittest.TestCase):
             np.allclose(of_input.grad.numpy(), np_out_grad, 1e-5, 1e-5, equal_nan=True)
         )
 
-    @unittest.skipIf(
-        not flow.unittest.env.eager_execution_enabled(),
-        "numpy doesn't work in lazy mode",
-    )
     def test_tensor_detach(test_case):
         shape = (2, 3, 4, 5)
         x = flow.Tensor(
@@ -763,30 +603,18 @@ class TestTensor(flow.unittest.TestCase):
         test_case.assertEqual(z.is_leaf, True)
         test_case.assertEqual(z.grad_fn, None)
 
-    @unittest.skipIf(
-        not flow.unittest.env.eager_execution_enabled(),
-        "numpy doesn't work in lazy mode",
-    )
     def test_tensor_clamp_(test_case):
         input = flow.Tensor(np.random.randn(2, 6, 5, 3), dtype=flow.float32)
         of_out = input.clamp(0.1, 0.5)
         np_out = np.clip(input.numpy(), 0.1, 0.5)
         test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 1e-5, 1e-5))
 
-    @unittest.skipIf(
-        not flow.unittest.env.eager_execution_enabled(),
-        "numpy doesn't work in lazy mode",
-    )
     def test_tensor_clip_(test_case):
         input = flow.Tensor(np.random.randn(2, 6, 5, 3), dtype=flow.float32)
         of_out = input.clip(0.1, 0.5)
         np_out = np.clip(input.numpy(), 0.1, 0.5)
         test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 1e-5, 1e-5))
 
-    @unittest.skipIf(
-        not flow.unittest.env.eager_execution_enabled(),
-        "numpy doesn't work in lazy mode",
-    )
     def _test_cast_tensor_function(test_case):
         shape = (2, 3, 4, 5)
         np_arr = np.random.randn(*shape).astype(np.float32)
@@ -795,20 +623,12 @@ class TestTensor(flow.unittest.TestCase):
         np_out = np_arr.astype(np.int8)
         test_case.assertTrue(np.array_equal(output.numpy(), np_out))
 
-    @unittest.skipIf(
-        not flow.unittest.env.eager_execution_enabled(),
-        "numpy doesn't work in lazy mode",
-    )
     def _test_sin_tensor_function(test_case, shape, device):
         input = flow.Tensor(np.random.randn(2, 3, 4, 5))
         of_out = input.sin()
         np_out = np.sin(input.numpy())
         test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 1e-5, 1e-5))
 
-    @unittest.skipIf(
-        not flow.unittest.env.eager_execution_enabled(),
-        "numpy doesn't work in lazy mode",
-    )
     def test_cos_tensor_function(test_case):
         arr = np.random.randn(2, 3, 4, 5)
         input = flow.Tensor(arr, dtype=flow.float32)
@@ -816,10 +636,6 @@ class TestTensor(flow.unittest.TestCase):
         of_out = input.cos()
         test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 1e-5, 1e-5))
 
-    @unittest.skipIf(
-        not flow.unittest.env.eager_execution_enabled(),
-        "numpy doesn't work in lazy mode",
-    )
     def test_std_tensor_function(test_case):
         np_arr = np.random.randn(9, 8, 7, 6)
         input = flow.Tensor(np_arr)
@@ -827,10 +643,6 @@ class TestTensor(flow.unittest.TestCase):
         np_out = np.std(np_arr, axis=1)
         test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 1e-5, 1e-5))
 
-    @unittest.skipIf(
-        not flow.unittest.env.eager_execution_enabled(),
-        "numpy doesn't work in lazy mode",
-    )
     def test_sqrt_tensor_function(test_case):
         input_arr = np.random.rand(1, 6, 3, 8)
         np_out = np.sqrt(input_arr)
@@ -840,10 +652,6 @@ class TestTensor(flow.unittest.TestCase):
             np.allclose(of_out.numpy(), np_out, 1e-5, 1e-5, equal_nan=True)
         )
 
-    @unittest.skipIf(
-        not flow.unittest.env.eager_execution_enabled(),
-        "numpy doesn't work in lazy mode",
-    )
     def test_rsqrt_tensor_function(test_case):
         np_arr = np.random.rand(3, 2, 5, 7)
         np_out = 1 / np.sqrt(np_arr)
@@ -853,10 +661,6 @@ class TestTensor(flow.unittest.TestCase):
             np.allclose(of_out.numpy(), np_out, 1e-5, 1e-5, equal_nan=True)
         )
 
-    @unittest.skipIf(
-        not flow.unittest.env.eager_execution_enabled(),
-        "numpy doesn't work in lazy mode",
-    )
     def test_square_tensor_function(test_case):
         np_arr = np.random.randn(2, 7, 7, 3)
         np_out = np.square(np_arr)
@@ -866,10 +670,6 @@ class TestTensor(flow.unittest.TestCase):
             np.allclose(of_out.numpy(), np_out, 1e-5, 1e-5, equal_nan=True)
         )
 
-    @unittest.skipIf(
-        not flow.unittest.env.eager_execution_enabled(),
-        "numpy doesn't work in lazy mode",
-    )
     def test_tensor_addmm_(test_case):
         input = flow.Tensor(np.random.randn(2, 6), dtype=flow.float32)
         mat1 = flow.Tensor(np.random.randn(2, 3), dtype=flow.float32)
@@ -878,10 +678,6 @@ class TestTensor(flow.unittest.TestCase):
         np_out = np.add(2 * input.numpy(), 1 * np.matmul(mat1.numpy(), mat2.numpy()))
         test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 1e-5, 1e-5))
 
-    @unittest.skipIf(
-        not flow.unittest.env.eager_execution_enabled(),
-        "numpy doesn't work in lazy mode",
-    )
     def test_norm_tensor_function(test_case):
         input = flow.Tensor(
             np.array([[-4.0, -3.0, -2.0], [-1.0, 0.0, 1.0], [2.0, 3.0, 4.0]]),
@@ -899,10 +695,6 @@ class TestTensor(flow.unittest.TestCase):
         test_case.assertTrue(np.allclose(of_out_2.numpy(), np_out_2, 1e-5, 1e-5))
         test_case.assertTrue(np.allclose(of_out_3.numpy(), np_out_3, 1e-5, 1e-5))
 
-    @unittest.skipIf(
-        not flow.unittest.env.eager_execution_enabled(),
-        "numpy doesn't work in lazy mode",
-    )
     def test_pow_tensor_function(test_case):
         input = flow.Tensor(np.array([1, 2, 3, 4, 5, 6]), dtype=flow.float32)
         of_out = input.pow(2.1)
@@ -912,10 +704,6 @@ class TestTensor(flow.unittest.TestCase):
         of_out_magic = input ** 2.1
         test_case.assertTrue(np.allclose(of_out_magic.numpy(), np_out, 1e-5, 1e-5))
 
-    @unittest.skipIf(
-        not flow.unittest.env.eager_execution_enabled(),
-        "numpy doesn't work in lazy mode",
-    )
     def test_tensor_atanh(test_case):
         np_input = np.random.random((2, 3)) - 0.5
         of_input = flow.Tensor(np_input, dtype=flow.float32, requires_grad=True)
@@ -934,10 +722,6 @@ class TestTensor(flow.unittest.TestCase):
             np.allclose(of_input.grad.numpy(), np_out_grad, 1e-4, 1e-4, equal_nan=True)
         )
 
-    @unittest.skipIf(
-        not flow.unittest.env.eager_execution_enabled(),
-        "numpy doesn't work in lazy mode",
-    )
     def test_tensor_arctanh(test_case):
         np_input = np.random.random((2, 3)) - 0.5
         of_input = flow.Tensor(np_input, dtype=flow.float32, requires_grad=True)
@@ -955,10 +739,6 @@ class TestTensor(flow.unittest.TestCase):
             np.allclose(of_input.grad.numpy(), np_out_grad, 1e-4, 1e-4, equal_nan=True)
         )
 
-    @unittest.skipIf(
-        not flow.unittest.env.eager_execution_enabled(),
-        "numpy doesn't work in lazy mode",
-    )
     def test_tensor_tan(test_case):
         np_input = np.random.random((2, 3)) - 0.5
         of_input = flow.Tensor(np_input, dtype=flow.float32, requires_grad=True)
@@ -976,10 +756,6 @@ class TestTensor(flow.unittest.TestCase):
             np.allclose(of_input.grad.numpy(), np_out_grad, 1e-4, 1e-4, equal_nan=True)
         )
 
-    @unittest.skipIf(
-        not flow.unittest.env.eager_execution_enabled(),
-        "numpy doesn't work in lazy mode",
-    )
     def test_tensor_acos(test_case):
         input = flow.Tensor(np.random.rand(8, 11, 9, 7) - 0.5, requires_grad=True,)
         of_out = input.acos()
@@ -994,10 +770,6 @@ class TestTensor(flow.unittest.TestCase):
             np.allclose(input.grad.numpy(), np_grad, 1e-4, 1e-4, equal_nan=True)
         )
 
-    @unittest.skipIf(
-        not flow.unittest.env.eager_execution_enabled(),
-        "numpy doesn't work in lazy mode",
-    )
     def test_tensor_ceil(test_case):
         x = flow.Tensor(np.random.randn(2, 3), requires_grad=True)
         of_out = x.ceil()
@@ -1008,10 +780,6 @@ class TestTensor(flow.unittest.TestCase):
         of_out.backward()
         test_case.assertTrue(np.allclose(x.grad.numpy(), np.zeros((2, 3)), 1e-4, 1e-4))
 
-    @unittest.skipIf(
-        not flow.unittest.env.eager_execution_enabled(),
-        "numpy doesn't work in lazy mode",
-    )
     def test_tensor_expm1(test_case):
         x = flow.Tensor(np.random.randn(2, 3), requires_grad=True)
         of_out = x.expm1()
@@ -1022,10 +790,6 @@ class TestTensor(flow.unittest.TestCase):
         of_out.backward()
         test_case.assertTrue(np.allclose(x.grad.numpy(), np.exp(x.numpy()), 1e-4, 1e-4))
 
-    @unittest.skipIf(
-        not flow.unittest.env.eager_execution_enabled(),
-        "numpy doesn't work in lazy mode",
-    )
     def test_tensor_mish(test_case):
         def np_mish(x):
             f = 1 + np.exp(x)
@@ -1046,10 +810,6 @@ class TestTensor(flow.unittest.TestCase):
         of_out.backward()
         test_case.assertTrue(np.allclose(of_input.grad.numpy(), np_grad, 1e-5, 1e-5))
 
-    @unittest.skipIf(
-        not flow.unittest.env.eager_execution_enabled(),
-        "numpy doesn't work in lazy mode",
-    )
     def test_tensor_triu(test_case):
         def np_triu(x, diagonal):
             y = np.triu(x, diagonal)
@@ -1071,10 +831,6 @@ class TestTensor(flow.unittest.TestCase):
                 np.allclose(of_input.grad.numpy(), np_grad, 1e-5, 1e-5)
             )
 
-    @unittest.skipIf(
-        not flow.unittest.env.eager_execution_enabled(),
-        "numpy doesn't work in lazy mode",
-    )
     def test_tensor_grad_assignment(test_case):
         np_input = np.random.randn(2, 4, 5, 6)
         of_input = flow.Tensor(np_input, dtype=flow.float32, requires_grad=True)
@@ -1091,10 +847,6 @@ class TestTensor(flow.unittest.TestCase):
         of_input.grad = None
         test_case.assertTrue(of_input.grad is None)
 
-    @unittest.skipIf(
-        not flow.unittest.env.eager_execution_enabled(),
-        "numpy doesn't work in lazy mode",
-    )
     def test_tensor_grad_assignment_sum(test_case):
         np_input = np.random.randn(1, 5, 7, 3)
         of_input = flow.Tensor(np_input, dtype=flow.float32, requires_grad=True)
diff --git a/oneflow/single_client_init.py b/oneflow/single_client_init.py
new file mode 100644
index 0000000000000000000000000000000000000000..e8e07becf233f62fe3d3b0f0593448a35815b7d0
--- /dev/null
+++ b/oneflow/single_client_init.py
@@ -0,0 +1,137 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+import oneflow._oneflow_internal
+
+
+Size = oneflow._oneflow_internal.Size
+device = oneflow._oneflow_internal.device
+placement = oneflow._oneflow_internal.placement
+no_grad = oneflow._oneflow_internal.autograd.no_grad
+
+# define dtype at the begining of oneflow init
+
+locals()["dtype"] = oneflow._oneflow_internal.dtype
+locals()["char"] = oneflow._oneflow_internal.char
+locals()["float16"] = oneflow._oneflow_internal.float16
+locals()["half"] = oneflow._oneflow_internal.float16
+locals()["float32"] = oneflow._oneflow_internal.float32
+locals()["float"] = oneflow._oneflow_internal.float
+locals()["double"] = oneflow._oneflow_internal.double
+locals()["float64"] = oneflow._oneflow_internal.float64
+locals()["int8"] = oneflow._oneflow_internal.int8
+locals()["int"] = oneflow._oneflow_internal.int32
+locals()["int32"] = oneflow._oneflow_internal.int32
+locals()["int64"] = oneflow._oneflow_internal.int64
+locals()["long"] = oneflow._oneflow_internal.int64
+locals()["uint8"] = oneflow._oneflow_internal.uint8
+locals()["record"] = oneflow._oneflow_internal.record
+locals()["tensor_buffer"] = oneflow._oneflow_internal.tensor_buffer
+
+from oneflow.core.job.job_set_pb2 import ConfigProto
+from oneflow.core.job.job_conf_pb2 import JobConfigProto
+
+from oneflow.compatible.single_client.python.framework import session_util
+from oneflow.compatible.single_client.python.framework import session_context
+from oneflow.compatible.single_client.python.framework import env_util
+
+
+oneflow._oneflow_internal.DestroyEnv()
+oneflow._oneflow_internal.SetIsMultiClient(False)
+env_util.init_default_physical_env()
+session_context.OpenDefaultSession(
+    session_util.Session(oneflow._oneflow_internal.NewSessionId())
+)
+oneflow._oneflow_internal.EnableEagerEnvironment(False)
+
+del env_util
+del session_util
+del session_context
+
+
+import oneflow.compatible.single_client.python_gen.__export_symbols__
+
+import oneflow.compatible.single_client.python.framework.c_api_util
+
+# register ForeignCallback
+from oneflow.compatible.single_client.python.framework import register_python_callback
+from oneflow.compatible.single_client.python.framework import python_callback
+
+oneflow._oneflow_internal.RegisterForeignCallbackOnlyOnce(
+    python_callback.global_python_callback
+)
+del python_callback
+del register_python_callback
+
+# register Watcher
+from oneflow.compatible.single_client.python.framework import watcher
+
+oneflow._oneflow_internal.RegisterWatcherOnlyOnce(watcher._global_watcher)
+del watcher
+
+# register BoxingUtil
+from oneflow.compatible.single_client.python.eager import boxing_util
+
+oneflow._oneflow_internal.deprecated.RegisterBoxingUtilOnlyOnce(
+    boxing_util._global_boxing_util
+)
+del boxing_util
+
+# register RegisterPyKernels
+from oneflow.compatible.single_client.python.ops.util import custom_op_module
+
+oneflow._oneflow_internal.RegisterPyKernels(
+    custom_op_module._python_kernel_reg.kernels_
+)
+del custom_op_module
+
+from oneflow.compatible.single_client.python.framework import register_class_method_util
+
+register_class_method_util.RegisterMethod4Class()
+del register_class_method_util
+
+INVALID_SPLIT_AXIS = oneflow._oneflow_internal.INVALID_SPLIT_AXIS
+
+import atexit
+from oneflow.compatible.single_client.python.framework.session_context import (
+    TryCloseDefaultSession,
+)
+
+atexit.register(TryCloseDefaultSession)
+
+del TryCloseDefaultSession
+del atexit
+
+import sys
+
+__original_exit__ = sys.exit
+
+
+def custom_exit(returncode):
+    if returncode != 0:
+        import oneflow
+
+        oneflow._oneflow_internal.MasterSendAbort()
+    __original_exit__(returncode)
+
+
+sys.exit = custom_exit
+
+del custom_exit
+del sys
+
+del absolute_import
diff --git a/oneflow/single_client_main.py b/oneflow/single_client_main.py
new file mode 100644
index 0000000000000000000000000000000000000000..c6785e2e41d3382e3c535fe61f65c2726286def2
--- /dev/null
+++ b/oneflow/single_client_main.py
@@ -0,0 +1,47 @@
+"""
+Copyright 2020 The OneFlow Authors. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+from __future__ import absolute_import
+
+import os
+import argparse
+
+parser = argparse.ArgumentParser()
+parser.add_argument(
+    "--start_worker", default=False, action="store_true", required=False
+)
+parser.add_argument("--env_proto", type=str, required=False)
+args = parser.parse_args()
+
+
+def StartWorker(env_proto):
+    import oneflow._oneflow_internal
+
+    oneflow._oneflow_internal.InitEnv(env_proto, False)
+
+
+def main():
+    start_worker = args.start_worker
+    if start_worker:
+        env_proto = args.env_proto
+        assert os.path.isfile(
+            env_proto
+        ), "env_proto not found, please check your env_proto path: {}".format(env_proto)
+        with open(env_proto, "rb") as f:
+            StartWorker(f.read())
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/check_src.py b/tools/check_src.py
index 82bf5bc8200679fe865c63eea965729c2e989dce..b7a9b396118914493c1dea3e8d738079dee2d277 100644
--- a/tools/check_src.py
+++ b/tools/check_src.py
@@ -39,16 +39,12 @@ def check_unwanted_test_scripts(python_test_dir=None, allowed=None):
 
 check_unwanted_test_scripts(
     python_test_dir=os.path.join(src_root, "oneflow/python/test"),
-    allowed=[
-        "customized",
-        "custom_ops",
-        "dataloader",
-        "graph",
-        "models",
-        "modules",
-        "ops",
-        "serving",
-        "tensor",
-        "xrt",
-    ],
+    allowed=["custom_ops", "dataloader", "graph", "models", "modules", "tensor",],
+)
+
+check_unwanted_test_scripts(
+    python_test_dir=os.path.join(
+        src_root, "oneflow/compatible_single_client_python/test"
+    ),
+    allowed=["models", "ops", "serving", "xrt",],
 )
diff --git a/tools/conver_single_client_name_space.py b/tools/conver_single_client_name_space.py
new file mode 100644
index 0000000000000000000000000000000000000000..8770a7bd1012476b16001ce2e3c27cd127895f9d
--- /dev/null
+++ b/tools/conver_single_client_name_space.py
@@ -0,0 +1,29 @@
+import sys
+import os
+import multiprocessing
+
+project_source_dir = sys.argv[1]
+single_client_python_dir = project_source_dir + "/compatible"
+
+
+single_client_python_files = []
+for root, dirs, files in os.walk(project_source_dir):
+    for file in files:
+        file_path = os.path.join(root, file)
+        if file_path.endswith(".py"):
+            single_client_python_files.append(file_path)
+
+assert len(single_client_python_files) > 0
+
+
+def convert_name_sapce(python_file):
+    os.system(
+        "sed 's/compatible_single_client_python/compatible\.single_client\.python/g' -i "
+        + python_file
+    )
+
+
+with multiprocessing.Pool(processes=multiprocessing.cpu_count()) as pool:
+    pool.map(convert_name_sapce, single_client_python_files)
+    pool.close()
+    pool.join()
diff --git a/tools/generate_oneflow_api.py b/tools/generate_oneflow_api.py
index c4e392848f582ad3296c1b609dfa3fa0e0704195..1218decf4e7ca7f347b4f81fb85ac864b52587f1 100644
--- a/tools/generate_oneflow_api.py
+++ b/tools/generate_oneflow_api.py
@@ -4,6 +4,7 @@ import argparse
 import inspect
 
 import oneflow
+import oneflow.compatible.single_client
 
 parser = argparse.ArgumentParser()
 parser.add_argument("-root", "--root_path", type=str, required=True)
@@ -118,7 +119,6 @@ def include_submodule(modname):
 
 
 def include_export(api_name_base, symbol):
-    # print(symbol._IS_VALUE)
     if symbol.__name__ == api_name_base:
         output = ["from {} import {}".format(symbol.__module__, api_name_base)]
     else:
@@ -138,9 +138,9 @@ def include_export(api_name_base, symbol):
     return output
 
 
-def exported_symbols():
+def exported_symbols(module_name):
     for mod in sys.modules.values():
-        if mod.__name__.startswith("oneflow.python"):
+        if mod.__name__.startswith(module_name):
             for attr in dir(mod):
                 symbol = getattr(mod, attr)
                 if hasattr(symbol, "__dict__") and "_ONEFLOW_API" in vars(symbol):
@@ -148,10 +148,10 @@ def exported_symbols():
                         yield api_name, symbol, mod
 
 
-def collect_exports():
+def collect_exports(module_name):
     exports = {}
     api_name2module = {}
-    for api_name, symbol, module in exported_symbols():
+    for api_name, symbol, module in exported_symbols(module_name):
         has_another_symbol_exported = (
             api_name in exports and exports[api_name] != symbol
         )
@@ -180,8 +180,10 @@ def collect_exports():
 
 
 def main():
-    mod = collect_exports()
+    mod = collect_exports("oneflow.python")
     mod.dump(args.root_path, is_root=True)
+    mod = collect_exports("oneflow.compatible.single_client.python")
+    mod.dump(args.root_path + "/compatible/single_client", is_root=True)
 
 
 if __name__ == "__main__":
diff --git a/tools/generate_oneflow_symbols_export_file.py b/tools/generate_oneflow_symbols_export_file.py
index c6e4180a2ef75ed1f732ba5f48c780a396793f6a..ca6de5082073d9fb0671bf0af4f5d9501aec95cb 100644
--- a/tools/generate_oneflow_symbols_export_file.py
+++ b/tools/generate_oneflow_symbols_export_file.py
@@ -6,10 +6,11 @@ import importlib
 import platform
 
 
-project_source_dir = sys.argv[1]
-python_dir = project_source_dir + "/oneflow/python"
+python_dir = sys.argv[1]
 output_filepath = sys.argv[2]
 
+filemode = sys.argv[3]
+
 
 def GetImportPath(filepath):
     assert filepath.startswith(python_dir)
@@ -44,5 +45,8 @@ for py_script in RecursiveFindPythonFile(python_dir):
 
 python_scripts = "from __future__ import absolute_import\n"
 for filepath in import_filepaths:
-    python_scripts += "import oneflow.python.%s\n" % GetImportPath(filepath)
+    if filemode != "compatible":
+        python_scripts += "import oneflow.python.%s\n" % GetImportPath(filepath)
+    else:
+        python_scripts += "import oneflow.compatible.%s\n" % GetImportPath(filepath)
 open(output_filepath, "w").write(python_scripts)