From 2e75393e58349143a14c1e35cc28f4acb1c2411a Mon Sep 17 00:00:00 2001
From: poohRui <32978179+poohRui@users.noreply.github.com>
Date: Thu, 8 Jul 2021 01:01:27 +0800
Subject: [PATCH] Support inplace operations (#5204)

* support inplace forward

* support inplace backward

* add test case

* add test case for clone

* inplace is not support for leaf nodes

* refine clone

* add checks

* refine

* forbid clone with no grad

* Separate autograd meta to tensor (#5267)

* separate autograd meta

* minor fix

* fix acc_grad interface

* fix acc_grad with null

* minor fix

* inplace without clone

* refine

* minor fix

* remove maybe from constructor

* change from create to set

* fix merge bugs

* fix merge bug

* remove inplace flag in local_call_opkernel_phy_instr_operand

* remove out-date codes

* refine code

* add JUST

* fix merge master bug

* revert autograd engine input_grad check

* fix bug in tensor_hook

Co-authored-by: wyg1997 <wyg19970408@gmail.com>
Co-authored-by: Houjiang Chen <chenhoujiangcug@gmail.com>
Co-authored-by: oneflow-ci-bot <69100618+oneflow-ci-bot@users.noreply.github.com>
---
 oneflow/api/python/framework/tensor.cpp       |  4 +-
 oneflow/core/autograd/autograd_engine.cpp     | 39 ++++++----
 .../core/eager/opkernel_instruction_type.cpp  |  2 +-
 .../eager_mirrored_op_interpreter.cpp         | 15 ++--
 oneflow/core/framework/tensor.cpp             | 21 ++++-
 oneflow/core/framework/tensor.h               | 76 ++++++++++++++-----
 oneflow/core/framework/tensor_impl.cpp        | 62 ++++++++++-----
 oneflow/core/framework/tensor_impl.h          | 51 +++++++------
 oneflow/core/functional/functional_api.yaml   |  2 +-
 .../functional/impl/activation_functor.cpp    | 20 ++++-
 oneflow/python/framework/tensor.py            |  7 ++
 oneflow/python/nn/modules/activation.py       |  7 ++
 .../python/test/modules/test_activation.py    | 13 ++++
 oneflow/python/test/tensor/test_tensor.py     | 16 ++++
 14 files changed, 245 insertions(+), 90 deletions(-)

diff --git a/oneflow/api/python/framework/tensor.cpp b/oneflow/api/python/framework/tensor.cpp
index 332e21a73..f7249a663 100644
--- a/oneflow/api/python/framework/tensor.cpp
+++ b/oneflow/api/python/framework/tensor.cpp
@@ -215,6 +215,7 @@ void SpecializedDef(py::class_<MirroredTensor, Tensor, std::shared_ptr<MirroredT
   api->def("zeros_", &ApiEagerMirroredTensorZeros);
   api->def("_register_hook",
            [](const std::shared_ptr<MirroredTensor>& self, const AutogradMeta::Hook& hook) -> void {
+             if (!self->grad_fn_node()) { CHECK_JUST(AddAccumulateFunctionNode(self)); }
              self->mut_autograd_meta()->add_hook(hook);
            });
 }
@@ -256,9 +257,10 @@ void ExportTensor(py::module& m, const char* name) {
       // Methods of pytorch
       .def("retain_grad",
            [](T& t) {
-             if (!t.is_leaf()) { t.set_retain_grad(true); }
+             if (!t.is_leaf()) { t.set_retain_grad(true).GetOrThrow(); }
            })
       .def("detach", [](const T& t) { return t.api_detach().GetPtrOrThrow(); })
+      .def("clone", [](const T& t) { return t.api_clone().GetPtrOrThrow(); })
       // OneFlow tensor properties other than pytorch tensor
       .def_property_readonly("is_lazy", &T::is_lazy)
       .def_property_readonly("is_consistent", &T::is_consistent);
diff --git a/oneflow/core/autograd/autograd_engine.cpp b/oneflow/core/autograd/autograd_engine.cpp
index 122a776e6..a7b38fcff 100644
--- a/oneflow/core/autograd/autograd_engine.cpp
+++ b/oneflow/core/autograd/autograd_engine.cpp
@@ -64,8 +64,8 @@ StackFunctionNode::StackFunctionNode(
   input_meta_datas_.resize(inputs.size());
   next_functions_->reserve(inputs.size());
   for (int i = 0; i < inputs.size(); ++i) {
-    input_meta_datas_.at(i) = inputs.at(i)->mut_autograd_meta();
-    if (input_meta_datas_.at(i)->requires_grad()) {
+    if (inputs.at(i)->requires_grad()) {
+      input_meta_datas_.at(i) = inputs.at(i)->mut_autograd_meta();
       next_functions_->emplace_back(inputs.at(i)->mut_grad_fn_node());
     }
   }
@@ -73,6 +73,9 @@ StackFunctionNode::StackFunctionNode(
   output_meta_datas_.resize(outputs.size());
   output_tensor_infos_.reserve(outputs.size());
   for (int i = 0; i < outputs.size(); ++i) {
+    const auto& autograd_meta =
+        NewAutogradMeta(outputs.at(i)->requires_grad(), outputs.at(i)->is_leaf());
+    outputs.at(i)->set_autograd_meta(autograd_meta);
     output_meta_datas_.at(i) = outputs.at(i)->mut_autograd_meta();
     output_tensor_infos_.emplace_back(TensorInfo(*outputs.at(i)));
   }
@@ -126,6 +129,7 @@ Maybe<bool> FunctionNode::Apply(bool create_graph) {
   JUST((*backward_fn_)(output_grads, &input_grads, create_graph));
   for (int i = 0; i < input_meta_datas_.size(); ++i) {
     if (input_grads.at(i)) {
+      CHECK_NOTNULL_OR_RETURN(input_meta_datas_.at(i));
       JUST(input_meta_datas_.at(i)->now_grad_arg()->PushPartialTensor(input_grads.at(i)));
     }
   }
@@ -148,7 +152,7 @@ Maybe<void> StackAutogradEngine::RunBackwardAndSaveGrads4LeafTensor(const Tensor
                                                                     bool create_graph) {
   ClearReleasedFunctionNodes();
   for (int i = 0; i < outputs.size(); ++i) {
-    JUST(outputs.at(i)->now_grad_arg()->PushPartialTensor(out_grads.at(i)));
+    JUST(JUST(outputs.at(i)->now_grad_arg())->PushPartialTensor(out_grads.at(i)));
   }
   // Runs each FunctionNode
   for (const auto& weak_func_node : node_list_) {
@@ -173,10 +177,10 @@ Maybe<TensorTuple> StackAutogradEngine::RunBackwardAndReturnInputsTensorGrad(
   std::vector<bool> ori_retain_grad(inputs.size());
   for (int i = 0; i < inputs.size(); ++i) {
     ori_retain_grad.at(i) = inputs.at(i)->retain_grad();
-    inputs.at(i)->set_retain_grad(true);
+    JUST(inputs.at(i)->set_retain_grad(true));
   }
   for (int i = 0; i < outputs.size(); ++i) {
-    JUST(outputs.at(i)->now_grad_arg()->PushPartialTensor(out_grads.at(i)));
+    JUST(JUST(outputs.at(i)->now_grad_arg())->PushPartialTensor(out_grads.at(i)));
   }
   // Runs each FunctionNode
   for (const auto& weak_func_node : node_list_) {
@@ -190,10 +194,10 @@ Maybe<TensorTuple> StackAutogradEngine::RunBackwardAndReturnInputsTensorGrad(
   }
   // Gets input grads and resume retain_grad
   for (int i = 0; i < inputs.size(); ++i) {
-    input_now_grads->at(i) = inputs.at(i)->acc_grad();
+    input_now_grads->at(i) = JUST(inputs.at(i)->acc_grad());
     if (!ori_retain_grad.at(i)) {
-      inputs.at(i)->set_acc_grad(nullptr);
-      inputs.at(i)->set_retain_grad(false);
+      JUST(inputs.at(i)->set_acc_grad(nullptr));
+      JUST(inputs.at(i)->set_retain_grad(false));
     }
   }
   if (!retain_graph) { ClearEngine(); }
@@ -241,8 +245,8 @@ GraphFunctionNode::GraphFunctionNode(
   input_meta_datas_.resize(inputs.size());
   next_functions_->reserve(inputs.size());
   for (int i = 0; i < inputs.size(); ++i) {
-    input_meta_datas_.at(i) = inputs.at(i)->mut_autograd_meta();
-    if (input_meta_datas_.at(i)->requires_grad()) {
+    if (inputs.at(i)->requires_grad()) {
+      input_meta_datas_.at(i) = inputs.at(i)->mut_autograd_meta();
       next_functions_->emplace_back(inputs.at(i)->mut_grad_fn_node());
     }
   }
@@ -250,6 +254,9 @@ GraphFunctionNode::GraphFunctionNode(
   output_meta_datas_.resize(outputs.size());
   output_tensor_infos_.reserve(outputs.size());
   for (int i = 0; i < outputs.size(); ++i) {
+    const auto& autograd_meta =
+        NewAutogradMeta(outputs.at(i)->requires_grad(), outputs.at(i)->is_leaf());
+    outputs.at(i)->set_autograd_meta(autograd_meta);
     output_meta_datas_.at(i) = outputs.at(i)->mut_autograd_meta();
     output_tensor_infos_.emplace_back(TensorInfo(*outputs.at(i)));
   }
@@ -373,7 +380,7 @@ Maybe<void> GraphAutogradEngine::RunBackwardAndSaveGrads4LeafTensor(const Tensor
                                                                     bool retain_graph,
                                                                     bool create_graph) {
   for (int i = 0; i < outputs.size(); ++i) {
-    JUST(outputs.at(i)->now_grad_arg()->PushPartialTensor(out_grads.at(i)));
+    JUST(JUST(outputs.at(i)->now_grad_arg())->PushPartialTensor(out_grads.at(i)));
   }
   GraphTask graph_task(outputs, retain_graph, create_graph);
   JUST(graph_task.ComputeDependencies());
@@ -389,10 +396,10 @@ Maybe<TensorTuple> GraphAutogradEngine::RunBackwardAndReturnInputsTensorGrad(
   std::vector<bool> ori_retain_grad(inputs.size());
   for (int i = 0; i < inputs.size(); ++i) {
     ori_retain_grad.at(i) = inputs.at(i)->retain_grad();
-    inputs.at(i)->set_retain_grad(true);
+    JUST(inputs.at(i)->set_retain_grad(true));
   }
   for (int i = 0; i < outputs.size(); ++i) {
-    JUST(outputs.at(i)->now_grad_arg()->PushPartialTensor(out_grads.at(i)));
+    JUST(JUST(outputs.at(i)->now_grad_arg())->PushPartialTensor(out_grads.at(i)));
   }
 
   JUST(graph_task.ComputeDependenciesAndPruneNode(inputs));
@@ -400,10 +407,10 @@ Maybe<TensorTuple> GraphAutogradEngine::RunBackwardAndReturnInputsTensorGrad(
 
   // Gets input grads and resume retain_grad
   for (int i = 0; i < inputs.size(); ++i) {
-    input_now_grads->at(i) = inputs.at(i)->acc_grad();
+    input_now_grads->at(i) = JUST(inputs.at(i)->acc_grad());
     if (!ori_retain_grad.at(i)) {
-      inputs.at(i)->set_acc_grad(nullptr);
-      inputs.at(i)->set_retain_grad(false);
+      JUST(inputs.at(i)->set_acc_grad(nullptr));
+      JUST(inputs.at(i)->set_retain_grad(false));
     }
   }
   return input_now_grads;
diff --git a/oneflow/core/eager/opkernel_instruction_type.cpp b/oneflow/core/eager/opkernel_instruction_type.cpp
index be3d853ea..58b3b676b 100644
--- a/oneflow/core/eager/opkernel_instruction_type.cpp
+++ b/oneflow/core/eager/opkernel_instruction_type.cpp
@@ -512,7 +512,7 @@ struct LocalCallOpKernelUtil final {
   static inline Maybe<void> InitOutputBlobs(LocalCallOpKernelPhyInstrOperand* operand) {
     JUST(operand->ForEachOutputTensor([&](vm::EagerBlobObject* blob_object) -> Maybe<void> {
       CHECK_OR_RETURN(static_cast<bool>(blob_object));
-      JUST(blob_object->InitBlob());
+      JUST(blob_object->TryInitBlob());
       return Maybe<void>::Ok();
     }));
     return Maybe<void>::Ok();
diff --git a/oneflow/core/framework/op_interpreter/eager_mirrored_op_interpreter.cpp b/oneflow/core/framework/op_interpreter/eager_mirrored_op_interpreter.cpp
index e045100b7..cccc060c7 100644
--- a/oneflow/core/framework/op_interpreter/eager_mirrored_op_interpreter.cpp
+++ b/oneflow/core/framework/op_interpreter/eager_mirrored_op_interpreter.cpp
@@ -59,14 +59,17 @@ Maybe<void> NaiveInterpret(const UserOpExpr& user_op_expr, const TensorTuple& in
     }
     input_eager_blob_objects->at(i) = JUST(inputs.at(i)->eager_blob_object());
   }
+  std::shared_ptr<EagerBlobObjectList> output_eager_blob_objects =
+      std::make_shared<EagerBlobObjectList>(outputs->size());
   for (int i = 0; i < outputs->size(); i++) {
     if (!outputs->at(i)) {
       outputs->at(i) =
           std::make_shared<MirroredTensor>(std::make_shared<EagerMirroredTensorImpl>());
     }
+    if (JUST(outputs->at(i)->has_eager_blob_object())) {
+      output_eager_blob_objects->at(i) = JUST(outputs->at(i)->eager_blob_object());
+    }
   }
-  std::shared_ptr<EagerBlobObjectList> output_eager_blob_objects =
-      std::make_shared<EagerBlobObjectList>(outputs->size());
   Symbol<Device> op_device;
   std::shared_ptr<const ParallelDesc> op_parallel_desc;
   bool need_check_mem_case = true;
@@ -102,9 +105,11 @@ Maybe<void> NaiveInterpret(const UserOpExpr& user_op_expr, const TensorTuple& in
       }));
 
   for (int i = 0; i < output_eager_blob_objects->size(); i++) {
-    auto* tensor_impl = JUST(TensorImpl4Tensor(outputs->at(i)));
-    JUST(tensor_impl->InitEagerBlobObject(JUST(outputs->at(i)->device())->mem_case()));
-    output_eager_blob_objects->at(i) = JUST(tensor_impl->eager_blob_object());
+    if (!output_eager_blob_objects->at(i)) {
+      auto* tensor_impl = JUST(TensorImpl4Tensor(outputs->at(i)));
+      JUST(tensor_impl->InitEagerBlobObject(JUST(outputs->at(i)->device())->mem_case()));
+      output_eager_blob_objects->at(i) = JUST(tensor_impl->eager_blob_object());
+    }
   }
 
   const auto& kernel = JUST(user_op_expr.MutKernel4Device(*op_device));
diff --git a/oneflow/core/framework/tensor.cpp b/oneflow/core/framework/tensor.cpp
index 8762d3671..4fccd92de 100644
--- a/oneflow/core/framework/tensor.cpp
+++ b/oneflow/core/framework/tensor.cpp
@@ -20,6 +20,9 @@ limitations under the License.
 #include "oneflow/core/framework/tensor_tuple.h"
 #include "oneflow/core/autograd/autograd_engine.h"
 #include "oneflow/core/framework/op_interpreter/eager_mirrored_op_interpreter.h"
+#include "oneflow/core/framework/op_interpreter/op_interpreter_util.h"
+#include "oneflow/core/framework/op_builder.h"
+#include "oneflow/core/framework/op_expr.h"
 
 namespace oneflow {
 
@@ -51,8 +54,7 @@ namespace one {
   const auto& blob_desc = eager_blob_object->blob_desc();
   const auto& tensor_meta =
       std::make_shared<MirroredTensorMeta>(blob_desc.shape_ptr(), blob_desc.data_type(), device);
-  const auto& autograd_meta = std::make_shared<AutogradMeta>(requires_grad, is_leaf);
-  auto* tensor_impl = new EagerMirroredTensorImpl(tensor_meta, autograd_meta);
+  auto* tensor_impl = new EagerMirroredTensorImpl(tensor_meta, requires_grad, is_leaf);
   JUST(tensor_impl->InitEagerBlobObjectAndTensorStorage(eager_blob_object, tensor_storage));
   return std::make_shared<MirroredTensor>(std::shared_ptr<MirroredTensorImpl>(tensor_impl));
 }
@@ -74,6 +76,21 @@ Maybe<MirroredTensor> MirroredTensor::api_detach() const {
   return std::make_shared<MirroredTensor>(JUST(impl_->detach()));
 }
 
+Maybe<Tensor> MirroredTensor::clone() const {
+  const auto& device_type = JUST(this->device())->type();
+  int64_t device_id = JUST(this->device())->device_id();
+  std::shared_ptr<OpExpr> copy_op_ = JUST(one::OpBuilder("copy")
+                                              .Input("in", 1)
+                                              .Attr("device_type", device_type)
+                                              .Attr("device_id", device_id)
+                                              .Output("out", 1)
+                                              .Build());
+  std::shared_ptr<MirroredTensor> input =
+      std::const_pointer_cast<MirroredTensor>(shared_from_this());
+  const auto& output = JUST(OpInterpUtil::Dispatch<Tensor>(*copy_op_, {input}));
+  return output;
+}
+
 Maybe<ConsistentTensor> ConsistentTensor::MakeTensor(
     const std::shared_ptr<const Shape>& shape, DataType dtype,
     Symbol<cfg::ParallelDistribution> parallel_distribution, Symbol<ParallelDesc> parallel_desc,
diff --git a/oneflow/core/framework/tensor.h b/oneflow/core/framework/tensor.h
index f011fa758..fa3f744ee 100644
--- a/oneflow/core/framework/tensor.h
+++ b/oneflow/core/framework/tensor.h
@@ -58,6 +58,7 @@ class Tensor {
   virtual Maybe<EagerMirroredTensorImpl*> mut_eager_mirrored_tensor_impl() { OF_UNIMPLEMENTED(); }
   virtual Maybe<vm::EagerBlobObject> eager_blob_object() const = 0;
   virtual Maybe<VmLocalDepObject> compute_local_dep_object() const = 0;
+  virtual Maybe<bool> has_eager_blob_object() const = 0;
   virtual Maybe<TensorStorage> tensor_storage() const { OF_UNIMPLEMENTED(); }
 
   // Getters/Setters valid only for EagerConsistentTensor
@@ -76,19 +77,22 @@ class Tensor {
   virtual bool is_leaf() const = 0;
   virtual bool retain_grad() const = 0;
   virtual std::shared_ptr<const FunctionNode> grad_fn_node() const = 0;
-  virtual const std::shared_ptr<Tensor>& acc_grad() const = 0;
-  virtual const std::shared_ptr<TensorArg>& now_grad_arg() const = 0;
+  virtual Maybe<Tensor> acc_grad() const = 0;
+  virtual Maybe<TensorArg> now_grad_arg() const = 0;
   virtual Maybe<Tensor> detach() const = 0;
+  virtual Maybe<Tensor> clone() const = 0;
 
   // Setters for autograd
   virtual void set_requires_grad(bool requires_grad) = 0;
-  virtual void set_retain_grad(bool retain_grad) = 0;
+  virtual Maybe<void> set_retain_grad(bool retain_grad) = 0;
   virtual void set_grad_fn_node(const std::shared_ptr<FunctionNode>& grad_fn_node) = 0;
   virtual const std::shared_ptr<FunctionNode>& mut_grad_fn_node() = 0;
-  virtual void set_acc_grad(const std::shared_ptr<Tensor>& grad) = 0;
-  virtual std::shared_ptr<Tensor> mut_acc_grad() = 0;
+  virtual Maybe<void> set_acc_grad(const std::shared_ptr<Tensor>& grad) = 0;
+  virtual Maybe<Tensor> mut_acc_grad() = 0;
   virtual void set_is_leaf(bool is_leaf) = 0;
   virtual std::shared_ptr<AutogradMeta> mut_autograd_meta() = 0;
+  virtual bool has_autograd_meta() const = 0;
+  virtual void set_autograd_meta(const std::shared_ptr<AutogradMeta>& autograd_meta) = 0;
 
   virtual user_op::TensorDesc* mut_tensor_meta() = 0;
 
@@ -97,7 +101,7 @@ class Tensor {
 };
 
 template<typename DerivedT>
-class TensorIf : public Tensor, public std::enable_shared_from_this<TensorIf<DerivedT>> {
+class TensorIf : public Tensor {
  public:
   virtual ~TensorIf() = default;
 
@@ -113,8 +117,12 @@ class TensorIf : public Tensor, public std::enable_shared_from_this<TensorIf<Der
   std::shared_ptr<const FunctionNode> grad_fn_node() const override { return grad_fn_node_; }
   // used by pybind11 only
   Maybe<DerivedT> api_acc_grad() const {
-    const std::shared_ptr<Tensor>& tensor = acc_grad();
-    return cast_for_api(tensor);
+    if (has_autograd_meta()) {
+      const std::shared_ptr<Tensor>& tensor = JUST(acc_grad());
+      return cast_for_api(tensor);
+    } else {
+      return std::shared_ptr<DerivedT>();
+    }
   }
 
   // Setters for autograd
@@ -130,6 +138,10 @@ class TensorIf : public Tensor, public std::enable_shared_from_this<TensorIf<Der
   // Operators for tensor
   // used by pybind11 only
   virtual Maybe<DerivedT> api_detach() const = 0;
+  Maybe<DerivedT> api_clone() const {
+    const std::shared_ptr<Tensor>& tensor = JUST(clone());
+    return cast_for_api(tensor);
+  }
 
  protected:
   TensorIf() = default;
@@ -144,7 +156,8 @@ class TensorIf : public Tensor, public std::enable_shared_from_this<TensorIf<Der
   }
 };
 
-class MirroredTensor final : public TensorIf<MirroredTensor> {
+class MirroredTensor final : public TensorIf<MirroredTensor>,
+                             public std::enable_shared_from_this<MirroredTensor> {
  public:
   OF_DISALLOW_COPY_AND_MOVE(MirroredTensor);
   MirroredTensor() = default;
@@ -177,24 +190,34 @@ class MirroredTensor final : public TensorIf<MirroredTensor> {
     return impl_->compute_local_dep_object();
   }
   Maybe<TensorStorage> tensor_storage() const override { return impl_->tensor_storage(); }
+  Maybe<bool> has_eager_blob_object() const override { return impl_->has_eager_blob_object(); }
 
   // Getters for autograd
-  const std::shared_ptr<Tensor>& acc_grad() const override { return impl_->acc_grad(); }
-  const std::shared_ptr<TensorArg>& now_grad_arg() const override { return impl_->now_grad_arg(); }
+  Maybe<Tensor> acc_grad() const override { return impl_->acc_grad(); }
+  Maybe<TensorArg> now_grad_arg() const override { return impl_->now_grad_arg(); }
   bool requires_grad() const override { return impl_->requires_grad(); }
   bool is_leaf() const override { return impl_->is_leaf(); }
   bool retain_grad() const override { return impl_->retain_grad(); }
+  bool has_autograd_meta() const override { return impl_->has_autograd_meta(); }
 
   // Setters for autograd
-  void set_acc_grad(const std::shared_ptr<Tensor>& grad) override { impl_->set_acc_grad(grad); }
+  Maybe<void> set_acc_grad(const std::shared_ptr<Tensor>& grad) override {
+    return impl_->set_acc_grad(grad);
+  }
   void set_requires_grad(bool requires_grad) override { impl_->set_requires_grad(requires_grad); }
-  void set_retain_grad(bool retain_grad) override { impl_->set_retain_grad(retain_grad); }
-  std::shared_ptr<Tensor> mut_acc_grad() override { return impl_->mut_acc_grad(); }
+  Maybe<void> set_retain_grad(bool retain_grad) override {
+    return impl_->set_retain_grad(retain_grad);
+  }
+  Maybe<Tensor> mut_acc_grad() override { return impl_->mut_acc_grad(); }
   void set_is_leaf(bool is_leaf) override { impl_->set_is_leaf(is_leaf); }
   std::shared_ptr<AutogradMeta> mut_autograd_meta() override { return impl_->mut_autograd_meta(); }
+  void set_autograd_meta(const std::shared_ptr<AutogradMeta>& autograd_meta) override {
+    impl_->set_autograd_meta(autograd_meta);
+  }
 
   // Operators for tensor
   Maybe<MirroredTensor> api_detach() const override;
+  Maybe<Tensor> clone() const override;
 
   static Maybe<MirroredTensor> MakeTensor(const std::shared_ptr<const Shape>& shape, DataType dtype,
                                           const Symbol<Device>& device, bool is_lazy,
@@ -234,7 +257,9 @@ class ConsistentTensor final : public TensorIf<ConsistentTensor> {
       const override {
     return impl_->consumer_parallel_distribution_constraint();
   }
-  Maybe<MirroredTensor> cur_rank_phy_tensor() const { return impl_->cur_rank_phy_tensor(); }
+  Maybe<MirroredTensor> cur_rank_phy_tensor() const override {
+    return impl_->cur_rank_phy_tensor();
+  }
   int64_t ndim() const override;
   bool is_cuda() const override;
   int64_t dim(int64_t index) const override;
@@ -249,6 +274,8 @@ class ConsistentTensor final : public TensorIf<ConsistentTensor> {
     return impl_->compute_local_dep_object();
   }
   const TensorMeta& tensor_meta() const override { return *impl_->tensor_meta(); }
+  Maybe<TensorStorage> tensor_storage() const override { return impl_->tensor_storage(); }
+  Maybe<bool> has_eager_blob_object() const override { return impl_->has_eager_blob_object(); }
 
   // Setters
   Maybe<void> set_consumer_parallel_distribution_constraint(
@@ -258,22 +285,31 @@ class ConsistentTensor final : public TensorIf<ConsistentTensor> {
   }
 
   // Getters for autograd
-  const std::shared_ptr<Tensor>& acc_grad() const override { return impl_->acc_grad(); }
-  const std::shared_ptr<TensorArg>& now_grad_arg() const override { return impl_->now_grad_arg(); }
+  Maybe<Tensor> acc_grad() const override { return impl_->acc_grad(); }
+  Maybe<TensorArg> now_grad_arg() const override { return impl_->now_grad_arg(); }
   bool requires_grad() const override { return impl_->requires_grad(); }
   bool is_leaf() const override { return impl_->is_leaf(); }
   bool retain_grad() const override { return impl_->retain_grad(); }
+  bool has_autograd_meta() const override { return impl_->has_autograd_meta(); }
 
   // Setters for autograd
-  void set_acc_grad(const std::shared_ptr<Tensor>& grad) override { impl_->set_acc_grad(grad); }
-  std::shared_ptr<Tensor> mut_acc_grad() override { return impl_->mut_acc_grad(); }
+  Maybe<void> set_acc_grad(const std::shared_ptr<Tensor>& grad) override {
+    return impl_->set_acc_grad(grad);
+  }
+  Maybe<Tensor> mut_acc_grad() override { return impl_->mut_acc_grad(); }
   void set_requires_grad(bool requires_grad) override { impl_->set_requires_grad(requires_grad); }
-  void set_retain_grad(bool retain_grad) override { impl_->set_retain_grad(retain_grad); }
+  Maybe<void> set_retain_grad(bool retain_grad) override {
+    return impl_->set_retain_grad(retain_grad);
+  }
   void set_is_leaf(bool is_leaf) override { impl_->set_is_leaf(is_leaf); }
   std::shared_ptr<AutogradMeta> mut_autograd_meta() override { return impl_->mut_autograd_meta(); }
+  void set_autograd_meta(const std::shared_ptr<AutogradMeta>& autograd_meta) override {
+    impl_->set_autograd_meta(autograd_meta);
+  }
 
   // Operators for tensor
   virtual Maybe<ConsistentTensor> api_detach() const override;
+  Maybe<Tensor> clone() const override { return Error::Unimplemented(); }
 
   static Maybe<ConsistentTensor> MakeTensor(const std::shared_ptr<const Shape>& shape,
                                             DataType dtype,
diff --git a/oneflow/core/framework/tensor_impl.cpp b/oneflow/core/framework/tensor_impl.cpp
index b2828cda8..bfd9069b1 100644
--- a/oneflow/core/framework/tensor_impl.cpp
+++ b/oneflow/core/framework/tensor_impl.cpp
@@ -32,6 +32,33 @@ limitations under the License.
 namespace oneflow {
 namespace one {
 
+Maybe<Tensor> TensorImpl::acc_grad() const {
+  CHECK_NOTNULL_OR_RETURN(autograd_meta_);
+  return autograd_meta_->acc_grad();
+}
+
+Maybe<TensorArg> TensorImpl::now_grad_arg() const {
+  CHECK_NOTNULL_OR_RETURN(autograd_meta_);
+  return autograd_meta_->now_grad_arg();
+}
+
+Maybe<void> TensorImpl::set_acc_grad(const std::shared_ptr<Tensor>& grad) {
+  CHECK_NOTNULL_OR_RETURN(autograd_meta_);
+  autograd_meta_->set_acc_grad(grad);
+  return Maybe<void>::Ok();
+}
+
+Maybe<Tensor> TensorImpl::mut_acc_grad() {
+  CHECK_NOTNULL_OR_RETURN(autograd_meta_);
+  return autograd_meta_->mut_acc_grad();
+}
+
+Maybe<void> TensorImpl::set_retain_grad(bool retain_grad) {
+  CHECK_NOTNULL_OR_RETURN(autograd_meta_);
+  autograd_meta_->set_retain_grad(retain_grad);
+  return Maybe<void>::Ok();
+}
+
 namespace {
 
 std::shared_ptr<const MirroredTensorMeta> NewDefaultMirroredTensorMeta() {
@@ -48,24 +75,19 @@ Maybe<MirroredTensorImpl> LazyMirroredTensorImpl::detach() const {
 }
 
 EagerMirroredTensorImpl::EagerMirroredTensorImpl()
-    : MirroredTensorImpl(NewDefaultMirroredTensorMeta(), NewAutogradMeta(false, false)) {}
-
-EagerMirroredTensorImpl::EagerMirroredTensorImpl(
-    const std::shared_ptr<const MirroredTensorMeta>& tensor_meta,
-    const std::shared_ptr<AutogradMeta>& autograd_meta)
-    : MirroredTensorImpl(tensor_meta, autograd_meta) {}
+    : MirroredTensorImpl(NewDefaultMirroredTensorMeta(), false, false) {}
 
 EagerMirroredTensorImpl::EagerMirroredTensorImpl(
     const std::shared_ptr<const MirroredTensorMeta>& tensor_meta, bool requires_grad, bool is_leaf)
-    : MirroredTensorImpl(tensor_meta, NewAutogradMeta(requires_grad, is_leaf)) {}
+    : MirroredTensorImpl(tensor_meta, requires_grad, is_leaf) {}
 
 EagerMirroredTensorImpl::~EagerMirroredTensorImpl() {}
 
 EagerMirroredTensorImpl::EagerMirroredTensorImpl(
     const std::shared_ptr<const MirroredTensorMeta>& tensor_meta,
     std::shared_ptr<TensorStorage> tensor_storage, bool requires_grad, bool is_leaf)
-    : MirroredTensorImpl(tensor_meta, NewAutogradMeta(requires_grad, is_leaf)),
-      tensor_storage_(tensor_storage) {}
+    : MirroredTensorImpl(tensor_meta, requires_grad, is_leaf), tensor_storage_(tensor_storage) {}
+
 Maybe<void> EagerMirroredTensorImpl::UpdateTensorStorage() {
   const auto& eager_blob_object = eager_blob_object_;
   tensor_storage_ = std::make_shared<TensorStorage>(eager_blob_object->tensor_buffer());
@@ -169,10 +191,10 @@ size_t ConsistentTensorMeta::CalcHashValue() const {
 }
 
 EagerConsistentTensorImpl::EagerConsistentTensorImpl(
-    Symbol<ConsistentTensorMeta> consistent_tensor_meta,
-    const std::shared_ptr<AutogradMeta>& autograd_meta,
+    Symbol<ConsistentTensorMeta> consistent_tensor_meta, bool requires_grad, bool is_leaf,
     const std::shared_ptr<MirroredTensor>& cur_rank_phy_tensor)
-    : ConsistentTensorImpl(consistent_tensor_meta, autograd_meta),
+    : ConsistentTensorImpl(consistent_tensor_meta, cur_rank_phy_tensor->requires_grad(),
+                           cur_rank_phy_tensor->is_leaf()),
       cur_rank_phy_tensor_(cur_rank_phy_tensor) {}
 
 /*static*/ Maybe<EagerConsistentTensorImpl> EagerConsistentTensorImpl::New(
@@ -193,8 +215,9 @@ EagerConsistentTensorImpl::EagerConsistentTensorImpl(
   const auto& dtype = cur_rank_phy_tensor->dtype();
   Symbol<ConsistentTensorMeta> consistent_tensor_meta(
       ConsistentTensorMeta(shape, dtype, parallel_distribution, parallel_desc));
-  return std::shared_ptr<EagerConsistentTensorImpl>(new EagerConsistentTensorImpl(
-      consistent_tensor_meta, cur_rank_phy_tensor->mut_autograd_meta(), cur_rank_phy_tensor));
+  return std::shared_ptr<EagerConsistentTensorImpl>(
+      new EagerConsistentTensorImpl(consistent_tensor_meta, cur_rank_phy_tensor->requires_grad(),
+                                    cur_rank_phy_tensor->is_leaf(), cur_rank_phy_tensor));
 }
 
 /*static*/ Maybe<EagerConsistentTensorImpl> EagerConsistentTensorImpl::New(
@@ -219,21 +242,20 @@ EagerConsistentTensorImpl::EagerConsistentTensorImpl(
       JUST(GetPhysicalShape(*shape, *parallel_distribution, *parallel_desc, parallel_id));
   const auto& cur_rank_phy_tensor_meta =
       std::make_shared<MirroredTensorMeta>(cur_rank_phy_shape, dtype, device);
-  const auto& autograd_meta = NewAutogradMeta(requires_grad, is_leaf);
   auto cur_rank_phy_tensor_impl =
-      std::make_shared<EagerMirroredTensorImpl>(cur_rank_phy_tensor_meta, autograd_meta);
+      std::make_shared<EagerMirroredTensorImpl>(cur_rank_phy_tensor_meta, requires_grad, is_leaf);
   JUST(cur_rank_phy_tensor_impl->InitEagerBlobObject(device->mem_case()));
   const auto& cur_rank_phy_tensor = std::make_shared<MirroredTensor>(cur_rank_phy_tensor_impl);
-  auto* tensor_impl = new EagerConsistentTensorImpl(
-      consistent_tensor_meta, cur_rank_phy_tensor->mut_autograd_meta(), cur_rank_phy_tensor);
+  auto* tensor_impl =
+      new EagerConsistentTensorImpl(consistent_tensor_meta, cur_rank_phy_tensor->requires_grad(),
+                                    cur_rank_phy_tensor->is_leaf(), cur_rank_phy_tensor);
   return std::shared_ptr<EagerConsistentTensorImpl>(tensor_impl);
 }
 
 /*static*/ Maybe<EagerConsistentTensorImpl> EagerConsistentTensorImpl::NewWithoutPhyTensor(
     Symbol<ConsistentTensorMeta> consistent_tensor_meta, Symbol<Device> device, int64_t parallel_id,
     bool requires_grad, bool is_leaf) {
-  const auto& autograd_meta = NewAutogradMeta(requires_grad, is_leaf);
-  auto* tensor_impl = new EagerConsistentTensorImpl(consistent_tensor_meta, autograd_meta,
+  auto* tensor_impl = new EagerConsistentTensorImpl(consistent_tensor_meta, requires_grad, is_leaf,
                                                     std::shared_ptr<MirroredTensor>());
   return std::shared_ptr<EagerConsistentTensorImpl>(tensor_impl);
 }
diff --git a/oneflow/core/framework/tensor_impl.h b/oneflow/core/framework/tensor_impl.h
index 707bc01b1..213e0fe09 100644
--- a/oneflow/core/framework/tensor_impl.h
+++ b/oneflow/core/framework/tensor_impl.h
@@ -62,26 +62,33 @@ class TensorImpl {
   virtual Maybe<vm::EagerBlobObject> eager_blob_object() const = 0;
   virtual Maybe<VmLocalDepObject> compute_local_dep_object() const = 0;
   virtual Maybe<TensorStorage> tensor_storage() const { OF_UNIMPLEMENTED(); }
+  virtual Maybe<bool> has_eager_blob_object() const = 0;
 
   // Getters for autograd
-  const std::shared_ptr<Tensor>& acc_grad() const { return autograd_meta_->acc_grad(); }
-  const std::shared_ptr<TensorArg>& now_grad_arg() const { return autograd_meta_->now_grad_arg(); }
-  bool requires_grad() const { return autograd_meta_->requires_grad(); }
-  bool is_leaf() const { return autograd_meta_->is_leaf(); }
+  Maybe<Tensor> acc_grad() const;
+  Maybe<TensorArg> now_grad_arg() const;
+  bool requires_grad() const { return requires_grad_; }
+  bool is_leaf() const { return is_leaf_; }
   bool retain_grad() const { return autograd_meta_->retain_grad(); }
 
   // Setters for autograd
-  void set_acc_grad(const std::shared_ptr<Tensor>& grad) { autograd_meta_->set_acc_grad(grad); }
-  std::shared_ptr<Tensor> mut_acc_grad() { return autograd_meta_->mut_acc_grad(); }
-  void set_requires_grad(bool requires_grad) { autograd_meta_->set_requires_grad(requires_grad); }
-  void set_retain_grad(bool retain_grad) { autograd_meta_->set_retain_grad(retain_grad); }
-  void set_is_leaf(bool is_leaf) { autograd_meta_->set_is_leaf(is_leaf); }
+  Maybe<void> set_acc_grad(const std::shared_ptr<Tensor>& grad);
+  Maybe<Tensor> mut_acc_grad();
+  void set_requires_grad(bool requires_grad) { requires_grad_ = requires_grad; }
+  Maybe<void> set_retain_grad(bool retain_grad);
+  void set_is_leaf(bool is_leaf) { is_leaf_ = is_leaf; }
   std::shared_ptr<AutogradMeta> mut_autograd_meta() { return autograd_meta_; }
+  void set_autograd_meta(const std::shared_ptr<AutogradMeta>& autograd_meta) {
+    autograd_meta_ = autograd_meta;
+  }
+  bool has_autograd_meta() const { return autograd_meta_.get(); }
 
  protected:
-  TensorImpl(const std::shared_ptr<AutogradMeta>& autograd_meta) : autograd_meta_(autograd_meta) {}
+  TensorImpl(bool requires_grad, bool is_leaf) : requires_grad_(requires_grad), is_leaf_(is_leaf) {}
 
  protected:
+  bool requires_grad_;
+  bool is_leaf_;
   std::shared_ptr<AutogradMeta> autograd_meta_;
 };
 
@@ -106,8 +113,8 @@ class MirroredTensorImpl : public TensorImpl {
 
  protected:
   MirroredTensorImpl(const std::shared_ptr<const MirroredTensorMeta>& tensor_meta,
-                     const std::shared_ptr<AutogradMeta>& autograd_meta)
-      : TensorImpl(autograd_meta), tensor_meta_(tensor_meta) {}
+                     bool requires_grad, bool is_leaf)
+      : TensorImpl(requires_grad, is_leaf), tensor_meta_(tensor_meta) {}
 
   std::shared_ptr<const MirroredTensorMeta> tensor_meta_;
 };
@@ -134,6 +141,7 @@ class ConsistentTensorImpl : public TensorImpl {
   // Getters valid only for EagerMirroredTensorImpl
   Maybe<vm::EagerBlobObject> eager_blob_object() const override { OF_UNIMPLEMENTED(); }
   Maybe<VmLocalDepObject> compute_local_dep_object() const override { OF_UNIMPLEMENTED(); }
+  Maybe<bool> has_eager_blob_object() const override { OF_UNIMPLEMENTED(); }
 
   // Setters
   void set_consumer_parallel_distribution_constraint(Symbol<cfg::ParallelDistribution> val) {
@@ -146,9 +154,8 @@ class ConsistentTensorImpl : public TensorImpl {
   }
 
  protected:
-  ConsistentTensorImpl(Symbol<ConsistentTensorMeta> tensor_meta,
-                       const std::shared_ptr<AutogradMeta>& autograd_meta)
-      : TensorImpl(autograd_meta),
+  ConsistentTensorImpl(Symbol<ConsistentTensorMeta> tensor_meta, bool requires_grad, bool is_leaf)
+      : TensorImpl(requires_grad, is_leaf),
         tensor_meta_(tensor_meta),
         consumer_parallel_distribution_constraint_() {}
 
@@ -161,17 +168,18 @@ class LazyMirroredTensorImpl final : public MirroredTensorImpl {
   OF_DISALLOW_COPY_AND_MOVE(LazyMirroredTensorImpl);
   LazyMirroredTensorImpl(const std::shared_ptr<const MirroredTensorMeta>& tensor_meta,
                          bool requires_grad, bool is_leaf)
-      : MirroredTensorImpl(tensor_meta, NewAutogradMeta(requires_grad, is_leaf)) {}
+      : MirroredTensorImpl(tensor_meta, requires_grad, is_leaf) {}
   ~LazyMirroredTensorImpl() override = default;
 
   // Getters
-  const std::shared_ptr<const Shape>& shape() const { return tensor_meta()->shape_ptr(); }
+  const std::shared_ptr<const Shape>& shape() const override { return tensor_meta()->shape_ptr(); }
   bool is_lazy() const override { return true; }
 
   // Getters valid only for EagerMirroredTensorImpl
   Maybe<vm::EagerBlobObject> eager_blob_object() const override { OF_UNIMPLEMENTED(); }
   Maybe<VmLocalDepObject> compute_local_dep_object() const override { OF_UNIMPLEMENTED(); }
   Maybe<TensorStorage> tensor_storage() const override { OF_UNIMPLEMENTED(); }
+  Maybe<bool> has_eager_blob_object() const override { OF_UNIMPLEMENTED(); }
   Maybe<MirroredTensorImpl> detach() const override;
 };
 
@@ -179,8 +187,6 @@ class EagerMirroredTensorImpl final : public MirroredTensorImpl {
  public:
   OF_DISALLOW_COPY_AND_MOVE(EagerMirroredTensorImpl);
   EagerMirroredTensorImpl();
-  EagerMirroredTensorImpl(const std::shared_ptr<const MirroredTensorMeta>& tensor_meta,
-                          const std::shared_ptr<AutogradMeta>& autograd_meta);
   EagerMirroredTensorImpl(const std::shared_ptr<const MirroredTensorMeta>& tensor_meta,
                           bool requires_grad, bool is_leaf);
   EagerMirroredTensorImpl(const std::shared_ptr<const MirroredTensorMeta>& tensor_meta,
@@ -203,6 +209,7 @@ class EagerMirroredTensorImpl final : public MirroredTensorImpl {
     CHECK_OR_RETURN(eager_blob_object_);
     return tensor_storage_;
   }
+  Maybe<bool> has_eager_blob_object() const override { return eager_blob_object_.get(); }
 
   // Setters
   TensorStorage* mut_tensor_storage() { return tensor_storage_.get(); }
@@ -226,7 +233,7 @@ class LazyConsistentTensorImpl final : public ConsistentTensorImpl {
   OF_DISALLOW_COPY_AND_MOVE(LazyConsistentTensorImpl);
   LazyConsistentTensorImpl(Symbol<ConsistentTensorMeta> consistent_tensor_meta, bool requires_grad,
                            bool is_leaf)
-      : ConsistentTensorImpl(consistent_tensor_meta, NewAutogradMeta(requires_grad, is_leaf)) {}
+      : ConsistentTensorImpl(consistent_tensor_meta, requires_grad, is_leaf) {}
   ~LazyConsistentTensorImpl() override = default;
 
   // Getters
@@ -262,8 +269,8 @@ class EagerConsistentTensorImpl final : public ConsistentTensorImpl {
                                                         Symbol<Device>, int64_t, bool, bool);
 
  private:
-  EagerConsistentTensorImpl(Symbol<ConsistentTensorMeta> consistent_tensor_meta,
-                            const std::shared_ptr<AutogradMeta>& autograd_meta,
+  EagerConsistentTensorImpl(Symbol<ConsistentTensorMeta> consistent_tensor_meta, bool requires_grad,
+                            bool is_leaf,
                             const std::shared_ptr<MirroredTensor>& cur_rank_phy_tensor);
 
   std::shared_ptr<MirroredTensor> cur_rank_phy_tensor_;
diff --git a/oneflow/core/functional/functional_api.yaml b/oneflow/core/functional/functional_api.yaml
index 273b2635b..cec427e3a 100644
--- a/oneflow/core/functional/functional_api.yaml
+++ b/oneflow/core/functional/functional_api.yaml
@@ -153,7 +153,7 @@
   bind_python: True
 
 - name: "relu"
-  signature: "Tensor Relu(Tensor x)"
+  signature: "Tensor Relu(Tensor x, *, Bool inplace=False)"
   bind_python: True
 
 - name: "relu_grad"
diff --git a/oneflow/core/functional/impl/activation_functor.cpp b/oneflow/core/functional/impl/activation_functor.cpp
index f54b4a6d2..7f8bcd10d 100644
--- a/oneflow/core/functional/impl/activation_functor.cpp
+++ b/oneflow/core/functional/impl/activation_functor.cpp
@@ -25,6 +25,7 @@ limitations under the License.
 #include "oneflow/core/framework/tensor_tuple.h"
 #include "oneflow/core/functional/function_library.h"
 #include "oneflow/core/functional/scalar.h"
+#include "oneflow/core/autograd/autograd_mode.h"
 
 namespace oneflow {
 namespace one {
@@ -32,9 +33,24 @@ namespace functional {
 
 namespace impl {
 
-class ReluFunctor : public UnaryFunctor {
+class ReluFunctor {
  public:
-  ReluFunctor() { op_ = CHECK_JUST(one::OpBuilder("relu").Input("in").Output("out").Build()); }
+  ReluFunctor() {
+    op_ = CHECK_JUST(one::OpBuilder("relu").Input("in", 1).Output("out", 1).Build());
+  }
+  Maybe<Tensor> operator()(const std::shared_ptr<Tensor>& x, bool inplace) const {
+    if (inplace) {
+      std::shared_ptr<TensorTuple> outputs = std::make_shared<TensorTuple>(1);
+      outputs->at(0) = x;
+      JUST(JUST(OpInterpUtil::GetInterpreter())->Apply(*op_, {x}, outputs.get(), AttrMap{}));
+      return outputs->at(0);
+    } else {
+      return OpInterpUtil::Dispatch<Tensor>(*op_, {x});
+    }
+  }
+
+ private:
+  std::shared_ptr<OpExpr> op_;
 };
 
 class ReluGradFunctor : public BinaryFunctor {
diff --git a/oneflow/python/framework/tensor.py b/oneflow/python/framework/tensor.py
index d79cbcd74..429ed1032 100644
--- a/oneflow/python/framework/tensor.py
+++ b/oneflow/python/framework/tensor.py
@@ -322,6 +322,13 @@ class Tensor:
         else:
             return None
 
+    @_auto_determine
+    def clone(self):
+        if self._local_or_consistent_tensor is not None:
+            return flow.Tensor(self._local_or_consistent_tensor.clone())
+        else:
+            return None
+
     def requires_grad_(self, requires_grad=True):
         self.requires_grad = requires_grad
 
diff --git a/oneflow/python/nn/modules/activation.py b/oneflow/python/nn/modules/activation.py
index 40783227e..9b4fd61c0 100644
--- a/oneflow/python/nn/modules/activation.py
+++ b/oneflow/python/nn/modules/activation.py
@@ -133,8 +133,15 @@ class ReLU(Module):
 
     def __init__(self, inplace: bool = False):
         super().__init__()
+        self._inplace = inplace
 
     def forward(self, x):
+        if self._inplace:
+            if x.requires_grad and x.is_leaf:
+                raise RuntimeError(
+                    "a leaf Variable that requires grad is being used in an in-place operation."
+                )
+            return flow.F.relu(x, inplace=True)
         return flow.F.relu(x)
 
 
diff --git a/oneflow/python/test/modules/test_activation.py b/oneflow/python/test/modules/test_activation.py
index 385c855df..fd9f74e2f 100644
--- a/oneflow/python/test/modules/test_activation.py
+++ b/oneflow/python/test/modules/test_activation.py
@@ -38,6 +38,19 @@ def _test_relu_impl(test_case, shape, device):
     of_out.backward()
     test_case.assertTrue(np.allclose(of_input.grad.numpy(), np_out > 0, 1e-5, 1e-5))
 
+    inplace_m = flow.nn.ReLU(inplace=True)
+    of_input = flow.Tensor(
+        np_input, dtype=flow.float32, device=flow.device(device), requires_grad=True
+    )
+    of_input_inplace = of_input + 1
+    inplace_m(of_input_inplace)
+    np_out = np.maximum(0, np_input + 1)
+    test_case.assertTrue(np.allclose(of_input_inplace.numpy(), np_out, 1e-5, 1e-5))
+
+    of_out_inplace = of_input_inplace.sum()
+    of_out_inplace.backward()
+    test_case.assertTrue(np.allclose(of_input.grad.numpy(), np_out > 0, 1e-5, 1e-5))
+
 
 @unittest.skipIf(
     not flow.unittest.env.eager_execution_enabled(),
diff --git a/oneflow/python/test/tensor/test_tensor.py b/oneflow/python/test/tensor/test_tensor.py
index 9e8141f14..5596eab2e 100644
--- a/oneflow/python/test/tensor/test_tensor.py
+++ b/oneflow/python/test/tensor/test_tensor.py
@@ -763,6 +763,22 @@ class TestTensor(flow.unittest.TestCase):
         test_case.assertEqual(z.is_leaf, True)
         test_case.assertEqual(z.grad_fn, None)
 
+    @unittest.skipIf(
+        not flow.unittest.env.eager_execution_enabled(),
+        "numpy doesn't work in lazy mode",
+    )
+    def test_tensor_clone(test_case):
+        shape = (2, 3, 4, 5)
+        x = flow.Tensor(
+            np.random.randn(*shape), dtype=flow.float32, requires_grad=True,
+        )
+        y = x.clone()
+        test_case.assertTrue(np.allclose(y.numpy(), x.numpy(), 1e-4, 1e-4))
+        test_case.assertEqual(y.requires_grad, True)
+        test_case.assertEqual(y.is_leaf, False)
+        # Cannot print Copy grad function
+        test_case.assertTrue(y.grad_fn != None)
+
     @unittest.skipIf(
         not flow.unittest.env.eager_execution_enabled(),
         "numpy doesn't work in lazy mode",
-- 
GitLab