From 2e75393e58349143a14c1e35cc28f4acb1c2411a Mon Sep 17 00:00:00 2001 From: poohRui <32978179+poohRui@users.noreply.github.com> Date: Thu, 8 Jul 2021 01:01:27 +0800 Subject: [PATCH] Support inplace operations (#5204) * support inplace forward * support inplace backward * add test case * add test case for clone * inplace is not support for leaf nodes * refine clone * add checks * refine * forbid clone with no grad * Separate autograd meta to tensor (#5267) * separate autograd meta * minor fix * fix acc_grad interface * fix acc_grad with null * minor fix * inplace without clone * refine * minor fix * remove maybe from constructor * change from create to set * fix merge bugs * fix merge bug * remove inplace flag in local_call_opkernel_phy_instr_operand * remove out-date codes * refine code * add JUST * fix merge master bug * revert autograd engine input_grad check * fix bug in tensor_hook Co-authored-by: wyg1997 <wyg19970408@gmail.com> Co-authored-by: Houjiang Chen <chenhoujiangcug@gmail.com> Co-authored-by: oneflow-ci-bot <69100618+oneflow-ci-bot@users.noreply.github.com> --- oneflow/api/python/framework/tensor.cpp | 4 +- oneflow/core/autograd/autograd_engine.cpp | 39 ++++++---- .../core/eager/opkernel_instruction_type.cpp | 2 +- .../eager_mirrored_op_interpreter.cpp | 15 ++-- oneflow/core/framework/tensor.cpp | 21 ++++- oneflow/core/framework/tensor.h | 76 ++++++++++++++----- oneflow/core/framework/tensor_impl.cpp | 62 ++++++++++----- oneflow/core/framework/tensor_impl.h | 51 +++++++------ oneflow/core/functional/functional_api.yaml | 2 +- .../functional/impl/activation_functor.cpp | 20 ++++- oneflow/python/framework/tensor.py | 7 ++ oneflow/python/nn/modules/activation.py | 7 ++ .../python/test/modules/test_activation.py | 13 ++++ oneflow/python/test/tensor/test_tensor.py | 16 ++++ 14 files changed, 245 insertions(+), 90 deletions(-) diff --git a/oneflow/api/python/framework/tensor.cpp b/oneflow/api/python/framework/tensor.cpp index 332e21a73..f7249a663 100644 --- a/oneflow/api/python/framework/tensor.cpp +++ b/oneflow/api/python/framework/tensor.cpp @@ -215,6 +215,7 @@ void SpecializedDef(py::class_<MirroredTensor, Tensor, std::shared_ptr<MirroredT api->def("zeros_", &ApiEagerMirroredTensorZeros); api->def("_register_hook", [](const std::shared_ptr<MirroredTensor>& self, const AutogradMeta::Hook& hook) -> void { + if (!self->grad_fn_node()) { CHECK_JUST(AddAccumulateFunctionNode(self)); } self->mut_autograd_meta()->add_hook(hook); }); } @@ -256,9 +257,10 @@ void ExportTensor(py::module& m, const char* name) { // Methods of pytorch .def("retain_grad", [](T& t) { - if (!t.is_leaf()) { t.set_retain_grad(true); } + if (!t.is_leaf()) { t.set_retain_grad(true).GetOrThrow(); } }) .def("detach", [](const T& t) { return t.api_detach().GetPtrOrThrow(); }) + .def("clone", [](const T& t) { return t.api_clone().GetPtrOrThrow(); }) // OneFlow tensor properties other than pytorch tensor .def_property_readonly("is_lazy", &T::is_lazy) .def_property_readonly("is_consistent", &T::is_consistent); diff --git a/oneflow/core/autograd/autograd_engine.cpp b/oneflow/core/autograd/autograd_engine.cpp index 122a776e6..a7b38fcff 100644 --- a/oneflow/core/autograd/autograd_engine.cpp +++ b/oneflow/core/autograd/autograd_engine.cpp @@ -64,8 +64,8 @@ StackFunctionNode::StackFunctionNode( input_meta_datas_.resize(inputs.size()); next_functions_->reserve(inputs.size()); for (int i = 0; i < inputs.size(); ++i) { - input_meta_datas_.at(i) = inputs.at(i)->mut_autograd_meta(); - if (input_meta_datas_.at(i)->requires_grad()) { + if (inputs.at(i)->requires_grad()) { + input_meta_datas_.at(i) = inputs.at(i)->mut_autograd_meta(); next_functions_->emplace_back(inputs.at(i)->mut_grad_fn_node()); } } @@ -73,6 +73,9 @@ StackFunctionNode::StackFunctionNode( output_meta_datas_.resize(outputs.size()); output_tensor_infos_.reserve(outputs.size()); for (int i = 0; i < outputs.size(); ++i) { + const auto& autograd_meta = + NewAutogradMeta(outputs.at(i)->requires_grad(), outputs.at(i)->is_leaf()); + outputs.at(i)->set_autograd_meta(autograd_meta); output_meta_datas_.at(i) = outputs.at(i)->mut_autograd_meta(); output_tensor_infos_.emplace_back(TensorInfo(*outputs.at(i))); } @@ -126,6 +129,7 @@ Maybe<bool> FunctionNode::Apply(bool create_graph) { JUST((*backward_fn_)(output_grads, &input_grads, create_graph)); for (int i = 0; i < input_meta_datas_.size(); ++i) { if (input_grads.at(i)) { + CHECK_NOTNULL_OR_RETURN(input_meta_datas_.at(i)); JUST(input_meta_datas_.at(i)->now_grad_arg()->PushPartialTensor(input_grads.at(i))); } } @@ -148,7 +152,7 @@ Maybe<void> StackAutogradEngine::RunBackwardAndSaveGrads4LeafTensor(const Tensor bool create_graph) { ClearReleasedFunctionNodes(); for (int i = 0; i < outputs.size(); ++i) { - JUST(outputs.at(i)->now_grad_arg()->PushPartialTensor(out_grads.at(i))); + JUST(JUST(outputs.at(i)->now_grad_arg())->PushPartialTensor(out_grads.at(i))); } // Runs each FunctionNode for (const auto& weak_func_node : node_list_) { @@ -173,10 +177,10 @@ Maybe<TensorTuple> StackAutogradEngine::RunBackwardAndReturnInputsTensorGrad( std::vector<bool> ori_retain_grad(inputs.size()); for (int i = 0; i < inputs.size(); ++i) { ori_retain_grad.at(i) = inputs.at(i)->retain_grad(); - inputs.at(i)->set_retain_grad(true); + JUST(inputs.at(i)->set_retain_grad(true)); } for (int i = 0; i < outputs.size(); ++i) { - JUST(outputs.at(i)->now_grad_arg()->PushPartialTensor(out_grads.at(i))); + JUST(JUST(outputs.at(i)->now_grad_arg())->PushPartialTensor(out_grads.at(i))); } // Runs each FunctionNode for (const auto& weak_func_node : node_list_) { @@ -190,10 +194,10 @@ Maybe<TensorTuple> StackAutogradEngine::RunBackwardAndReturnInputsTensorGrad( } // Gets input grads and resume retain_grad for (int i = 0; i < inputs.size(); ++i) { - input_now_grads->at(i) = inputs.at(i)->acc_grad(); + input_now_grads->at(i) = JUST(inputs.at(i)->acc_grad()); if (!ori_retain_grad.at(i)) { - inputs.at(i)->set_acc_grad(nullptr); - inputs.at(i)->set_retain_grad(false); + JUST(inputs.at(i)->set_acc_grad(nullptr)); + JUST(inputs.at(i)->set_retain_grad(false)); } } if (!retain_graph) { ClearEngine(); } @@ -241,8 +245,8 @@ GraphFunctionNode::GraphFunctionNode( input_meta_datas_.resize(inputs.size()); next_functions_->reserve(inputs.size()); for (int i = 0; i < inputs.size(); ++i) { - input_meta_datas_.at(i) = inputs.at(i)->mut_autograd_meta(); - if (input_meta_datas_.at(i)->requires_grad()) { + if (inputs.at(i)->requires_grad()) { + input_meta_datas_.at(i) = inputs.at(i)->mut_autograd_meta(); next_functions_->emplace_back(inputs.at(i)->mut_grad_fn_node()); } } @@ -250,6 +254,9 @@ GraphFunctionNode::GraphFunctionNode( output_meta_datas_.resize(outputs.size()); output_tensor_infos_.reserve(outputs.size()); for (int i = 0; i < outputs.size(); ++i) { + const auto& autograd_meta = + NewAutogradMeta(outputs.at(i)->requires_grad(), outputs.at(i)->is_leaf()); + outputs.at(i)->set_autograd_meta(autograd_meta); output_meta_datas_.at(i) = outputs.at(i)->mut_autograd_meta(); output_tensor_infos_.emplace_back(TensorInfo(*outputs.at(i))); } @@ -373,7 +380,7 @@ Maybe<void> GraphAutogradEngine::RunBackwardAndSaveGrads4LeafTensor(const Tensor bool retain_graph, bool create_graph) { for (int i = 0; i < outputs.size(); ++i) { - JUST(outputs.at(i)->now_grad_arg()->PushPartialTensor(out_grads.at(i))); + JUST(JUST(outputs.at(i)->now_grad_arg())->PushPartialTensor(out_grads.at(i))); } GraphTask graph_task(outputs, retain_graph, create_graph); JUST(graph_task.ComputeDependencies()); @@ -389,10 +396,10 @@ Maybe<TensorTuple> GraphAutogradEngine::RunBackwardAndReturnInputsTensorGrad( std::vector<bool> ori_retain_grad(inputs.size()); for (int i = 0; i < inputs.size(); ++i) { ori_retain_grad.at(i) = inputs.at(i)->retain_grad(); - inputs.at(i)->set_retain_grad(true); + JUST(inputs.at(i)->set_retain_grad(true)); } for (int i = 0; i < outputs.size(); ++i) { - JUST(outputs.at(i)->now_grad_arg()->PushPartialTensor(out_grads.at(i))); + JUST(JUST(outputs.at(i)->now_grad_arg())->PushPartialTensor(out_grads.at(i))); } JUST(graph_task.ComputeDependenciesAndPruneNode(inputs)); @@ -400,10 +407,10 @@ Maybe<TensorTuple> GraphAutogradEngine::RunBackwardAndReturnInputsTensorGrad( // Gets input grads and resume retain_grad for (int i = 0; i < inputs.size(); ++i) { - input_now_grads->at(i) = inputs.at(i)->acc_grad(); + input_now_grads->at(i) = JUST(inputs.at(i)->acc_grad()); if (!ori_retain_grad.at(i)) { - inputs.at(i)->set_acc_grad(nullptr); - inputs.at(i)->set_retain_grad(false); + JUST(inputs.at(i)->set_acc_grad(nullptr)); + JUST(inputs.at(i)->set_retain_grad(false)); } } return input_now_grads; diff --git a/oneflow/core/eager/opkernel_instruction_type.cpp b/oneflow/core/eager/opkernel_instruction_type.cpp index be3d853ea..58b3b676b 100644 --- a/oneflow/core/eager/opkernel_instruction_type.cpp +++ b/oneflow/core/eager/opkernel_instruction_type.cpp @@ -512,7 +512,7 @@ struct LocalCallOpKernelUtil final { static inline Maybe<void> InitOutputBlobs(LocalCallOpKernelPhyInstrOperand* operand) { JUST(operand->ForEachOutputTensor([&](vm::EagerBlobObject* blob_object) -> Maybe<void> { CHECK_OR_RETURN(static_cast<bool>(blob_object)); - JUST(blob_object->InitBlob()); + JUST(blob_object->TryInitBlob()); return Maybe<void>::Ok(); })); return Maybe<void>::Ok(); diff --git a/oneflow/core/framework/op_interpreter/eager_mirrored_op_interpreter.cpp b/oneflow/core/framework/op_interpreter/eager_mirrored_op_interpreter.cpp index e045100b7..cccc060c7 100644 --- a/oneflow/core/framework/op_interpreter/eager_mirrored_op_interpreter.cpp +++ b/oneflow/core/framework/op_interpreter/eager_mirrored_op_interpreter.cpp @@ -59,14 +59,17 @@ Maybe<void> NaiveInterpret(const UserOpExpr& user_op_expr, const TensorTuple& in } input_eager_blob_objects->at(i) = JUST(inputs.at(i)->eager_blob_object()); } + std::shared_ptr<EagerBlobObjectList> output_eager_blob_objects = + std::make_shared<EagerBlobObjectList>(outputs->size()); for (int i = 0; i < outputs->size(); i++) { if (!outputs->at(i)) { outputs->at(i) = std::make_shared<MirroredTensor>(std::make_shared<EagerMirroredTensorImpl>()); } + if (JUST(outputs->at(i)->has_eager_blob_object())) { + output_eager_blob_objects->at(i) = JUST(outputs->at(i)->eager_blob_object()); + } } - std::shared_ptr<EagerBlobObjectList> output_eager_blob_objects = - std::make_shared<EagerBlobObjectList>(outputs->size()); Symbol<Device> op_device; std::shared_ptr<const ParallelDesc> op_parallel_desc; bool need_check_mem_case = true; @@ -102,9 +105,11 @@ Maybe<void> NaiveInterpret(const UserOpExpr& user_op_expr, const TensorTuple& in })); for (int i = 0; i < output_eager_blob_objects->size(); i++) { - auto* tensor_impl = JUST(TensorImpl4Tensor(outputs->at(i))); - JUST(tensor_impl->InitEagerBlobObject(JUST(outputs->at(i)->device())->mem_case())); - output_eager_blob_objects->at(i) = JUST(tensor_impl->eager_blob_object()); + if (!output_eager_blob_objects->at(i)) { + auto* tensor_impl = JUST(TensorImpl4Tensor(outputs->at(i))); + JUST(tensor_impl->InitEagerBlobObject(JUST(outputs->at(i)->device())->mem_case())); + output_eager_blob_objects->at(i) = JUST(tensor_impl->eager_blob_object()); + } } const auto& kernel = JUST(user_op_expr.MutKernel4Device(*op_device)); diff --git a/oneflow/core/framework/tensor.cpp b/oneflow/core/framework/tensor.cpp index 8762d3671..4fccd92de 100644 --- a/oneflow/core/framework/tensor.cpp +++ b/oneflow/core/framework/tensor.cpp @@ -20,6 +20,9 @@ limitations under the License. #include "oneflow/core/framework/tensor_tuple.h" #include "oneflow/core/autograd/autograd_engine.h" #include "oneflow/core/framework/op_interpreter/eager_mirrored_op_interpreter.h" +#include "oneflow/core/framework/op_interpreter/op_interpreter_util.h" +#include "oneflow/core/framework/op_builder.h" +#include "oneflow/core/framework/op_expr.h" namespace oneflow { @@ -51,8 +54,7 @@ namespace one { const auto& blob_desc = eager_blob_object->blob_desc(); const auto& tensor_meta = std::make_shared<MirroredTensorMeta>(blob_desc.shape_ptr(), blob_desc.data_type(), device); - const auto& autograd_meta = std::make_shared<AutogradMeta>(requires_grad, is_leaf); - auto* tensor_impl = new EagerMirroredTensorImpl(tensor_meta, autograd_meta); + auto* tensor_impl = new EagerMirroredTensorImpl(tensor_meta, requires_grad, is_leaf); JUST(tensor_impl->InitEagerBlobObjectAndTensorStorage(eager_blob_object, tensor_storage)); return std::make_shared<MirroredTensor>(std::shared_ptr<MirroredTensorImpl>(tensor_impl)); } @@ -74,6 +76,21 @@ Maybe<MirroredTensor> MirroredTensor::api_detach() const { return std::make_shared<MirroredTensor>(JUST(impl_->detach())); } +Maybe<Tensor> MirroredTensor::clone() const { + const auto& device_type = JUST(this->device())->type(); + int64_t device_id = JUST(this->device())->device_id(); + std::shared_ptr<OpExpr> copy_op_ = JUST(one::OpBuilder("copy") + .Input("in", 1) + .Attr("device_type", device_type) + .Attr("device_id", device_id) + .Output("out", 1) + .Build()); + std::shared_ptr<MirroredTensor> input = + std::const_pointer_cast<MirroredTensor>(shared_from_this()); + const auto& output = JUST(OpInterpUtil::Dispatch<Tensor>(*copy_op_, {input})); + return output; +} + Maybe<ConsistentTensor> ConsistentTensor::MakeTensor( const std::shared_ptr<const Shape>& shape, DataType dtype, Symbol<cfg::ParallelDistribution> parallel_distribution, Symbol<ParallelDesc> parallel_desc, diff --git a/oneflow/core/framework/tensor.h b/oneflow/core/framework/tensor.h index f011fa758..fa3f744ee 100644 --- a/oneflow/core/framework/tensor.h +++ b/oneflow/core/framework/tensor.h @@ -58,6 +58,7 @@ class Tensor { virtual Maybe<EagerMirroredTensorImpl*> mut_eager_mirrored_tensor_impl() { OF_UNIMPLEMENTED(); } virtual Maybe<vm::EagerBlobObject> eager_blob_object() const = 0; virtual Maybe<VmLocalDepObject> compute_local_dep_object() const = 0; + virtual Maybe<bool> has_eager_blob_object() const = 0; virtual Maybe<TensorStorage> tensor_storage() const { OF_UNIMPLEMENTED(); } // Getters/Setters valid only for EagerConsistentTensor @@ -76,19 +77,22 @@ class Tensor { virtual bool is_leaf() const = 0; virtual bool retain_grad() const = 0; virtual std::shared_ptr<const FunctionNode> grad_fn_node() const = 0; - virtual const std::shared_ptr<Tensor>& acc_grad() const = 0; - virtual const std::shared_ptr<TensorArg>& now_grad_arg() const = 0; + virtual Maybe<Tensor> acc_grad() const = 0; + virtual Maybe<TensorArg> now_grad_arg() const = 0; virtual Maybe<Tensor> detach() const = 0; + virtual Maybe<Tensor> clone() const = 0; // Setters for autograd virtual void set_requires_grad(bool requires_grad) = 0; - virtual void set_retain_grad(bool retain_grad) = 0; + virtual Maybe<void> set_retain_grad(bool retain_grad) = 0; virtual void set_grad_fn_node(const std::shared_ptr<FunctionNode>& grad_fn_node) = 0; virtual const std::shared_ptr<FunctionNode>& mut_grad_fn_node() = 0; - virtual void set_acc_grad(const std::shared_ptr<Tensor>& grad) = 0; - virtual std::shared_ptr<Tensor> mut_acc_grad() = 0; + virtual Maybe<void> set_acc_grad(const std::shared_ptr<Tensor>& grad) = 0; + virtual Maybe<Tensor> mut_acc_grad() = 0; virtual void set_is_leaf(bool is_leaf) = 0; virtual std::shared_ptr<AutogradMeta> mut_autograd_meta() = 0; + virtual bool has_autograd_meta() const = 0; + virtual void set_autograd_meta(const std::shared_ptr<AutogradMeta>& autograd_meta) = 0; virtual user_op::TensorDesc* mut_tensor_meta() = 0; @@ -97,7 +101,7 @@ class Tensor { }; template<typename DerivedT> -class TensorIf : public Tensor, public std::enable_shared_from_this<TensorIf<DerivedT>> { +class TensorIf : public Tensor { public: virtual ~TensorIf() = default; @@ -113,8 +117,12 @@ class TensorIf : public Tensor, public std::enable_shared_from_this<TensorIf<Der std::shared_ptr<const FunctionNode> grad_fn_node() const override { return grad_fn_node_; } // used by pybind11 only Maybe<DerivedT> api_acc_grad() const { - const std::shared_ptr<Tensor>& tensor = acc_grad(); - return cast_for_api(tensor); + if (has_autograd_meta()) { + const std::shared_ptr<Tensor>& tensor = JUST(acc_grad()); + return cast_for_api(tensor); + } else { + return std::shared_ptr<DerivedT>(); + } } // Setters for autograd @@ -130,6 +138,10 @@ class TensorIf : public Tensor, public std::enable_shared_from_this<TensorIf<Der // Operators for tensor // used by pybind11 only virtual Maybe<DerivedT> api_detach() const = 0; + Maybe<DerivedT> api_clone() const { + const std::shared_ptr<Tensor>& tensor = JUST(clone()); + return cast_for_api(tensor); + } protected: TensorIf() = default; @@ -144,7 +156,8 @@ class TensorIf : public Tensor, public std::enable_shared_from_this<TensorIf<Der } }; -class MirroredTensor final : public TensorIf<MirroredTensor> { +class MirroredTensor final : public TensorIf<MirroredTensor>, + public std::enable_shared_from_this<MirroredTensor> { public: OF_DISALLOW_COPY_AND_MOVE(MirroredTensor); MirroredTensor() = default; @@ -177,24 +190,34 @@ class MirroredTensor final : public TensorIf<MirroredTensor> { return impl_->compute_local_dep_object(); } Maybe<TensorStorage> tensor_storage() const override { return impl_->tensor_storage(); } + Maybe<bool> has_eager_blob_object() const override { return impl_->has_eager_blob_object(); } // Getters for autograd - const std::shared_ptr<Tensor>& acc_grad() const override { return impl_->acc_grad(); } - const std::shared_ptr<TensorArg>& now_grad_arg() const override { return impl_->now_grad_arg(); } + Maybe<Tensor> acc_grad() const override { return impl_->acc_grad(); } + Maybe<TensorArg> now_grad_arg() const override { return impl_->now_grad_arg(); } bool requires_grad() const override { return impl_->requires_grad(); } bool is_leaf() const override { return impl_->is_leaf(); } bool retain_grad() const override { return impl_->retain_grad(); } + bool has_autograd_meta() const override { return impl_->has_autograd_meta(); } // Setters for autograd - void set_acc_grad(const std::shared_ptr<Tensor>& grad) override { impl_->set_acc_grad(grad); } + Maybe<void> set_acc_grad(const std::shared_ptr<Tensor>& grad) override { + return impl_->set_acc_grad(grad); + } void set_requires_grad(bool requires_grad) override { impl_->set_requires_grad(requires_grad); } - void set_retain_grad(bool retain_grad) override { impl_->set_retain_grad(retain_grad); } - std::shared_ptr<Tensor> mut_acc_grad() override { return impl_->mut_acc_grad(); } + Maybe<void> set_retain_grad(bool retain_grad) override { + return impl_->set_retain_grad(retain_grad); + } + Maybe<Tensor> mut_acc_grad() override { return impl_->mut_acc_grad(); } void set_is_leaf(bool is_leaf) override { impl_->set_is_leaf(is_leaf); } std::shared_ptr<AutogradMeta> mut_autograd_meta() override { return impl_->mut_autograd_meta(); } + void set_autograd_meta(const std::shared_ptr<AutogradMeta>& autograd_meta) override { + impl_->set_autograd_meta(autograd_meta); + } // Operators for tensor Maybe<MirroredTensor> api_detach() const override; + Maybe<Tensor> clone() const override; static Maybe<MirroredTensor> MakeTensor(const std::shared_ptr<const Shape>& shape, DataType dtype, const Symbol<Device>& device, bool is_lazy, @@ -234,7 +257,9 @@ class ConsistentTensor final : public TensorIf<ConsistentTensor> { const override { return impl_->consumer_parallel_distribution_constraint(); } - Maybe<MirroredTensor> cur_rank_phy_tensor() const { return impl_->cur_rank_phy_tensor(); } + Maybe<MirroredTensor> cur_rank_phy_tensor() const override { + return impl_->cur_rank_phy_tensor(); + } int64_t ndim() const override; bool is_cuda() const override; int64_t dim(int64_t index) const override; @@ -249,6 +274,8 @@ class ConsistentTensor final : public TensorIf<ConsistentTensor> { return impl_->compute_local_dep_object(); } const TensorMeta& tensor_meta() const override { return *impl_->tensor_meta(); } + Maybe<TensorStorage> tensor_storage() const override { return impl_->tensor_storage(); } + Maybe<bool> has_eager_blob_object() const override { return impl_->has_eager_blob_object(); } // Setters Maybe<void> set_consumer_parallel_distribution_constraint( @@ -258,22 +285,31 @@ class ConsistentTensor final : public TensorIf<ConsistentTensor> { } // Getters for autograd - const std::shared_ptr<Tensor>& acc_grad() const override { return impl_->acc_grad(); } - const std::shared_ptr<TensorArg>& now_grad_arg() const override { return impl_->now_grad_arg(); } + Maybe<Tensor> acc_grad() const override { return impl_->acc_grad(); } + Maybe<TensorArg> now_grad_arg() const override { return impl_->now_grad_arg(); } bool requires_grad() const override { return impl_->requires_grad(); } bool is_leaf() const override { return impl_->is_leaf(); } bool retain_grad() const override { return impl_->retain_grad(); } + bool has_autograd_meta() const override { return impl_->has_autograd_meta(); } // Setters for autograd - void set_acc_grad(const std::shared_ptr<Tensor>& grad) override { impl_->set_acc_grad(grad); } - std::shared_ptr<Tensor> mut_acc_grad() override { return impl_->mut_acc_grad(); } + Maybe<void> set_acc_grad(const std::shared_ptr<Tensor>& grad) override { + return impl_->set_acc_grad(grad); + } + Maybe<Tensor> mut_acc_grad() override { return impl_->mut_acc_grad(); } void set_requires_grad(bool requires_grad) override { impl_->set_requires_grad(requires_grad); } - void set_retain_grad(bool retain_grad) override { impl_->set_retain_grad(retain_grad); } + Maybe<void> set_retain_grad(bool retain_grad) override { + return impl_->set_retain_grad(retain_grad); + } void set_is_leaf(bool is_leaf) override { impl_->set_is_leaf(is_leaf); } std::shared_ptr<AutogradMeta> mut_autograd_meta() override { return impl_->mut_autograd_meta(); } + void set_autograd_meta(const std::shared_ptr<AutogradMeta>& autograd_meta) override { + impl_->set_autograd_meta(autograd_meta); + } // Operators for tensor virtual Maybe<ConsistentTensor> api_detach() const override; + Maybe<Tensor> clone() const override { return Error::Unimplemented(); } static Maybe<ConsistentTensor> MakeTensor(const std::shared_ptr<const Shape>& shape, DataType dtype, diff --git a/oneflow/core/framework/tensor_impl.cpp b/oneflow/core/framework/tensor_impl.cpp index b2828cda8..bfd9069b1 100644 --- a/oneflow/core/framework/tensor_impl.cpp +++ b/oneflow/core/framework/tensor_impl.cpp @@ -32,6 +32,33 @@ limitations under the License. namespace oneflow { namespace one { +Maybe<Tensor> TensorImpl::acc_grad() const { + CHECK_NOTNULL_OR_RETURN(autograd_meta_); + return autograd_meta_->acc_grad(); +} + +Maybe<TensorArg> TensorImpl::now_grad_arg() const { + CHECK_NOTNULL_OR_RETURN(autograd_meta_); + return autograd_meta_->now_grad_arg(); +} + +Maybe<void> TensorImpl::set_acc_grad(const std::shared_ptr<Tensor>& grad) { + CHECK_NOTNULL_OR_RETURN(autograd_meta_); + autograd_meta_->set_acc_grad(grad); + return Maybe<void>::Ok(); +} + +Maybe<Tensor> TensorImpl::mut_acc_grad() { + CHECK_NOTNULL_OR_RETURN(autograd_meta_); + return autograd_meta_->mut_acc_grad(); +} + +Maybe<void> TensorImpl::set_retain_grad(bool retain_grad) { + CHECK_NOTNULL_OR_RETURN(autograd_meta_); + autograd_meta_->set_retain_grad(retain_grad); + return Maybe<void>::Ok(); +} + namespace { std::shared_ptr<const MirroredTensorMeta> NewDefaultMirroredTensorMeta() { @@ -48,24 +75,19 @@ Maybe<MirroredTensorImpl> LazyMirroredTensorImpl::detach() const { } EagerMirroredTensorImpl::EagerMirroredTensorImpl() - : MirroredTensorImpl(NewDefaultMirroredTensorMeta(), NewAutogradMeta(false, false)) {} - -EagerMirroredTensorImpl::EagerMirroredTensorImpl( - const std::shared_ptr<const MirroredTensorMeta>& tensor_meta, - const std::shared_ptr<AutogradMeta>& autograd_meta) - : MirroredTensorImpl(tensor_meta, autograd_meta) {} + : MirroredTensorImpl(NewDefaultMirroredTensorMeta(), false, false) {} EagerMirroredTensorImpl::EagerMirroredTensorImpl( const std::shared_ptr<const MirroredTensorMeta>& tensor_meta, bool requires_grad, bool is_leaf) - : MirroredTensorImpl(tensor_meta, NewAutogradMeta(requires_grad, is_leaf)) {} + : MirroredTensorImpl(tensor_meta, requires_grad, is_leaf) {} EagerMirroredTensorImpl::~EagerMirroredTensorImpl() {} EagerMirroredTensorImpl::EagerMirroredTensorImpl( const std::shared_ptr<const MirroredTensorMeta>& tensor_meta, std::shared_ptr<TensorStorage> tensor_storage, bool requires_grad, bool is_leaf) - : MirroredTensorImpl(tensor_meta, NewAutogradMeta(requires_grad, is_leaf)), - tensor_storage_(tensor_storage) {} + : MirroredTensorImpl(tensor_meta, requires_grad, is_leaf), tensor_storage_(tensor_storage) {} + Maybe<void> EagerMirroredTensorImpl::UpdateTensorStorage() { const auto& eager_blob_object = eager_blob_object_; tensor_storage_ = std::make_shared<TensorStorage>(eager_blob_object->tensor_buffer()); @@ -169,10 +191,10 @@ size_t ConsistentTensorMeta::CalcHashValue() const { } EagerConsistentTensorImpl::EagerConsistentTensorImpl( - Symbol<ConsistentTensorMeta> consistent_tensor_meta, - const std::shared_ptr<AutogradMeta>& autograd_meta, + Symbol<ConsistentTensorMeta> consistent_tensor_meta, bool requires_grad, bool is_leaf, const std::shared_ptr<MirroredTensor>& cur_rank_phy_tensor) - : ConsistentTensorImpl(consistent_tensor_meta, autograd_meta), + : ConsistentTensorImpl(consistent_tensor_meta, cur_rank_phy_tensor->requires_grad(), + cur_rank_phy_tensor->is_leaf()), cur_rank_phy_tensor_(cur_rank_phy_tensor) {} /*static*/ Maybe<EagerConsistentTensorImpl> EagerConsistentTensorImpl::New( @@ -193,8 +215,9 @@ EagerConsistentTensorImpl::EagerConsistentTensorImpl( const auto& dtype = cur_rank_phy_tensor->dtype(); Symbol<ConsistentTensorMeta> consistent_tensor_meta( ConsistentTensorMeta(shape, dtype, parallel_distribution, parallel_desc)); - return std::shared_ptr<EagerConsistentTensorImpl>(new EagerConsistentTensorImpl( - consistent_tensor_meta, cur_rank_phy_tensor->mut_autograd_meta(), cur_rank_phy_tensor)); + return std::shared_ptr<EagerConsistentTensorImpl>( + new EagerConsistentTensorImpl(consistent_tensor_meta, cur_rank_phy_tensor->requires_grad(), + cur_rank_phy_tensor->is_leaf(), cur_rank_phy_tensor)); } /*static*/ Maybe<EagerConsistentTensorImpl> EagerConsistentTensorImpl::New( @@ -219,21 +242,20 @@ EagerConsistentTensorImpl::EagerConsistentTensorImpl( JUST(GetPhysicalShape(*shape, *parallel_distribution, *parallel_desc, parallel_id)); const auto& cur_rank_phy_tensor_meta = std::make_shared<MirroredTensorMeta>(cur_rank_phy_shape, dtype, device); - const auto& autograd_meta = NewAutogradMeta(requires_grad, is_leaf); auto cur_rank_phy_tensor_impl = - std::make_shared<EagerMirroredTensorImpl>(cur_rank_phy_tensor_meta, autograd_meta); + std::make_shared<EagerMirroredTensorImpl>(cur_rank_phy_tensor_meta, requires_grad, is_leaf); JUST(cur_rank_phy_tensor_impl->InitEagerBlobObject(device->mem_case())); const auto& cur_rank_phy_tensor = std::make_shared<MirroredTensor>(cur_rank_phy_tensor_impl); - auto* tensor_impl = new EagerConsistentTensorImpl( - consistent_tensor_meta, cur_rank_phy_tensor->mut_autograd_meta(), cur_rank_phy_tensor); + auto* tensor_impl = + new EagerConsistentTensorImpl(consistent_tensor_meta, cur_rank_phy_tensor->requires_grad(), + cur_rank_phy_tensor->is_leaf(), cur_rank_phy_tensor); return std::shared_ptr<EagerConsistentTensorImpl>(tensor_impl); } /*static*/ Maybe<EagerConsistentTensorImpl> EagerConsistentTensorImpl::NewWithoutPhyTensor( Symbol<ConsistentTensorMeta> consistent_tensor_meta, Symbol<Device> device, int64_t parallel_id, bool requires_grad, bool is_leaf) { - const auto& autograd_meta = NewAutogradMeta(requires_grad, is_leaf); - auto* tensor_impl = new EagerConsistentTensorImpl(consistent_tensor_meta, autograd_meta, + auto* tensor_impl = new EagerConsistentTensorImpl(consistent_tensor_meta, requires_grad, is_leaf, std::shared_ptr<MirroredTensor>()); return std::shared_ptr<EagerConsistentTensorImpl>(tensor_impl); } diff --git a/oneflow/core/framework/tensor_impl.h b/oneflow/core/framework/tensor_impl.h index 707bc01b1..213e0fe09 100644 --- a/oneflow/core/framework/tensor_impl.h +++ b/oneflow/core/framework/tensor_impl.h @@ -62,26 +62,33 @@ class TensorImpl { virtual Maybe<vm::EagerBlobObject> eager_blob_object() const = 0; virtual Maybe<VmLocalDepObject> compute_local_dep_object() const = 0; virtual Maybe<TensorStorage> tensor_storage() const { OF_UNIMPLEMENTED(); } + virtual Maybe<bool> has_eager_blob_object() const = 0; // Getters for autograd - const std::shared_ptr<Tensor>& acc_grad() const { return autograd_meta_->acc_grad(); } - const std::shared_ptr<TensorArg>& now_grad_arg() const { return autograd_meta_->now_grad_arg(); } - bool requires_grad() const { return autograd_meta_->requires_grad(); } - bool is_leaf() const { return autograd_meta_->is_leaf(); } + Maybe<Tensor> acc_grad() const; + Maybe<TensorArg> now_grad_arg() const; + bool requires_grad() const { return requires_grad_; } + bool is_leaf() const { return is_leaf_; } bool retain_grad() const { return autograd_meta_->retain_grad(); } // Setters for autograd - void set_acc_grad(const std::shared_ptr<Tensor>& grad) { autograd_meta_->set_acc_grad(grad); } - std::shared_ptr<Tensor> mut_acc_grad() { return autograd_meta_->mut_acc_grad(); } - void set_requires_grad(bool requires_grad) { autograd_meta_->set_requires_grad(requires_grad); } - void set_retain_grad(bool retain_grad) { autograd_meta_->set_retain_grad(retain_grad); } - void set_is_leaf(bool is_leaf) { autograd_meta_->set_is_leaf(is_leaf); } + Maybe<void> set_acc_grad(const std::shared_ptr<Tensor>& grad); + Maybe<Tensor> mut_acc_grad(); + void set_requires_grad(bool requires_grad) { requires_grad_ = requires_grad; } + Maybe<void> set_retain_grad(bool retain_grad); + void set_is_leaf(bool is_leaf) { is_leaf_ = is_leaf; } std::shared_ptr<AutogradMeta> mut_autograd_meta() { return autograd_meta_; } + void set_autograd_meta(const std::shared_ptr<AutogradMeta>& autograd_meta) { + autograd_meta_ = autograd_meta; + } + bool has_autograd_meta() const { return autograd_meta_.get(); } protected: - TensorImpl(const std::shared_ptr<AutogradMeta>& autograd_meta) : autograd_meta_(autograd_meta) {} + TensorImpl(bool requires_grad, bool is_leaf) : requires_grad_(requires_grad), is_leaf_(is_leaf) {} protected: + bool requires_grad_; + bool is_leaf_; std::shared_ptr<AutogradMeta> autograd_meta_; }; @@ -106,8 +113,8 @@ class MirroredTensorImpl : public TensorImpl { protected: MirroredTensorImpl(const std::shared_ptr<const MirroredTensorMeta>& tensor_meta, - const std::shared_ptr<AutogradMeta>& autograd_meta) - : TensorImpl(autograd_meta), tensor_meta_(tensor_meta) {} + bool requires_grad, bool is_leaf) + : TensorImpl(requires_grad, is_leaf), tensor_meta_(tensor_meta) {} std::shared_ptr<const MirroredTensorMeta> tensor_meta_; }; @@ -134,6 +141,7 @@ class ConsistentTensorImpl : public TensorImpl { // Getters valid only for EagerMirroredTensorImpl Maybe<vm::EagerBlobObject> eager_blob_object() const override { OF_UNIMPLEMENTED(); } Maybe<VmLocalDepObject> compute_local_dep_object() const override { OF_UNIMPLEMENTED(); } + Maybe<bool> has_eager_blob_object() const override { OF_UNIMPLEMENTED(); } // Setters void set_consumer_parallel_distribution_constraint(Symbol<cfg::ParallelDistribution> val) { @@ -146,9 +154,8 @@ class ConsistentTensorImpl : public TensorImpl { } protected: - ConsistentTensorImpl(Symbol<ConsistentTensorMeta> tensor_meta, - const std::shared_ptr<AutogradMeta>& autograd_meta) - : TensorImpl(autograd_meta), + ConsistentTensorImpl(Symbol<ConsistentTensorMeta> tensor_meta, bool requires_grad, bool is_leaf) + : TensorImpl(requires_grad, is_leaf), tensor_meta_(tensor_meta), consumer_parallel_distribution_constraint_() {} @@ -161,17 +168,18 @@ class LazyMirroredTensorImpl final : public MirroredTensorImpl { OF_DISALLOW_COPY_AND_MOVE(LazyMirroredTensorImpl); LazyMirroredTensorImpl(const std::shared_ptr<const MirroredTensorMeta>& tensor_meta, bool requires_grad, bool is_leaf) - : MirroredTensorImpl(tensor_meta, NewAutogradMeta(requires_grad, is_leaf)) {} + : MirroredTensorImpl(tensor_meta, requires_grad, is_leaf) {} ~LazyMirroredTensorImpl() override = default; // Getters - const std::shared_ptr<const Shape>& shape() const { return tensor_meta()->shape_ptr(); } + const std::shared_ptr<const Shape>& shape() const override { return tensor_meta()->shape_ptr(); } bool is_lazy() const override { return true; } // Getters valid only for EagerMirroredTensorImpl Maybe<vm::EagerBlobObject> eager_blob_object() const override { OF_UNIMPLEMENTED(); } Maybe<VmLocalDepObject> compute_local_dep_object() const override { OF_UNIMPLEMENTED(); } Maybe<TensorStorage> tensor_storage() const override { OF_UNIMPLEMENTED(); } + Maybe<bool> has_eager_blob_object() const override { OF_UNIMPLEMENTED(); } Maybe<MirroredTensorImpl> detach() const override; }; @@ -179,8 +187,6 @@ class EagerMirroredTensorImpl final : public MirroredTensorImpl { public: OF_DISALLOW_COPY_AND_MOVE(EagerMirroredTensorImpl); EagerMirroredTensorImpl(); - EagerMirroredTensorImpl(const std::shared_ptr<const MirroredTensorMeta>& tensor_meta, - const std::shared_ptr<AutogradMeta>& autograd_meta); EagerMirroredTensorImpl(const std::shared_ptr<const MirroredTensorMeta>& tensor_meta, bool requires_grad, bool is_leaf); EagerMirroredTensorImpl(const std::shared_ptr<const MirroredTensorMeta>& tensor_meta, @@ -203,6 +209,7 @@ class EagerMirroredTensorImpl final : public MirroredTensorImpl { CHECK_OR_RETURN(eager_blob_object_); return tensor_storage_; } + Maybe<bool> has_eager_blob_object() const override { return eager_blob_object_.get(); } // Setters TensorStorage* mut_tensor_storage() { return tensor_storage_.get(); } @@ -226,7 +233,7 @@ class LazyConsistentTensorImpl final : public ConsistentTensorImpl { OF_DISALLOW_COPY_AND_MOVE(LazyConsistentTensorImpl); LazyConsistentTensorImpl(Symbol<ConsistentTensorMeta> consistent_tensor_meta, bool requires_grad, bool is_leaf) - : ConsistentTensorImpl(consistent_tensor_meta, NewAutogradMeta(requires_grad, is_leaf)) {} + : ConsistentTensorImpl(consistent_tensor_meta, requires_grad, is_leaf) {} ~LazyConsistentTensorImpl() override = default; // Getters @@ -262,8 +269,8 @@ class EagerConsistentTensorImpl final : public ConsistentTensorImpl { Symbol<Device>, int64_t, bool, bool); private: - EagerConsistentTensorImpl(Symbol<ConsistentTensorMeta> consistent_tensor_meta, - const std::shared_ptr<AutogradMeta>& autograd_meta, + EagerConsistentTensorImpl(Symbol<ConsistentTensorMeta> consistent_tensor_meta, bool requires_grad, + bool is_leaf, const std::shared_ptr<MirroredTensor>& cur_rank_phy_tensor); std::shared_ptr<MirroredTensor> cur_rank_phy_tensor_; diff --git a/oneflow/core/functional/functional_api.yaml b/oneflow/core/functional/functional_api.yaml index 273b2635b..cec427e3a 100644 --- a/oneflow/core/functional/functional_api.yaml +++ b/oneflow/core/functional/functional_api.yaml @@ -153,7 +153,7 @@ bind_python: True - name: "relu" - signature: "Tensor Relu(Tensor x)" + signature: "Tensor Relu(Tensor x, *, Bool inplace=False)" bind_python: True - name: "relu_grad" diff --git a/oneflow/core/functional/impl/activation_functor.cpp b/oneflow/core/functional/impl/activation_functor.cpp index f54b4a6d2..7f8bcd10d 100644 --- a/oneflow/core/functional/impl/activation_functor.cpp +++ b/oneflow/core/functional/impl/activation_functor.cpp @@ -25,6 +25,7 @@ limitations under the License. #include "oneflow/core/framework/tensor_tuple.h" #include "oneflow/core/functional/function_library.h" #include "oneflow/core/functional/scalar.h" +#include "oneflow/core/autograd/autograd_mode.h" namespace oneflow { namespace one { @@ -32,9 +33,24 @@ namespace functional { namespace impl { -class ReluFunctor : public UnaryFunctor { +class ReluFunctor { public: - ReluFunctor() { op_ = CHECK_JUST(one::OpBuilder("relu").Input("in").Output("out").Build()); } + ReluFunctor() { + op_ = CHECK_JUST(one::OpBuilder("relu").Input("in", 1).Output("out", 1).Build()); + } + Maybe<Tensor> operator()(const std::shared_ptr<Tensor>& x, bool inplace) const { + if (inplace) { + std::shared_ptr<TensorTuple> outputs = std::make_shared<TensorTuple>(1); + outputs->at(0) = x; + JUST(JUST(OpInterpUtil::GetInterpreter())->Apply(*op_, {x}, outputs.get(), AttrMap{})); + return outputs->at(0); + } else { + return OpInterpUtil::Dispatch<Tensor>(*op_, {x}); + } + } + + private: + std::shared_ptr<OpExpr> op_; }; class ReluGradFunctor : public BinaryFunctor { diff --git a/oneflow/python/framework/tensor.py b/oneflow/python/framework/tensor.py index d79cbcd74..429ed1032 100644 --- a/oneflow/python/framework/tensor.py +++ b/oneflow/python/framework/tensor.py @@ -322,6 +322,13 @@ class Tensor: else: return None + @_auto_determine + def clone(self): + if self._local_or_consistent_tensor is not None: + return flow.Tensor(self._local_or_consistent_tensor.clone()) + else: + return None + def requires_grad_(self, requires_grad=True): self.requires_grad = requires_grad diff --git a/oneflow/python/nn/modules/activation.py b/oneflow/python/nn/modules/activation.py index 40783227e..9b4fd61c0 100644 --- a/oneflow/python/nn/modules/activation.py +++ b/oneflow/python/nn/modules/activation.py @@ -133,8 +133,15 @@ class ReLU(Module): def __init__(self, inplace: bool = False): super().__init__() + self._inplace = inplace def forward(self, x): + if self._inplace: + if x.requires_grad and x.is_leaf: + raise RuntimeError( + "a leaf Variable that requires grad is being used in an in-place operation." + ) + return flow.F.relu(x, inplace=True) return flow.F.relu(x) diff --git a/oneflow/python/test/modules/test_activation.py b/oneflow/python/test/modules/test_activation.py index 385c855df..fd9f74e2f 100644 --- a/oneflow/python/test/modules/test_activation.py +++ b/oneflow/python/test/modules/test_activation.py @@ -38,6 +38,19 @@ def _test_relu_impl(test_case, shape, device): of_out.backward() test_case.assertTrue(np.allclose(of_input.grad.numpy(), np_out > 0, 1e-5, 1e-5)) + inplace_m = flow.nn.ReLU(inplace=True) + of_input = flow.Tensor( + np_input, dtype=flow.float32, device=flow.device(device), requires_grad=True + ) + of_input_inplace = of_input + 1 + inplace_m(of_input_inplace) + np_out = np.maximum(0, np_input + 1) + test_case.assertTrue(np.allclose(of_input_inplace.numpy(), np_out, 1e-5, 1e-5)) + + of_out_inplace = of_input_inplace.sum() + of_out_inplace.backward() + test_case.assertTrue(np.allclose(of_input.grad.numpy(), np_out > 0, 1e-5, 1e-5)) + @unittest.skipIf( not flow.unittest.env.eager_execution_enabled(), diff --git a/oneflow/python/test/tensor/test_tensor.py b/oneflow/python/test/tensor/test_tensor.py index 9e8141f14..5596eab2e 100644 --- a/oneflow/python/test/tensor/test_tensor.py +++ b/oneflow/python/test/tensor/test_tensor.py @@ -763,6 +763,22 @@ class TestTensor(flow.unittest.TestCase): test_case.assertEqual(z.is_leaf, True) test_case.assertEqual(z.grad_fn, None) + @unittest.skipIf( + not flow.unittest.env.eager_execution_enabled(), + "numpy doesn't work in lazy mode", + ) + def test_tensor_clone(test_case): + shape = (2, 3, 4, 5) + x = flow.Tensor( + np.random.randn(*shape), dtype=flow.float32, requires_grad=True, + ) + y = x.clone() + test_case.assertTrue(np.allclose(y.numpy(), x.numpy(), 1e-4, 1e-4)) + test_case.assertEqual(y.requires_grad, True) + test_case.assertEqual(y.is_leaf, False) + # Cannot print Copy grad function + test_case.assertTrue(y.grad_fn != None) + @unittest.skipIf( not flow.unittest.env.eager_execution_enabled(), "numpy doesn't work in lazy mode", -- GitLab