diff --git a/CMakeLists.txt b/CMakeLists.txt index 3f51e67f580ed201de6593ecea7199f52e6b9d32..c1933f0d070caa69392652df250ac873460fde3d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -120,7 +120,7 @@ if(WIN32) #set(CMAKE_EXE_LINKER_FLAGS_DEBUG "${CMAKE_EXE_LINKER_FLAGS} /DEBUG:FASTLINK") set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /D_ITERATOR_DEBUG_LEVEL=0") else() - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -Wall -Wno-sign-compare -Wno-unused-function -fPIC -Werror=return-type") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -Wall -Wno-sign-compare -Wno-unused-function -fPIC") endif() if (THIRD_PARTY) diff --git a/oneflow/core/common/error.proto b/oneflow/core/common/error.proto index b7d2471add4d3851da208ab552f501386f7195a3..7040f3344cb8de619533e22fe20dd164816e15a9 100644 --- a/oneflow/core/common/error.proto +++ b/oneflow/core/common/error.proto @@ -57,7 +57,7 @@ enum JobBuildAndInferError { kLogicalBlobNameInvalid = 402; kOpNameExist = 450; - kOpConfDeviceTypeNoSet = 460; + kOpConfDeviceTagNoSet = 460; kPlacementError = 470; kBlobSplitAxisInferError = 480; kUnknownJobBuildAndInferError = 500; diff --git a/oneflow/core/framework/to_string.cpp b/oneflow/core/framework/to_string.cpp index 3ea283be969a470e0f99fa6946903b7de35d257d..ddc75b751e45aced86909a43bc1d2b057ba786c4 100644 --- a/oneflow/core/framework/to_string.cpp +++ b/oneflow/core/framework/to_string.cpp @@ -13,6 +13,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "oneflow/core/common/util.h" #include "oneflow/core/framework/to_string.h" namespace oneflow { @@ -20,7 +21,7 @@ namespace oneflow { Maybe<const char*> DeviceTag4DeviceType(DeviceType device_type) { if (device_type == kCPU) { return "cpu"; } if (device_type == kGPU) { return "gpu"; } - return Error::DeviceTagNotFound() << "invalid"; + return Error::DeviceTagNotFound() << "invalid_device"; } Maybe<DeviceType> DeviceType4DeviceTag(const std::string& device_tag) { diff --git a/oneflow/core/framework/user_op_hob.cpp b/oneflow/core/framework/user_op_hob.cpp index a89bc28e75c293a3f45ea44384b8cc6cfe26c641..36f70a2db3dd0c1fcc258ff12409fcb2de0bbe2c 100644 --- a/oneflow/core/framework/user_op_hob.cpp +++ b/oneflow/core/framework/user_op_hob.cpp @@ -40,13 +40,6 @@ hob::BoolFunctorPtr<KernelRegContext> HobFalse() { return krbf_ptr; } -hob::HobContextGetter<KernelRegContext, DeviceType> HobDeviceType() { - std::ostringstream string_stream; - string_stream << "device_type"; - return hob::HobContextGetter<KernelRegContext, DeviceType>( - string_stream.str(), [](const KernelRegContext& ctx) { return ctx.device_type(); }); -} - hob::HobContextGetter<KernelRegContext, DataType> HobDataType(const std::string& tensor_name, int tensor_idx) { std::ostringstream string_stream; @@ -58,6 +51,14 @@ hob::HobContextGetter<KernelRegContext, DataType> HobDataType(const std::string& }); } +HobStringContextGetter<KernelRegContext> HobDeviceTag() { + std::ostringstream string_stream; + string_stream << "device_tag"; + return HobStringContextGetter<KernelRegContext>( + string_stream.str(), + [](const KernelRegContext& ctx) -> const std::string& { return ctx.device_tag(); }); +} + } // namespace user_op } // namespace oneflow diff --git a/oneflow/core/framework/user_op_hob.h b/oneflow/core/framework/user_op_hob.h index 3ec831d614da851fa8f853b9c4fbe8df7859b848..a1a161d2697109c58a0bf6c488e1231fe0d53214 100644 --- a/oneflow/core/framework/user_op_hob.h +++ b/oneflow/core/framework/user_op_hob.h @@ -16,8 +16,9 @@ limitations under the License. #ifndef ONEFLOW_CORE_FRAMEWORK_USER_OP_HOB_H_ #define ONEFLOW_CORE_FRAMEWORK_USER_OP_HOB_H_ -#include "oneflow/core/common/high_order_bool.h" #include "oneflow/core/common/data_type.h" +#include "oneflow/core/common/high_order_bool.h" +#include "oneflow/core/framework/to_string.h" #include "oneflow/core/framework/user_op_registry_manager.h" namespace oneflow { @@ -28,8 +29,6 @@ hob::BoolFunctorPtr<KernelRegContext> HobTrue(); hob::BoolFunctorPtr<KernelRegContext> HobFalse(); -hob::HobContextGetter<KernelRegContext, DeviceType> HobDeviceType(); - hob::HobContextGetter<KernelRegContext, DataType> HobDataType(const std::string& tensor_name, int tensor_idx); @@ -47,6 +46,46 @@ hob::HobContextGetter<user_op::KernelRegContext, T> HobAttr(const std::string& a }); } +template<typename ContextT> +class HobStringContextGetter final { + public: + HobStringContextGetter(const DeviceType& device_type) { + std::string str = ToString(device_type); + debug_str_ = str; + context_getter_ = [str](const ContextT&) -> const std::string& { return str; }; + } + HobStringContextGetter(const char* const_value) { + std::string str(const_value); + debug_str_ = str; + context_getter_ = [str](const ContextT&) -> const std::string& { return str; }; + } + HobStringContextGetter(const std::string& const_value) + : debug_str_(const_value), + context_getter_( + [const_value](const ContextT&) -> const std::string& { return const_value; }) {} + HobStringContextGetter(const std::string& debug_str, + const std::function<const std::string&(const ContextT&)>& context_getter) + : debug_str_(debug_str), context_getter_(context_getter) {} + + hob::BoolFunctorPtr<ContextT> operator==(const HobStringContextGetter& other) const { + std::ostringstream string_stream; + string_stream << debug_str_ << " == " << other.debug_str_; + std::function<std::string(const ContextT&)> l_fn = this->context_getter_; + std::function<std::string(const ContextT&)> r_fn = other.context_getter_; + std::shared_ptr<const hob::BoolFunctor<ContextT>> krbf_ptr = + std::make_shared<const hob::HighOrderBoolFunctor<ContextT>>( + string_stream.str(), + [l_fn, r_fn](const ContextT& ctx) { return l_fn(ctx) == r_fn(ctx); }); + return krbf_ptr; + } + + private: + std::string debug_str_; + std::function<const std::string&(const ContextT&)> context_getter_; +}; + +HobStringContextGetter<KernelRegContext> HobDeviceTag(); + } // namespace user_op } // namespace oneflow diff --git a/oneflow/core/framework/user_op_kernel_registry.h b/oneflow/core/framework/user_op_kernel_registry.h index 7265ff53f9a4275510e9627b1d4803dad61cbf20..412e0e2981771c65000c6d73e061d0d68ead9e00 100644 --- a/oneflow/core/framework/user_op_kernel_registry.h +++ b/oneflow/core/framework/user_op_kernel_registry.h @@ -37,6 +37,7 @@ class KernelRegContext { virtual ~KernelRegContext() = default; virtual DeviceType device_type() const = 0; + virtual const std::string& device_tag() const = 0; virtual const ParallelContext& parallel_ctx() const = 0; virtual const TensorDesc* TensorDesc4ArgNameAndIndex(const std::string&, int32_t) const = 0; diff --git a/oneflow/core/graph/boxing/collective_boxing_sub_task_graph_builder.cpp b/oneflow/core/graph/boxing/collective_boxing_sub_task_graph_builder.cpp index 6624e6827d0989b2e3b113091b852d37bd8b0c95..280a167d9be60143a90905781a0ffdaa0a8cb446 100644 --- a/oneflow/core/graph/boxing/collective_boxing_sub_task_graph_builder.cpp +++ b/oneflow/core/graph/boxing/collective_boxing_sub_task_graph_builder.cpp @@ -13,10 +13,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "oneflow/core/framework/to_string.h" +#include "oneflow/core/graph/boxing/chain_sub_task_graph_builder.h" #include "oneflow/core/graph/boxing/collective_boxing_sub_task_graph_builder.h" #include "oneflow/core/graph/boxing/sub_task_graph_builder_util.h" #include "oneflow/core/graph/collective_boxing_task_node.h" -#include "oneflow/core/graph/boxing/chain_sub_task_graph_builder.h" #include "oneflow/core/graph/slice_boxing_task_node.h" namespace oneflow { @@ -31,7 +32,7 @@ void NcclInitCollectiveNode(CollectiveBoxingGenericTaskNode* node, const BlobDesc& logical_blob_desc, OpType op_type, int64_t root) { OperatorConf op_conf; op_conf.set_name(name); - op_conf.set_device_type(DeviceType::kGPU); + op_conf.set_device_tag(CHECK_JUST(DeviceTag4DeviceType(DeviceType::kGPU))); CollectiveBoxingGenericOpConf* conf = op_conf.mutable_collective_boxing_generic_conf(); *conf->mutable_lbi() = lbi; RankDesc* rank_desc = conf->mutable_rank_desc(); diff --git a/oneflow/core/graph/boxing_identity_compute_task_node.cpp b/oneflow/core/graph/boxing_identity_compute_task_node.cpp index ea4a7e7d4fd79609060ac0fad5bf034e11da7f03..035700f09d395ad97abd3fd497d1892f321fd3c6 100644 --- a/oneflow/core/graph/boxing_identity_compute_task_node.cpp +++ b/oneflow/core/graph/boxing_identity_compute_task_node.cpp @@ -13,6 +13,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "oneflow/core/framework/to_string.h" #include "oneflow/core/graph/boxing_identity_compute_task_node.h" #include "oneflow/core/graph/logical_node.h" @@ -41,7 +42,7 @@ void BoxingIdentityCompTaskNode::BuildExecGphAndRegst() { ExecNode* node = mut_exec_gph().NewNode(); OperatorConf op_conf; op_conf.set_name("System-Boxing-Identity-" + NewUniqueId()); - op_conf.set_device_type(this->device_type()); + op_conf.set_device_tag(CHECK_JUST(DeviceTag4DeviceType(this->device_type()))); *op_conf.mutable_boxing_identity_conf()->mutable_lbi() = lbi_; std::shared_ptr<Operator> sole_op = ConstructOp(op_conf, &GlobalJobDesc()); node->mut_op() = sole_op; diff --git a/oneflow/core/graph/chain_graph.cpp b/oneflow/core/graph/chain_graph.cpp index 1bcc6c8988994d621726967aa7ca6ab35dd27fc3..9a1c898de2cab605e2a50d6f8977cba7f3642cb4 100644 --- a/oneflow/core/graph/chain_graph.cpp +++ b/oneflow/core/graph/chain_graph.cpp @@ -20,6 +20,7 @@ limitations under the License. #include "oneflow/core/thread/thread_pool.h" #include "oneflow/core/common/blocking_counter.h" #include "oneflow/core/framework/config_def.h" +#include "oneflow/core/framework/to_string.h" #include "oneflow/core/job/global_for.h" namespace oneflow { @@ -180,10 +181,8 @@ void CollectIgnoreTaskEdgesInFirstMergedChains(const std::vector<std::vector<Tas if (fw_node == nullptr) { continue; } if (fw_node->logical_node()->op_vec().size() != 1) { continue; } const auto& src_op = *fw_node->logical_node()->SoleOp(); - if (src_op.op_conf().has_variable_conf() - && src_op.op_conf().device_type() == DeviceType::kGPU) { - return true; - } + DeviceType device_type = CHECK_JUST(DeviceType4DeviceTag(src_op.op_conf().device_tag())); + if (src_op.op_conf().has_variable_conf() && device_type == DeviceType::kGPU) { return true; } } return false; }; diff --git a/oneflow/core/graph/copy_task_node.cpp b/oneflow/core/graph/copy_task_node.cpp index 70a658ba0a1caf64b6f68f70f0fadb766a842c74..39ec0da62807b7a5984cb809c7f95ca238e85aa7 100644 --- a/oneflow/core/graph/copy_task_node.cpp +++ b/oneflow/core/graph/copy_task_node.cpp @@ -13,9 +13,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "oneflow/core/framework/to_string.h" #include "oneflow/core/graph/copy_task_node.h" -#include "oneflow/core/operator/operator.h" #include "oneflow/core/job/thrd_id_generator.h" +#include "oneflow/core/operator/operator.h" namespace oneflow { @@ -79,7 +80,7 @@ void CopyHdTaskNode::InitProducedRegstMemCase(MemoryCase* mem_case) { OperatorConf CopyHdTaskNode::NewCopyOpConf() { OperatorConf conf; conf.set_name("copy_hd_" + NewUniqueId()); - conf.set_device_type(device_type()); + conf.set_device_tag(CHECK_JUST(DeviceTag4DeviceType(device_type()))); conf.mutable_copy_hd_conf()->set_type(copy_type_); auto in_regst = GetSoleConsumedRegst("copy_in"); if (in_regst->NumOfLbi() == 1) { @@ -141,7 +142,7 @@ void CopyCommNetTaskNode::PinConsumedRegstMemCase(MemoryCase* mem_case) { OperatorConf CopyCommNetTaskNode::NewCopyOpConf() { OperatorConf conf; conf.set_name("copy_comm_net_" + NewUniqueId()); - conf.set_device_type(device_type()); + conf.set_device_tag(CHECK_JUST(DeviceTag4DeviceType(this->device_type()))); conf.mutable_copy_comm_net_conf(); return conf; } diff --git a/oneflow/core/graph/logical_graph.cpp b/oneflow/core/graph/logical_graph.cpp index 7142edab5ec83cbd37204fb3a957fe47ab6dfe82..ede44cc8532230954daf391ddeae615cb8aebb38 100644 --- a/oneflow/core/graph/logical_graph.cpp +++ b/oneflow/core/graph/logical_graph.cpp @@ -13,12 +13,13 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "oneflow/core/common/balanced_splitter.h" +#include "oneflow/core/framework/to_string.h" #include "oneflow/core/graph/logical_graph.h" #include "oneflow/core/graph/op_graph.h" +#include "oneflow/core/job/global_for.h" #include "oneflow/core/operator/operator.h" #include "oneflow/core/operator/op_conf_util.h" -#include "oneflow/core/common/balanced_splitter.h" -#include "oneflow/core/job/global_for.h" namespace oneflow { @@ -63,7 +64,7 @@ void LogicalGraph::NaiveBuildFwStruct( auto parallel_desc_ptr_it = name2parallel_desc.find(cur_op_conf.name()); CHECK(parallel_desc_ptr_it != name2parallel_desc.end()); const std::shared_ptr<ParallelDesc>& parallel_desc_ptr = parallel_desc_ptr_it->second; - cur_op_conf.set_device_type(parallel_desc_ptr->device_type()); + cur_op_conf.set_device_tag(CHECK_JUST(DeviceTag4DeviceType(parallel_desc_ptr->device_type()))); std::shared_ptr<Operator> cur_op = ConstructOp(cur_op_conf, &GlobalJobDesc()); LogicalNode* cur_node = cur_op->NewProperLogicalNode(); AddAllocatedNode(cur_node); diff --git a/oneflow/core/graph/slice_boxing_task_node.cpp b/oneflow/core/graph/slice_boxing_task_node.cpp index 2362705cb96e64eda8b11727b1b41e7e3d4d808f..50428f43b2707fd420ab1523bae717921a129e5b 100644 --- a/oneflow/core/graph/slice_boxing_task_node.cpp +++ b/oneflow/core/graph/slice_boxing_task_node.cpp @@ -13,6 +13,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "oneflow/core/framework/to_string.h" #include "oneflow/core/graph/slice_boxing_task_node.h" namespace oneflow { @@ -102,7 +103,7 @@ void SliceBoxingTaskNode::SetOutShape(const Shape& shape) { out_shape_ = shape; OperatorConf SliceBoxingTaskNode::GetBoxingOpConf() { OperatorConf op_conf{}; - op_conf.set_device_type(device_type()); + op_conf.set_device_tag(CHECK_JUST(DeviceTag4DeviceType(device_type()))); SliceBoxingConf boxing_conf{}; *boxing_conf.mutable_lbi() = lbi_; out_slice_.ToProto(boxing_conf.mutable_out_slice()); diff --git a/oneflow/core/job/job_build_and_infer_ctx.cpp b/oneflow/core/job/job_build_and_infer_ctx.cpp index 682d95b0f693f8a435cb2c3065a4834f388be8ba..d4fdc6b101a5678a0f4956144d8b063a563c298b 100644 --- a/oneflow/core/job/job_build_and_infer_ctx.cpp +++ b/oneflow/core/job/job_build_and_infer_ctx.cpp @@ -13,17 +13,19 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "oneflow/core/job/job_build_and_infer_ctx.h" -#include "oneflow/core/job_rewriter/op_graph_pass.h" -#include "oneflow/core/job_rewriter/autograd.h" -#include "oneflow/core/framework/config_def.h" #include "oneflow/core/common/protobuf.h" -#include "oneflow/core/job/mirrored_sig_infer_hint.h" -#include "oneflow/core/job/foreign_callback.h" #include "oneflow/core/eager/eager_symbol_storage.h" +#include "oneflow/core/framework/config_def.h" +#include "oneflow/core/framework/to_string.h" +#include "oneflow/core/job/foreign_callback.h" +#include "oneflow/core/job/job_build_and_infer_ctx.h" +#include "oneflow/core/job/mirrored_sig_infer_hint.h" #include "oneflow/core/job/scope.h" -#include <google/protobuf/text_format.h> +#include "oneflow/core/job_rewriter/autograd.h" +#include "oneflow/core/job_rewriter/op_graph_pass.h" #include "oneflow/user/summary/summary_converter.h" + +#include <google/protobuf/text_format.h> #include <json.hpp> namespace oneflow { @@ -505,9 +507,9 @@ Maybe<OpAttribute> JobBuildAndInferCtx::AddAndInferOp(const OperatorConf& op_con CHECK_OR_RETURN(op_name2op_.find(op_name) == op_name2op_.end()) << JobBuildAndInferError::kOpNameExist << "op_name: " << op_name << " already exist in job: " << job_->job_conf().job_name(); - CHECK_NE_OR_RETURN(op_conf.device_type(), DeviceType::kInvalidDevice) - << JobBuildAndInferError::kOpConfDeviceTypeNoSet << "op_name: " << op_name - << " not set device type"; + CHECK_NE_OR_RETURN(op_conf.device_tag(), "invalid_device") + << JobBuildAndInferError::kOpConfDeviceTagNoSet << "op_name: " << op_name + << " not set device tag"; op_name2op_.emplace(op_name, ConstructOp(op_conf, job_desc)); Operator* op = op_name2op_.at(op_name).get(); @@ -836,7 +838,7 @@ Maybe<LogicalBlobId> LazyJobBuildAndInferCtx::FindOrCreateMirroredLbiFromCompati lbi_vec->push_back(sub_lbi); }; OperatorConf op_conf; - op_conf.set_device_type(parallel_desc.device_type()); + op_conf.set_device_tag(CHECK_JUST(DeviceTag4DeviceType(parallel_desc.device_type()))); if (sbp.has_broadcast_parallel()) { op_conf.set_name(kAutoMirroredBlobNamePrefix + "-DistributeClone-" + NewUniqueId()); auto* distribute_clone = op_conf.mutable_distribute_clone_conf(); @@ -890,7 +892,8 @@ Maybe<LogicalBlobId> EagerJobBuildAndInferCtx::FindOrCreateMirroredLbiFromCompat CHECK_OR_RETURN(producer_op_conf.has_scope_symbol_id()); op_conf.set_scope_symbol_id(producer_op_conf.scope_symbol_id()); } - op_conf.set_device_type(parallel_desc.device_type()); + // const char* device_tag = JUST(DeviceTag4DeviceType(parallel_desc.device_type())); + op_conf.set_device_tag(JUST(DeviceTag4DeviceType(parallel_desc.device_type()))); op_conf.set_name(kAutoMirroredBlobNamePrefix + "-CastToMirrored-" + NewUniqueId()); auto* cast_to_mirrored_conf = op_conf.mutable_cast_to_mirrored_conf(); cast_to_mirrored_conf->set_in(lbn); diff --git a/oneflow/core/job/parallel_desc.cpp b/oneflow/core/job/parallel_desc.cpp index 016924b01d76d67108fa1cb298332db4194c8236..d829b8ad7191ccd0e790e190c296b8f678b667a9 100644 --- a/oneflow/core/job/parallel_desc.cpp +++ b/oneflow/core/job/parallel_desc.cpp @@ -157,10 +157,9 @@ Maybe<void> ParallelDesc::CheckWithResourceDesc(const ResourceDesc& resource_des ParallelConf ParallelDesc::GetParallelIdOnlyParallelConf(int64_t parallel_id) const { ParallelConf parallel_conf; - const char* device_tag = CHECK_JUST(DeviceTag4DeviceType(device_type())); std::string machine_id = std::to_string(MachineIdForParallelId(parallel_id)); std::string device_id = std::to_string(DeviceIdForParallelId(parallel_id)); - parallel_conf.set_device_tag(device_tag); + parallel_conf.set_device_tag(CHECK_JUST(DeviceTag4DeviceType(device_type()))); parallel_conf.add_device_name(machine_id + ":" + device_id); return parallel_conf; } diff --git a/oneflow/core/job/scope.cpp b/oneflow/core/job/scope.cpp index cb0c39146a0dd82f6ffcee25800bc8052d3375b8..def4e6cf09fa212fefc35d2946d7d587aef7069e 100644 --- a/oneflow/core/job/scope.cpp +++ b/oneflow/core/job/scope.cpp @@ -13,9 +13,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "oneflow/core/framework/to_string.h" #include "oneflow/core/job/scope.h" -#include "oneflow/core/vm/symbol_storage.h" #include "oneflow/core/operator/operator.h" +#include "oneflow/core/vm/symbol_storage.h" namespace oneflow { @@ -42,7 +43,7 @@ Maybe<const JobDesc*> Scope::job_desc() const { } Maybe<int64_t> Scope::GetParallelDescSymbolId(const OperatorConf& op_conf) const { - if (op_conf.device_type() == DeviceType::kCPU || IsCpuOnly(op_conf)) { + if (op_conf.device_tag() == "cpu" || IsCpuOnly(op_conf)) { return scope_proto_.host_parallel_desc_symbol_id(); } else { return scope_proto_.device_parallel_desc_symbol_id(); @@ -50,7 +51,7 @@ Maybe<int64_t> Scope::GetParallelDescSymbolId(const OperatorConf& op_conf) const } Maybe<const ParallelDesc*> Scope::GetParallelDesc(const OperatorConf& op_conf) const { - if (op_conf.device_type() == DeviceType::kCPU || IsCpuOnly(op_conf)) { + if (op_conf.device_tag() == "cpu" || IsCpuOnly(op_conf)) { return host_parallel_desc_.get(); } else { return device_parallel_desc_.get(); diff --git a/oneflow/core/job_rewriter/dump_variable_info_pass.cpp b/oneflow/core/job_rewriter/dump_variable_info_pass.cpp index 12a2bfe47d2acc1d41edc660b24c61398d5b4a6a..91946ea724529f3cf1d84171844003ae21959084 100644 --- a/oneflow/core/job_rewriter/dump_variable_info_pass.cpp +++ b/oneflow/core/job_rewriter/dump_variable_info_pass.cpp @@ -13,8 +13,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "oneflow/core/job_rewriter/op_graph_pass.h" +#include "oneflow/core/framework/to_string.h" #include "oneflow/core/job/global_for.h" +#include "oneflow/core/job_rewriter/op_graph_pass.h" namespace oneflow { @@ -35,19 +36,19 @@ Maybe<void> DumpVariableInfoPass::Apply(const OpGraph& op_graph, JobBuilder* job const std::string sep = "\t"; auto log_stream = TeePersistentLogStream::Create("variable_table_" + std::to_string(GlobalJobDesc().job_id())); - (*log_stream) << "id" << sep << "name" << sep << "device_type" << sep << "parallel_num" << sep + (*log_stream) << "id" << sep << "name" << sep << "device_tag" << sep << "parallel_num" << sep << "distribute" << sep << "data_type" << sep << "shape" << sep << "elem_cnt" << sep << "size" << "\n"; - op_graph.TopoForEachNode([&](const OpNode* node) { + JUST(op_graph.TopoForEachNodeWithErrorCaptured([&](const OpNode* node) -> Maybe<void> { const OperatorConf& op_conf = node->op().op_conf(); - if (!op_conf.has_variable_conf()) { return; } + if (!op_conf.has_variable_conf()) { return Maybe<void>::Ok(); } const VariableOpConf& conf = op_conf.variable_conf(); (*log_stream) << std::to_string(cnt); (*log_stream) << sep; (*log_stream) << op_conf.name(); (*log_stream) << sep; - (*log_stream) << DeviceType_Name(op_conf.device_type()); + (*log_stream) << op_conf.device_tag(); (*log_stream) << sep; (*log_stream) << std::to_string(node->parallel_desc().parallel_num()); (*log_stream) << sep; @@ -67,7 +68,8 @@ Maybe<void> DumpVariableInfoPass::Apply(const OpGraph& op_graph, JobBuilder* job (*log_stream) << std::to_string(shape.elem_cnt() * GetSizeOfDataType(conf.data_type())); (*log_stream) << "\n"; cnt += 1; - }); + return Maybe<void>::Ok(); + })); return Maybe<void>::Ok(); } diff --git a/oneflow/core/kernel/arg_where_kernel.cpp b/oneflow/core/kernel/arg_where_kernel.cpp index 36766485ccbf284483ff85364f935742e3236455..f81262214ff56547842af9e074520e382c5c3d43 100644 --- a/oneflow/core/kernel/arg_where_kernel.cpp +++ b/oneflow/core/kernel/arg_where_kernel.cpp @@ -13,9 +13,10 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "oneflow/core/kernel/kernel.h" -#include "oneflow/core/kernel/arg_where_kernel_util.h" #include "oneflow/core/common/nd_index_offset_helper.h" +#include "oneflow/core/framework/to_string.h" +#include "oneflow/core/kernel/arg_where_kernel_util.h" +#include "oneflow/core/kernel/kernel.h" namespace oneflow { @@ -40,14 +41,14 @@ class ArgWhereKernel : public KernelIf<DeviceType::kCPU> { } }; -#define REGISTER_ARG_WHERE_KERNEL(device_type_v, dtype, itype, ndims) \ - NEW_REGISTER_KERNEL(OperatorConf::kArgWhereConf, \ - ArgWhereKernel<device_type_v, dtype, itype, ndims>) \ - .SetIsMatchedPred([](const KernelConf& conf) { \ - return (device_type_v == conf.op_attribute().op_conf().device_type()) \ - && (GetDataType<itype>::value == conf.data_type()) \ - && (GetDataType<dtype>::value == conf.arg_where_conf().in_data_type()) \ - && (ndims == conf.arg_where_conf().num_axes()); \ +#define REGISTER_ARG_WHERE_KERNEL(device_type_v, dtype, itype, ndims) \ + NEW_REGISTER_KERNEL(OperatorConf::kArgWhereConf, \ + ArgWhereKernel<device_type_v, dtype, itype, ndims>) \ + .SetIsMatchedPred([](const KernelConf& conf) -> bool { \ + return (conf.op_attribute().op_conf().device_tag() == ToString(device_type_v)) \ + && (GetDataType<itype>::value == conf.data_type()) \ + && (GetDataType<dtype>::value == conf.arg_where_conf().in_data_type()) \ + && (ndims == conf.arg_where_conf().num_axes()); \ }); #define REGISTER_ARG_WHERE_KERNELS_AT_NDIMS(device_type_v, dtype, itype) \ diff --git a/oneflow/core/kernel/indexed_slices_lazy_adam_model_update_kernel.cpp b/oneflow/core/kernel/indexed_slices_lazy_adam_model_update_kernel.cpp index 34bef5af046ee05635b510d44a9b02964ffda3c7..a0ea46db6d446d47346bc4ee25e0271ab4a83d41 100644 --- a/oneflow/core/kernel/indexed_slices_lazy_adam_model_update_kernel.cpp +++ b/oneflow/core/kernel/indexed_slices_lazy_adam_model_update_kernel.cpp @@ -13,10 +13,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "oneflow/core/framework/to_string.h" #include "oneflow/core/kernel/kernel.h" #include "oneflow/core/kernel/kernel_context.h" -#include "oneflow/core/kernel/indexed_slices_reduce_sum_kernel_util.h" #include "oneflow/core/kernel/indexed_slices_lazy_adam_model_update_kernel_util.h" +#include "oneflow/core/kernel/indexed_slices_reduce_sum_kernel_util.h" namespace oneflow { @@ -80,7 +81,7 @@ void IndexedSlicesLazyAdamMdUpdateKernel<device_type, T, K>::ForwardDataContent( OF_PP_PAIR_FIRST(indices_type_pair)>) \ .SetIsMatchedPred([](const KernelConf& kernel_conf) -> bool { \ return ( \ - (kernel_conf.op_attribute().op_conf().device_type() == device_type_v) \ + (kernel_conf.op_attribute().op_conf().device_tag() == ToString(device_type_v)) \ && ((OF_PP_PAIR_SECOND(data_type_pair)) == kernel_conf.data_type()) \ && (OF_PP_PAIR_SECOND(indices_type_pair) \ == kernel_conf.indexed_slices_lazy_adam_model_update_conf().indices_data_type())); \ diff --git a/oneflow/core/kernel/indexed_slices_momentum_model_update_kernel.cpp b/oneflow/core/kernel/indexed_slices_momentum_model_update_kernel.cpp index aa70b5cd33506f69c62425c6df4ad680c1fbf84f..771df11cfa17814d6ae77d3dcbaf3f04989108e2 100644 --- a/oneflow/core/kernel/indexed_slices_momentum_model_update_kernel.cpp +++ b/oneflow/core/kernel/indexed_slices_momentum_model_update_kernel.cpp @@ -13,10 +13,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "oneflow/core/framework/to_string.h" #include "oneflow/core/kernel/kernel.h" #include "oneflow/core/kernel/kernel_context.h" -#include "oneflow/core/kernel/indexed_slices_reduce_sum_kernel_util.h" #include "oneflow/core/kernel/indexed_slices_momentum_model_update_kernel_util.h" +#include "oneflow/core/kernel/indexed_slices_reduce_sum_kernel_util.h" namespace oneflow { @@ -75,7 +76,7 @@ void IndexedSlicesMomentumMdUpdateKernel<device_type, T, K>::ForwardDataContent( OF_PP_PAIR_FIRST(indices_type_pair)>) \ .SetIsMatchedPred([](const KernelConf& kernel_conf) -> bool { \ return ( \ - (kernel_conf.op_attribute().op_conf().device_type() == device_type_v) \ + (kernel_conf.op_attribute().op_conf().device_tag() == ToString(device_type_v)) \ && ((OF_PP_PAIR_SECOND(data_type_pair)) == kernel_conf.data_type()) \ && (OF_PP_PAIR_SECOND(indices_type_pair) \ == kernel_conf.indexed_slices_momentum_model_update_conf().indices_data_type())); \ diff --git a/oneflow/core/kernel/indexed_slices_naive_model_update_kernel.cpp b/oneflow/core/kernel/indexed_slices_naive_model_update_kernel.cpp index 4f2a6ac519734e55a12163c385b1d32c2ab7b53f..434c6c4d800fdb087af397fcabc5255bd0854379 100644 --- a/oneflow/core/kernel/indexed_slices_naive_model_update_kernel.cpp +++ b/oneflow/core/kernel/indexed_slices_naive_model_update_kernel.cpp @@ -13,6 +13,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "oneflow/core/framework/to_string.h" #include "oneflow/core/kernel/kernel.h" #include "oneflow/core/kernel/indexed_slices_naive_model_update_kernel_util.h" @@ -59,7 +60,7 @@ void IndexedSlicesNaiveMdUpdateKernel<device_type, T, K>::ForwardDataContent( IndexedSlicesNaiveMdUpdateKernel<device_type_v, OF_PP_PAIR_FIRST(data_type_pair), \ OF_PP_PAIR_FIRST(indices_type_pair)>) \ .SetIsMatchedPred([](const KernelConf& kernel_conf) -> bool { \ - return ((kernel_conf.op_attribute().op_conf().device_type() == device_type_v) \ + return ((kernel_conf.op_attribute().op_conf().device_tag() == ToString(device_type_v)) \ && ((OF_PP_PAIR_SECOND(data_type_pair)) == kernel_conf.data_type()) \ && (OF_PP_PAIR_SECOND(indices_type_pair) \ == kernel_conf.indexed_slices_naive_model_update_conf().indices_data_type())); \ diff --git a/oneflow/core/kernel/indexed_slices_reduce_sum_kernel.cpp b/oneflow/core/kernel/indexed_slices_reduce_sum_kernel.cpp index fe6a5a1c9b9d629bcd3fa9c652a39ddff0717407..251df60a24b07328a20da697cde2c3c8edc41fbe 100644 --- a/oneflow/core/kernel/indexed_slices_reduce_sum_kernel.cpp +++ b/oneflow/core/kernel/indexed_slices_reduce_sum_kernel.cpp @@ -13,6 +13,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "oneflow/core/framework/to_string.h" #include "oneflow/core/kernel/kernel.h" #include "oneflow/core/kernel/kernel_context.h" #include "oneflow/core/kernel/indexed_slices_reduce_sum_kernel_util.h" @@ -54,17 +55,17 @@ void IndexedSlicesReduceSumKernel<device_type, T, K>::ForwardDataContent( workspace_ptr, workspace_size_in_bytes); } -#define MAKE_INDEXED_SLICES_REDUCE_SUM_KERNEL_ENTRY(device_type_v, data_type_pair, \ - indices_type_pair) \ - NEW_REGISTER_KERNEL( \ - OperatorConf::kIndexedSlicesReduceSumConf, \ - IndexedSlicesReduceSumKernel<device_type_v, OF_PP_PAIR_FIRST(data_type_pair), \ - OF_PP_PAIR_FIRST(indices_type_pair)>) \ - .SetIsMatchedPred([](const KernelConf& kernel_conf) -> bool { \ - return ((kernel_conf.op_attribute().op_conf().device_type() == device_type_v) \ - && ((OF_PP_PAIR_SECOND(data_type_pair)) == kernel_conf.data_type()) \ - && (OF_PP_PAIR_SECOND(indices_type_pair) \ - == kernel_conf.indexed_slices_reduce_sum_conf().indices_data_type())); \ +#define MAKE_INDEXED_SLICES_REDUCE_SUM_KERNEL_ENTRY(device_type_v, data_type_pair, \ + indices_type_pair) \ + NEW_REGISTER_KERNEL( \ + OperatorConf::kIndexedSlicesReduceSumConf, \ + IndexedSlicesReduceSumKernel<device_type_v, OF_PP_PAIR_FIRST(data_type_pair), \ + OF_PP_PAIR_FIRST(indices_type_pair)>) \ + .SetIsMatchedPred([](const KernelConf& kernel_conf) -> bool { \ + return ((kernel_conf.op_attribute().op_conf().device_tag() == ToString(device_type_v)) \ + && ((OF_PP_PAIR_SECOND(data_type_pair)) == kernel_conf.data_type()) \ + && (OF_PP_PAIR_SECOND(indices_type_pair) \ + == kernel_conf.indexed_slices_reduce_sum_conf().indices_data_type())); \ }); OF_PP_SEQ_PRODUCT_FOR_EACH_TUPLE(MAKE_INDEXED_SLICES_REDUCE_SUM_KERNEL_ENTRY, DEVICE_TYPE_SEQ, diff --git a/oneflow/core/kernel/kernel.h b/oneflow/core/kernel/kernel.h index aaa3c49b7719570b6d48c921601933ce8c3f63b4..d90d7ddfe2fc520fcd12376fbf6becd8909a7124 100644 --- a/oneflow/core/kernel/kernel.h +++ b/oneflow/core/kernel/kernel.h @@ -16,16 +16,17 @@ limitations under the License. #ifndef ONEFLOW_CORE_KERNEL_KERNEL_H_ #define ONEFLOW_CORE_KERNEL_KERNEL_H_ +#include "oneflow/core/common/protobuf.h" +#include "oneflow/core/framework/to_string.h" #include "oneflow/core/job/job.pb.h" #include "oneflow/core/job/resource.pb.h" #include "oneflow/core/kernel/kernel.pb.h" +#include "oneflow/core/kernel/kernel_registration.h" #include "oneflow/core/kernel/kernel_util.h" #include "oneflow/core/operator/operator.h" +#include "oneflow/core/operator/op_conf_util.h" #include "oneflow/core/persistence/snapshot.h" #include "oneflow/core/register/blob.h" -#include "oneflow/core/common/protobuf.h" -#include "oneflow/core/operator/op_conf_util.h" -#include "oneflow/core/kernel/kernel_registration.h" namespace oneflow { @@ -210,18 +211,19 @@ std::unique_ptr<const Kernel> ConstructKernel(const JobDesc* job_desc, const Ker {GetHashKey(device_type, OF_PP_PAIR_SECOND(data_type_pair)), \ []() { return new kernel_class<device_type, OF_PP_PAIR_FIRST(data_type_pair)>(); }}, -#define ADD_DEFAULT_KERNEL_CREATOR(op_type_case, kernel_class, data_type_seq) \ - namespace { \ - \ - Kernel* OF_PP_CAT(CreateKernel, __LINE__)(const KernelConf& kernel_conf) { \ - static const HashMap<std::string, std::function<Kernel*()>> creators = { \ - OF_PP_SEQ_PRODUCT_FOR_EACH_TUPLE(MAKE_KERNEL_CREATOR_ENTRY, (kernel_class), \ - DEVICE_TYPE_SEQ, data_type_seq)}; \ - return creators.at(GetHashKey(kernel_conf.op_attribute().op_conf().device_type(), \ - kernel_conf.data_type()))(); \ - } \ - \ - REGISTER_KERNEL_CREATOR(op_type_case, OF_PP_CAT(CreateKernel, __LINE__)); \ +#define ADD_DEFAULT_KERNEL_CREATOR(op_type_case, kernel_class, data_type_seq) \ + namespace { \ + \ + Kernel* OF_PP_CAT(CreateKernel, __LINE__)(const KernelConf& kernel_conf) { \ + static const HashMap<std::string, std::function<Kernel*()>> creators = { \ + OF_PP_SEQ_PRODUCT_FOR_EACH_TUPLE(MAKE_KERNEL_CREATOR_ENTRY, (kernel_class), \ + DEVICE_TYPE_SEQ, data_type_seq)}; \ + DeviceType device_type = \ + CHECK_JUST(DeviceType4DeviceTag(kernel_conf.op_attribute().op_conf().device_tag())); \ + return creators.at(GetHashKey(device_type, kernel_conf.data_type()))(); \ + } \ + \ + REGISTER_KERNEL_CREATOR(op_type_case, OF_PP_CAT(CreateKernel, __LINE__)); \ } #define MAKE_DEVICE_TYPE_KERNEL_CREATOR_ENTRY(kernel_class, device_type) \ @@ -234,7 +236,9 @@ std::unique_ptr<const Kernel> ConstructKernel(const JobDesc* job_desc, const Ker static const HashMap<int, std::function<Kernel*()>> creators = { \ OF_PP_SEQ_PRODUCT_FOR_EACH_TUPLE(MAKE_DEVICE_TYPE_KERNEL_CREATOR_ENTRY, (kernel_class), \ DEVICE_TYPE_SEQ)}; \ - return creators.at(kernel_conf.op_attribute().op_conf().device_type())(); \ + DeviceType device_type = \ + CHECK_JUST(DeviceType4DeviceTag(kernel_conf.op_attribute().op_conf().device_tag())); \ + return creators.at(device_type)(); \ } \ \ REGISTER_KERNEL_CREATOR(op_type_case, OF_PP_CAT(CreateKernel, __LINE__)); \ @@ -257,20 +261,21 @@ std::unique_ptr<const Kernel> ConstructKernel(const JobDesc* job_desc, const Ker REGISTER_KERNEL_CREATOR(op_type_case, CreateKernel); \ } -#define ADD_DEFAULT_KERNEL_CREATOR_WITH_GPU_HALF(op_type_case, kernel_class, data_type_seq) \ - namespace { \ - \ - Kernel* OF_PP_CAT(CreateKernel, __LINE__)(const KernelConf& kernel_conf) { \ - static const HashMap<std::string, std::function<Kernel*()>> creators = { \ - OF_PP_SEQ_PRODUCT_FOR_EACH_TUPLE(MAKE_KERNEL_CREATOR_ENTRY, (kernel_class), \ - DEVICE_TYPE_SEQ, data_type_seq) \ - MAKE_KERNEL_CREATOR_ENTRY(kernel_class, DeviceType::kGPU, \ - (float16, DataType::kFloat16))}; \ - return creators.at(GetHashKey(kernel_conf.op_attribute().op_conf().device_type(), \ - kernel_conf.data_type()))(); \ - } \ - \ - REGISTER_KERNEL_CREATOR(op_type_case, OF_PP_CAT(CreateKernel, __LINE__)); \ +#define ADD_DEFAULT_KERNEL_CREATOR_WITH_GPU_HALF(op_type_case, kernel_class, data_type_seq) \ + namespace { \ + \ + Kernel* OF_PP_CAT(CreateKernel, __LINE__)(const KernelConf& kernel_conf) { \ + static const HashMap<std::string, std::function<Kernel*()>> creators = { \ + OF_PP_SEQ_PRODUCT_FOR_EACH_TUPLE(MAKE_KERNEL_CREATOR_ENTRY, (kernel_class), \ + DEVICE_TYPE_SEQ, data_type_seq) \ + MAKE_KERNEL_CREATOR_ENTRY(kernel_class, DeviceType::kGPU, \ + (float16, DataType::kFloat16))}; \ + DeviceType device_type = \ + CHECK_JUST(DeviceType4DeviceTag(kernel_conf.op_attribute().op_conf().device_tag())); \ + return creators.at(GetHashKey(device_type, kernel_conf.data_type()))(); \ + } \ + \ + REGISTER_KERNEL_CREATOR(op_type_case, OF_PP_CAT(CreateKernel, __LINE__)); \ } #endif // ONEFLOW_CORE_KERNEL_KERNEL_H_ diff --git a/oneflow/core/kernel/kernel_registration.h b/oneflow/core/kernel/kernel_registration.h index 8a19ed98dac78bf1fb20f78a2566eb15c1b9e06b..811c77e13ace363653c54cb5556eac1d3f332fbf 100644 --- a/oneflow/core/kernel/kernel_registration.h +++ b/oneflow/core/kernel/kernel_registration.h @@ -16,13 +16,14 @@ limitations under the License. #ifndef ONEFLOW_CORE_KERNEL_KERNEL_REGISTRATION_H_ #define ONEFLOW_CORE_KERNEL_KERNEL_REGISTRATION_H_ +#include "oneflow/core/common/data_type.h" #include "oneflow/core/common/data_type.pb.h" #include "oneflow/core/common/device_type.pb.h" -#include "oneflow/core/common/util.h" #include "oneflow/core/common/str_util.h" -#include "oneflow/core/common/data_type.h" -#include "oneflow/core/operator/op_conf_util.h" +#include "oneflow/core/common/util.h" +#include "oneflow/core/framework/to_string.h" #include "oneflow/core/kernel/kernel.pb.h" +#include "oneflow/core/operator/op_conf_util.h" namespace oneflow { @@ -79,20 +80,20 @@ Kernel* CreateKernel(const KernelConf& kernel_conf); kernel_registration::KernelRegistrarBuilder(op_type).SetCreateFn( \ []() { return new __VA_ARGS__(); }) -#define REGISTER_KERNEL_WITH_NOTHING(op_type, ...) \ - NEW_REGISTER_KERNEL(op_type, __VA_ARGS__).SetIsMatchedPred([](const KernelConf&) { \ - return true; \ +#define REGISTER_KERNEL_WITH_NOTHING(op_type, ...) \ + NEW_REGISTER_KERNEL(op_type, __VA_ARGS__).SetIsMatchedPred([](const KernelConf&) -> bool { \ + return true; \ }); -#define REGISTER_KERNEL_WITH_DEVICE_AND_DTYPE(op_type, device, dtype, ...) \ - NEW_REGISTER_KERNEL(op_type, __VA_ARGS__).SetIsMatchedPred([](const KernelConf& conf) { \ - return (device == conf.op_attribute().op_conf().device_type()) \ - && (GetDataType<dtype>::value == conf.data_type()); \ +#define REGISTER_KERNEL_WITH_DEVICE_AND_DTYPE(op_type, device, dtype, ...) \ + NEW_REGISTER_KERNEL(op_type, __VA_ARGS__).SetIsMatchedPred([](const KernelConf& conf) -> bool { \ + return (ToString(device) == conf.op_attribute().op_conf().device_tag()) \ + && (GetDataType<dtype>::value == conf.data_type()); \ }); -#define REGISTER_KERNEL_WITH_DEVICE(op_type, device, ...) \ - NEW_REGISTER_KERNEL(op_type, __VA_ARGS__).SetIsMatchedPred([](const KernelConf& conf) { \ - return (device == conf.op_attribute().op_conf().device_type()); \ +#define REGISTER_KERNEL_WITH_DEVICE(op_type, device, ...) \ + NEW_REGISTER_KERNEL(op_type, __VA_ARGS__).SetIsMatchedPred([](const KernelConf& conf) -> bool { \ + return (ToString(device) == conf.op_attribute().op_conf().device_tag()); \ }); #define REGISTER_KERNEL_HELPER_CPU_FLOATING(op_type, kernel) \ diff --git a/oneflow/core/kernel/normal_model_update_kernel.h b/oneflow/core/kernel/normal_model_update_kernel.h index 40b8f30a622a4db9665f540745ad70bbec1f3f79..d6108418f36c538050978d2a7cf8dc4e17f3267a 100644 --- a/oneflow/core/kernel/normal_model_update_kernel.h +++ b/oneflow/core/kernel/normal_model_update_kernel.h @@ -16,6 +16,7 @@ limitations under the License. #ifndef ONEFLOW_CORE_KERNEL_NORMAL_MODEL_UPDATE_KERNEL_H_ #define ONEFLOW_CORE_KERNEL_NORMAL_MODEL_UPDATE_KERNEL_H_ +#include "oneflow/core/framework/to_string.h" #include "oneflow/core/kernel/kernel.h" namespace oneflow { @@ -49,13 +50,14 @@ class NormalMdUpdateKernel : public KernelIf<device_type> { #define DECLARE_MDUPDT_KERNEL_CREATOR(x) Kernel* Create##x##MdUpdtKernel(const KernelConf&); -#define DEFINE_MDUPDT_KERNEL_CREATOR(x) \ - Kernel* Create##x##MdUpdtKernel(const KernelConf& kernel_conf) { \ - static const HashMap<std::string, std::function<Kernel*()>> creators = { \ - OF_PP_SEQ_PRODUCT_FOR_EACH_TUPLE(MAKE_KERNEL_CREATOR_ENTRY, (x##MdUpdateKernel), \ - DEVICE_TYPE_SEQ, FLOATING_DATA_TYPE_SEQ)}; \ - return creators.at(GetHashKey(kernel_conf.op_attribute().op_conf().device_type(), \ - kernel_conf.data_type()))(); \ +#define DEFINE_MDUPDT_KERNEL_CREATOR(x) \ + Kernel* Create##x##MdUpdtKernel(const KernelConf& kernel_conf) { \ + static const HashMap<std::string, std::function<Kernel*()>> creators = { \ + OF_PP_SEQ_PRODUCT_FOR_EACH_TUPLE(MAKE_KERNEL_CREATOR_ENTRY, (x##MdUpdateKernel), \ + DEVICE_TYPE_SEQ, FLOATING_DATA_TYPE_SEQ)}; \ + DeviceType device_type = \ + CHECK_JUST(DeviceType4DeviceTag(kernel_conf.op_attribute().op_conf().device_tag())); \ + return creators.at(GetHashKey(device_type, kernel_conf.data_type()))(); \ } } // namespace oneflow diff --git a/oneflow/core/kernel/sigmoid_cross_entropy_kernel.cu b/oneflow/core/kernel/sigmoid_cross_entropy_kernel.cu index a79a3c6f22a3f315e805c4f2a26c146f328cc1a0..98b07f23c19c39304016ee1feee3b5424de3157b 100644 --- a/oneflow/core/kernel/sigmoid_cross_entropy_kernel.cu +++ b/oneflow/core/kernel/sigmoid_cross_entropy_kernel.cu @@ -86,17 +86,17 @@ class SigmoidCrossEntropyGradGpuKernel final : public KernelIf<DeviceType::kGPU> #define REGISTER_SIGMOID_CROSS_ENTROPY_GPU_KERNEL(dtype, ltype) \ NEW_REGISTER_KERNEL(OperatorConf::kSigmoidCrossEntropyConf, \ SigmoidCrossEntropyGpuKernel<dtype, ltype>) \ - .SetIsMatchedPred([](const KernelConf& conf) { \ - return ((conf.op_attribute().op_conf().device_type() == DeviceType::kGPU) \ + .SetIsMatchedPred([](const KernelConf& conf) -> bool { \ + return ((conf.op_attribute().op_conf().device_tag() == "gpu") \ && (conf.data_type() == GetDataType<dtype>::value) \ && (GetDataType<ltype>::value \ == conf.op_attribute().op_conf().sigmoid_cross_entropy_conf().label_type())); \ }); \ NEW_REGISTER_KERNEL(OperatorConf::kSigmoidCrossEntropyGradConf, \ SigmoidCrossEntropyGradGpuKernel<dtype, ltype>) \ - .SetIsMatchedPred([](const KernelConf& conf) { \ + .SetIsMatchedPred([](const KernelConf& conf) -> bool { \ return ( \ - (conf.op_attribute().op_conf().device_type() == DeviceType::kGPU) \ + (conf.op_attribute().op_conf().device_tag() == "gpu") \ && (conf.data_type() == GetDataType<dtype>::value) \ && (GetDataType<ltype>::value \ == conf.op_attribute().op_conf().sigmoid_cross_entropy_grad_conf().label_type())); \ diff --git a/oneflow/core/kernel/sync_dynamic_resize_kernel.cpp b/oneflow/core/kernel/sync_dynamic_resize_kernel.cpp index 0ae4b53521ead0e7277ff08b4af1389bd038ff1b..b568a28890fb5b618e712096b02af9874f6b9f22 100644 --- a/oneflow/core/kernel/sync_dynamic_resize_kernel.cpp +++ b/oneflow/core/kernel/sync_dynamic_resize_kernel.cpp @@ -13,15 +13,17 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "oneflow/core/common/util.h" +#include "oneflow/core/device/cuda_util.h" +#include "oneflow/core/framework/to_string.h" +#include "oneflow/core/kernel/kernel.h" +#include "oneflow/core/register/register_desc.h" + #include <cstddef> #include <cstdint> #include <memory> #include <mutex> #include <queue> -#include "oneflow/core/common/util.h" -#include "oneflow/core/device/cuda_util.h" -#include "oneflow/core/kernel/kernel.h" -#include "oneflow/core/register/register_desc.h" namespace oneflow { @@ -101,7 +103,7 @@ class SyncDynamicResizeGPUKernel final : public KernelIf<DeviceType::kGPU> { #define REGISTER_SYNC_DYNAMIC_RESIZE_GPU_KERNEL(stype) \ NEW_REGISTER_KERNEL(OperatorConf::kSyncDynamicResizeConf, SyncDynamicResizeGPUKernel<stype>) \ .SetIsMatchedPred([](const KernelConf& kernel_conf) { \ - return (kernel_conf.op_attribute().op_conf().device_type() == DeviceType::kGPU \ + return (kernel_conf.op_attribute().op_conf().device_tag() == "gpu" \ && GetDataType<stype>::value \ == kernel_conf.sync_dynamic_resize_conf().size_data_type()); \ }) @@ -139,7 +141,7 @@ class SyncDynamicResizeCPUKernel final : public KernelIf<DeviceType::kCPU> { #define REGISTER_SYNC_DYNAMIC_RESIZE_CPU_KERNEL(stype) \ NEW_REGISTER_KERNEL(OperatorConf::kSyncDynamicResizeConf, SyncDynamicResizeCPUKernel<stype>) \ .SetIsMatchedPred([](const KernelConf& kernel_conf) { \ - return (kernel_conf.op_attribute().op_conf().device_type() == DeviceType::kCPU \ + return (kernel_conf.op_attribute().op_conf().device_tag() == "cpu" \ && GetDataType<stype>::value \ == kernel_conf.sync_dynamic_resize_conf().size_data_type()); \ }) diff --git a/oneflow/core/kernel/tensor_buffer_to_tensor_list_kernel.cpp b/oneflow/core/kernel/tensor_buffer_to_tensor_list_kernel.cpp index 7b065fb3819067bfe39cc51b6aa4bcfc3ed3bede..da16f4b655691d02da8e28f81589376231f09d86 100644 --- a/oneflow/core/kernel/tensor_buffer_to_tensor_list_kernel.cpp +++ b/oneflow/core/kernel/tensor_buffer_to_tensor_list_kernel.cpp @@ -13,6 +13,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "oneflow/core/framework/to_string.h" #include "oneflow/core/kernel/kernel.h" #include "oneflow/core/kernel/kernel_util.h" #include "oneflow/core/common/tensor_buffer.h" @@ -81,12 +82,12 @@ void TensorBufferToTensorListKernel<T>::ForwardDataContent( CHECK_EQ(out_blob->total_num_of_tensors(), in_blob->shape().elem_cnt()); } -#define REGISTER_TENSOR_BUFFER_TO_TENSOR_LIST_KERNEL(dtype) \ - NEW_REGISTER_KERNEL(OperatorConf::kTensorBufferToTensorListConf, \ - TensorBufferToTensorListKernel<dtype>) \ - .SetIsMatchedPred([](const KernelConf& conf) { \ - return (conf.op_attribute().op_conf().device_type() == DeviceType::kCPU) \ - && (conf.data_type() == GetDataType<dtype>::value); \ +#define REGISTER_TENSOR_BUFFER_TO_TENSOR_LIST_KERNEL(dtype) \ + NEW_REGISTER_KERNEL(OperatorConf::kTensorBufferToTensorListConf, \ + TensorBufferToTensorListKernel<dtype>) \ + .SetIsMatchedPred([](const KernelConf& conf) { \ + return (conf.op_attribute().op_conf().device_tag() == "cpu") \ + && (conf.data_type() == GetDataType<dtype>::value); \ }); REGISTER_TENSOR_BUFFER_TO_TENSOR_LIST_KERNEL(char) diff --git a/oneflow/core/kernel/tensor_list_to_tensor_buffer_kernel.cpp b/oneflow/core/kernel/tensor_list_to_tensor_buffer_kernel.cpp index 0b63ec43259f5a09d7cb99a54cbe99dc4a9f777c..b617ff4e1dcd7c76f7cc91c743796459b0cbcb7d 100644 --- a/oneflow/core/kernel/tensor_list_to_tensor_buffer_kernel.cpp +++ b/oneflow/core/kernel/tensor_list_to_tensor_buffer_kernel.cpp @@ -13,8 +13,9 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "oneflow/core/kernel/kernel.h" #include "oneflow/core/common/tensor_buffer.h" +#include "oneflow/core/framework/to_string.h" +#include "oneflow/core/kernel/kernel.h" namespace oneflow { @@ -59,7 +60,7 @@ void TensorListToTensorBufferKernel::ForwardHeader( NEW_REGISTER_KERNEL(OperatorConf::kTensorListToTensorBufferConf, TensorListToTensorBufferKernel) .SetIsMatchedPred([](const KernelConf& conf) { - return (conf.op_attribute().op_conf().device_type() == DeviceType::kCPU) + return (conf.op_attribute().op_conf().device_tag() == "cpu") && (conf.data_type() == DataType::kTensorBuffer); }); diff --git a/oneflow/core/kernel/unique_with_counts_kernel.cpp b/oneflow/core/kernel/unique_with_counts_kernel.cpp index a5173fb71d86619fe4c970b4983e9031fcabc94f..0af8a190d4335a91aabe884ae29ee9d11c531b9c 100644 --- a/oneflow/core/kernel/unique_with_counts_kernel.cpp +++ b/oneflow/core/kernel/unique_with_counts_kernel.cpp @@ -13,6 +13,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "oneflow/core/framework/to_string.h" #include "oneflow/core/kernel/kernel.h" #include "oneflow/core/kernel/kernel_context.h" #include "oneflow/core/kernel/unique_kernel_util.h" @@ -57,7 +58,7 @@ void UniqueWithCountsKernel<device_type, T, K>::ForwardDataContent( UniqueWithCountsKernel<device_type_v, OF_PP_PAIR_FIRST(data_type_pair), \ OF_PP_PAIR_FIRST(indices_type_pair)>) \ .SetIsMatchedPred([](const KernelConf& kernel_conf) -> bool { \ - return ((kernel_conf.op_attribute().op_conf().device_type() == device_type_v) \ + return ((kernel_conf.op_attribute().op_conf().device_tag() == ToString(device_type_v)) \ && ((OF_PP_PAIR_SECOND(data_type_pair)) == kernel_conf.data_type()) \ && (OF_PP_PAIR_SECOND(indices_type_pair) \ == kernel_conf.unique_with_counts_conf().indices_data_type())); \ diff --git a/oneflow/core/kernel/user_kernel.cpp b/oneflow/core/kernel/user_kernel.cpp index f2e73da9bb57fdb99fa9955bd92f4efe536d47f0..ed9193c2cfebbbea3088013b0adde3e76f61ff3b 100644 --- a/oneflow/core/kernel/user_kernel.cpp +++ b/oneflow/core/kernel/user_kernel.cpp @@ -13,14 +13,14 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "oneflow/core/kernel/kernel.h" -#include "oneflow/core/kernel/eager_kernel.h" +#include "oneflow/core/framework/infer_util.h" #include "oneflow/core/framework/op_kernel.h" #include "oneflow/core/framework/op_kernel_infer_cache.h" -#include "oneflow/core/framework/user_op_registry_manager.h" #include "oneflow/core/framework/tensor.h" +#include "oneflow/core/framework/to_string.h" #include "oneflow/core/framework/user_op_conf.h" -#include "oneflow/core/framework/infer_util.h" +#include "oneflow/core/framework/user_op_registry_manager.h" +#include "oneflow/core/kernel/eager_kernel.h" #include "oneflow/core/kernel/kernel.h" namespace oneflow { @@ -61,8 +61,8 @@ class UserKernelBaseContext { }; InitInOrOut(kernel_conf.op_attribute().op_conf().user_conf().input(), &inputs_); InitInOrOut(kernel_conf.op_attribute().op_conf().user_conf().output(), &outputs_); - - device_type_ = kernel_conf.op_attribute().op_conf().device_type(); + device_tag_ = kernel_conf.op_attribute().op_conf().device_tag(); + device_type_ = CHECK_JUST(DeviceType4DeviceTag(device_tag_)); parallel_ctx_ = kernel_conf.user_conf().parallel_ctx(); for (const auto& pair : kernel_conf.user_conf().bn_in_op2blob_desc()) { arg2tensor_desc_.emplace(GenUnRepeatedBn(pair.first), user_op::TensorDesc(pair.second)); @@ -71,6 +71,7 @@ class UserKernelBaseContext { ~UserKernelBaseContext() = default; DeviceType device_type() const { return device_type_; } + const std::string& device_tag() const { return device_tag_; } const ParallelContext& parallel_ctx() const { return parallel_ctx_; } const JobDesc& job_desc() const { return job_desc_; } const user_op::TensorDesc* TensorDesc4ArgNameAndIndex(const std::string& arg_name, @@ -87,6 +88,7 @@ class UserKernelBaseContext { ArgVec inputs_; ArgVec outputs_; DeviceType device_type_; + std::string device_tag_; ParallelContext parallel_ctx_; HashMap<std::pair<std::string, int32_t>, user_op::TensorDesc> arg2tensor_desc_; const JobDesc& job_desc_; @@ -378,6 +380,7 @@ class UserKernelRegContext final : public user_op::KernelRegContext { ~UserKernelRegContext() = default; DeviceType device_type() const override { return base_ctx_.device_type(); } + const std::string& device_tag() const override { return base_ctx_.device_tag(); } const ParallelContext& parallel_ctx() const override { return base_ctx_.parallel_ctx(); } const user_op::TensorDesc* TensorDesc4ArgNameAndIndex(const std::string& arg_name, int32_t index) const override { diff --git a/oneflow/core/operator/op_conf.proto b/oneflow/core/operator/op_conf.proto index d91b23df15c555fd33efdb8dda0080eec5f048be..a011031e674ae70cc00a5ba51524fd7690532991 100644 --- a/oneflow/core/operator/op_conf.proto +++ b/oneflow/core/operator/op_conf.proto @@ -976,7 +976,7 @@ message CastToStaticShapeOpConf { message OperatorConf { required string name = 1; optional bool trainable = 3 [default = true]; - optional DeviceType device_type = 4 [default = kInvalidDevice]; + optional string device_tag = 4 [default = "invalid_device"]; optional bool enable_cudnn = 5; optional int64 cudnn_buf_limit_mbyte = 6; repeated string ctrl_in_op_name = 7; diff --git a/oneflow/core/operator/operator.cpp b/oneflow/core/operator/operator.cpp index 362668402bb0fb6777181f59741130b311aa8c04..4bff087e029346c9788709240c6cb3aed3f74154 100644 --- a/oneflow/core/operator/operator.cpp +++ b/oneflow/core/operator/operator.cpp @@ -13,14 +13,15 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "oneflow/core/operator/operator.h" -#include "oneflow/core/graph/logical_node.h" #include "oneflow/core/common/balanced_splitter.h" +#include "oneflow/core/eager/eager_symbol_storage.h" +#include "oneflow/core/framework/to_string.h" #include "oneflow/core/framework/user_op_registry_manager.h" -#include "oneflow/core/job/sbp_signature_builder.h" +#include "oneflow/core/graph/logical_node.h" #include "oneflow/core/job/mirrored_sig_infer_hint.h" -#include "oneflow/core/eager/eager_symbol_storage.h" +#include "oneflow/core/job/sbp_signature_builder.h" #include "oneflow/core/job/scope.h" +#include "oneflow/core/operator/operator.h" namespace oneflow { @@ -39,7 +40,8 @@ DataType GetDataTypeFromBnInOpVec( std::shared_ptr<Operator> CheckAndConstructOp(const OperatorConf& op_conf, const JobDesc* job_desc) { Operator* rptr = NewObj<Operator>(op_conf.op_type_case(), op_conf); - if (IsCpuOnly(op_conf)) { CHECK_EQ(op_conf.device_type(), DeviceType::kCPU); } + DeviceType device_type = CHECK_JUST(DeviceType4DeviceTag(op_conf.device_tag())); + if (IsCpuOnly(op_conf)) { CHECK_EQ(device_type, DeviceType::kCPU); } rptr->Init(op_conf, job_desc); return std::shared_ptr<Operator>(rptr); } @@ -72,6 +74,11 @@ LogicalBlobId* Operator::MutBnInOp2Lbi(const std::string& bn_in_op) { } } +DeviceType Operator::device_type() const { + DeviceType device_type = CHECK_JUST(DeviceType4DeviceTag(op_attribute_.op_conf().device_tag())); + return device_type; +} + const std::string& Operator::SoleIbn() const { CHECK_EQ(input_bns().size(), 1); return input_bns().Get(0); @@ -636,7 +643,7 @@ bool IsCpuOnly(const OperatorConf& op_conf) { std::shared_ptr<Operator> ConstructOp(const OperatorConf& op_conf, DeviceType device_type, const JobDesc* job_desc) { OperatorConf dev_op_conf = op_conf; - dev_op_conf.set_device_type(device_type); + dev_op_conf.set_device_tag(CHECK_JUST(DeviceTag4DeviceType(device_type))); return CheckAndConstructOp(dev_op_conf, job_desc); } diff --git a/oneflow/core/operator/operator.h b/oneflow/core/operator/operator.h index fcf198ad3d7c4ae2e7c206fd98a0d28ab4c5251f..6c8892edadc09c24189a065b2801f6b1df658003 100644 --- a/oneflow/core/operator/operator.h +++ b/oneflow/core/operator/operator.h @@ -59,7 +59,7 @@ class Operator { // Getters const std::string& op_name() const { return op_conf().name(); } - DeviceType device_type() const { return op_attribute_.op_conf().device_type(); } + DeviceType device_type() const; bool EnableCudnn() const { return op_conf().enable_cudnn(); } bool DevIsGpuAndEnableCudnn() const { return device_type() == DeviceType::kGPU && EnableCudnn(); } const OperatorConf& op_conf() const { return op_attribute_.op_conf(); } diff --git a/oneflow/core/operator/user_op.cpp b/oneflow/core/operator/user_op.cpp index a648c1ac8efce173fed955e95f47b842c53f5b6e..8858ae8d8e7c00f40ec5e2754fbecd862dac369f 100644 --- a/oneflow/core/operator/user_op.cpp +++ b/oneflow/core/operator/user_op.cpp @@ -13,12 +13,13 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "oneflow/core/operator/user_op.h" -#include "oneflow/core/operator/user_op_util.h" -#include "oneflow/core/framework/tensor_desc.h" +#include "oneflow/core/framework/batch_axis_context.h" #include "oneflow/core/framework/infer_util.h" #include "oneflow/core/framework/sbp_context.h" -#include "oneflow/core/framework/batch_axis_context.h" +#include "oneflow/core/framework/tensor_desc.h" +#include "oneflow/core/framework/to_string.h" +#include "oneflow/core/operator/user_op.h" +#include "oneflow/core/operator/user_op_util.h" namespace oneflow { @@ -53,7 +54,8 @@ class UserOpKernelRegContext final : public user_op::KernelRegContext { const auto& op_conf = user_op->op_conf(); CHECK(op_conf.has_user_conf()); - device_type_ = op_conf.device_type(); + device_tag_ = op_conf.device_tag(); + device_type_ = CHECK_JUST(DeviceType4DeviceTag(device_tag_)); parallel_ctx_ = parallel_ctx; auto InitInOrOut = [&](const PbMap<std::string, UserOpConf::ListString>& arg_map, @@ -85,6 +87,7 @@ class UserOpKernelRegContext final : public user_op::KernelRegContext { ~UserOpKernelRegContext() = default; DeviceType device_type() const override { return device_type_; } + const std::string& device_tag() const override { return device_tag_; } const ParallelContext& parallel_ctx() const override { return *parallel_ctx_; } const user_op::TensorDesc* TensorDesc4ArgNameAndIndex(const std::string& arg_name, int32_t index) const override { @@ -99,6 +102,7 @@ class UserOpKernelRegContext final : public user_op::KernelRegContext { ArgVec inputs_; ArgVec outputs_; DeviceType device_type_; + std::string device_tag_; const ParallelContext* parallel_ctx_; HashMap<std::pair<std::string, int32_t>, user_op::TensorDesc> arg2tensor_desc_; }; diff --git a/oneflow/python/eager/boxing_util.py b/oneflow/python/eager/boxing_util.py index bce9af4b9c682a7b9343e2e704d4565b365c8173..fb79e4e88cf5e86ff81f2bcc3cacfbd6995d00f0 100644 --- a/oneflow/python/eager/boxing_util.py +++ b/oneflow/python/eager/boxing_util.py @@ -505,6 +505,7 @@ def ConstructNaiveBoxingOpConf( ): op_conf = op_conf_pb.OperatorConf() op_conf.name = "undefined_boxing_op_name" + op_conf.device_tag = "cpu" op_conf.boxing_conf.lbi.op_name = "undefined_boxing_op_name" op_conf.boxing_conf.lbi.blob_name = "undefined_boxing_blob_name" op_conf.boxing_conf.in_num = in_parallel_num @@ -623,7 +624,7 @@ def BuildCopyHdInstruction(builder, produced_blob_object, to_device_tag): def _MakeCopyHdOpConfAndRetLbi(): op_conf = op_conf_pb.OperatorConf() op_conf.name = "copy_hd" - op_conf.device_type = c_api_util.DeviceType4DeviceTag("gpu") + op_conf.device_tag = "gpu" setattr(op_conf.copy_conf, "in", "%s/in" % op_conf.name) op_conf.copy_conf.out = "out" lbi = logical_blob_id_util.LogicalBlobId() @@ -669,6 +670,8 @@ def _AssignOpConf(): op_conf.name = "assign" op_conf.assign_conf.ref = "assign/ref" op_conf.assign_conf.value = "assign/value" + device_tag = oneflow.current_scope().device_parallel_desc_symbol.device_tag + op_conf.device_tag = device_tag return op_conf @@ -722,7 +725,7 @@ def ReplaceDeviceTag(parallel_desc_symbol, device_tag, builder=None): def _GetEagerNcclAllReduce(parallel_conf, ibn2blob_object): op_conf = op_conf_pb.OperatorConf() - op_conf.device_type = c_api_util.DeviceType4DeviceTag("gpu") + op_conf.device_tag = "gpu" op_conf.name = "eager_nccl_all_reduce" op_conf.user_conf.op_type_name = "eager_nccl_all_reduce" op_conf.user_conf.input["in"].s.append("eager_nccl_all_reduce/in_0") diff --git a/oneflow/python/eager/op_executor.py b/oneflow/python/eager/op_executor.py index 751dd41cf389601f34446c4aba6f2e2e7ef2c64d..f2f757d74c6a543ae2ca881b3db3c90a84a3ec43 100644 --- a/oneflow/python/eager/op_executor.py +++ b/oneflow/python/eager/op_executor.py @@ -21,7 +21,6 @@ import oneflow.core.operator.op_conf_pb2 as op_conf_util import oneflow.python.eager.vm_util as vm_util import oneflow.python.eager.boxing_util as boxing_util import oneflow.python.eager.symbol_storage as symbol_storage -import oneflow.python.framework.device_util as device_util import oneflow.python.framework.c_api_util as c_api_util import oneflow.python.framework.remote_blob as remote_blob_util import oneflow.python.framework.op_arg_util as op_arg_util @@ -363,7 +362,7 @@ def _GenModelInitOpConfAndRetLbi(var_op_conf): variable_op_conf.CopyFrom(var_op_conf.variable_conf) op_conf = op_conf_util.OperatorConf() op_conf.name = "model_init" - op_conf.device_type = device_util.DeviceType4DeviceTag("cpu") + op_conf.device_tag = "cpu" op_conf.model_init_conf.out.append("out_0") op_conf.model_init_conf.variable_op_name.append(var_op_conf.name) op_conf.model_init_conf.original_variable_conf.append(variable_op_conf) @@ -379,7 +378,7 @@ def _GenModelLoadOpConfAndRetLbi(var_op_conf, path_lbi): op_conf = op_conf_util.OperatorConf() op_conf.name = "model_load" - op_conf.device_type = device_util.DeviceType4DeviceTag("cpu") + op_conf.device_tag = "cpu" op_conf.model_load_conf.path = "{}/{}".format(path_lbi.op_name, path_lbi.blob_name) op_conf.model_load_conf.out.append("out_0") op_conf.model_load_conf.variable_op_name.append(var_op_conf.name) @@ -394,7 +393,7 @@ def _GenModelLoadOpConfAndRetLbi(var_op_conf, path_lbi): def _GenModelIOPathInputOpConfAndRetLbi(): op_conf = op_conf_util.OperatorConf() op_conf.name = "model_io_path_input" - op_conf.device_type = device_util.DeviceType4DeviceTag("cpu") + op_conf.device_tag = "cpu" op_conf.input_conf.out = "out" blob_conf = op_conf_util.InterfaceBlobConf() @@ -413,7 +412,7 @@ def _GenModelIOPathInputOpConfAndRetLbi(): def _GenModelSaveOpConf(var_blobs, path_lbi): op_conf = op_conf_util.OperatorConf() op_conf.name = "model_save" - op_conf.device_type = device_util.DeviceType4DeviceTag("cpu") + op_conf.device_tag = "cpu" op_conf.model_save_conf.path = "{}/{}".format(path_lbi.op_name, path_lbi.blob_name) for blob in var_blobs: getattr(op_conf.model_save_conf, "in").append(blob.logical_blob_name) diff --git a/oneflow/python/framework/c_api_util.py b/oneflow/python/framework/c_api_util.py index eccd887f92ff8782610ce63e34cc8305c59e2ee3..a03d0fb277cb0f250fc42767e739ffae2c72b1ba 100644 --- a/oneflow/python/framework/c_api_util.py +++ b/oneflow/python/framework/c_api_util.py @@ -554,15 +554,6 @@ def GetMachine2DeviceIdListOFRecordFromParallelConf(parallel_conf): return text_format.Parse(ofrecord, record_util.OFRecord()) -def DeviceType4DeviceTag(device_tag): - device_tag = str(device_tag) - device_type, error_str = oneflow_internal.DeviceType4DeviceTag(device_tag) - error = text_format.Parse(error_str, error_util.ErrorProto()) - if error.HasField("error_type"): - raise JobBuildAndInferError(error) - return device_type - - def GetFunctionConfigDef(): func_config_def, error_str = oneflow_internal.GetFunctionConfigDef() error = text_format.Parse(error_str, error_util.ErrorProto()) diff --git a/oneflow/python/framework/compile_context.py b/oneflow/python/framework/compile_context.py index fbb49e5e8165e6cd1a76ee3d80e5bf1801ba3084..27e518c671bf3e3a7b484da31b3a5518fffffcd2 100644 --- a/oneflow/python/framework/compile_context.py +++ b/oneflow/python/framework/compile_context.py @@ -61,9 +61,9 @@ def CurJobAddConsistentOp(op_conf, scope_symbol=None): if scope_symbol is None: scope_symbol = oneflow.current_scope() op_conf.scope_symbol_id = scope_symbol.symbol_id - if not op_conf.HasField("device_type"): + if not op_conf.HasField("device_tag"): device_tag = scope_symbol.device_parallel_desc_symbol.device_tag - op_conf.device_type = c_api_util.DeviceType4DeviceTag(device_tag) + op_conf.device_tag = device_tag return c_api_util.CurJobBuildAndInferCtx_AddAndInferConsistentOp(op_conf) @@ -72,7 +72,7 @@ def CurJobAddMirroredOp(op_conf, scope_symbol=None): if scope_symbol is None: scope_symbol = oneflow.current_scope() op_conf.scope_symbol_id = scope_symbol.symbol_id - if not op_conf.HasField("device_type"): + if not op_conf.HasField("device_tag"): device_tag = scope_symbol.device_parallel_desc_symbol.device_tag - op_conf.device_type = c_api_util.DeviceType4DeviceTag(device_tag) + op_conf.device_tag = device_tag return c_api_util.CurJobBuildAndInferCtx_AddAndInferMirroredOp(op_conf) diff --git a/oneflow/python/framework/device_util.py b/oneflow/python/framework/device_util.py deleted file mode 100644 index 65c4abd73ecf8e664f1570e7ceccc83c9ca8c85a..0000000000000000000000000000000000000000 --- a/oneflow/python/framework/device_util.py +++ /dev/null @@ -1,30 +0,0 @@ -""" -Copyright 2020 The OneFlow Authors. All rights reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" -from __future__ import absolute_import - -import oneflow.python.framework.c_api_util as c_api_util - - -def DeviceType4DeviceTag(device_tag): - global _device_tag2device_type - if device_tag not in _device_tag2device_type: - _device_tag2device_type[device_tag] = c_api_util.DeviceType4DeviceTag( - device_tag - ) - return _device_tag2device_type[device_tag] - - -_device_tag2device_type = {} diff --git a/oneflow/python/framework/placement_context.py b/oneflow/python/framework/placement_context.py index 086e09d2ff52404a0492cabc37d38cf7b70299e9..5f170801070156536372f0eb3b3966ba8c77edd8 100644 --- a/oneflow/python/framework/placement_context.py +++ b/oneflow/python/framework/placement_context.py @@ -20,7 +20,6 @@ import re import oneflow.core.job.placement_pb2 as placement_pb import oneflow.python.framework.c_api_util as c_api_util -import oneflow.python.framework.device_util as device_util import oneflow.python.framework.op_util as op_util import oneflow.python.framework.session_context as session_ctx import oneflow.python.framework.scope_util as scope_util @@ -77,9 +76,6 @@ class PlacementScope(object): self.GetDeviceTag4OpConf(op_conf), self.machine_device_ids_ ) - def GetDeviceType4OpConf(self, op_conf): - return device_util.DeviceType4DeviceTag(self.GetDeviceTag4OpConf(op_conf)) - def GetDeviceTag4OpConf(self, op_conf): return self.default_device_tag @@ -109,15 +105,6 @@ def PlacementScopeStackTop(): return session_ctx.GetDefaultSession().placement_scope_stack[0] -def CurPlacementGroupGetDeviceType(op_conf): - assert len(session_ctx.GetDefaultSession().placement_scope_stack) > 0 - return ( - session_ctx.GetDefaultSession() - .placement_scope_stack[0] - .GetDeviceType4OpConf(op_conf) - ) - - def ParallelConf4OpConf(op_conf): assert len(session_ctx.GetDefaultSession().placement_scope_stack) > 0 return ( diff --git a/oneflow/python/framework/placement_util.py b/oneflow/python/framework/placement_util.py index 22b950dd04d3f1f071a6d976a3d43e7aab5d08b5..10f71a88cffc78129d904a85325c529753271767 100644 --- a/oneflow/python/framework/placement_util.py +++ b/oneflow/python/framework/placement_util.py @@ -66,6 +66,10 @@ def deprecated_placement(*args, **kwargs): def api_placement( device_tag: str, machine_device_ids: str ) -> placement_ctx.PlacementScope: + from oneflow.python.compatibility import with_cuda + + if with_cuda == False: + device_tag = "cpu" func = enable_if.unique([GetPlacementScope, GetNormalModePlacementScope]) return func(device_tag, machine_device_ids) diff --git a/oneflow/python/oneflow_internal.h b/oneflow/python/oneflow_internal.h index b301fcff6f34f9d2655f33bc012845995582c08f..9331b77223cb749bb140be2bae1cd42bac34cfc3 100644 --- a/oneflow/python/oneflow_internal.h +++ b/oneflow/python/oneflow_internal.h @@ -108,12 +108,6 @@ void LaunchJob(const std::shared_ptr<oneflow::ForeignJobInstance>& cb, std::stri return oneflow::LaunchJob(cb).GetDataAndSerializedErrorProto(error_str); } -long DeviceType4DeviceTag(const std::string& device_tag, std::string* error_str) { - return oneflow::GetDeviceType4DeviceTag(device_tag) - .GetDataAndSerializedErrorProto(error_str, - static_cast<long>(oneflow::DeviceType::kInvalidDevice)); -} - std::string GetMachine2DeviceIdListOFRecordFromParallelConf(const std::string& parallel_conf, std::string* error_str) { return oneflow::GetSerializedMachineId2DeviceIdListOFRecord(parallel_conf) diff --git a/oneflow/python/oneflow_internal_helper.h b/oneflow/python/oneflow_internal_helper.h index 7f97d97d10af77400621a5bfbbccdf575f9b14af..893c2315719bc2bd94d0192f7eea7866cbc78eb3 100644 --- a/oneflow/python/oneflow_internal_helper.h +++ b/oneflow/python/oneflow_internal_helper.h @@ -209,10 +209,6 @@ Maybe<void> LaunchJob(const std::shared_ptr<oneflow::ForeignJobInstance>& cb) { return Maybe<void>::Ok(); } -Maybe<long long> GetDeviceType4DeviceTag(const std::string& device_tag) { - return JUST(DeviceType4DeviceTag(device_tag)); -} - Maybe<std::string> GetSerializedMachineId2DeviceIdListOFRecord( const std::string& parallel_conf_str) { ParallelConf parallel_conf; diff --git a/oneflow/python/ops/__init__.py b/oneflow/python/ops/__init__.py index acfbfea1721edc59b3c95c63c524fd4e4f6176ae..c958e19d6f9912b283bcc3928ad3adc484388ac4 100644 --- a/oneflow/python/ops/__init__.py +++ b/oneflow/python/ops/__init__.py @@ -95,7 +95,7 @@ def _GetReturnOpConfAndOutLbiAndScope(remote_blob, allow_cpu_return_op=True): setattr(op_conf.return_conf, "in", remote_blob.unique_name) op_conf.return_conf.out = "out" if allow_cpu_return_op: - op_conf.device_type = c_api_util.DeviceType4DeviceTag("cpu") + op_conf.device_tag = "cpu" lbi = logical_blob_id_util.LogicalBlobId() lbi.op_name = op_conf.name diff --git a/oneflow/python/ops/user_op_builder.py b/oneflow/python/ops/user_op_builder.py index 92e3da2c378755d0843d19afc0e74e4791719cc6..31ea5342f1f0b4c7288f2e235a721dad0949eae1 100644 --- a/oneflow/python/ops/user_op_builder.py +++ b/oneflow/python/ops/user_op_builder.py @@ -50,6 +50,8 @@ class UserOp(object): self.op_conf_.name = op_name if op_type_name is not None: self.op_conf_.user_conf.op_type_name = op_type_name + device_tag = oneflow.current_scope().device_parallel_desc_symbol.device_tag + self.op_conf_.device_tag = device_tag self.output_arg_key_list_ = [] @property diff --git a/oneflow/user/kernels/add_n_kernel.cpp b/oneflow/user/kernels/add_n_kernel.cpp index c7d5d941e49ce1c3b27eb131894e30051f37d375..3e9ba9294cba21b2369dd9a2ecf0765808549290 100644 --- a/oneflow/user/kernels/add_n_kernel.cpp +++ b/oneflow/user/kernels/add_n_kernel.cpp @@ -57,7 +57,7 @@ class CpuAddNKernel : public user_op::OpKernel { #define REGISTER_CPU_ADDN_KERNEL(cpp_type, dtype) \ REGISTER_USER_KERNEL("add_n") \ .SetCreateFn<CpuAddNKernel<cpp_type>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kCPU) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "cpu") \ & (user_op::HobDataType("in", 0) == dtype)) \ .SetInplaceProposalFn([](const user_op::InferContext&, \ user_op::AddInplaceArgPair AddInplaceArgPairFn) -> Maybe<void> { \ diff --git a/oneflow/user/kernels/add_n_kernel.cu b/oneflow/user/kernels/add_n_kernel.cu index 41d2223df24ef75878d25b57318935aeac382b49..0ae89377395663898e3054493049c3f8e61daa3f 100644 --- a/oneflow/user/kernels/add_n_kernel.cu +++ b/oneflow/user/kernels/add_n_kernel.cu @@ -116,7 +116,7 @@ class GpuAddNKernel : public user_op::OpKernel { #define REGISTER_GPU_ADDN_KERNEL(cpp_type, dtype) \ REGISTER_USER_KERNEL("add_n") \ .SetCreateFn<GpuAddNKernel<cpp_type>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kGPU) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "gpu") \ & (user_op::HobDataType("in", 0) == dtype)) \ .SetInplaceProposalFn([](const user_op::InferContext&, \ user_op::AddInplaceArgPair AddInplaceArgPairFn) -> Maybe<void> { \ @@ -189,7 +189,7 @@ class GpuAddNHalfKernel : public user_op::OpKernel { REGISTER_USER_KERNEL("add_n") .SetCreateFn<GpuAddNHalfKernel>() - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kGPU) + .SetIsMatchedHob((user_op::HobDeviceTag() == "gpu") & (user_op::HobDataType("in", 0) == DataType::kFloat16)) .SetInplaceProposalFn([](const user_op::InferContext&, user_op::AddInplaceArgPair AddInplaceArgPairFn) -> Maybe<void> { diff --git a/oneflow/user/kernels/arg_sort_kernel.cpp b/oneflow/user/kernels/arg_sort_kernel.cpp index 4bd36041fe6db85537ffc3f4a4296d178f9b78e8..91a6e728af8a449a41f5d5faac77d2ad2210ad74 100644 --- a/oneflow/user/kernels/arg_sort_kernel.cpp +++ b/oneflow/user/kernels/arg_sort_kernel.cpp @@ -59,10 +59,10 @@ class CpuArgSortKernel final : public user_op::OpKernel { bool AlwaysComputeWhenAllOutputsEmpty() const override { return false; } }; -#define REGISTER_CPU_ARG_SORT_KERNEL(dtype) \ - REGISTER_USER_KERNEL("arg_sort") \ - .SetCreateFn<CpuArgSortKernel<dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kCPU) \ +#define REGISTER_CPU_ARG_SORT_KERNEL(dtype) \ + REGISTER_USER_KERNEL("arg_sort") \ + .SetCreateFn<CpuArgSortKernel<dtype>>() \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "cpu") \ & (user_op::HobDataType("in", 0) == GetDataType<dtype>::value)); REGISTER_CPU_ARG_SORT_KERNEL(float) diff --git a/oneflow/user/kernels/arg_sort_kernel.cu b/oneflow/user/kernels/arg_sort_kernel.cu index 5e4b67317d7ba860615a8378cbb99fc0c29d27a5..9e3a3090df153c2e018cad338e62c971d34d4bcf 100644 --- a/oneflow/user/kernels/arg_sort_kernel.cu +++ b/oneflow/user/kernels/arg_sort_kernel.cu @@ -108,7 +108,7 @@ class GpuArgSortKernel final : public user_op::OpKernel { #define REGISTER_GPU_ARG_SORT_KERNEL(dtype) \ REGISTER_USER_KERNEL("arg_sort") \ .SetCreateFn<GpuArgSortKernel<dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kGPU) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "gpu") \ & (user_op::HobDataType("in", 0) == GetDataType<dtype>::value)) \ .SetInferTmpSizeFn([](user_op::InferContext* ctx) { \ const Shape* in_shape = ctx->Shape4ArgNameAndIndex("in", 0); \ diff --git a/oneflow/user/kernels/argmax_kernel.cpp b/oneflow/user/kernels/argmax_kernel.cpp index 38fad3cf152854db1cedb784552ce44621be45d6..24697c2a18988e82cb39e88686e99d690d972876 100644 --- a/oneflow/user/kernels/argmax_kernel.cpp +++ b/oneflow/user/kernels/argmax_kernel.cpp @@ -55,7 +55,7 @@ class CpuArgMaxKernel final : public user_op::OpKernel { #define REGISTER_CPU_ARGMAX_KERNEL(dtype) \ REGISTER_USER_KERNEL("argmax").SetCreateFn<CpuArgMaxKernel<dtype>>().SetIsMatchedHob( \ - (user_op::HobDeviceType() == DeviceType::kCPU) \ + (user_op::HobDeviceTag() == "cpu") \ & (user_op::HobDataType("in", 0) == GetDataType<dtype>::value)); REGISTER_CPU_ARGMAX_KERNEL(float) diff --git a/oneflow/user/kernels/argmax_kernel.cu b/oneflow/user/kernels/argmax_kernel.cu index a4bed87a594de36f1732ae8539f2850741524e07..15633d39760d5fba1cb1ebfa91793bcf8c91104c 100644 --- a/oneflow/user/kernels/argmax_kernel.cu +++ b/oneflow/user/kernels/argmax_kernel.cu @@ -148,7 +148,7 @@ class GpuArgMaxKernel final : public user_op::OpKernel { #define REGISTER_GPU_ARGMAX_KERNEL(dtype) \ REGISTER_USER_KERNEL("argmax") \ .SetCreateFn<GpuArgMaxKernel<dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kGPU) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "gpu") \ & (user_op::HobDataType("in", 0) == GetDataType<dtype>::value)) \ .SetInferTmpSizeFn([](user_op::InferContext* ctx) { \ const Shape* in_shape = ctx->Shape4ArgNameAndIndex("in", 0); \ diff --git a/oneflow/user/kernels/batch_gather_kernel.cpp b/oneflow/user/kernels/batch_gather_kernel.cpp index ec2ed34c8db8b7d07cee04ecaded8a06eed7f347..df311c895bb673b9f84f36a8bf20bc60f651fede 100644 --- a/oneflow/user/kernels/batch_gather_kernel.cpp +++ b/oneflow/user/kernels/batch_gather_kernel.cpp @@ -46,7 +46,7 @@ class BatchGatherKernel final : public user_op::OpKernel { .SetCreateFn<BatchGatherKernel<device, OF_PP_PAIR_FIRST(out_dtype), \ OF_PP_PAIR_FIRST(indices_dtype)>>() \ .SetIsMatchedHob( \ - (user_op::HobDeviceType() == device) \ + (user_op::HobDeviceTag() == device) \ & (user_op::HobDataType("out", 0) == OF_PP_PAIR_SECOND(out_dtype)) \ & (user_op::HobDataType("indices", 0) == OF_PP_PAIR_SECOND(indices_dtype))); diff --git a/oneflow/user/kernels/bernoulli_kernel.cpp b/oneflow/user/kernels/bernoulli_kernel.cpp index c6b129f5255df396ebdbdd406be96f6fe6387de8..222aa058f939a1a76533468f61ca7effc8da34cd 100644 --- a/oneflow/user/kernels/bernoulli_kernel.cpp +++ b/oneflow/user/kernels/bernoulli_kernel.cpp @@ -55,7 +55,7 @@ class BernoulliKerenl final : public user_op::OpKernel { REGISTER_USER_KERNEL("bernoulli") \ .SetCreateFn< \ BernoulliKerenl<OF_PP_PAIR_FIRST(in_dtype_pair), OF_PP_PAIR_FIRST(out_dtype_pair)>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kCPU) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "cpu") \ & (user_op::HobDataType("in", 0) == OF_PP_PAIR_SECOND(in_dtype_pair)) \ & (user_op::HobDataType("out", 0) == OF_PP_PAIR_SECOND(out_dtype_pair))); diff --git a/oneflow/user/kernels/bias_add_kernel.h b/oneflow/user/kernels/bias_add_kernel.h index 25cee29ff5597fa5c1b63a3e182b41281bedf136..ad657720616072a5156ef976e219b4feb9f53373 100644 --- a/oneflow/user/kernels/bias_add_kernel.h +++ b/oneflow/user/kernels/bias_add_kernel.h @@ -59,7 +59,7 @@ class BiasAddUserKernel final : public user_op::OpKernel { #define REGISTER_BIAS_ADD_USER_KERNEL(op_device_type, dtype) \ REGISTER_USER_KERNEL("bias_add") \ .SetCreateFn<BiasAddUserKernel<DeviceType::k##op_device_type, dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::k##op_device_type) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == DeviceType::k##op_device_type) \ & (user_op::HobDataType("out", 0) == GetDataType<dtype>::value)) \ .SetInplaceProposalFn([](const user_op::InferContext&, \ user_op::AddInplaceArgPair AddInplaceArgPairFn) -> Maybe<void> { \ diff --git a/oneflow/user/kernels/broadcast_div_grad_kernel.cpp b/oneflow/user/kernels/broadcast_div_grad_kernel.cpp index e97b61f1450aafaa180b1c58298058df364baf53..dfbb58355258a65c5b31f51dae20597fe575b168 100644 --- a/oneflow/user/kernels/broadcast_div_grad_kernel.cpp +++ b/oneflow/user/kernels/broadcast_div_grad_kernel.cpp @@ -60,7 +60,7 @@ class BroadcastDivGradKernel final : public user_op::OpKernel { #define REGISTER_BROADCAST_DIV_GRAD_KERNEL(device, dtype_pair) \ REGISTER_USER_KERNEL("broadcast_div_grad") \ .SetCreateFn<BroadcastDivGradKernel<device, OF_PP_PAIR_FIRST(dtype_pair)>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == device) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == device) \ & (user_op::HobDataType("y", 0) == OF_PP_PAIR_SECOND(dtype_pair))) \ .SetInferTmpSizeFn([](oneflow::user_op::InferContext* ctx) { \ user_op::TensorDesc* z = ctx->TensorDesc4ArgNameAndIndex("z", 0); \ diff --git a/oneflow/user/kernels/broadcast_like_kernel.cpp b/oneflow/user/kernels/broadcast_like_kernel.cpp index 539e3bae330fbdb2e0599b647035237db603cf02..95a14a84109a28067778bdc3de232831992bd4c2 100644 --- a/oneflow/user/kernels/broadcast_like_kernel.cpp +++ b/oneflow/user/kernels/broadcast_like_kernel.cpp @@ -44,10 +44,10 @@ class BroadcastLikeKernel final : public user_op::OpKernel { } // namespace -#define REGISTER_BROADCAST_LIKE_XPU_KERNEL(device, dtype) \ - REGISTER_USER_KERNEL("broadcast_like") \ - .SetCreateFn<BroadcastLikeKernel<device, dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == device) \ +#define REGISTER_BROADCAST_LIKE_XPU_KERNEL(device, dtype) \ + REGISTER_USER_KERNEL("broadcast_like") \ + .SetCreateFn<BroadcastLikeKernel<device, dtype>>() \ + .SetIsMatchedHob((user_op::HobDeviceTag() == device) \ & (user_op::HobDataType("y", 0) == GetDataType<dtype>::value)); #ifdef WITH_CUDA diff --git a/oneflow/user/kernels/cast_kernel.cpp b/oneflow/user/kernels/cast_kernel.cpp index ba729c649be92b0e3117962ed121dcaf42cf2153..6e8e564daecd3467dce54dcfe482d3cbcfef4599 100644 --- a/oneflow/user/kernels/cast_kernel.cpp +++ b/oneflow/user/kernels/cast_kernel.cpp @@ -86,10 +86,10 @@ class CastKernel final : public OpKernel { #define REGISTER_CAST_KERNEL(device) \ REGISTER_USER_KERNEL("cast").SetCreateFn<CastKernel<device>>().SetIsMatchedHob( \ - user_op::HobDeviceType() == device); \ + user_op::HobDeviceTag() == device); \ REGISTER_USER_KERNEL("cast_like") \ .SetCreateFn<CastKernel<device>>() \ - .SetIsMatchedHob(user_op::HobDeviceType() == device); + .SetIsMatchedHob(user_op::HobDeviceTag() == device); REGISTER_CAST_KERNEL(DeviceType::kCPU) REGISTER_CAST_KERNEL(DeviceType::kGPU) diff --git a/oneflow/user/kernels/categorical_ordinal_encode_kernel.cpp b/oneflow/user/kernels/categorical_ordinal_encode_kernel.cpp index 575cf06e5a2f02251e525d3e97400de959846ade..ae28e68b77797422364b78ead0ff9615e8c1523f 100644 --- a/oneflow/user/kernels/categorical_ordinal_encode_kernel.cpp +++ b/oneflow/user/kernels/categorical_ordinal_encode_kernel.cpp @@ -45,7 +45,7 @@ class CategoricalOrdinalEncodeKernel final : public user_op::OpKernel { #define REGISTER_CATEGORICAL_ORDINAL_ENCODE_KERNEL(device, proto_type, cpp_type) \ REGISTER_USER_KERNEL("CategoricalOrdinalEncode") \ .SetCreateFn<CategoricalOrdinalEncodeKernel<device, cpp_type>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == device) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == device) \ & (user_op::HobDataType("in", 0) == proto_type)); REGISTER_CATEGORICAL_ORDINAL_ENCODE_KERNEL(DeviceType::kCPU, DataType::kInt32, int32_t); diff --git a/oneflow/user/kernels/clip_by_value_kernel.cpp b/oneflow/user/kernels/clip_by_value_kernel.cpp index 969e26760636f4b6de80363680eaea7b68ffaaa5..62ff5d549365cd3651a92019a6f5e19955422c3e 100644 --- a/oneflow/user/kernels/clip_by_value_kernel.cpp +++ b/oneflow/user/kernels/clip_by_value_kernel.cpp @@ -190,7 +190,7 @@ class ClipByScalarMaxGradKernel final : public user_op::OpKernel { #define REGISTER_CLIP_KERNEL(op_type_name, kernel_name, device_type_v, dtype) \ REGISTER_USER_KERNEL(#op_type_name) \ .SetCreateFn<kernel_name##Kernel<device_type_v, dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == device_type_v) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == device_type_v) \ & (user_op::HobDataType("y", 0) == GetDataType<dtype>::value)) \ .SetInplaceProposalFn([](const user_op::InferContext&, \ user_op::AddInplaceArgPair AddInplaceArgPairFn) -> Maybe<void> { \ @@ -201,7 +201,7 @@ class ClipByScalarMaxGradKernel final : public user_op::OpKernel { #define REGISTER_CLIP_GRAD_KERNEL(op_type_name, kernel_name, device_type_v, dtype) \ REGISTER_USER_KERNEL(#op_type_name) \ .SetCreateFn<kernel_name##GradKernel<device_type_v, dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == device_type_v) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == device_type_v) \ & (user_op::HobDataType("dx", 0) == GetDataType<dtype>::value)) \ .SetInplaceProposalFn([](const user_op::InferContext&, \ user_op::AddInplaceArgPair AddInplaceArgPairFn) -> Maybe<void> { \ diff --git a/oneflow/user/kernels/coco_reader_kernel.cpp b/oneflow/user/kernels/coco_reader_kernel.cpp index 4e900533d21e8c27651eb7ee85c210c94c8c77d1..fc43d60c5cc7e631d51d624ea1935b83adf12cd7 100644 --- a/oneflow/user/kernels/coco_reader_kernel.cpp +++ b/oneflow/user/kernels/coco_reader_kernel.cpp @@ -54,7 +54,7 @@ class COCOReaderKernel final : public user_op::OpKernel { REGISTER_USER_KERNEL("COCOReader") .SetCreateFn<COCOReaderKernel>() - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kCPU) + .SetIsMatchedHob((user_op::HobDeviceTag() == "cpu") & (user_op::HobDataType("image", 0) == DataType::kTensorBuffer) & (user_op::HobDataType("image_id", 0) == DataType::kInt64) & (user_op::HobDataType("image_size", 0) == DataType::kInt32) diff --git a/oneflow/user/kernels/concat_kernel.cpp b/oneflow/user/kernels/concat_kernel.cpp index a713b45bd0cbcdec04fbbfa28fa9f075cde9308f..3619dba8cbe9913613c0bf90862c3170081f8b78 100644 --- a/oneflow/user/kernels/concat_kernel.cpp +++ b/oneflow/user/kernels/concat_kernel.cpp @@ -78,7 +78,7 @@ class ConcatKernel final : public user_op::OpKernel { #define REGISTER_CONCAT_KERNEL(device, dtype) \ REGISTER_USER_KERNEL("concat").SetCreateFn<ConcatKernel<device, dtype>>().SetIsMatchedHob( \ - (user_op::HobDeviceType() == device) \ + (user_op::HobDeviceTag() == device) \ & (user_op::HobDataType("out", 0) == GetDataType<dtype>::value)); #define REGISTER_CONCAT_KERNEL_WITH_DEVICE(device) \ diff --git a/oneflow/user/kernels/constant_kernel.cpp b/oneflow/user/kernels/constant_kernel.cpp index bb226e3f94b68be0b506a2e65d0020f9925de8f4..0a654bea3eff30758350ca79517a31a793c97632 100644 --- a/oneflow/user/kernels/constant_kernel.cpp +++ b/oneflow/user/kernels/constant_kernel.cpp @@ -60,10 +60,10 @@ class ConstantKernel final : public OpKernel { bool AlwaysComputeWhenAllOutputsEmpty() const override { return false; } }; -#define REGISTER_CONSTANT_XPU_KERNEL(device, dtype) \ - REGISTER_USER_KERNEL("constant") \ - .SetCreateFn<ConstantKernel<device, dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == device) \ +#define REGISTER_CONSTANT_XPU_KERNEL(device, dtype) \ + REGISTER_USER_KERNEL("constant") \ + .SetCreateFn<ConstantKernel<device, dtype>>() \ + .SetIsMatchedHob((user_op::HobDeviceTag() == device) \ & (user_op::HobAttr<DataType>("dtype") == GetDataType<dtype>::value)); #define REGISTER_CONSTANT_KERNEL(device, dtype_pair) \ diff --git a/oneflow/user/kernels/conv_cudnn_kernels.cpp b/oneflow/user/kernels/conv_cudnn_kernels.cpp index 77f33fdf52b05f90aaaa6d0b1713caec37e1c1e9..31892511a8e344b07c87fe68383b808542207c1f 100644 --- a/oneflow/user/kernels/conv_cudnn_kernels.cpp +++ b/oneflow/user/kernels/conv_cudnn_kernels.cpp @@ -190,7 +190,7 @@ class ConvGpuKernel final : public user_op::OpKernel { #define REGISTER_CONV_KERNEL(op_name, dtype, ndims) \ REGISTER_USER_KERNEL(#op_name) \ .SetCreateFn<ConvGpuKernel<dtype, ndims>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kGPU) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "gpu") \ & (user_op::HobDataType("in", 0) == GetDataType<dtype>::value)) \ .SetInferTmpSizeFn([](user_op::InferContext* ctx) -> size_t { \ const JobDesc& job_desc = ctx->job_desc(); \ @@ -248,7 +248,7 @@ class ConvDataGradGpuKernel final : public user_op::OpKernel { #define REGISTER_CONV_DATA_GRAD_FLOATING_KERNEL(dtype) \ REGISTER_USER_KERNEL("conv_data_grad") \ .SetCreateFn<ConvDataGradGpuKernel<dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kGPU) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "gpu") \ & (user_op::HobDataType("dy", 0) == GetDataType<dtype>::value)) \ .SetInferTmpSizeFn([](user_op::InferContext* ctx) -> size_t { \ const JobDesc& job_desc = ctx->job_desc(); \ @@ -300,7 +300,7 @@ class ConvFilterGradGpuKernel final : public user_op::OpKernel { #define REGISTER_CONV_FILTER_GRAD_FLOATING_KERNEL(dtype) \ REGISTER_USER_KERNEL("conv_filter_grad") \ .SetCreateFn<ConvFilterGradGpuKernel<dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kGPU) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "gpu") \ & (user_op::HobDataType("dy", 0) == GetDataType<dtype>::value)) \ .SetInferTmpSizeFn([](user_op::InferContext* ctx) -> size_t { \ const JobDesc& job_desc = ctx->job_desc(); \ @@ -371,10 +371,10 @@ class ConvBiasGradGpuKernel final : public user_op::OpKernel { } }; -#define REGISTER_CONV_BIAS_GRAD_FLOATING_KERNEL(dtype) \ - REGISTER_USER_KERNEL("conv_bias_grad") \ - .SetCreateFn<ConvBiasGradGpuKernel<dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kGPU) \ +#define REGISTER_CONV_BIAS_GRAD_FLOATING_KERNEL(dtype) \ + REGISTER_USER_KERNEL("conv_bias_grad") \ + .SetCreateFn<ConvBiasGradGpuKernel<dtype>>() \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "gpu") \ & (user_op::HobDataType("dy", 0) == GetDataType<dtype>::value)); REGISTER_CONV_BIAS_GRAD_FLOATING_KERNEL(float); diff --git a/oneflow/user/kernels/conv_kernels.cpp b/oneflow/user/kernels/conv_kernels.cpp index 5af68f5104f9ad8e0137eb95fe7eb99a54404ff8..922b7d0666594ecd9d50eac42ba7baa15c61c873 100644 --- a/oneflow/user/kernels/conv_kernels.cpp +++ b/oneflow/user/kernels/conv_kernels.cpp @@ -457,7 +457,7 @@ class ConvCpuKernel final : public user_op::OpKernel { #define REGISTER_CONV_KERNEL(op_name, dtype, ndims) \ REGISTER_USER_KERNEL(#op_name) \ .SetCreateFn<ConvCpuKernel<dtype, ndims>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kCPU) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "cpu") \ & (user_op::HobAttr<int32_t>("groups") == 1) \ & (user_op::HobDataType("in", 0) == GetDataType<dtype>::value)) \ .SetInferTmpSizeFn([](user_op::InferContext* ctx) -> size_t { \ @@ -536,7 +536,7 @@ class ConvDataGradCpuKernel final : public user_op::OpKernel { #define REGISTER_CONV_DATA_GRAD_KERNEL(op_name, dtype) \ REGISTER_USER_KERNEL(#op_name) \ .SetCreateFn<ConvDataGradCpuKernel<dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kCPU) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "cpu") \ & (user_op::HobAttr<int32_t>("groups") == 1) \ & (user_op::HobDataType("dy", 0) == GetDataType<dtype>::value)) \ .SetInferTmpSizeFn([](user_op::InferContext* ctx) -> size_t { \ @@ -604,7 +604,7 @@ class ConvFilterGradCpuKernel final : public user_op::OpKernel { #define REGISTER_CONV_FILTER_GRAD_KERNEL(op_name, dtype) \ REGISTER_USER_KERNEL(#op_name) \ .SetCreateFn<ConvFilterGradCpuKernel<dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kCPU) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "cpu") \ & (user_op::HobAttr<int32_t>("groups") == 1) \ & (user_op::HobDataType("dy", 0) == GetDataType<dtype>::value)) \ .SetInferTmpSizeFn([](user_op::InferContext* ctx) -> size_t { \ @@ -672,7 +672,7 @@ class ConvBiasGradCpuKernel final : public user_op::OpKernel { #define REGISTER_CONV_BIAS_GRAD_KERNEL(op_name, dtype) \ REGISTER_USER_KERNEL(#op_name) \ .SetCreateFn<ConvBiasGradCpuKernel<dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kCPU) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "cpu") \ & (user_op::HobDataType("dy", 0) == GetDataType<dtype>::value)) \ .SetInferTmpSizeFn([](user_op::InferContext* ctx) -> size_t { \ const auto& out_diff_shape = ctx->TensorDesc4ArgNameAndIndex("dy", 0)->shape(); \ diff --git a/oneflow/user/kernels/deconv_cudnn_kernel.cpp b/oneflow/user/kernels/deconv_cudnn_kernel.cpp index fd24256cc67434f25f5886dedb7835dccf598a01..a6785e29eabd586d9f5fad761863f3871df38a76 100644 --- a/oneflow/user/kernels/deconv_cudnn_kernel.cpp +++ b/oneflow/user/kernels/deconv_cudnn_kernel.cpp @@ -131,7 +131,7 @@ class DeConvGpuKernel final : public user_op::OpKernel { #define REGISTER_DECONV_KERNEL(op_name, dtype, ndims) \ REGISTER_USER_KERNEL(#op_name) \ .SetCreateFn<DeConvGpuKernel<dtype, ndims>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kGPU) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "gpu") \ & (user_op::HobDataType("in", 0) == GetDataType<dtype>::value)) \ .SetInferTmpSizeFn([](user_op::InferContext* ctx) -> size_t { \ const JobDesc& job_desc = ctx->job_desc(); \ diff --git a/oneflow/user/kernels/dropout_kernel.cpp b/oneflow/user/kernels/dropout_kernel.cpp index 4605e458df97c0be150052068833b239dece565c..2c7b6e6dd771f775a1d014b013188456b5526d9b 100644 --- a/oneflow/user/kernels/dropout_kernel.cpp +++ b/oneflow/user/kernels/dropout_kernel.cpp @@ -48,7 +48,7 @@ class DropoutKernelCPU final : public user_op::OpKernel { #define REGISTER_DROPOUT_KERNEL_CPU(dtype) \ REGISTER_USER_KERNEL("dropout") \ .SetCreateFn<DropoutKernelCPU<dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kCPU) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "cpu") \ & (user_op::HobDataType("out", 0) == GetDataType<dtype>::value)) \ .SetInplaceProposalFn([](const user_op::InferContext&, \ user_op::AddInplaceArgPair AddInplaceArgPairFn) -> Maybe<void> { \ @@ -80,7 +80,7 @@ class DropoutGradKernelCPU final : public user_op::OpKernel { #define REGISTER_DROPOUT_GRAD_KERNEL_CPU(dtype) \ REGISTER_USER_KERNEL("dropout_grad") \ .SetCreateFn<DropoutGradKernelCPU<dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kCPU) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "cpu") \ & (user_op::HobDataType("dx", 0) == GetDataType<dtype>::value)) \ .SetInplaceProposalFn([](const user_op::InferContext&, \ user_op::AddInplaceArgPair AddInplaceArgPairFn) -> Maybe<void> { \ @@ -121,7 +121,7 @@ class RandomMaskLikeKernel final : public user_op::OpKernel { #define REGISTER_RANDOM_MASK_LIKE_KERNEL(device) \ REGISTER_USER_KERNEL("random_mask_like") \ .SetCreateFn<RandomMaskLikeKernel<device>>() \ - .SetIsMatchedHob(user_op::HobDeviceType() == device); + .SetIsMatchedHob(user_op::HobDeviceTag() == device); REGISTER_RANDOM_MASK_LIKE_KERNEL(DeviceType::kCPU) #ifdef WITH_CUDA diff --git a/oneflow/user/kernels/dropout_kernel.cu b/oneflow/user/kernels/dropout_kernel.cu index 283d5562e8bca8b6c7feb47a96e62d88ec52eab5..e1d217c9b9cf6f09dfd544a280aba9b512e1473b 100644 --- a/oneflow/user/kernels/dropout_kernel.cu +++ b/oneflow/user/kernels/dropout_kernel.cu @@ -85,7 +85,7 @@ class DropoutKernelGPU final : public user_op::OpKernel { #define REGISTER_DROPOUT_KERNEL_GPU(dtype) \ REGISTER_USER_KERNEL("dropout") \ .SetCreateFn<DropoutKernelGPU<dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kGPU) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "gpu") \ & (user_op::HobDataType("out", 0) == GetDataType<dtype>::value)) \ .SetInplaceProposalFn([](const user_op::InferContext&, \ user_op::AddInplaceArgPair AddInplaceArgPairFn) -> Maybe<void> { \ @@ -118,7 +118,7 @@ class DropoutGradKernelGPU final : public user_op::OpKernel { #define REGISTER_DROPOUT_GRAD_KERNEL_GPU(dtype) \ REGISTER_USER_KERNEL("dropout_grad") \ .SetCreateFn<DropoutGradKernelGPU<dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kGPU) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "gpu") \ & (user_op::HobDataType("dx", 0) == GetDataType<dtype>::value)) \ .SetInplaceProposalFn([](const user_op::InferContext&, \ user_op::AddInplaceArgPair AddInplaceArgPairFn) -> Maybe<void> { \ diff --git a/oneflow/user/kernels/eager_nccl_kernels.cu b/oneflow/user/kernels/eager_nccl_kernels.cu index 7ffb19edd6546b36109df7adb51350d40c149c6a..c448f753c51ea6e11776a391324e4cc053db0c94 100644 --- a/oneflow/user/kernels/eager_nccl_kernels.cu +++ b/oneflow/user/kernels/eager_nccl_kernels.cu @@ -50,6 +50,6 @@ class EagerNcclAllReduceKernel final : public user_op::OpKernel { REGISTER_USER_KERNEL("eager_nccl_all_reduce") .SetCreateFn<EagerNcclAllReduceKernel>() - .SetIsMatchedHob(user_op::HobDeviceType() == DeviceType::kGPU); + .SetIsMatchedHob(user_op::HobDeviceTag() == "gpu"); } // namespace oneflow diff --git a/oneflow/user/kernels/expand_dims_kernel.cpp b/oneflow/user/kernels/expand_dims_kernel.cpp index ffe221771af780f76efd4d147f8a5a4bed9bc35b..508b57dd75309ad3f5ab50666b96be518aa50e1f 100644 --- a/oneflow/user/kernels/expand_dims_kernel.cpp +++ b/oneflow/user/kernels/expand_dims_kernel.cpp @@ -21,7 +21,7 @@ namespace oneflow { #define REGISTER_EXPAND_DIMS_KERNEL(D) \ REGISTER_USER_KERNEL("expand_dims") \ .SetCreateFn<CopyDataContentKernel<DeviceType::D>>() \ - .SetIsMatchedHob(user_op::HobDeviceType() == DeviceType::D) \ + .SetIsMatchedHob(user_op::HobDeviceTag() == DeviceType::D) \ .SetInplaceProposalFn([](const user_op::InferContext&, \ user_op::AddInplaceArgPair AddInplaceArgPairFn) -> Maybe<void> { \ OF_RETURN_IF_ERROR(AddInplaceArgPairFn("out", 0, "in", 0, false)); \ diff --git a/oneflow/user/kernels/gather_kernel.cpp b/oneflow/user/kernels/gather_kernel.cpp index 4bea3f61352f8a3ff061b2f92ef5e7387c474b6f..ac141cbe54016d3e770f5a393c1d2d2c09dc269f 100644 --- a/oneflow/user/kernels/gather_kernel.cpp +++ b/oneflow/user/kernels/gather_kernel.cpp @@ -93,7 +93,7 @@ class GatherKernel final : public user_op::OpKernel { REGISTER_USER_KERNEL("gather") \ .SetCreateFn< \ GatherKernel<device, OF_PP_PAIR_FIRST(in_type), OF_PP_PAIR_FIRST(indices_type)>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == device) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == device) \ & (user_op::HobDataType("in", 0) == OF_PP_PAIR_SECOND(in_type)) \ & (user_op::HobDataType("indices", 0) == OF_PP_PAIR_SECOND(indices_type))); diff --git a/oneflow/user/kernels/gelu_kernel.cpp b/oneflow/user/kernels/gelu_kernel.cpp index b9c7efa81dd5add328e3218613141509edb4e1ca..ac387442dddf2a78876e5807cc739bac38012936 100644 --- a/oneflow/user/kernels/gelu_kernel.cpp +++ b/oneflow/user/kernels/gelu_kernel.cpp @@ -41,7 +41,7 @@ class CpuGeluKernel final : public user_op::OpKernel { #define REGISTER_CPU_GELU_KERNEL(dtype) \ REGISTER_USER_KERNEL("gelu").SetCreateFn<CpuGeluKernel<dtype>>().SetIsMatchedHob( \ - (user_op::HobDeviceType() == DeviceType::kCPU) \ + (user_op::HobDeviceTag() == "cpu") \ & (user_op::HobDataType("out", 0) == GetDataType<dtype>::value)); REGISTER_CPU_GELU_KERNEL(float) @@ -75,10 +75,10 @@ class CpuGeluGradKernel final : public user_op::OpKernel { bool AlwaysComputeWhenAllOutputsEmpty() const override { return false; } }; -#define REGISTER_CPU_GELU_GRAD_KERNEL(dtype) \ - REGISTER_USER_KERNEL("gelu_grad") \ - .SetCreateFn<CpuGeluGradKernel<dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kCPU) \ +#define REGISTER_CPU_GELU_GRAD_KERNEL(dtype) \ + REGISTER_USER_KERNEL("gelu_grad") \ + .SetCreateFn<CpuGeluGradKernel<dtype>>() \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "cpu") \ & (user_op::HobDataType("dx", 0) == GetDataType<dtype>::value)); REGISTER_CPU_GELU_GRAD_KERNEL(float) diff --git a/oneflow/user/kernels/gelu_kernel.cu b/oneflow/user/kernels/gelu_kernel.cu index 3f88ba1de621e0261c286b54fa0944954474bdf9..a998044d724eeda311ce5fe954f2af1ca3474a90 100644 --- a/oneflow/user/kernels/gelu_kernel.cu +++ b/oneflow/user/kernels/gelu_kernel.cu @@ -123,7 +123,7 @@ class GpuGeluKernel<float16> final : public user_op::OpKernel { #define REGISTER_GPU_GELU_KERNEL(dtype) \ REGISTER_USER_KERNEL("gelu").SetCreateFn<GpuGeluKernel<dtype>>().SetIsMatchedHob( \ - (user_op::HobDeviceType() == DeviceType::kGPU) \ + (user_op::HobDeviceTag() == "gpu") \ & (user_op::HobDataType("out", 0) == GetDataType<dtype>::value)); REGISTER_GPU_GELU_KERNEL(float) @@ -174,10 +174,10 @@ class GpuGeluGradKernel<float16> final : public user_op::OpKernel { bool AlwaysComputeWhenAllOutputsEmpty() const override { return false; } }; -#define REGISTER_GPU_GELU_GRAD_KERNEL(dtype) \ - REGISTER_USER_KERNEL("gelu_grad") \ - .SetCreateFn<GpuGeluGradKernel<dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kGPU) \ +#define REGISTER_GPU_GELU_GRAD_KERNEL(dtype) \ + REGISTER_USER_KERNEL("gelu_grad") \ + .SetCreateFn<GpuGeluGradKernel<dtype>>() \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "gpu") \ & (user_op::HobDataType("dx", 0) == GetDataType<dtype>::value)); REGISTER_GPU_GELU_GRAD_KERNEL(float) diff --git a/oneflow/user/kernels/generate_random_batch_permutation_indices_kernel.cpp b/oneflow/user/kernels/generate_random_batch_permutation_indices_kernel.cpp index bba320f106a658cd1a9f0604a27a9e8494191490..73ca49245ff62774ee448f2ccb2a5973bf018a42 100644 --- a/oneflow/user/kernels/generate_random_batch_permutation_indices_kernel.cpp +++ b/oneflow/user/kernels/generate_random_batch_permutation_indices_kernel.cpp @@ -44,6 +44,6 @@ class GenerateRandomBatchPermutationIndicesCPUKernel final : public user_op::OpK REGISTER_USER_KERNEL("generate_random_batch_permutation_indices") .SetCreateFn<GenerateRandomBatchPermutationIndicesCPUKernel>() - .SetIsMatchedHob(user_op::HobDeviceType() == DeviceType::kCPU); + .SetIsMatchedHob(user_op::HobDeviceTag() == "cpu"); } // namespace oneflow diff --git a/oneflow/user/kernels/generate_random_batch_permutation_indices_kernel.cu b/oneflow/user/kernels/generate_random_batch_permutation_indices_kernel.cu index 96398345d5d8ff5639ff27e46130b660eee8fa13..db063686d94724c02ba93c687333098f4d645440 100644 --- a/oneflow/user/kernels/generate_random_batch_permutation_indices_kernel.cu +++ b/oneflow/user/kernels/generate_random_batch_permutation_indices_kernel.cu @@ -115,7 +115,7 @@ class GenerateRandomBatchPermutationIndicesGPUKernel final : public user_op::OpK REGISTER_USER_KERNEL("generate_random_batch_permutation_indices") .SetCreateFn<GenerateRandomBatchPermutationIndicesGPUKernel>() - .SetIsMatchedHob(user_op::HobDeviceType() == DeviceType::kGPU) + .SetIsMatchedHob(user_op::HobDeviceTag() == "gpu") .SetInferTmpSizeFn([](oneflow::user_op::InferContext* ctx) { const Shape* y_shape = ctx->Shape4ArgNameAndIndex("y", 0); const int32_t batch_size = y_shape->At(0); diff --git a/oneflow/user/kernels/heap_selection_top_k_kernel.cu b/oneflow/user/kernels/heap_selection_top_k_kernel.cu index 9e434dce1e035dbba14906dac742b8b30fccb51d..3e5df2bec786b588bfb464f0d4b8d8566592aa2e 100644 --- a/oneflow/user/kernels/heap_selection_top_k_kernel.cu +++ b/oneflow/user/kernels/heap_selection_top_k_kernel.cu @@ -216,7 +216,7 @@ class GpuHeapSelectionTopKKernel final : public user_op::OpKernel { #define REGISTER_GPU_HEAP_SELECTION_TOP_K_KERNEL(dtype) \ REGISTER_USER_KERNEL("top_k").SetCreateFn<GpuHeapSelectionTopKKernel<dtype>>().SetIsMatchedHob( \ - (user_op::HobDeviceType() == DeviceType::kGPU) & (user_op::HobAttr<int32_t>("k") <= 128) \ + (user_op::HobDeviceTag() == "gpu") & (user_op::HobAttr<int32_t>("k") <= 128) \ & (user_op::HobDataType("in", 0) == GetDataType<dtype>::value)); REGISTER_GPU_HEAP_SELECTION_TOP_K_KERNEL(float) diff --git a/oneflow/user/kernels/identity_kernel.cpp b/oneflow/user/kernels/identity_kernel.cpp index b7e5261ae8b4c07a095a2669eef1ae789ae6a23f..969360a33172442ff00fc25a4cb304115091542f 100644 --- a/oneflow/user/kernels/identity_kernel.cpp +++ b/oneflow/user/kernels/identity_kernel.cpp @@ -43,7 +43,7 @@ class IdentityKernel final : public user_op::OpKernel { #define REGISTER_IDENTITY_KERNEL(device) \ REGISTER_USER_KERNEL("identity") \ .SetCreateFn<IdentityKernel<device>>() \ - .SetIsMatchedHob(user_op::HobDeviceType() == device) \ + .SetIsMatchedHob(user_op::HobDeviceTag() == device) \ .SetInplaceProposalFn([](const user_op::InferContext&, \ user_op::AddInplaceArgPair AddInplaceArgPairFn) -> Maybe<void> { \ OF_RETURN_IF_ERROR(AddInplaceArgPairFn("out", 0, "in", 0, false)); \ diff --git a/oneflow/user/kernels/image_batch_align_kernel.cpp b/oneflow/user/kernels/image_batch_align_kernel.cpp index f001a73ea829a752a38ceffd1aef7d5f63b1712f..735029f0537b06a1db5a9042970b7df25a571040 100644 --- a/oneflow/user/kernels/image_batch_align_kernel.cpp +++ b/oneflow/user/kernels/image_batch_align_kernel.cpp @@ -102,7 +102,7 @@ class ImageBatchAlignKernel final : public user_op::OpKernel { #define REGISTER_IMAGE_BATCH_ALIGN_KERNEL(dtype) \ REGISTER_USER_KERNEL("image_batch_align") \ .SetCreateFn<ImageBatchAlignKernel<dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kCPU) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "cpu") \ & (user_op::HobDataType("in", 0) == DataType::kTensorBuffer) \ & (user_op::HobDataType("out", 0) == GetDataType<dtype>::value)); diff --git a/oneflow/user/kernels/image_decode_kernel.cpp b/oneflow/user/kernels/image_decode_kernel.cpp index 312cf340944d9e17857233b95b3b7bc0ebd97aca..4627a9498559f6ac42381d3ea70eb238fa7b6386 100644 --- a/oneflow/user/kernels/image_decode_kernel.cpp +++ b/oneflow/user/kernels/image_decode_kernel.cpp @@ -86,7 +86,7 @@ class ImageDecodeKernel final : public user_op::OpKernel { REGISTER_USER_KERNEL("image_decode") .SetCreateFn<ImageDecodeKernel>() - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kCPU) + .SetIsMatchedHob((user_op::HobDeviceTag() == "cpu") & (user_op::HobDataType("in", 0) == DataType::kTensorBuffer) & (user_op::HobDataType("out", 0) == DataType::kTensorBuffer)); ; diff --git a/oneflow/user/kernels/image_object_preprocess_kernels.cpp b/oneflow/user/kernels/image_object_preprocess_kernels.cpp index 0e3e2d0ffdbe89163841115b4d9f0efc62d14f83..216101cb39b56422961ea32f75cbecf08053d140 100644 --- a/oneflow/user/kernels/image_object_preprocess_kernels.cpp +++ b/oneflow/user/kernels/image_object_preprocess_kernels.cpp @@ -441,7 +441,7 @@ MakeInplaceProposalFn(const std::string& input_arg_name) { REGISTER_USER_KERNEL("image_flip") .SetCreateFn<ImageFlipKernel>() - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kCPU) + .SetIsMatchedHob((user_op::HobDeviceTag() == "cpu") & (user_op::HobDataType("in", 0) == DataType::kTensorBuffer) & (user_op::HobDataType("flip_code", 0) == DataType::kInt8) & (user_op::HobDataType("out", 0) == DataType::kTensorBuffer)) @@ -449,7 +449,7 @@ REGISTER_USER_KERNEL("image_flip") REGISTER_USER_KERNEL("object_bbox_flip") .SetCreateFn<ObjectBboxFlipKernel>() - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kCPU) + .SetIsMatchedHob((user_op::HobDeviceTag() == "cpu") & (user_op::HobDataType("bbox", 0) == DataType::kTensorBuffer) & (user_op::HobDataType("image_size", 0) == DataType::kInt32) & (user_op::HobDataType("flip_code", 0) == DataType::kInt8) @@ -458,7 +458,7 @@ REGISTER_USER_KERNEL("object_bbox_flip") REGISTER_USER_KERNEL("object_bbox_scale") .SetCreateFn<ObjectBboxScaleKernel>() - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kCPU) + .SetIsMatchedHob((user_op::HobDeviceTag() == "cpu") & (user_op::HobDataType("bbox", 0) == DataType::kTensorBuffer) & (user_op::HobDataType("scale", 0) == DataType::kFloat) & (user_op::HobDataType("out", 0) == DataType::kTensorBuffer)) @@ -466,7 +466,7 @@ REGISTER_USER_KERNEL("object_bbox_scale") REGISTER_USER_KERNEL("object_segmentation_polygon_flip") .SetCreateFn<ObjectSegmentationPolygonFlipKernel>() - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kCPU) + .SetIsMatchedHob((user_op::HobDeviceTag() == "cpu") & (user_op::HobDataType("poly", 0) == DataType::kTensorBuffer) & (user_op::HobDataType("image_size", 0) == DataType::kInt32) & (user_op::HobDataType("flip_code", 0) == DataType::kInt8) @@ -475,7 +475,7 @@ REGISTER_USER_KERNEL("object_segmentation_polygon_flip") REGISTER_USER_KERNEL("object_segmentation_polygon_scale") .SetCreateFn<ObjectSegmentationPolygonScaleKernel>() - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kCPU) + .SetIsMatchedHob((user_op::HobDeviceTag() == "cpu") & (user_op::HobDataType("poly", 0) == DataType::kTensorBuffer) & (user_op::HobDataType("scale", 0) == DataType::kFloat) & (user_op::HobDataType("out", 0) == DataType::kTensorBuffer)) @@ -483,14 +483,14 @@ REGISTER_USER_KERNEL("object_segmentation_polygon_scale") REGISTER_USER_KERNEL("image_normalize") .SetCreateFn<ImageNormalize>() - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kCPU) + .SetIsMatchedHob((user_op::HobDeviceTag() == "cpu") & (user_op::HobDataType("in", 0) == DataType::kTensorBuffer) & (user_op::HobDataType("out", 0) == DataType::kTensorBuffer)) .SetInplaceProposalFn(MakeInplaceProposalFn("in")); REGISTER_USER_KERNEL("object_segmentation_polygon_to_mask") .SetCreateFn<ObjectSegmentationPolygonToMask>() - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kCPU) + .SetIsMatchedHob((user_op::HobDeviceTag() == "cpu") & (user_op::HobDataType("poly", 0) == DataType::kTensorBuffer) & (user_op::HobDataType("poly_index", 0) == DataType::kTensorBuffer) & (user_op::HobDataType("image_size", 0) == DataType::kInt32) diff --git a/oneflow/user/kernels/image_preprocess_kernels.cpp b/oneflow/user/kernels/image_preprocess_kernels.cpp index 27fc4969758543c718e29990d03d4e504c130af4..ca1c884993392f35d1ac27a4b38e196fa0e55424 100644 --- a/oneflow/user/kernels/image_preprocess_kernels.cpp +++ b/oneflow/user/kernels/image_preprocess_kernels.cpp @@ -85,7 +85,7 @@ class ResizeToStaticShapeKernel final : public user_op::OpKernel { REGISTER_USER_KERNEL("image_resize") .SetCreateFn<ResizeToStaticShapeKernel>() - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kCPU) + .SetIsMatchedHob((user_op::HobDeviceTag() == "cpu") & (user_op::HobDataType("in", 0) == DataType::kTensorBuffer) & (user_op::HobDataType("out", 0) == DataType::kUInt8)); @@ -137,7 +137,7 @@ class ResizeShorterToTensorBufferKernel final : public user_op::OpKernel { REGISTER_USER_KERNEL("image_resize") .SetCreateFn<ResizeShorterToTensorBufferKernel>() - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kCPU) + .SetIsMatchedHob((user_op::HobDeviceTag() == "cpu") & (user_op::HobDataType("in", 0) == DataType::kTensorBuffer) & (user_op::HobDataType("out", 0) == DataType::kTensorBuffer)); @@ -314,7 +314,7 @@ class CropMirrorNormalizeFromStaticShapeToFloatKernel final : public user_op::Op REGISTER_USER_KERNEL("crop_mirror_normalize_from_uint8") .SetCreateFn<CropMirrorNormalizeFromStaticShapeToFloatKernel>() - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kCPU) + .SetIsMatchedHob((user_op::HobDeviceTag() == "cpu") & (user_op::HobDataType("in", 0) == DataType::kUInt8) & (user_op::HobDataType("out", 0) == DataType::kFloat)); @@ -404,7 +404,7 @@ class CropMirrorNormalizeFromTensorBufferToFloatKernel final : public user_op::O REGISTER_USER_KERNEL("crop_mirror_normalize_from_tensorbuffer") .SetCreateFn<CropMirrorNormalizeFromTensorBufferToFloatKernel>() - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kCPU) + .SetIsMatchedHob((user_op::HobDeviceTag() == "cpu") & (user_op::HobDataType("in", 0) == DataType::kTensorBuffer) & (user_op::HobDataType("out", 0) == DataType::kFloat)); @@ -451,7 +451,7 @@ class CoinFlipKernel final : public user_op::OpKernel { REGISTER_USER_KERNEL("coin_flip") .SetCreateFn<CoinFlipKernel>() - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kCPU) + .SetIsMatchedHob((user_op::HobDeviceTag() == "cpu") & (user_op::HobDataType("out", 0) == DataType::kInt8)); namespace { @@ -516,7 +516,7 @@ class ImageRandomCropKernel final : public user_op::OpKernel { REGISTER_USER_KERNEL("image_random_crop") .SetCreateFn<ImageRandomCropKernel>() - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kCPU) + .SetIsMatchedHob((user_op::HobDeviceTag() == DeviceType::kCPU) & (user_op::HobDataType("in", 0) == DataType::kTensorBuffer) & (user_op::HobDataType("out", 0) == DataType::kTensorBuffer)); diff --git a/oneflow/user/kernels/image_preprocess_kernels.cu b/oneflow/user/kernels/image_preprocess_kernels.cu index 6b0f10a4727d98140fcc0b9a339f70bd9ec4f35e..47aed7af5d86c0584d6e89ad5dc34a155252a3c4 100644 --- a/oneflow/user/kernels/image_preprocess_kernels.cu +++ b/oneflow/user/kernels/image_preprocess_kernels.cu @@ -188,7 +188,7 @@ class CropMirrorNormalizeGpuKernel final : public user_op::OpKernel { REGISTER_USER_KERNEL("crop_mirror_normalize_from_uint8") .SetCreateFn<CropMirrorNormalizeGpuKernel>() - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kGPU) + .SetIsMatchedHob((user_op::HobDeviceTag() == "gpu") & (user_op::HobDataType("in", 0) == DataType::kUInt8) & (user_op::HobDataType("out", 0) == DataType::kFloat)); diff --git a/oneflow/user/kernels/image_target_resize_kernel.cpp b/oneflow/user/kernels/image_target_resize_kernel.cpp index c9f40d621d8eb7dc9d367f11e034a43bec5a7d5f..38bba2e1ec37854ca080f61184fe2c4857e0d411 100644 --- a/oneflow/user/kernels/image_target_resize_kernel.cpp +++ b/oneflow/user/kernels/image_target_resize_kernel.cpp @@ -121,7 +121,7 @@ class ImageTargetResizeKernel final : public user_op::OpKernel { REGISTER_USER_KERNEL("image_target_resize") .SetCreateFn<ImageTargetResizeKernel>() - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kCPU) + .SetIsMatchedHob((user_op::HobDeviceTag() == "cpu") & (user_op::HobDataType("in", 0) == DataType::kTensorBuffer) & (user_op::HobDataType("out", 0) == DataType::kTensorBuffer) & (user_op::HobDataType("size", 0) == DataType::kInt32) diff --git a/oneflow/user/kernels/l2_normalize_kernel.cpp b/oneflow/user/kernels/l2_normalize_kernel.cpp index 5ae86c0b1a589e76bb40a32970bbb0c39986c6e1..0cd819735bbd86fba4e8cf15b48a93c4422f5aad 100644 --- a/oneflow/user/kernels/l2_normalize_kernel.cpp +++ b/oneflow/user/kernels/l2_normalize_kernel.cpp @@ -91,10 +91,10 @@ class CpuL2NormalizeKernel final : public user_op::OpKernel { bool AlwaysComputeWhenAllOutputsEmpty() const override { return false; } }; -#define REGISTER_CPU_L2_NORMALIZE_KERNEL(dtype) \ - REGISTER_USER_KERNEL("l2_normalize") \ - .SetCreateFn<CpuL2NormalizeKernel<dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kCPU) \ +#define REGISTER_CPU_L2_NORMALIZE_KERNEL(dtype) \ + REGISTER_USER_KERNEL("l2_normalize") \ + .SetCreateFn<CpuL2NormalizeKernel<dtype>>() \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "cpu") \ & (user_op::HobDataType("y", 0) == GetDataType<dtype>::value)); REGISTER_CPU_L2_NORMALIZE_KERNEL(float) @@ -122,10 +122,10 @@ class CpuL2NormalizeGradKernel final : public user_op::OpKernel { bool AlwaysComputeWhenAllOutputsEmpty() const override { return false; } }; -#define REGISTER_CPU_L2_NORMALIZE_GRAD_KERNEL(dtype) \ - REGISTER_USER_KERNEL("l2_normalize_grad") \ - .SetCreateFn<CpuL2NormalizeGradKernel<dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kCPU) \ +#define REGISTER_CPU_L2_NORMALIZE_GRAD_KERNEL(dtype) \ + REGISTER_USER_KERNEL("l2_normalize_grad") \ + .SetCreateFn<CpuL2NormalizeGradKernel<dtype>>() \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "cpu") \ & (user_op::HobDataType("dx", 0) == GetDataType<dtype>::value)); REGISTER_CPU_L2_NORMALIZE_GRAD_KERNEL(float) diff --git a/oneflow/user/kernels/l2_normalize_kernel.cu b/oneflow/user/kernels/l2_normalize_kernel.cu index e3d11f3e585bc8f0425bc205f132563f22f9ea0c..6133bd53df9e973a356865215b993db4f8f01e1b 100644 --- a/oneflow/user/kernels/l2_normalize_kernel.cu +++ b/oneflow/user/kernels/l2_normalize_kernel.cu @@ -104,10 +104,10 @@ class GpuL2NormalizeKernel final : public user_op::OpKernel { bool AlwaysComputeWhenAllOutputsEmpty() const override { return false; } }; -#define REGISTER_GPU_L2_NORMALIZE_KERNEL(dtype) \ - REGISTER_USER_KERNEL("l2_normalize") \ - .SetCreateFn<GpuL2NormalizeKernel<dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kGPU) \ +#define REGISTER_GPU_L2_NORMALIZE_KERNEL(dtype) \ + REGISTER_USER_KERNEL("l2_normalize") \ + .SetCreateFn<GpuL2NormalizeKernel<dtype>>() \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "gpu") \ & (user_op::HobDataType("y", 0) == GetDataType<dtype>::value)); REGISTER_GPU_L2_NORMALIZE_KERNEL(float) @@ -136,10 +136,10 @@ class GpuL2NormalizeGradKernel final : public user_op::OpKernel { bool AlwaysComputeWhenAllOutputsEmpty() const override { return false; } }; -#define REGISTER_GPU_L2_NORMALIZE_GRAD_KERNEL(dtype) \ - REGISTER_USER_KERNEL("l2_normalize_grad") \ - .SetCreateFn<GpuL2NormalizeGradKernel<dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kGPU) \ +#define REGISTER_GPU_L2_NORMALIZE_GRAD_KERNEL(dtype) \ + REGISTER_USER_KERNEL("l2_normalize_grad") \ + .SetCreateFn<GpuL2NormalizeGradKernel<dtype>>() \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "gpu") \ & (user_op::HobDataType("dx", 0) == GetDataType<dtype>::value)); REGISTER_GPU_L2_NORMALIZE_GRAD_KERNEL(float) diff --git a/oneflow/user/kernels/layer_norm_cpu_kernel.cpp b/oneflow/user/kernels/layer_norm_cpu_kernel.cpp index 7b79ea77d65e22435e0b4ff2644b6d34b1d73dca..0000279202c079af5906bbf0babb3e4005388e22 100644 --- a/oneflow/user/kernels/layer_norm_cpu_kernel.cpp +++ b/oneflow/user/kernels/layer_norm_cpu_kernel.cpp @@ -28,10 +28,10 @@ class LayerNormCpuKernel final : public user_op::OpKernel { void Compute(user_op::KernelComputeContext* ctx) const override { TODO(); }; }; -#define REGISTER_LAYER_NORM_CPU_KERNEL(dtype) \ - REGISTER_USER_KERNEL("layer_norm") \ - .SetCreateFn<LayerNormCpuKernel<dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kCPU) \ +#define REGISTER_LAYER_NORM_CPU_KERNEL(dtype) \ + REGISTER_USER_KERNEL("layer_norm") \ + .SetCreateFn<LayerNormCpuKernel<dtype>>() \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "cpu") \ & (user_op::HobDataType("x", 0) == GetDataType<dtype>::value)); REGISTER_LAYER_NORM_CPU_KERNEL(float) @@ -48,10 +48,10 @@ class LayerNormGradCpuKernel final : public user_op::OpKernel { void Compute(user_op::KernelComputeContext* ctx) const override { TODO(); }; }; -#define REGISTER_LAYER_NORM_GRAD_CPU_KERNEL(dtype) \ - REGISTER_USER_KERNEL("layer_norm_grad") \ - .SetCreateFn<LayerNormGradCpuKernel<dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kCPU) \ +#define REGISTER_LAYER_NORM_GRAD_CPU_KERNEL(dtype) \ + REGISTER_USER_KERNEL("layer_norm_grad") \ + .SetCreateFn<LayerNormGradCpuKernel<dtype>>() \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "cpu") \ & (user_op::HobDataType("dy", 0) == GetDataType<dtype>::value)); REGISTER_LAYER_NORM_GRAD_CPU_KERNEL(float) @@ -68,10 +68,10 @@ class LayerNormParamGradCpuKernel final : public user_op::OpKernel { void Compute(user_op::KernelComputeContext* ctx) const override { TODO(); }; }; -#define REGISTER_LAYER_NORM_PARAM_GRAD_CPU_KERNEL(dtype) \ - REGISTER_USER_KERNEL("layer_norm_param_grad") \ - .SetCreateFn<LayerNormParamGradCpuKernel<dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kCPU) \ +#define REGISTER_LAYER_NORM_PARAM_GRAD_CPU_KERNEL(dtype) \ + REGISTER_USER_KERNEL("layer_norm_param_grad") \ + .SetCreateFn<LayerNormParamGradCpuKernel<dtype>>() \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "cpu") \ & (user_op::HobDataType("dy", 0) == GetDataType<dtype>::value)); REGISTER_LAYER_NORM_PARAM_GRAD_CPU_KERNEL(float) diff --git a/oneflow/user/kernels/layer_norm_gpu_kernel.cpp b/oneflow/user/kernels/layer_norm_gpu_kernel.cpp index 42d472f7901e62e0e30080570812baedcab00107..71abf77556176d9b0b97927bbbfbfc2fb16c569f 100644 --- a/oneflow/user/kernels/layer_norm_gpu_kernel.cpp +++ b/oneflow/user/kernels/layer_norm_gpu_kernel.cpp @@ -121,7 +121,7 @@ class LayerNormGpuKernel final : public user_op::OpKernel { #define REGISTER_LAYER_NORM_GPU_KERNEL(dtype, bn_param_dtype) \ REGISTER_USER_KERNEL("layer_norm") \ .SetCreateFn<LayerNormGpuKernel<dtype, bn_param_dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kGPU) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "gpu") \ & (user_op::HobDataType("x", 0) == GetDataType<dtype>::value)) \ .SetInferTmpSizeFn([](oneflow::user_op::InferContext* ctx) { \ user_op::TensorDesc* mean = ctx->TensorDesc4ArgNameAndIndex("mean", 0); \ @@ -174,7 +174,7 @@ class LayerNormGradGpuKernel final : public user_op::OpKernel { #define REGISTER_LAYER_NORM_GRAD_GPU_KERNEL(dtype, bn_param_dtype) \ REGISTER_USER_KERNEL("layer_norm_grad") \ .SetCreateFn<LayerNormGradGpuKernel<dtype, bn_param_dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kGPU) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "gpu") \ & (user_op::HobDataType("dy", 0) == GetDataType<dtype>::value)) \ .SetInferTmpSizeFn([](oneflow::user_op::InferContext* ctx) { \ user_op::TensorDesc* mean = ctx->TensorDesc4ArgNameAndIndex("mean", 0); \ @@ -243,10 +243,10 @@ class LayerNormParamGradGpuKernel final : public user_op::OpKernel { }; }; -#define REGISTER_LAYER_NORM_PARAM_GRAD_GPU_KERNEL(dtype) \ - REGISTER_USER_KERNEL("layer_norm_param_grad") \ - .SetCreateFn<LayerNormParamGradGpuKernel<dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kGPU) \ +#define REGISTER_LAYER_NORM_PARAM_GRAD_GPU_KERNEL(dtype) \ + REGISTER_USER_KERNEL("layer_norm_param_grad") \ + .SetCreateFn<LayerNormParamGradGpuKernel<dtype>>() \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "gpu") \ & (user_op::HobDataType("dy", 0) == GetDataType<dtype>::value)); REGISTER_LAYER_NORM_PARAM_GRAD_GPU_KERNEL(float) diff --git a/oneflow/user/kernels/leaky_relu_kernel.cpp b/oneflow/user/kernels/leaky_relu_kernel.cpp index 4bbf6f61a9045506118985abb0c54919a7916da4..18b971fd1d45e283951f3e170791c75fa11574d3 100644 --- a/oneflow/user/kernels/leaky_relu_kernel.cpp +++ b/oneflow/user/kernels/leaky_relu_kernel.cpp @@ -36,10 +36,10 @@ class CpuLeakyReluKernel final : public user_op::OpKernel { bool AlwaysComputeWhenAllOutputsEmpty() const override { return false; } }; -#define REGISTER_CPU_LEAKY_RELU_KERNEL(dtype) \ - REGISTER_USER_KERNEL("leaky_relu") \ - .SetCreateFn<CpuLeakyReluKernel<dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kCPU) \ +#define REGISTER_CPU_LEAKY_RELU_KERNEL(dtype) \ + REGISTER_USER_KERNEL("leaky_relu") \ + .SetCreateFn<CpuLeakyReluKernel<dtype>>() \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "cpu") \ & (user_op::HobDataType("y", 0) == GetDataType<dtype>::value)); REGISTER_CPU_LEAKY_RELU_KERNEL(float) @@ -66,10 +66,10 @@ class CpuLeakyReluGradKernel final : public user_op::OpKernel { bool AlwaysComputeWhenAllOutputsEmpty() const override { return false; } }; -#define REGISTER_CPU_LEAKY_RELU_GRAD_KERNEL(dtype) \ - REGISTER_USER_KERNEL("leaky_relu_grad") \ - .SetCreateFn<CpuLeakyReluGradKernel<dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kCPU) \ +#define REGISTER_CPU_LEAKY_RELU_GRAD_KERNEL(dtype) \ + REGISTER_USER_KERNEL("leaky_relu_grad") \ + .SetCreateFn<CpuLeakyReluGradKernel<dtype>>() \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "cpu") \ & (user_op::HobDataType("dx", 0) == GetDataType<dtype>::value)); REGISTER_CPU_LEAKY_RELU_GRAD_KERNEL(float) diff --git a/oneflow/user/kernels/leaky_relu_kernel.cu b/oneflow/user/kernels/leaky_relu_kernel.cu index fce60b156e63d840f039d0d435b5e7be51c88318..8ada7a0aa0474e1418eb77344a1fb5a79d2f34b1 100644 --- a/oneflow/user/kernels/leaky_relu_kernel.cu +++ b/oneflow/user/kernels/leaky_relu_kernel.cu @@ -50,10 +50,10 @@ class GpuLeakyReluKernel final : public user_op::OpKernel { bool AlwaysComputeWhenAllOutputsEmpty() const override { return false; } }; -#define REGISTER_GPU_LEAKY_RELU_KERNEL(dtype) \ - REGISTER_USER_KERNEL("leaky_relu") \ - .SetCreateFn<GpuLeakyReluKernel<dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kGPU) \ +#define REGISTER_GPU_LEAKY_RELU_KERNEL(dtype) \ + REGISTER_USER_KERNEL("leaky_relu") \ + .SetCreateFn<GpuLeakyReluKernel<dtype>>() \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "gpu") \ & (user_op::HobDataType("y", 0) == GetDataType<dtype>::value)); REGISTER_GPU_LEAKY_RELU_KERNEL(float) @@ -78,10 +78,10 @@ class GpuLeakyReluGradKernel final : public user_op::OpKernel { bool AlwaysComputeWhenAllOutputsEmpty() const override { return false; } }; -#define REGISTER_GPU_LEAKY_RELU_GRAD_KERNEL(dtype) \ - REGISTER_USER_KERNEL("leaky_relu_grad") \ - .SetCreateFn<GpuLeakyReluGradKernel<dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kGPU) \ +#define REGISTER_GPU_LEAKY_RELU_GRAD_KERNEL(dtype) \ + REGISTER_USER_KERNEL("leaky_relu_grad") \ + .SetCreateFn<GpuLeakyReluGradKernel<dtype>>() \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "gpu") \ & (user_op::HobDataType("dx", 0) == GetDataType<dtype>::value)); REGISTER_GPU_LEAKY_RELU_GRAD_KERNEL(float) diff --git a/oneflow/user/kernels/math_binary_broadcast_kernels.cpp b/oneflow/user/kernels/math_binary_broadcast_kernels.cpp index 068dbf9e2f5ab96ef2063976c13761d0af35285e..f6a320a3c0632a01e078fc9336fdc70aeacb1dec 100644 --- a/oneflow/user/kernels/math_binary_broadcast_kernels.cpp +++ b/oneflow/user/kernels/math_binary_broadcast_kernels.cpp @@ -52,7 +52,7 @@ class MathBinaryBroadcastKernel final : public user_op::OpKernel { device, OF_PP_PAIR_FIRST(data_type_pair), OF_PP_PAIR_FIRST(data_type_pair), \ &NdarrayUtil<device, OF_PP_PAIR_FIRST(data_type_pair)>::OF_PP_CAT( \ Broadcast, OF_PP_PAIR_SECOND(math_type_pair))>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == device) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == device) \ & (user_op::HobDataType("z", 0) == OF_PP_PAIR_SECOND(data_type_pair))); OF_PP_SEQ_PRODUCT_FOR_EACH_TUPLE(REGISTER_MATH_BINARY_BROADCAST_KERNEL, @@ -71,7 +71,7 @@ OF_PP_SEQ_PRODUCT_FOR_EACH_TUPLE(REGISTER_MATH_BINARY_BROADCAST_KERNEL, device, OF_PP_PAIR_FIRST(data_type_pair), int8_t, \ &NdarrayUtil<device, OF_PP_PAIR_FIRST(data_type_pair)>::OF_PP_CAT( \ Broadcast, OF_PP_PAIR_SECOND(math_type_pair))>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == device) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == device) \ & (user_op::HobDataType("x", 0) == OF_PP_PAIR_SECOND(data_type_pair)) \ & (user_op::HobDataType("z", 0) == DataType::kInt8)); diff --git a/oneflow/user/kernels/math_binary_elementwise_kernel.cpp b/oneflow/user/kernels/math_binary_elementwise_kernel.cpp index be0b74f7d8980f3c7997095c3bf7165d62213b4d..c615c3d661f4c10e4801324066010f92affb5b8d 100644 --- a/oneflow/user/kernels/math_binary_elementwise_kernel.cpp +++ b/oneflow/user/kernels/math_binary_elementwise_kernel.cpp @@ -92,20 +92,20 @@ class MathBinaryElementwiseYGradCpuKernel final : public user_op::OpKernel { .SetCreateFn< \ MathBinaryElementwiseCpuKernel<OF_PP_CAT(OF_PP_PAIR_SECOND(math_type_pair), Functor), \ OF_PP_PAIR_FIRST(data_type_pair)>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kCPU) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "cpu") \ & (user_op::HobDataType("x", 0) == OF_PP_PAIR_SECOND(data_type_pair))); \ \ REGISTER_USER_KERNEL((std::string("") + OF_PP_PAIR_FIRST(math_type_pair) + "_x_grad")) \ .SetCreateFn<MathBinaryElementwiseXGradCpuKernel< \ OF_PP_CAT(OF_PP_PAIR_SECOND(math_type_pair), Functor), \ OF_PP_PAIR_FIRST(data_type_pair)>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kCPU) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "cpu") \ & (user_op::HobDataType("x", 0) == OF_PP_PAIR_SECOND(data_type_pair))); \ REGISTER_USER_KERNEL((std::string("") + OF_PP_PAIR_FIRST(math_type_pair) + "_y_grad")) \ .SetCreateFn<MathBinaryElementwiseYGradCpuKernel< \ OF_PP_CAT(OF_PP_PAIR_SECOND(math_type_pair), Functor), \ OF_PP_PAIR_FIRST(data_type_pair)>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kCPU) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "cpu") \ & (user_op::HobDataType("x", 0) == OF_PP_PAIR_SECOND(data_type_pair))); OF_PP_SEQ_PRODUCT_FOR_EACH_TUPLE(REGISTER_MATH_BINARY_ELEMENTWISE_CPU_KERNEL_AND_GRAD, diff --git a/oneflow/user/kernels/math_binary_elementwise_kernel.cu b/oneflow/user/kernels/math_binary_elementwise_kernel.cu index 7df6126b3994ec87c82daee74b07c706df9467ac..6dc1e8cea97737b20682e68c72edb45449d0afd8 100644 --- a/oneflow/user/kernels/math_binary_elementwise_kernel.cu +++ b/oneflow/user/kernels/math_binary_elementwise_kernel.cu @@ -108,20 +108,20 @@ class MathBinaryElementwiseYGradGpuKernel final : public user_op::OpKernel { .SetCreateFn< \ MathBinaryElementwiseGpuKernel<OF_PP_CAT(OF_PP_PAIR_SECOND(math_type_pair), Functor), \ OF_PP_PAIR_FIRST(data_type_pair)>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kGPU) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "gpu") \ & (user_op::HobDataType("x", 0) == OF_PP_PAIR_SECOND(data_type_pair))); \ \ REGISTER_USER_KERNEL((std::string("") + OF_PP_PAIR_FIRST(math_type_pair) + "_x_grad")) \ .SetCreateFn<MathBinaryElementwiseXGradGpuKernel< \ OF_PP_CAT(OF_PP_PAIR_SECOND(math_type_pair), Functor), \ OF_PP_PAIR_FIRST(data_type_pair)>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kGPU) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "gpu") \ & (user_op::HobDataType("x", 0) == OF_PP_PAIR_SECOND(data_type_pair))); \ REGISTER_USER_KERNEL((std::string("") + OF_PP_PAIR_FIRST(math_type_pair) + "_y_grad")) \ .SetCreateFn<MathBinaryElementwiseYGradGpuKernel< \ OF_PP_CAT(OF_PP_PAIR_SECOND(math_type_pair), Functor), \ OF_PP_PAIR_FIRST(data_type_pair)>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kGPU) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "gpu") \ & (user_op::HobDataType("x", 0) == OF_PP_PAIR_SECOND(data_type_pair))); OF_PP_SEQ_PRODUCT_FOR_EACH_TUPLE(REGISTER_MATH_BINARY_ELEMENTWISE_GPU_KERNEL_AND_GRAD, @@ -205,18 +205,18 @@ class MathBinaryElementwiseYGradGpuHalfKernel final : public user_op::OpKernel { #define REGISTER_MATH_BINARY_ELEMENTWISE_GPU_HALF_KERNEL_AND_GRAD(math_type_str, math_func_prefix) \ REGISTER_USER_KERNEL(math_type_str) \ .SetCreateFn<MathBinaryElementwiseGpuHalfKernel<OF_PP_CAT(math_func_prefix, Functor)>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kGPU) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "gpu") \ & (user_op::HobDataType("x", 0) == DataType::kFloat16)); \ \ REGISTER_USER_KERNEL((std::string("") + math_type_str + "_x_grad")) \ .SetCreateFn< \ MathBinaryElementwiseXGradGpuHalfKernel<OF_PP_CAT(math_func_prefix, Functor)>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kGPU) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "gpu") \ & (user_op::HobDataType("x", 0) == DataType::kFloat16)); \ REGISTER_USER_KERNEL((std::string("") + math_type_str + "_y_grad")) \ .SetCreateFn< \ MathBinaryElementwiseYGradGpuHalfKernel<OF_PP_CAT(math_func_prefix, Functor)>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kGPU) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "gpu") \ & (user_op::HobDataType("x", 0) == DataType::kFloat16)); OF_PP_FOR_EACH_TUPLE(REGISTER_MATH_BINARY_ELEMENTWISE_GPU_HALF_KERNEL_AND_GRAD, diff --git a/oneflow/user/kernels/math_unary_elementwise_kernel.cpp b/oneflow/user/kernels/math_unary_elementwise_kernel.cpp index 6bc379c6c44eeb1e6cb4414c48398f097eaca0cc..8c2a2dee289baa9ebd8e06042fb9c2f373c70e57 100644 --- a/oneflow/user/kernels/math_unary_elementwise_kernel.cpp +++ b/oneflow/user/kernels/math_unary_elementwise_kernel.cpp @@ -64,7 +64,7 @@ class MathUnaryElementwiseGradCpuKernel final : public user_op::OpKernel { .SetCreateFn< \ MathUnaryElementwiseCpuKernel<OF_PP_CAT(OF_PP_PAIR_SECOND(math_type_pair), Functor), \ OF_PP_PAIR_FIRST(data_type_pair)>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kCPU) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "cpu") \ & (user_op::HobDataType("x", 0) == OF_PP_PAIR_SECOND(data_type_pair)) \ & (user_op::HobDataType("y", 0) == OF_PP_PAIR_SECOND(data_type_pair))); \ \ @@ -72,7 +72,7 @@ class MathUnaryElementwiseGradCpuKernel final : public user_op::OpKernel { .SetCreateFn< \ MathUnaryElementwiseGradCpuKernel<OF_PP_CAT(OF_PP_PAIR_SECOND(math_type_pair), Functor), \ OF_PP_PAIR_FIRST(data_type_pair)>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kCPU) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "cpu") \ & (user_op::HobDataType("x", 0) == OF_PP_PAIR_SECOND(data_type_pair))); OF_PP_SEQ_PRODUCT_FOR_EACH_TUPLE(REGISTER_MATH_UNARY_ELEMENTWISE_CPU_KERNEL_AND_GRAD, diff --git a/oneflow/user/kernels/math_unary_elementwise_kernel.cu b/oneflow/user/kernels/math_unary_elementwise_kernel.cu index 13ccafee4bf3f5e61cc463f6c423fece725d6fb3..32144bffe3279f749b2b0fbbb22fba4300853f64 100644 --- a/oneflow/user/kernels/math_unary_elementwise_kernel.cu +++ b/oneflow/user/kernels/math_unary_elementwise_kernel.cu @@ -82,7 +82,7 @@ class MathUnaryElementwiseGradGpuKernel final : public user_op::OpKernel { .SetCreateFn< \ MathUnaryElementwiseGpuKernel<OF_PP_CAT(OF_PP_PAIR_SECOND(math_type_pair), Functor), \ OF_PP_PAIR_FIRST(data_type_pair)>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kGPU) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "gpu") \ & (user_op::HobDataType("x", 0) == OF_PP_PAIR_SECOND(data_type_pair)) \ & (user_op::HobDataType("y", 0) == OF_PP_PAIR_SECOND(data_type_pair))); \ \ @@ -90,7 +90,7 @@ class MathUnaryElementwiseGradGpuKernel final : public user_op::OpKernel { .SetCreateFn< \ MathUnaryElementwiseGradGpuKernel<OF_PP_CAT(OF_PP_PAIR_SECOND(math_type_pair), Functor), \ OF_PP_PAIR_FIRST(data_type_pair)>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kGPU) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "gpu") \ & (user_op::HobDataType("x", 0) == OF_PP_PAIR_SECOND(data_type_pair))); OF_PP_SEQ_PRODUCT_FOR_EACH_TUPLE(REGISTER_MATH_UNARY_ELEMENTWISE_GPU_KERNEL_AND_GRAD, @@ -144,13 +144,13 @@ class MathUnaryElementwiseGradGpuHalfKernel final : public user_op::OpKernel { #define REGISTER_MATH_UNARY_ELEMENTWISE_GPU_HALF_KERNEL_AND_GRAD(math_type_str, math_func_prefix) \ REGISTER_USER_KERNEL(math_type_str) \ .SetCreateFn<MathUnaryElementwiseGpuHalfKernel<OF_PP_CAT(math_func_prefix, Functor)>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kGPU) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "gpu") \ & (user_op::HobDataType("x", 0) == DataType::kFloat16) \ & (user_op::HobDataType("y", 0) == DataType::kFloat16)); \ \ REGISTER_USER_KERNEL((std::string("") + math_type_str + "_grad")) \ .SetCreateFn<MathUnaryElementwiseGradGpuHalfKernel<OF_PP_CAT(math_func_prefix, Functor)>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kGPU) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "gpu") \ & (user_op::HobDataType("x", 0) == DataType::kFloat16)); OF_PP_FOR_EACH_TUPLE(REGISTER_MATH_UNARY_ELEMENTWISE_GPU_HALF_KERNEL_AND_GRAD, diff --git a/oneflow/user/kernels/matmul_kernels.cpp b/oneflow/user/kernels/matmul_kernels.cpp index f81b044961de7ee5b726626a8bdba04b6f0e81b5..669d6ae0ddf4049010d0b5b004cdadd65c9e5e4c 100644 --- a/oneflow/user/kernels/matmul_kernels.cpp +++ b/oneflow/user/kernels/matmul_kernels.cpp @@ -62,10 +62,10 @@ class MatmulFloatingKernel final : public user_op::OpKernel { } }; -#define REGISTER_MATMUL_KERNEL(device, dtype) \ - REGISTER_USER_KERNEL("matmul") \ - .SetCreateFn<MatmulFloatingKernel<device, dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == device) \ +#define REGISTER_MATMUL_KERNEL(device, dtype) \ + REGISTER_USER_KERNEL("matmul") \ + .SetCreateFn<MatmulFloatingKernel<device, dtype>>() \ + .SetIsMatchedHob((user_op::HobDeviceTag() == device) \ & (user_op::HobDataType("a", 0) == GetDataType<dtype>::value)); REGISTER_MATMUL_KERNEL(DeviceType::kCPU, float); @@ -110,8 +110,7 @@ class MatmulGpuHalfKernel final : public user_op::OpKernel { #ifdef WITH_CUDA REGISTER_USER_KERNEL("matmul").SetCreateFn<MatmulGpuHalfKernel>().SetIsMatchedHob( - (user_op::HobDeviceType() == DeviceType::kGPU) - & (user_op::HobDataType("a", 0) == DataType::kFloat16)); + (user_op::HobDeviceTag() == "gpu") & (user_op::HobDataType("a", 0) == DataType::kFloat16)); #endif template<DeviceType device_type, typename T> @@ -147,7 +146,7 @@ class BatchMatmulFloatingKernel final : public user_op::OpKernel { #define REGISTER_BATCH_MATMUL_KERNEL(device, dtype) \ REGISTER_USER_KERNEL("batch_matmul") \ .SetCreateFn<BatchMatmulFloatingKernel<device, dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == device) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == device) \ & (user_op::HobDataType("a", 0) == GetDataType<dtype>::value)) \ .SetInferTmpSizeFn([](user_op::InferContext* ctx) { \ user_op::TensorDesc* a = ctx->TensorDesc4ArgNameAndIndex("a", 0); \ @@ -203,7 +202,7 @@ class BatchMatmulGpuHalfKernel final : public user_op::OpKernel { REGISTER_USER_KERNEL("batch_matmul") .SetCreateFn<BatchMatmulGpuHalfKernel>() - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kGPU) + .SetIsMatchedHob((user_op::HobDeviceTag() == "gpu") & (user_op::HobDataType("a", 0) == DataType::kFloat16)) .SetInferTmpSizeFn([](user_op::InferContext* ctx) { user_op::TensorDesc* a = ctx->TensorDesc4ArgNameAndIndex("a", 0); diff --git a/oneflow/user/kernels/multiply_kernel.cpp b/oneflow/user/kernels/multiply_kernel.cpp index 392cc4dd3f3b6022e0ca7c294a9c3d64a5bc2ce6..435601096b2c17d85da24a46dbc49ac8f84d7a6d 100644 --- a/oneflow/user/kernels/multiply_kernel.cpp +++ b/oneflow/user/kernels/multiply_kernel.cpp @@ -46,7 +46,7 @@ class MultiplyKernel final : public user_op::OpKernel { #define REGISTER_MULTIPLY_KERNEL(device, dtype_pair) \ REGISTER_USER_KERNEL("multiply") \ .SetCreateFn<MultiplyKernel<device, OF_PP_PAIR_FIRST(dtype_pair)>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == device) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == device) \ & (user_op::HobDataType("x", 0) == OF_PP_PAIR_SECOND(dtype_pair))) \ .SetInplaceProposalFn([](const user_op::InferContext&, \ user_op::AddInplaceArgPair AddInplaceArgPairFn) -> Maybe<void> { \ diff --git a/oneflow/user/kernels/nd_index_slice_kernels.h b/oneflow/user/kernels/nd_index_slice_kernels.h index 77412f1abb7829e1a88b578546c1688ee83f1844..8c879d5a69018eedd31da2bfb1bb93c9fcb01c35 100644 --- a/oneflow/user/kernels/nd_index_slice_kernels.h +++ b/oneflow/user/kernels/nd_index_slice_kernels.h @@ -125,7 +125,7 @@ void TensorScatterNdAddKernel<device_type, T, I>::Compute( REGISTER_USER_KERNEL(#op_type_name) \ .SetCreateFn< \ op##Kernel<device_type_v, OF_PP_PAIR_FIRST(dtype_pair), OF_PP_PAIR_FIRST(itype_pair)>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == device_type_v) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == device_type_v) \ & (user_op::HobDataType("indices", 0) == OF_PP_PAIR_SECOND(itype_pair)) \ & (user_op::HobDataType("out", 0) == OF_PP_PAIR_SECOND(dtype_pair))); @@ -134,7 +134,7 @@ void TensorScatterNdAddKernel<device_type, T, I>::Compute( REGISTER_USER_KERNEL(#op_type_name) \ .SetCreateFn<TensorScatterNd##opt##Kernel<device_type_v, OF_PP_PAIR_FIRST(dtype_pair), \ OF_PP_PAIR_FIRST(itype_pair)>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == device_type_v) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == device_type_v) \ & (user_op::HobDataType("indices", 0) == OF_PP_PAIR_SECOND(itype_pair)) \ & (user_op::HobDataType("out", 0) == OF_PP_PAIR_SECOND(dtype_pair))) \ .SetInplaceProposalFn([](const user_op::InferContext&, \ diff --git a/oneflow/user/kernels/normalization_kernel.cpp b/oneflow/user/kernels/normalization_kernel.cpp index e6301b1b9aac94a0a13104e3345f25e096b8994f..c7f06a24c80dca1e556d9e7f7667efc795027b09 100644 --- a/oneflow/user/kernels/normalization_kernel.cpp +++ b/oneflow/user/kernels/normalization_kernel.cpp @@ -185,7 +185,7 @@ class NormalizationInferenceKernel final : public user_op::OpKernel { #define REGISTER_BN_INFERENCE_KERNEL(dtype) \ REGISTER_USER_KERNEL("normalization") \ .SetCreateFn<NormalizationInferenceKernel<dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kGPU) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "gpu") \ & (user_op::HobDataType("y", 0) == GetDataType<dtype>::value) \ & (user_op::HobAttr<bool>("training") == false)); @@ -353,7 +353,7 @@ class NormalizationGradUserKernel final : public user_op::OpKernel { #define REGISTER_BN_TRAIN_KERNEL(dtype) \ REGISTER_USER_KERNEL("normalization") \ .SetCreateFn<NormalizationTrainKernel<dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kGPU) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "gpu") \ & (user_op::HobDataType("y", 0) == GetDataType<dtype>::value) \ & (user_op::HobAttr<bool>("training") == true)) \ .SetInferTmpSizeFn(InferTrainTmpSize); @@ -361,7 +361,7 @@ class NormalizationGradUserKernel final : public user_op::OpKernel { #define REGISTER_BN_GRAD_KERNEL(dtype) \ REGISTER_USER_KERNEL("normalization_grad") \ .SetCreateFn<NormalizationGradUserKernel<dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kGPU) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "gpu") \ & (user_op::HobDataType("dx", 0) == GetDataType<dtype>::value)) \ .SetInferTmpSizeFn(InferGradTmpSize); diff --git a/oneflow/user/kernels/ofrecord_decoder_kernels.cpp b/oneflow/user/kernels/ofrecord_decoder_kernels.cpp index b52235b49f2b11edb54f0472aac542716f11361b..b070e72f7078b35c1fe0e433f41c14e9eb87312c 100644 --- a/oneflow/user/kernels/ofrecord_decoder_kernels.cpp +++ b/oneflow/user/kernels/ofrecord_decoder_kernels.cpp @@ -106,7 +106,7 @@ class OFRecordRawDecoderKernel final : public user_op::OpKernel { #define REGISTER_RAW_DECODER_KERNEL(dtype) \ REGISTER_USER_KERNEL("ofrecord_raw_decoder") \ .SetCreateFn<OFRecordRawDecoderKernel<dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kCPU) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "cpu") \ & (user_op::HobDataType("in", 0) == DataType::kOFRecord) \ & (user_op::HobDataType("out", 0) == GetDataType<dtype>::value)); @@ -211,7 +211,7 @@ class OFRecordImageDecoderRandomCropKernel final : public user_op::OpKernel { REGISTER_USER_KERNEL("ofrecord_image_decoder_random_crop") .SetCreateFn<OFRecordImageDecoderRandomCropKernel>() - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kCPU) + .SetIsMatchedHob((user_op::HobDeviceTag() == "cpu") & (user_op::HobDataType("in", 0) == DataType::kOFRecord) & (user_op::HobDataType("out", 0) == DataType::kTensorBuffer)); @@ -243,7 +243,7 @@ class OFRecordImageDecoderKernel final : public user_op::OpKernel { REGISTER_USER_KERNEL("ofrecord_image_decoder") .SetCreateFn<OFRecordImageDecoderKernel>() - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kCPU) + .SetIsMatchedHob((user_op::HobDeviceTag() == "cpu") & (user_op::HobDataType("in", 0) == DataType::kOFRecord) & (user_op::HobDataType("out", 0) == DataType::kTensorBuffer)); diff --git a/oneflow/user/kernels/ofrecord_image_classification_reader_kernel.cpp b/oneflow/user/kernels/ofrecord_image_classification_reader_kernel.cpp index 89723519ad29364d7ace277771ae579a1a6c36e9..17dd2d5fb2ec4941b5efa502648358c7e229ec89 100644 --- a/oneflow/user/kernels/ofrecord_image_classification_reader_kernel.cpp +++ b/oneflow/user/kernels/ofrecord_image_classification_reader_kernel.cpp @@ -55,7 +55,7 @@ class OFRecordImageClassificationReaderKernel final : public user_op::OpKernel { REGISTER_USER_KERNEL("ofrecord_image_classification_reader") .SetCreateFn<OFRecordImageClassificationReaderKernel>() - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kCPU) + .SetIsMatchedHob((user_op::HobDeviceTag() == DeviceType::kCPU) & (user_op::HobDataType("image", 0) == DataType::kTensorBuffer) & (user_op::HobDataType("label", 0) == DataType::kTensorBuffer)); diff --git a/oneflow/user/kernels/ofrecord_reader_kernel.cpp b/oneflow/user/kernels/ofrecord_reader_kernel.cpp index c2d838b9ad6f73eea4e610bbbb4462b7f654f37b..5d329707d227859ca9d2f45ba422cb1927196fdc 100644 --- a/oneflow/user/kernels/ofrecord_reader_kernel.cpp +++ b/oneflow/user/kernels/ofrecord_reader_kernel.cpp @@ -54,7 +54,7 @@ class OFRecordReaderKernel final : public user_op::OpKernel { REGISTER_USER_KERNEL("OFRecordReader") .SetCreateFn<OFRecordReaderKernel>() - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kCPU) + .SetIsMatchedHob((user_op::HobDeviceTag() == "cpu") & (user_op::HobDataType("out", 0) == DataType::kOFRecord)); } // namespace oneflow diff --git a/oneflow/user/kernels/one_hot_kernel.cpp b/oneflow/user/kernels/one_hot_kernel.cpp index f04c07e6c1c3edc4a9478d24ce2fd39627e38a09..37eba3f18c226b8da6a09d0a88db7ad166bc467a 100644 --- a/oneflow/user/kernels/one_hot_kernel.cpp +++ b/oneflow/user/kernels/one_hot_kernel.cpp @@ -54,7 +54,7 @@ class CpuOneHotKernel final : public user_op::OpKernel { #define REGISTER_CPU_ONE_HOT_KERNEL(dtype, itype) \ REGISTER_USER_KERNEL("one_hot").SetCreateFn<CpuOneHotKernel<dtype, itype>>().SetIsMatchedHob( \ - (user_op::HobDeviceType() == DeviceType::kCPU) \ + (user_op::HobDeviceTag() == "cpu") \ & (user_op::HobDataType("indices", 0) == GetDataType<itype>::value) \ & (user_op::HobDataType("out", 0) == GetDataType<dtype>::value)); diff --git a/oneflow/user/kernels/one_hot_kernel.cu b/oneflow/user/kernels/one_hot_kernel.cu index 0221b2a97b5cdf9e3c69f247a41760f0d671f743..5fbe02c863ffad86362373c7e7b820a7a1d2f515 100644 --- a/oneflow/user/kernels/one_hot_kernel.cu +++ b/oneflow/user/kernels/one_hot_kernel.cu @@ -61,7 +61,7 @@ class GpuOneHotKernel final : public user_op::OpKernel { #define REGISTER_GPU_ONE_HOT_KERNEL(dtype, itype) \ REGISTER_USER_KERNEL("one_hot").SetCreateFn<GpuOneHotKernel<dtype, itype>>().SetIsMatchedHob( \ - (user_op::HobDeviceType() == DeviceType::kGPU) \ + (user_op::HobDeviceTag() == "gpu") \ & (user_op::HobDataType("indices", 0) == GetDataType<itype>::value) \ & (user_op::HobDataType("out", 0) == GetDataType<dtype>::value)); diff --git a/oneflow/user/kernels/pad_kernel.cpp b/oneflow/user/kernels/pad_kernel.cpp index 51fb2c32c8c1a3b9a4304f278d7c7e29d49f3516..f059c5101d98af7c185c260ec26bd081eacebcfc 100644 --- a/oneflow/user/kernels/pad_kernel.cpp +++ b/oneflow/user/kernels/pad_kernel.cpp @@ -108,7 +108,7 @@ class PadKernel final : public user_op::OpKernel { #define REGISTER_PAD_KERNEL(dev, dtype) \ REGISTER_USER_KERNEL("pad").SetCreateFn<PadKernel<dev, dtype>>().SetIsMatchedHob( \ - (user_op::HobDeviceType() == dev) \ + (user_op::HobDeviceTag() == dev) \ & (user_op::HobDataType("y", 0) == GetDataType<dtype>::value)); #ifdef WITH_CUDA @@ -164,10 +164,10 @@ class PadGradKernel final : public user_op::OpKernel { bool AlwaysComputeWhenAllOutputsEmpty() const override { return false; } }; -#define REGISTER_PAD_GRAD_KERNEL(dev, dtype) \ - REGISTER_USER_KERNEL("pad_grad") \ - .SetCreateFn<PadGradKernel<dev, dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == dev) \ +#define REGISTER_PAD_GRAD_KERNEL(dev, dtype) \ + REGISTER_USER_KERNEL("pad_grad") \ + .SetCreateFn<PadGradKernel<dev, dtype>>() \ + .SetIsMatchedHob((user_op::HobDeviceTag() == dev) \ & (user_op::HobDataType("dx", 0) == GetDataType<dtype>::value)); #ifdef WITH_CUDA diff --git a/oneflow/user/kernels/pool_cpu_kernel.cpp b/oneflow/user/kernels/pool_cpu_kernel.cpp index 25776758668531ecac27e498930b627dfe87bd4b..34cbcf1635defecca3b0f3898839f65d96674891 100644 --- a/oneflow/user/kernels/pool_cpu_kernel.cpp +++ b/oneflow/user/kernels/pool_cpu_kernel.cpp @@ -580,51 +580,51 @@ class MaxPool3DGradCpuKernel final : public user_op::OpKernel { #define REGISTER_POOL_CPU_KERNEL(dtype) \ REGISTER_USER_KERNEL("avg_pool_1d") \ .SetCreateFn<AvgPool1DCpuKernel<dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kCPU) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "cpu") \ & (user_op::HobDataType("x", 0) == GetDataType<dtype>::value)); \ REGISTER_USER_KERNEL("avg_pool_1d_grad") \ .SetCreateFn<AvgPool1DGradCpuKernel<dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kCPU) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "cpu") \ & (user_op::HobDataType("x", 0) == GetDataType<dtype>::value)); \ REGISTER_USER_KERNEL("avg_pool_2d") \ .SetCreateFn<AvgPool2DCpuKernel<dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kCPU) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "cpu") \ & (user_op::HobDataType("x", 0) == GetDataType<dtype>::value)); \ REGISTER_USER_KERNEL("avg_pool_2d_grad") \ .SetCreateFn<AvgPool2DGradCpuKernel<dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kCPU) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "cpu") \ & (user_op::HobDataType("x", 0) == GetDataType<dtype>::value)); \ REGISTER_USER_KERNEL("avg_pool_3d") \ .SetCreateFn<AvgPool3DCpuKernel<dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kCPU) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "cpu") \ & (user_op::HobDataType("x", 0) == GetDataType<dtype>::value)); \ REGISTER_USER_KERNEL("avg_pool_3d_grad") \ .SetCreateFn<AvgPool3DGradCpuKernel<dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kCPU) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "cpu") \ & (user_op::HobDataType("x", 0) == GetDataType<dtype>::value)); \ REGISTER_USER_KERNEL("max_pool_1d") \ .SetCreateFn<MaxPool1DCpuKernel<dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kCPU) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "cpu") \ & (user_op::HobDataType("x", 0) == GetDataType<dtype>::value)); \ REGISTER_USER_KERNEL("max_pool_1d_grad") \ .SetCreateFn<MaxPool1DGradCpuKernel<dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kCPU) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "cpu") \ & (user_op::HobDataType("x", 0) == GetDataType<dtype>::value)); \ REGISTER_USER_KERNEL("max_pool_2d") \ .SetCreateFn<MaxPool2DCpuKernel<dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kCPU) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "cpu") \ & (user_op::HobDataType("x", 0) == GetDataType<dtype>::value)); \ REGISTER_USER_KERNEL("max_pool_2d_grad") \ .SetCreateFn<MaxPool2DGradCpuKernel<dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kCPU) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "cpu") \ & (user_op::HobDataType("x", 0) == GetDataType<dtype>::value)); \ REGISTER_USER_KERNEL("max_pool_3d") \ .SetCreateFn<MaxPool3DCpuKernel<dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kCPU) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "cpu") \ & (user_op::HobDataType("x", 0) == GetDataType<dtype>::value)); \ REGISTER_USER_KERNEL("max_pool_3d_grad") \ .SetCreateFn<MaxPool3DGradCpuKernel<dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kCPU) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "cpu") \ & (user_op::HobDataType("x", 0) == GetDataType<dtype>::value)); REGISTER_POOL_CPU_KERNEL(float) diff --git a/oneflow/user/kernels/pool_gpu_kernel.cpp b/oneflow/user/kernels/pool_gpu_kernel.cpp index 916d458c403938439ebc88ffac0253b339f0a929..3734d4befccf8cd0540b71de291be0d498bb1144 100644 --- a/oneflow/user/kernels/pool_gpu_kernel.cpp +++ b/oneflow/user/kernels/pool_gpu_kernel.cpp @@ -389,51 +389,51 @@ class MaxPool3DGradGpuKernel final : public user_op::OpKernel { #define REGISTER_POOL_GPU_KERNEL(dtype) \ REGISTER_USER_KERNEL("avg_pool_1d") \ .SetCreateFn<AvgPool1DGpuKernel<dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kGPU) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "gpu") \ & (user_op::HobDataType("x", 0) == GetDataType<dtype>::value)); \ REGISTER_USER_KERNEL("avg_pool_1d_grad") \ .SetCreateFn<AvgPool1DGradGpuKernel<dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kGPU) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "gpu") \ & (user_op::HobDataType("x", 0) == GetDataType<dtype>::value)); \ REGISTER_USER_KERNEL("avg_pool_2d") \ .SetCreateFn<AvgPool2DGpuKernel<dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kGPU) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "gpu") \ & (user_op::HobDataType("x", 0) == GetDataType<dtype>::value)); \ REGISTER_USER_KERNEL("avg_pool_2d_grad") \ .SetCreateFn<AvgPool2DGradGpuKernel<dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kGPU) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "gpu") \ & (user_op::HobDataType("x", 0) == GetDataType<dtype>::value)); \ REGISTER_USER_KERNEL("avg_pool_3d") \ .SetCreateFn<AvgPool3DGpuKernel<dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kGPU) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "gpu") \ & (user_op::HobDataType("x", 0) == GetDataType<dtype>::value)); \ REGISTER_USER_KERNEL("avg_pool_3d_grad") \ .SetCreateFn<AvgPool3DGradGpuKernel<dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kGPU) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "gpu") \ & (user_op::HobDataType("x", 0) == GetDataType<dtype>::value)); \ REGISTER_USER_KERNEL("max_pool_1d") \ .SetCreateFn<MaxPool1DGpuKernel<dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kGPU) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "gpu") \ & (user_op::HobDataType("x", 0) == GetDataType<dtype>::value)); \ REGISTER_USER_KERNEL("max_pool_1d_grad") \ .SetCreateFn<MaxPool1DGradGpuKernel<dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kGPU) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "gpu") \ & (user_op::HobDataType("x", 0) == GetDataType<dtype>::value)); \ REGISTER_USER_KERNEL("max_pool_2d") \ .SetCreateFn<MaxPool2DGpuKernel<dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kGPU) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "gpu") \ & (user_op::HobDataType("x", 0) == GetDataType<dtype>::value)); \ REGISTER_USER_KERNEL("max_pool_2d_grad") \ .SetCreateFn<MaxPool2DGradGpuKernel<dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kGPU) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "gpu") \ & (user_op::HobDataType("x", 0) == GetDataType<dtype>::value)); \ REGISTER_USER_KERNEL("max_pool_3d") \ .SetCreateFn<MaxPool3DGpuKernel<dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kGPU) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "gpu") \ & (user_op::HobDataType("x", 0) == GetDataType<dtype>::value)); \ REGISTER_USER_KERNEL("max_pool_3d_grad") \ .SetCreateFn<MaxPool3DGradGpuKernel<dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kGPU) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "gpu") \ & (user_op::HobDataType("x", 0) == GetDataType<dtype>::value)); REGISTER_POOL_GPU_KERNEL(float) diff --git a/oneflow/user/kernels/prelu_kernel.cpp b/oneflow/user/kernels/prelu_kernel.cpp index 89f4582446a5c90fcf79f981fd4d50fb6c2ad647..21092e6d5c5d9776eb03463d646d01bdc761ca27 100644 --- a/oneflow/user/kernels/prelu_kernel.cpp +++ b/oneflow/user/kernels/prelu_kernel.cpp @@ -49,7 +49,7 @@ class CpuPReluKernel final : public user_op::OpKernel { #define REGISTER_CPU_PRELU_KERNEL(dtype) \ REGISTER_USER_KERNEL("prelu") \ .SetCreateFn<CpuPReluKernel<dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kCPU) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "cpu") \ & (user_op::HobDataType("y", 0) == GetDataType<dtype>::value)) \ .SetInferTmpSizeFn([](user_op::InferContext* ctx) { \ const Shape* in_shape = ctx->Shape4ArgNameAndIndex("x", 0); \ @@ -92,7 +92,7 @@ class CpuPReluXGradKernel final : public user_op::OpKernel { #define REGISTER_CPU_PRELU_X_GRAD_KERNEL(dtype) \ REGISTER_USER_KERNEL("prelu_x_grad") \ .SetCreateFn<CpuPReluXGradKernel<dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kCPU) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "cpu") \ & (user_op::HobDataType("dx", 0) == GetDataType<dtype>::value)) \ .SetInferTmpSizeFn([](user_op::InferContext* ctx) { \ const Shape* in_shape = ctx->Shape4ArgNameAndIndex("x", 0); \ @@ -137,7 +137,7 @@ class CpuPReluAlphaGradKernel final : public user_op::OpKernel { #define REGISTER_CPU_PRELU_ALPHA_GRAD_KERNEL(dtype) \ REGISTER_USER_KERNEL("prelu_alpha_grad") \ .SetCreateFn<CpuPReluAlphaGradKernel<dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kCPU) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "cpu") \ & (user_op::HobDataType("alpha_diff", 0) == GetDataType<dtype>::value)) \ .SetInferTmpSizeFn([](user_op::InferContext* ctx) { \ const Shape* in_shape = ctx->Shape4ArgNameAndIndex("x", 0); \ diff --git a/oneflow/user/kernels/prelu_kernel.cu b/oneflow/user/kernels/prelu_kernel.cu index 9179a44611462ef5c22c2bdf67fedaa2580b8856..51a2a769a5e0a17b2963777258babbaa695f2f44 100644 --- a/oneflow/user/kernels/prelu_kernel.cu +++ b/oneflow/user/kernels/prelu_kernel.cu @@ -67,7 +67,7 @@ class GpuPReluKernel final : public user_op::OpKernel { #define REGISTER_GPU_PRELU_KERNEL(dtype) \ REGISTER_USER_KERNEL("prelu") \ .SetCreateFn<GpuPReluKernel<dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kGPU) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "gpu") \ & (user_op::HobDataType("y", 0) == GetDataType<dtype>::value)) \ .SetInferTmpSizeFn([](user_op::InferContext* ctx) { \ const Shape* in_shape = ctx->Shape4ArgNameAndIndex("x", 0); \ @@ -106,7 +106,7 @@ class GpuPReluXGradKernel final : public user_op::OpKernel { #define REGISTER_GPU_PRELU_X_GRAD_KERNEL(dtype) \ REGISTER_USER_KERNEL("prelu_x_grad") \ .SetCreateFn<GpuPReluXGradKernel<dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kGPU) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "gpu") \ & (user_op::HobDataType("dx", 0) == GetDataType<dtype>::value)) \ .SetInferTmpSizeFn([](user_op::InferContext* ctx) { \ const Shape* in_shape = ctx->Shape4ArgNameAndIndex("x", 0); \ @@ -148,7 +148,7 @@ class GpuPReluAlphaGradKernel final : public user_op::OpKernel { #define REGISTER_GPU_PRELU_ALPHA_GRAD_KERNEL(dtype) \ REGISTER_USER_KERNEL("prelu_alpha_grad") \ .SetCreateFn<GpuPReluAlphaGradKernel<dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kGPU) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "gpu") \ & (user_op::HobDataType("alpha_diff", 0) == GetDataType<dtype>::value)) \ .SetInferTmpSizeFn([](user_op::InferContext* ctx) { \ const Shape* in_shape = ctx->Shape4ArgNameAndIndex("x", 0); \ diff --git a/oneflow/user/kernels/radix_sort_top_k_kernel.cu b/oneflow/user/kernels/radix_sort_top_k_kernel.cu index 9daa98fbeb696e0e4be95ef2517f2935451fa439..bb50baa3b9ba11aeca6170a4d2abace3211fbc02 100644 --- a/oneflow/user/kernels/radix_sort_top_k_kernel.cu +++ b/oneflow/user/kernels/radix_sort_top_k_kernel.cu @@ -107,30 +107,29 @@ class GpuRadixSortTopKKernel final : public user_op::OpKernel { bool AlwaysComputeWhenAllOutputsEmpty() const override { return false; } }; -#define REGISTER_GPU_RADIX_SORT_TOP_K_KERNEL(dtype) \ - REGISTER_USER_KERNEL("top_k") \ - .SetCreateFn<GpuRadixSortTopKKernel<dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kGPU) \ - & (user_op::HobAttr<int32_t>("k") > 128) \ - & (user_op::HobDataType("in", 0) == GetDataType<dtype>::value)) \ - .SetInferTmpSizeFn([](user_op::InferContext* ctx) { \ - const Shape* in_shape = ctx->Shape4ArgNameAndIndex("in", 0); \ - const int32_t elem_cnt = in_shape->elem_cnt(); \ - const int32_t instance_size = in_shape->dim_vec().back(); \ - const int32_t instance_num = elem_cnt / instance_size; \ - \ - /* Sorted In*/ \ - const int32_t sorted_in_aligned_bytes = GetCudaAlignedSize(elem_cnt * sizeof(dtype)); \ - /* Indices */ \ - const int32_t indices_aligned_bytes = GetCudaAlignedSize(elem_cnt * sizeof(int32_t)); \ - /* Sorted Indices */ \ - const int32_t sorted_indices_aligned_bytes = indices_aligned_bytes; \ - /* CUB Temp Storage */ \ - int32_t temp_storage_bytes = \ - InferTempStorageForSortPairsDescending<dtype, int32_t>(instance_num, instance_size); \ - \ - return sorted_in_aligned_bytes + indices_aligned_bytes + sorted_indices_aligned_bytes \ - + temp_storage_bytes; \ +#define REGISTER_GPU_RADIX_SORT_TOP_K_KERNEL(dtype) \ + REGISTER_USER_KERNEL("top_k") \ + .SetCreateFn<GpuRadixSortTopKKernel<dtype>>() \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "gpu") & (user_op::HobAttr<int32_t>("k") > 128) \ + & (user_op::HobDataType("in", 0) == GetDataType<dtype>::value)) \ + .SetInferTmpSizeFn([](user_op::InferContext* ctx) { \ + const Shape* in_shape = ctx->Shape4ArgNameAndIndex("in", 0); \ + const int32_t elem_cnt = in_shape->elem_cnt(); \ + const int32_t instance_size = in_shape->dim_vec().back(); \ + const int32_t instance_num = elem_cnt / instance_size; \ + \ + /* Sorted In*/ \ + const int32_t sorted_in_aligned_bytes = GetCudaAlignedSize(elem_cnt * sizeof(dtype)); \ + /* Indices */ \ + const int32_t indices_aligned_bytes = GetCudaAlignedSize(elem_cnt * sizeof(int32_t)); \ + /* Sorted Indices */ \ + const int32_t sorted_indices_aligned_bytes = indices_aligned_bytes; \ + /* CUB Temp Storage */ \ + int32_t temp_storage_bytes = \ + InferTempStorageForSortPairsDescending<dtype, int32_t>(instance_num, instance_size); \ + \ + return sorted_in_aligned_bytes + indices_aligned_bytes + sorted_indices_aligned_bytes \ + + temp_storage_bytes; \ }); REGISTER_GPU_RADIX_SORT_TOP_K_KERNEL(float) diff --git a/oneflow/user/kernels/reduce_kernel.cpp b/oneflow/user/kernels/reduce_kernel.cpp index 2036c98c1dd5179b85f7b757cd925b74d2bab037..3360dd3bb0c4d612ea04e5f3049c08f41cb0f38a 100644 --- a/oneflow/user/kernels/reduce_kernel.cpp +++ b/oneflow/user/kernels/reduce_kernel.cpp @@ -48,7 +48,7 @@ class ReduceKernel final : public user_op::OpKernel { #define REGISTER_REDUCE_XPU_KERNEL(op_name, binary_func, device, dtype) \ REGISTER_USER_KERNEL(op_name) \ .SetCreateFn<ReduceKernel<binary_func, device, dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == device) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == device) \ & (user_op::HobDataType("output_tensor", 0) == GetDataType<dtype>::value)) \ .SetInferTmpSizeFn([](user_op::InferContext* ctx) { \ const Shape* in_shape = ctx->Shape4ArgNameAndIndex("input_tensor", 0); \ diff --git a/oneflow/user/kernels/reduce_like_kernels.cpp b/oneflow/user/kernels/reduce_like_kernels.cpp index 8a00ac2896ecbc53bf47be82909de2f8b0c8e9e7..85fb9658f38c4e9ca41d06c9871cb46d4e0e5a98 100644 --- a/oneflow/user/kernels/reduce_like_kernels.cpp +++ b/oneflow/user/kernels/reduce_like_kernels.cpp @@ -61,7 +61,7 @@ class ReduceSumLikeOpKernel final : public user_op::OpKernel { #define REGISTER_REDUCE_SUM_LIKE_KERNEL(device, data_type_pair) \ REGISTER_USER_KERNEL("reduce_sum_like") \ .SetCreateFn<ReduceSumLikeOpKernel<device, OF_PP_PAIR_FIRST(data_type_pair)>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == device) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == device) \ & (user_op::HobDataType("y", 0) == OF_PP_PAIR_SECOND(data_type_pair))) \ .SetInferTmpSizeFn(ReduceSumLikeInferTmpSize); diff --git a/oneflow/user/kernels/relu_kernel.cpp b/oneflow/user/kernels/relu_kernel.cpp index 53dfdb9b5540482ca5592beb44989335da5753f6..759bf4b39bba429f0db4af3e0ac04509c2df6744 100644 --- a/oneflow/user/kernels/relu_kernel.cpp +++ b/oneflow/user/kernels/relu_kernel.cpp @@ -39,7 +39,7 @@ class ReluKernel final : public user_op::OpKernel { #define REGISTER_RELU_KERNEL(device, dtype) \ REGISTER_USER_KERNEL("relu") \ .SetCreateFn<ReluKernel<device, dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == device) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == device) \ & (user_op::HobDataType("out", 0) == GetDataType<dtype>::value)) \ .SetInplaceProposalFn([](const user_op::InferContext&, \ user_op::AddInplaceArgPair AddInplaceArgPairFn) -> Maybe<void> { \ @@ -76,7 +76,7 @@ class ReluGradKernel final : public user_op::OpKernel { #define REGISTER_RELU_GRAD_KERNEL(device, dtype) \ REGISTER_USER_KERNEL("relu_grad") \ .SetCreateFn<ReluGradKernel<device, dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == device) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == device) \ & (user_op::HobDataType("dx", 0) == GetDataType<dtype>::value)) \ .SetInplaceProposalFn([](const user_op::InferContext&, \ user_op::AddInplaceArgPair AddInplaceArgPairFn) -> Maybe<void> { \ diff --git a/oneflow/user/kernels/reshape_kernel.cpp b/oneflow/user/kernels/reshape_kernel.cpp index e854ac5332dbf3a18aeb59e9274892d8024a533f..7a1f771ed7a1361a0c915f44f8b80615a5455676 100644 --- a/oneflow/user/kernels/reshape_kernel.cpp +++ b/oneflow/user/kernels/reshape_kernel.cpp @@ -21,7 +21,7 @@ namespace oneflow { #define REGISTER_RESHAPE_KERNEL(device) \ REGISTER_USER_KERNEL("reshape") \ .SetCreateFn<CopyDataContentKernel<device>>() \ - .SetIsMatchedHob(user_op::HobDeviceType() == device) \ + .SetIsMatchedHob(user_op::HobDeviceTag() == device) \ .SetInplaceProposalFn([](const user_op::InferContext&, \ user_op::AddInplaceArgPair AddInplaceArgPairFn) -> Maybe<void> { \ OF_RETURN_IF_ERROR(AddInplaceArgPairFn("out", 0, "in", 0, false)); \ diff --git a/oneflow/user/kernels/reshape_like_kernel.cpp b/oneflow/user/kernels/reshape_like_kernel.cpp index 4f5dc945519235bf156b079e1439085e1b3120b1..cf22daa70d57abdd0bc54a60bf2be0d0be9f101e 100644 --- a/oneflow/user/kernels/reshape_like_kernel.cpp +++ b/oneflow/user/kernels/reshape_like_kernel.cpp @@ -21,7 +21,7 @@ namespace oneflow { #define REGISTER_RESHAPE_LIKE_KERNEL(D) \ REGISTER_USER_KERNEL("reshape_like") \ .SetCreateFn<CopyDataContentKernel<DeviceType::D>>() \ - .SetIsMatchedHob(user_op::HobDeviceType() == DeviceType::D) \ + .SetIsMatchedHob(user_op::HobDeviceTag() == DeviceType::D) \ .SetInplaceProposalFn([](const user_op::InferContext&, \ user_op::AddInplaceArgPair AddInplaceArgPairFn) -> Maybe<void> { \ OF_RETURN_IF_ERROR(AddInplaceArgPairFn("out", 0, "in", 0, false)); \ diff --git a/oneflow/user/kernels/same_padding_kernel.cpp b/oneflow/user/kernels/same_padding_kernel.cpp index 9929d0df0fdbed439f626439e2de520db972c8f7..138fe30d3c520e05e5680c39ef8ccf8368a0a2ba 100644 --- a/oneflow/user/kernels/same_padding_kernel.cpp +++ b/oneflow/user/kernels/same_padding_kernel.cpp @@ -81,10 +81,10 @@ class SamePaddingKernel final : public user_op::OpKernel { bool AlwaysComputeWhenAllOutputsEmpty() const override { return false; } }; -#define REGISTER_SAME_PADDING_KERNEL(dev, dtype) \ - REGISTER_USER_KERNEL("same_padding") \ - .SetCreateFn<SamePaddingKernel<dev, dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == dev) \ +#define REGISTER_SAME_PADDING_KERNEL(dev, dtype) \ + REGISTER_USER_KERNEL("same_padding") \ + .SetCreateFn<SamePaddingKernel<dev, dtype>>() \ + .SetIsMatchedHob((user_op::HobDeviceTag() == dev) \ & (user_op::HobDataType("y", 0) == GetDataType<dtype>::value)); #ifdef WITH_CUDA @@ -159,10 +159,10 @@ class SamePaddingGradKernel final : public user_op::OpKernel { bool AlwaysComputeWhenAllOutputsEmpty() const override { return false; } }; -#define REGISTER_SAME_PADDING_GRAD_KERNEL(dev, dtype) \ - REGISTER_USER_KERNEL("same_padding_grad") \ - .SetCreateFn<SamePaddingGradKernel<dev, dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == dev) \ +#define REGISTER_SAME_PADDING_GRAD_KERNEL(dev, dtype) \ + REGISTER_USER_KERNEL("same_padding_grad") \ + .SetCreateFn<SamePaddingGradKernel<dev, dtype>>() \ + .SetIsMatchedHob((user_op::HobDeviceTag() == dev) \ & (user_op::HobDataType("dx", 0) == GetDataType<dtype>::value)); #ifdef WITH_CUDA diff --git a/oneflow/user/kernels/scalar_add_kernel.cpp b/oneflow/user/kernels/scalar_add_kernel.cpp index 8a17a70af7a0620cbe90f4a8475e14a4b6fb33e7..08544d2c897644bac7c3bcac5e4f683f77d85a75 100644 --- a/oneflow/user/kernels/scalar_add_kernel.cpp +++ b/oneflow/user/kernels/scalar_add_kernel.cpp @@ -48,7 +48,7 @@ class ScalarAddUserKernel final : public user_op::OpKernel { #define REGISTER_KERNEL(kernel_device_type, dtype) \ REGISTER_USER_KERNEL("scalar_add") \ .SetCreateFn<ScalarAddUserKernel<DeviceType::k##kernel_device_type, dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::k##kernel_device_type) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == DeviceType::k##kernel_device_type) \ & (user_op::HobDataType("out", 0) == GetDataType<dtype>::value)) \ .SetInplaceProposalFn([](const user_op::InferContext&, \ user_op::AddInplaceArgPair AddInplaceArgPairFn) -> Maybe<void> { \ diff --git a/oneflow/user/kernels/scalar_by_tensor_kernel.cpp b/oneflow/user/kernels/scalar_by_tensor_kernel.cpp index efd4fae0ff3706f6a4d3d85e96d0ad0c238214a6..016844ca8b7798514e4b715408a4dfb80ebedffe 100644 --- a/oneflow/user/kernels/scalar_by_tensor_kernel.cpp +++ b/oneflow/user/kernels/scalar_by_tensor_kernel.cpp @@ -84,7 +84,7 @@ class ScalarAddByTensorKernel final : public user_op::OpKernel { REGISTER_USER_KERNEL(OF_PP_PAIR_FIRST(scalar_by_tensor_pair)) \ .SetCreateFn<ScalarAddByTensorKernel<OF_PP_PAIR_SECOND(scalar_by_tensor_pair), device, \ OF_PP_PAIR_FIRST(dtype_pair)>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == device) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == device) \ & (user_op::HobDataType("x", 0) == OF_PP_PAIR_SECOND(dtype_pair))) \ .SetInplaceProposalFn([](const user_op::InferContext&, \ user_op::AddInplaceArgPair AddInplaceArgPairFn) -> Maybe<void> { \ diff --git a/oneflow/user/kernels/scalar_mul_kernel.cpp b/oneflow/user/kernels/scalar_mul_kernel.cpp index f4bb2a7b2a578f8632f990bee339bfdf2e2ca93a..db19ae498acf6071c2f85d41177629efbb884454 100644 --- a/oneflow/user/kernels/scalar_mul_kernel.cpp +++ b/oneflow/user/kernels/scalar_mul_kernel.cpp @@ -49,7 +49,7 @@ class ScalarMulUserKernel final : public user_op::OpKernel { #define REGISTER_KERNEL(kernel_device_type, dtype) \ REGISTER_USER_KERNEL("scalar_mul") \ .SetCreateFn<ScalarMulUserKernel<DeviceType::k##kernel_device_type, dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::k##kernel_device_type) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == DeviceType::k##kernel_device_type) \ & (user_op::HobDataType("out", 0) == GetDataType<dtype>::value)) \ .SetInplaceProposalFn([](const user_op::InferContext&, \ user_op::AddInplaceArgPair AddInplaceArgPairFn) -> Maybe<void> { \ diff --git a/oneflow/user/kernels/sigmoid_kernel.cpp b/oneflow/user/kernels/sigmoid_kernel.cpp index 9f34f0070a26cf6bbf39c4116bbafa9f4169565f..65c8b4cd692cd5f3ebf49b33211f4f1348e2e2a3 100644 --- a/oneflow/user/kernels/sigmoid_kernel.cpp +++ b/oneflow/user/kernels/sigmoid_kernel.cpp @@ -39,7 +39,7 @@ class SigmoidKernel final : public user_op::OpKernel { #define REGISTER_SIGMOID_KERNEL(device, dtype) \ REGISTER_USER_KERNEL("sigmoid") \ .SetCreateFn<SigmoidKernel<device, dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == device) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == device) \ & (user_op::HobDataType("out", 0) == GetDataType<dtype>::value)) \ .SetInplaceProposalFn([](const user_op::InferContext&, \ user_op::AddInplaceArgPair AddInplaceArgPairFn) -> Maybe<void> { \ @@ -76,7 +76,7 @@ class SigmoidGradKernel final : public user_op::OpKernel { #define REGISTER_SIGMOID_GRAD_KERNEL(device, dtype) \ REGISTER_USER_KERNEL("sigmoid_grad") \ .SetCreateFn<SigmoidGradKernel<device, dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == device) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == device) \ & (user_op::HobDataType("dx", 0) == GetDataType<dtype>::value)) \ .SetInplaceProposalFn([](const user_op::InferContext&, \ user_op::AddInplaceArgPair AddInplaceArgPairFn) -> Maybe<void> { \ diff --git a/oneflow/user/kernels/slice_kernel.cu b/oneflow/user/kernels/slice_kernel.cu index 45668f5b266fdb836e5c5a625f932777bff15785..7c8cc019341dcecc6d64fabb7f34820b78498d3b 100644 --- a/oneflow/user/kernels/slice_kernel.cu +++ b/oneflow/user/kernels/slice_kernel.cu @@ -182,11 +182,11 @@ class SliceGradGpuKernel final : public user_op::OpKernel { #define REGISTER_SLICE_GPU_KERNEL(dtype) \ REGISTER_USER_KERNEL("slice_v2") \ .SetCreateFn<SliceGpuKernel<dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kGPU) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "gpu") \ & (user_op::HobDataType("y", 0) == GetDataType<dtype>::value)); \ REGISTER_USER_KERNEL("slice_grad_v2") \ .SetCreateFn<SliceGradGpuKernel<dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kGPU) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "gpu") \ & (user_op::HobDataType("dx", 0) == GetDataType<dtype>::value)); REGISTER_SLICE_GPU_KERNEL(float) diff --git a/oneflow/user/kernels/smooth_l1_loss_kernel.cpp b/oneflow/user/kernels/smooth_l1_loss_kernel.cpp index dc71a79352d2a06cdd65f2985bd7e54c853c48a1..967e494519e08e1be048b61d70f436324df83ce6 100644 --- a/oneflow/user/kernels/smooth_l1_loss_kernel.cpp +++ b/oneflow/user/kernels/smooth_l1_loss_kernel.cpp @@ -43,10 +43,10 @@ class SmoothL1LossCPUKernel final : public user_op::OpKernel { bool AlwaysComputeWhenAllOutputsEmpty() const override { return false; } }; -#define REGISTER_SMOOTH_L1_LOSS_CPU_KERNEL(dtype) \ - REGISTER_USER_KERNEL("smooth_l1_loss") \ - .SetCreateFn<SmoothL1LossCPUKernel<dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kCPU) \ +#define REGISTER_SMOOTH_L1_LOSS_CPU_KERNEL(dtype) \ + REGISTER_USER_KERNEL("smooth_l1_loss") \ + .SetCreateFn<SmoothL1LossCPUKernel<dtype>>() \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "cpu") \ & (user_op::HobDataType("loss", 0) == GetDataType<dtype>::value)); REGISTER_SMOOTH_L1_LOSS_CPU_KERNEL(float) @@ -81,11 +81,11 @@ class SmoothL1LossGradCpuKernel final : public user_op::OpKernel { bool AlwaysComputeWhenAllOutputsEmpty() const override { return false; } }; -#define REGISTER_SMOOTH_L1_LOSS_GRAD_CPU_KERNEL(dtype) \ - REGISTER_USER_KERNEL("smooth_l1_loss_grad") \ - .SetCreateFn<SmoothL1LossGradCpuKernel<dtype>>() \ - .SetIsMatchedHob( \ - (user_op::HobDeviceType() == DeviceType::kCPU) \ +#define REGISTER_SMOOTH_L1_LOSS_GRAD_CPU_KERNEL(dtype) \ + REGISTER_USER_KERNEL("smooth_l1_loss_grad") \ + .SetCreateFn<SmoothL1LossGradCpuKernel<dtype>>() \ + .SetIsMatchedHob( \ + (user_op::HobDeviceTag() == "cpu") \ & (user_op::HobDataType("prediction_grad", 0) == GetDataType<dtype>::value)); REGISTER_SMOOTH_L1_LOSS_GRAD_CPU_KERNEL(float) diff --git a/oneflow/user/kernels/smooth_l1_loss_kernel.cu b/oneflow/user/kernels/smooth_l1_loss_kernel.cu index 0fbc890eaeaeec1f73cbf0a261875bad06e855b1..64cccc99f579f10b850c7d31202993adb4d63887 100644 --- a/oneflow/user/kernels/smooth_l1_loss_kernel.cu +++ b/oneflow/user/kernels/smooth_l1_loss_kernel.cu @@ -72,10 +72,10 @@ class SmoothL1LossGPUKernel final : public user_op::OpKernel { bool AlwaysComputeWhenAllOutputsEmpty() const override { return false; } }; -#define REGISTER_SMOOTH_L1_LOSS_GPU_KERNEL(dtype) \ - REGISTER_USER_KERNEL("smooth_l1_loss") \ - .SetCreateFn<SmoothL1LossGPUKernel<dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kGPU) \ +#define REGISTER_SMOOTH_L1_LOSS_GPU_KERNEL(dtype) \ + REGISTER_USER_KERNEL("smooth_l1_loss") \ + .SetCreateFn<SmoothL1LossGPUKernel<dtype>>() \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "gpu") \ & (user_op::HobDataType("loss", 0) == GetDataType<dtype>::value)); REGISTER_SMOOTH_L1_LOSS_GPU_KERNEL(float) @@ -103,11 +103,11 @@ class SmoothL1LossGradGpuKernel final : public user_op::OpKernel { bool AlwaysComputeWhenAllOutputsEmpty() const override { return false; } }; -#define REGISTER_SMOOTH_L1_LOSS_GRAD_GPU_KERNEL(dtype) \ - REGISTER_USER_KERNEL("smooth_l1_loss_grad") \ - .SetCreateFn<SmoothL1LossGradGpuKernel<dtype>>() \ - .SetIsMatchedHob( \ - (user_op::HobDeviceType() == DeviceType::kGPU) \ +#define REGISTER_SMOOTH_L1_LOSS_GRAD_GPU_KERNEL(dtype) \ + REGISTER_USER_KERNEL("smooth_l1_loss_grad") \ + .SetCreateFn<SmoothL1LossGradGpuKernel<dtype>>() \ + .SetIsMatchedHob( \ + (user_op::HobDeviceTag() == "gpu") \ & (user_op::HobDataType("prediction_grad", 0) == GetDataType<dtype>::value)); REGISTER_SMOOTH_L1_LOSS_GRAD_GPU_KERNEL(float) diff --git a/oneflow/user/kernels/softmax_cross_entropy_kernel.h b/oneflow/user/kernels/softmax_cross_entropy_kernel.h index 16411e4612d855362597a39ffbd76adb98db3318..e599a15d5820af1170bcf7737f60c265fa6a399c 100644 --- a/oneflow/user/kernels/softmax_cross_entropy_kernel.h +++ b/oneflow/user/kernels/softmax_cross_entropy_kernel.h @@ -57,7 +57,7 @@ class SoftmaxCrossEntropyKernel final : public user_op::OpKernel { #define REGISTER_SOFTMAX_CROSS_ENTROPY_KERNEL(device_type_v, dtype_pair) \ REGISTER_USER_KERNEL("softmax_cross_entropy") \ .SetCreateFn<SoftmaxCrossEntropyKernel<device_type_v, OF_PP_PAIR_FIRST(dtype_pair)>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == device_type_v) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == device_type_v) \ & (user_op::HobDataType("label", 0) == OF_PP_PAIR_SECOND(dtype_pair)) \ & (user_op::HobDataType("out", 0) == OF_PP_PAIR_SECOND(dtype_pair))) \ .SetInferTmpSizeFn([](user_op::InferContext* ctx) { \ @@ -92,7 +92,7 @@ class SoftmaxCrossEntropyGradKernel final : public user_op::OpKernel { REGISTER_USER_KERNEL("softmax_cross_entropy_grad") \ .SetCreateFn<SoftmaxCrossEntropyGradKernel<device_type_v, OF_PP_PAIR_FIRST(dtype_pair)>>() \ .SetIsMatchedHob( \ - (user_op::HobDeviceType() == device_type_v) \ + (user_op::HobDeviceTag() == device_type_v) \ & (user_op::HobDataType("label", 0) == OF_PP_PAIR_SECOND(dtype_pair)) \ & (user_op::HobDataType("prediction_diff", 0) == OF_PP_PAIR_SECOND(dtype_pair))) \ .SetInplaceProposalFn([](const user_op::InferContext&, \ diff --git a/oneflow/user/kernels/softmax_kernel.cpp b/oneflow/user/kernels/softmax_kernel.cpp index 9189e6c4d248466a221d54df77bda39ce637d78c..b7f69bd9012351e85561d085439399d1847b435e 100644 --- a/oneflow/user/kernels/softmax_kernel.cpp +++ b/oneflow/user/kernels/softmax_kernel.cpp @@ -61,7 +61,7 @@ user_op::InferTmpSizeFn GenInferTmpSizeFn(const std::string& bn) { #define REGISTER_SOFTMAX_KERNEL(device, dtype) \ REGISTER_USER_KERNEL("softmax") \ .SetCreateFn<SoftmaxKernel<device, dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == device) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == device) \ & (user_op::HobDataType("out", 0) == GetDataType<dtype>::value)) \ .SetInferTmpSizeFn(GenInferTmpSizeFn<dtype>("in")); @@ -104,7 +104,7 @@ class SoftmaxGradKernel final : public user_op::OpKernel { #define REGISTER_SOFTMAX_GRAD_KERNEL(device, dtype) \ REGISTER_USER_KERNEL("softmax_grad") \ .SetCreateFn<SoftmaxGradKernel<device, dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == device) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == device) \ & (user_op::HobDataType("dx", 0) == GetDataType<dtype>::value)) \ .SetInferTmpSizeFn(GenInferTmpSizeFn<dtype>("dx")); diff --git a/oneflow/user/kernels/sort_kernel.cpp b/oneflow/user/kernels/sort_kernel.cpp index 5bedb432112c52d64b8a7d7022210bfe5b86b199..57b30308b5d8ecaeca1168f741ab8ac3291d0591 100644 --- a/oneflow/user/kernels/sort_kernel.cpp +++ b/oneflow/user/kernels/sort_kernel.cpp @@ -52,7 +52,7 @@ class CpuSortKernel final : public user_op::OpKernel { #define REGISTER_CPU_SORT_KERNEL(dtype) \ REGISTER_USER_KERNEL("sort").SetCreateFn<CpuSortKernel<dtype>>().SetIsMatchedHob( \ - (user_op::HobDeviceType() == DeviceType::kCPU) \ + (user_op::HobDeviceTag() == "cpu") \ & (user_op::HobDataType("out", 0) == GetDataType<dtype>::value)); REGISTER_CPU_SORT_KERNEL(float) diff --git a/oneflow/user/kernels/sort_kernel.cu b/oneflow/user/kernels/sort_kernel.cu index 8bd07555c6b54fe119c10b1a2920c345058fff6c..1e695c39a8b691776740f133950f2503a6c3ce16 100644 --- a/oneflow/user/kernels/sort_kernel.cu +++ b/oneflow/user/kernels/sort_kernel.cu @@ -54,7 +54,7 @@ class GpuSortKernel final : public user_op::OpKernel { #define REGISTER_GPU_SORT_KERNEL(dtype) \ REGISTER_USER_KERNEL("sort") \ .SetCreateFn<GpuSortKernel<dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kGPU) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "gpu") \ & (user_op::HobDataType("out", 0) == GetDataType<dtype>::value)) \ .SetInferTmpSizeFn([](user_op::InferContext* ctx) { \ const Shape* in_shape = ctx->Shape4ArgNameAndIndex("in", 0); \ diff --git a/oneflow/user/kernels/sparse_cross_entropy_kernel.cpp b/oneflow/user/kernels/sparse_cross_entropy_kernel.cpp index f70639080d66a53b85e75caa18be1ffee81968ca..f0c6b5a69d44ad92e7c80e935ddd3f13d62e2fde 100644 --- a/oneflow/user/kernels/sparse_cross_entropy_kernel.cpp +++ b/oneflow/user/kernels/sparse_cross_entropy_kernel.cpp @@ -77,7 +77,7 @@ class SparseCrossEntropyMsKernel final : public user_op::OpKernel { REGISTER_USER_KERNEL(kernel_name) \ .SetCreateFn<kernel_class<device_type_v, OF_PP_PAIR_FIRST(dtype_pair), \ OF_PP_PAIR_FIRST(ltype_pair)>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == device_type_v) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == device_type_v) \ & (user_op::HobDataType("label", 0) == OF_PP_PAIR_SECOND(ltype_pair)) \ & (user_op::HobDataType("out", 0) == OF_PP_PAIR_SECOND(dtype_pair))); @@ -166,7 +166,7 @@ class SparseCrossEntropyMsGradKernel final : public user_op::OpKernel { .SetCreateFn<kernel_class<device_type_v, OF_PP_PAIR_FIRST(dtype_pair), \ OF_PP_PAIR_FIRST(ltype_pair)>>() \ .SetIsMatchedHob( \ - (user_op::HobDeviceType() == device_type_v) \ + (user_op::HobDeviceTag() == device_type_v) \ & (user_op::HobDataType("label", 0) == OF_PP_PAIR_SECOND(ltype_pair)) \ & (user_op::HobDataType("prediction_diff", 0) == OF_PP_PAIR_SECOND(dtype_pair))); diff --git a/oneflow/user/kernels/sparse_softmax_cross_entropy_kernel.cpp b/oneflow/user/kernels/sparse_softmax_cross_entropy_kernel.cpp index b56543eb945c6badaafb1276dad8b489e43afdaa..5c1164966a9ab4d6618963d247fcc4bbaf342c25 100644 --- a/oneflow/user/kernels/sparse_softmax_cross_entropy_kernel.cpp +++ b/oneflow/user/kernels/sparse_softmax_cross_entropy_kernel.cpp @@ -67,7 +67,7 @@ class SparseSoftmaxCrossEntropyMsKernel final : public user_op::OpKernel { REGISTER_USER_KERNEL(kernel_name) \ .SetCreateFn<kernel_class<device_type_v, OF_PP_PAIR_FIRST(dtype_pair), \ OF_PP_PAIR_FIRST(ltype_pair)>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == device_type_v) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == device_type_v) \ & (user_op::HobDataType("label", 0) == OF_PP_PAIR_SECOND(ltype_pair)) \ & (user_op::HobDataType("out", 0) == OF_PP_PAIR_SECOND(dtype_pair))) \ .SetInferTmpSizeFn([](user_op::InferContext* ctx) { \ @@ -158,7 +158,7 @@ class SparseSoftmaxCrossEntropyMsGradKernel final : public user_op::OpKernel { .SetCreateFn<kernel_class<device_type_v, OF_PP_PAIR_FIRST(dtype_pair), \ OF_PP_PAIR_FIRST(ltype_pair)>>() \ .SetIsMatchedHob( \ - (user_op::HobDeviceType() == device_type_v) \ + (user_op::HobDeviceTag() == device_type_v) \ & (user_op::HobDataType("label", 0) == OF_PP_PAIR_SECOND(ltype_pair)) \ & (user_op::HobDataType("prediction_diff", 0) == OF_PP_PAIR_SECOND(dtype_pair))) \ .SetInplaceProposalFn([](const user_op::InferContext&, \ diff --git a/oneflow/user/kernels/split_like_kernel.cpp b/oneflow/user/kernels/split_like_kernel.cpp index 641370d864e27f54e60fa26f1a609a5b4f17f3cd..4bcd10268de204da6c66cd43b6a1721006a93d8d 100644 --- a/oneflow/user/kernels/split_like_kernel.cpp +++ b/oneflow/user/kernels/split_like_kernel.cpp @@ -77,10 +77,10 @@ class SplitLikeKernel final : public user_op::OpKernel { } // namespace -#define REGISTER_SPLIT_LIKE_KERNEL(device, dtype) \ - REGISTER_USER_KERNEL("split_like") \ - .SetCreateFn<SplitLikeKernel<device, dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == device) \ +#define REGISTER_SPLIT_LIKE_KERNEL(device, dtype) \ + REGISTER_USER_KERNEL("split_like") \ + .SetCreateFn<SplitLikeKernel<device, dtype>>() \ + .SetIsMatchedHob((user_op::HobDeviceTag() == device) \ & (user_op::HobDataType("out", 0) == GetDataType<dtype>::value)); #define REGISTER_SPLIT_LIKE_KERNEL_WITH_DEVICE(device) \ diff --git a/oneflow/user/kernels/squeeze_kernel.cpp b/oneflow/user/kernels/squeeze_kernel.cpp index 9e4502e877bf289c3388580687db064bc7782501..8c74c848290e1f19aaf4cfe36357b2764e0c60fe 100644 --- a/oneflow/user/kernels/squeeze_kernel.cpp +++ b/oneflow/user/kernels/squeeze_kernel.cpp @@ -21,7 +21,7 @@ namespace oneflow { #define REGISTER_SQUEEZE_KERNEL(D) \ REGISTER_USER_KERNEL("squeeze") \ .SetCreateFn<CopyDataContentKernel<DeviceType::D>>() \ - .SetIsMatchedHob(user_op::HobDeviceType() == DeviceType::D) \ + .SetIsMatchedHob(user_op::HobDeviceTag() == DeviceType::D) \ .SetInplaceProposalFn([](const user_op::InferContext&, \ user_op::AddInplaceArgPair AddInplaceArgPairFn) -> Maybe<void> { \ OF_RETURN_IF_ERROR(AddInplaceArgPairFn("out", 0, "in", 0, false)); \ diff --git a/oneflow/user/kernels/summary_kernels.cpp b/oneflow/user/kernels/summary_kernels.cpp index 1e6ec552032bd0ecbe7ac8b21e6cfc1b0761bdfe..5c762eb957e4c645b6b0d8118bac2acbd8fcd374 100644 --- a/oneflow/user/kernels/summary_kernels.cpp +++ b/oneflow/user/kernels/summary_kernels.cpp @@ -51,10 +51,10 @@ class SummaryWriteScalar final : public user_op::OpKernel { bool AlwaysComputeWhenAllOutputsEmpty() const override { return true; } }; -#define REGISTER_SCALAR_USER_KERNEL(dtype) \ - REGISTER_USER_KERNEL("summary_write_scalar") \ - .SetCreateFn<SummaryWriteScalar<dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kCPU) \ +#define REGISTER_SCALAR_USER_KERNEL(dtype) \ + REGISTER_USER_KERNEL("summary_write_scalar") \ + .SetCreateFn<SummaryWriteScalar<dtype>>() \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "cpu") \ & (user_op::HobDataType("in", 0) == GetDataType<dtype>::value)); REGISTER_SCALAR_USER_KERNEL(double) @@ -77,7 +77,7 @@ class CreateSummaryWriter final : public user_op::OpKernel { REGISTER_USER_KERNEL("create_summary_writer") .SetCreateFn<CreateSummaryWriter>() - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kCPU)); + .SetIsMatchedHob((user_op::HobDeviceTag() == "cpu")); class FlushSummaryWriter final : public user_op::OpKernel { public: @@ -93,7 +93,7 @@ class FlushSummaryWriter final : public user_op::OpKernel { REGISTER_USER_KERNEL("flush_summary_writer") .SetCreateFn<FlushSummaryWriter>() - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kCPU)); + .SetIsMatchedHob((user_op::HobDeviceTag() == "cpu")); template<typename T> class SummaryWriteHistogram final : public user_op::OpKernel { @@ -117,10 +117,10 @@ class SummaryWriteHistogram final : public user_op::OpKernel { bool AlwaysComputeWhenAllOutputsEmpty() const override { return true; } }; -#define REGISTER_HISTOGRAM_USER_KERNEL(dtype) \ - REGISTER_USER_KERNEL("summary_write_histogram") \ - .SetCreateFn<SummaryWriteHistogram<dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kCPU) \ +#define REGISTER_HISTOGRAM_USER_KERNEL(dtype) \ + REGISTER_USER_KERNEL("summary_write_histogram") \ + .SetCreateFn<SummaryWriteHistogram<dtype>>() \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "cpu") \ & (user_op::HobDataType("in", 0) == GetDataType<dtype>::value)); REGISTER_HISTOGRAM_USER_KERNEL(double) @@ -152,7 +152,7 @@ class SummaryWritePb final : public user_op::OpKernel { REGISTER_USER_KERNEL("summary_write_pb") .SetCreateFn<SummaryWritePb<int8_t>>() - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kCPU) + .SetIsMatchedHob((user_op::HobDeviceTag() == "cpu") & (user_op::HobDataType("in", 0) == GetDataType<int8_t>::value)); template<typename T> @@ -179,7 +179,7 @@ class SummaryWriteImage final : public user_op::OpKernel { REGISTER_USER_KERNEL("summary_write_image") .SetCreateFn<SummaryWriteImage<uint8_t>>() - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kCPU) + .SetIsMatchedHob((user_op::HobDeviceTag() == "cpu") & (user_op::HobDataType("in", 0) == GetDataType<uint8_t>::value)); } // namespace summary diff --git a/oneflow/user/kernels/tanh_kernel.cpp b/oneflow/user/kernels/tanh_kernel.cpp index bac1e5cfd43a642b5d971180401b7f7978964b74..abf0af1f2e8be6560effcdd7178a3764f1bc3cbb 100644 --- a/oneflow/user/kernels/tanh_kernel.cpp +++ b/oneflow/user/kernels/tanh_kernel.cpp @@ -39,7 +39,7 @@ class TanHKernel final : public user_op::OpKernel { #define REGISTER_TANH_KERNEL(device, dtype) \ REGISTER_USER_KERNEL("tanh") \ .SetCreateFn<TanHKernel<device, dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == device) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == device) \ & (user_op::HobDataType("out", 0) == GetDataType<dtype>::value)) \ .SetInplaceProposalFn([](const user_op::InferContext&, \ user_op::AddInplaceArgPair AddInplaceArgPairFn) -> Maybe<void> { \ @@ -76,7 +76,7 @@ class TanHGradKernel final : public user_op::OpKernel { #define REGISTER_TANH_GRAD_KERNEL(device, dtype) \ REGISTER_USER_KERNEL("tanh_grad") \ .SetCreateFn<TanHGradKernel<device, dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == device) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == device) \ & (user_op::HobDataType("dx", 0) == GetDataType<dtype>::value)) \ .SetInplaceProposalFn([](const user_op::InferContext&, \ user_op::AddInplaceArgPair AddInplaceArgPairFn) -> Maybe<void> { \ diff --git a/oneflow/user/kernels/tensor_buffer_kernels.cpp b/oneflow/user/kernels/tensor_buffer_kernels.cpp index b0c8b0d5b0cdbc4cc2f4d0266379e081e2195703..d0255e0e378d3e06c6a4beb6af66c1369f7212f2 100644 --- a/oneflow/user/kernels/tensor_buffer_kernels.cpp +++ b/oneflow/user/kernels/tensor_buffer_kernels.cpp @@ -59,7 +59,7 @@ class TensorBufferToTensorKernel final : public user_op::OpKernel { REGISTER_USER_KERNEL("tensor_buffer_to_tensor") .SetCreateFn<TensorBufferToTensorKernel>() - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kCPU) + .SetIsMatchedHob((user_op::HobDeviceTag() == "cpu") & (user_op::HobDataType("in", 0) == DataType::kTensorBuffer)); class TensorToTensorBufferKernel final : public user_op::OpKernel { @@ -101,7 +101,7 @@ class TensorToTensorBufferKernel final : public user_op::OpKernel { REGISTER_USER_KERNEL("tensor_to_tensor_buffer") .SetCreateFn<TensorToTensorBufferKernel>() - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kCPU) + .SetIsMatchedHob((user_op::HobDeviceTag() == "cpu") & (user_op::HobDataType("out", 0) == DataType::kTensorBuffer)); } // namespace diff --git a/oneflow/user/kernels/test_kernels.cpp b/oneflow/user/kernels/test_kernels.cpp index 0c3ce7ba31c6af19e6c30ade0d38af2230da056b..583decec11bc212a39c703417e5e49cecf52850c 100644 --- a/oneflow/user/kernels/test_kernels.cpp +++ b/oneflow/user/kernels/test_kernels.cpp @@ -127,7 +127,7 @@ class TestSourceGpuKernel final : public user_op::OpKernel { REGISTER_USER_KERNEL("TestSource") .SetCreateFn<TestSourceGpuKernel>() - .SetIsMatchedHob(user_op::HobDeviceType() == DeviceType::kGPU) + .SetIsMatchedHob(user_op::HobDeviceTag() == "gpu") .SetInferTmpSizeFn([](user_op::InferContext*) { return 0; }); class TestMultiOutputOrderKernel final : public user_op::OpKernel { @@ -150,7 +150,7 @@ class TestMultiOutputOrderKernel final : public user_op::OpKernel { REGISTER_USER_KERNEL("TestMultiOutputOrder") .SetCreateFn<TestMultiOutputOrderKernel>() - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kGPU) + .SetIsMatchedHob((user_op::HobDeviceTag() == "gpu") & (user_op::HobDataType("in", 0) == DataType::kFloat)); class TestMultiInputFwKernel final : public user_op::OpKernel { @@ -170,7 +170,7 @@ class TestMultiInputFwKernel final : public user_op::OpKernel { REGISTER_USER_KERNEL("TestMultiInput") .SetCreateFn<TestMultiInputFwKernel>() - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kGPU) + .SetIsMatchedHob((user_op::HobDeviceTag() == "gpu") & (user_op::HobDataType("x1", 0) == DataType::kFloat)); class TestMultiInputBwKernel final : public user_op::OpKernel { @@ -192,7 +192,7 @@ class TestMultiInputBwKernel final : public user_op::OpKernel { REGISTER_USER_KERNEL("TestMultiInputGrad") .SetCreateFn<TestMultiInputBwKernel>() - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kGPU) + .SetIsMatchedHob((user_op::HobDeviceTag() == "gpu") & (user_op::HobDataType("x1", 0) == DataType::kFloat)); #endif @@ -233,7 +233,7 @@ class TestSourceKernel final : public user_op::OpKernel { REGISTER_USER_KERNEL("TestSource") .SetCreateFn<TestSourceKernel>() - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kCPU) + .SetIsMatchedHob((user_op::HobDeviceTag() == DeviceType::kCPU) & (user_op::HobDataType("out", 0) == DataType::kFloat)) .SetInferTmpSizeFn([](user_op::InferContext*) { return 0; }); @@ -254,7 +254,7 @@ class TestSourceMultiGpuFixedOutNumKernel final : public user_op::OpKernel { REGISTER_USER_KERNEL("TestSourceMultiGpuFixedOutNum") .SetCreateFn<TestSourceMultiGpuFixedOutNumKernel>() - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kCPU) + .SetIsMatchedHob((user_op::HobDeviceTag() == DeviceType::kCPU) & (user_op::HobDataType("out", 0) == DataType::kFloat)); class TestDynamicSourceKernel final : public user_op::OpKernel { @@ -273,7 +273,7 @@ class TestDynamicSourceKernel final : public user_op::OpKernel { REGISTER_USER_KERNEL("TestDynamicSource") .SetCreateFn<TestDynamicSourceKernel>() - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kCPU) + .SetIsMatchedHob((user_op::HobDeviceTag() == "cpu") & (user_op::HobDataType("out", 0) == DataType::kFloat)); class TestRandomSourceKernel final : public user_op::OpKernel { @@ -301,7 +301,7 @@ class TestRandomSourceKernel final : public user_op::OpKernel { REGISTER_USER_KERNEL("TestRandomSource") .SetCreateFn<TestRandomSourceKernel>() - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kCPU) + .SetIsMatchedHob((user_op::HobDeviceTag() == "cpu") & (user_op::HobDataType("out", 0) == DataType::kFloat)); class TestDataTypeAttrKernel final : public user_op::OpKernel { diff --git a/oneflow/user/kernels/top_k_kernel.cpp b/oneflow/user/kernels/top_k_kernel.cpp index 819ece8441ac109549ea8978e7007f9ffb624858..fab1b6324b511af9edc7141818aef71035ec754d 100644 --- a/oneflow/user/kernels/top_k_kernel.cpp +++ b/oneflow/user/kernels/top_k_kernel.cpp @@ -99,7 +99,7 @@ class TopKCpuKernel final : public user_op::OpKernel { #define REGISTER_CPU_TOP_K_KERNEL(dtype) \ REGISTER_USER_KERNEL("top_k") \ .SetCreateFn<TopKCpuKernel<dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == DeviceType::kCPU) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == "cpu") \ & (user_op::HobDataType("in", 0) == GetDataType<dtype>::value)) \ .SetInferTmpSizeFn([](user_op::InferContext* ctx) { \ const Shape* in_shape = ctx->Shape4ArgNameAndIndex("in", 0); \ diff --git a/oneflow/user/kernels/transpose_kernel.cpp b/oneflow/user/kernels/transpose_kernel.cpp index b433f3a0050f7392c164929096794c3fd4a4455e..5a4dbfc91d2db203cd308958c96c4ebd3d0e1c92 100644 --- a/oneflow/user/kernels/transpose_kernel.cpp +++ b/oneflow/user/kernels/transpose_kernel.cpp @@ -43,7 +43,7 @@ class TransposeKernel final : public OpKernel { #define REGISTER_TRANSPOSE_KERNEL(device, dtype) \ REGISTER_USER_KERNEL("transpose") \ .SetCreateFn<TransposeKernel<device, dtype>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == device) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == device) \ & (user_op::HobDataType("input", 0) == GetDataType<dtype>::value) \ & (user_op::HobDataType("output", 0) == GetDataType<dtype>::value)); diff --git a/oneflow/user/kernels/two_stage_reduce_kernel.cpp b/oneflow/user/kernels/two_stage_reduce_kernel.cpp index d9cecaf63989d956300511271c16f0dd2de0c2d9..5bd1ecad471573cc2ac464d5a339dee00ff07239 100644 --- a/oneflow/user/kernels/two_stage_reduce_kernel.cpp +++ b/oneflow/user/kernels/two_stage_reduce_kernel.cpp @@ -97,7 +97,7 @@ user_op::InferTmpSizeFn GenDeviceStageInferTmpSizeFn() { #define REGISTER_REDUCE_DEVICE_STAGE_KERNEL(op_name, binary_func, device, dtype_pair) \ REGISTER_USER_KERNEL(op_name) \ .SetCreateFn<ReduceDeviceStageKernel<binary_func, device, OF_PP_PAIR_FIRST(dtype_pair)>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == device) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == device) \ & (user_op::HobDataType("out", 0) == OF_PP_PAIR_SECOND(dtype_pair))) \ .SetInferTmpSizeFn(GenDeviceStageInferTmpSizeFn<OF_PP_PAIR_FIRST(dtype_pair)>()); @@ -155,7 +155,7 @@ user_op::InferTmpSizeFn GenDeviceStageGradInferTmpSizeFn() { #define REGISTER_REDUCE_DEVICE_STAGE_GRAD_KERNEL(op_name, device, dtype_pair) \ REGISTER_USER_KERNEL(op_name) \ .SetCreateFn<ReduceDeviceStageGradKernel<device, OF_PP_PAIR_FIRST(dtype_pair)>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == device) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == device) \ & (user_op::HobDataType("in_diff", 0) == OF_PP_PAIR_SECOND(dtype_pair))) \ .SetInferTmpSizeFn(GenDeviceStageGradInferTmpSizeFn<OF_PP_PAIR_FIRST(dtype_pair)>()); @@ -196,7 +196,7 @@ class ReduceGlobalStageKernel final : public OpKernel { #define REGISTER_REDUCE_GLOBAL_STAGE_KERNEL(op_name, binary_func, device, dtype_pair) \ REGISTER_USER_KERNEL(op_name) \ .SetCreateFn<ReduceGlobalStageKernel<binary_func, device, OF_PP_PAIR_FIRST(dtype_pair)>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == device) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == device) \ & (user_op::HobDataType("out", 0) == OF_PP_PAIR_SECOND(dtype_pair))) \ .SetInferTmpSizeFn([](InferContext* ctx) { \ const Shape* in_shape = ctx->Shape4ArgNameAndIndex("in", 0); \ @@ -296,7 +296,7 @@ user_op::InferTmpSizeFn GenGlobalStageGradInferTmpSizeFn() { #define REGISTER_REDUCE_GLOBAL_STAGE_GRAD_KERNEL(op_name, device, dtype_pair) \ REGISTER_USER_KERNEL(op_name) \ .SetCreateFn<ReduceGlobalStageGradKernel<device, OF_PP_PAIR_FIRST(dtype_pair)>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == device) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == device) \ & (user_op::HobDataType("in_diff", 0) == OF_PP_PAIR_SECOND(dtype_pair))) \ .SetInferTmpSizeFn(GenGlobalStageGradInferTmpSizeFn<OF_PP_PAIR_FIRST(dtype_pair)>()); diff --git a/oneflow/user/kernels/unsorted_batch_segment_sum_kernel.cpp b/oneflow/user/kernels/unsorted_batch_segment_sum_kernel.cpp index 191c63b3e268f186dbe3341c0bca5e8c88cd1e7c..198f22d5cb1564f5ec5dc6c3f4e155fb19873d7b 100644 --- a/oneflow/user/kernels/unsorted_batch_segment_sum_kernel.cpp +++ b/oneflow/user/kernels/unsorted_batch_segment_sum_kernel.cpp @@ -58,7 +58,7 @@ class UnsortedBatchSegmentSumKernel final : public user_op::OpKernel { .SetCreateFn<UnsortedBatchSegmentSumKernel<device, OF_PP_PAIR_FIRST(out_dtype), \ OF_PP_PAIR_FIRST(segment_ids_dtype)>>() \ .SetIsMatchedHob( \ - (user_op::HobDeviceType() == device) \ + (user_op::HobDeviceTag() == device) \ & (user_op::HobDataType("segment_ids", 0) == OF_PP_PAIR_SECOND(segment_ids_dtype)) \ & (user_op::HobDataType("out", 0) == OF_PP_PAIR_SECOND(out_dtype))); diff --git a/oneflow/user/kernels/unsorted_segment_sum_kernel.cpp b/oneflow/user/kernels/unsorted_segment_sum_kernel.cpp index 83f65c4c2a40666494b6afe86b95681d78438228..4b7aea8c40eb4a41de86832f17bb5511d2ea271f 100644 --- a/oneflow/user/kernels/unsorted_segment_sum_kernel.cpp +++ b/oneflow/user/kernels/unsorted_segment_sum_kernel.cpp @@ -95,7 +95,7 @@ class UnsortedSegmentSumKernel final : public user_op::OpKernel { .SetCreateFn<UnsortedSegmentSumKernel<device, OF_PP_PAIR_FIRST(out_type), \ OF_PP_PAIR_FIRST(segment_ids_type)>>() \ .SetIsMatchedHob( \ - (user_op::HobDeviceType() == device) \ + (user_op::HobDeviceTag() == device) \ & (user_op::HobDataType("segment_ids", 0) == OF_PP_PAIR_SECOND(segment_ids_type)) \ & (user_op::HobDataType("out", 0) == OF_PP_PAIR_SECOND(out_type))); diff --git a/oneflow/user/kernels/upsample_kernel.cu b/oneflow/user/kernels/upsample_kernel.cu index 4e973ecb5828940692ffb5b9d2ea98c631ce0690..78387c493820b1f2c319ffaa8efc6a725725fea6 100644 --- a/oneflow/user/kernels/upsample_kernel.cu +++ b/oneflow/user/kernels/upsample_kernel.cu @@ -190,13 +190,13 @@ class UpsampleNearestGradGPUKernel final : public user_op::OpKernel { REGISTER_USER_KERNEL("upsample") \ .SetCreateFn<UpsampleNearestGPUKernel<dtype>>() \ .SetIsMatchedHob( \ - (user_op::HobDeviceType() == DeviceType::kGPU) \ + (user_op::HobDeviceTag() == "gpu") \ & (user_op::HobDataType("y", 0) == GetDataType<dtype>::value) \ & (user_op::HobAttr<std::string>("interpolation") == std::string("nearest"))); \ REGISTER_USER_KERNEL("upsample_grad") \ .SetCreateFn<UpsampleNearestGradGPUKernel<dtype>>() \ .SetIsMatchedHob( \ - (user_op::HobDeviceType() == DeviceType::kGPU) \ + (user_op::HobDeviceTag() == "gpu") \ & (user_op::HobDataType("dx", 0) == GetDataType<dtype>::value) \ & (user_op::HobAttr<std::string>("interpolation") == std::string("nearest"))); @@ -262,13 +262,13 @@ class UpsampleBilinearGradGPUKernel final : public user_op::OpKernel { REGISTER_USER_KERNEL("upsample") \ .SetCreateFn<UpsampleBilinearGPUKernel<dtype>>() \ .SetIsMatchedHob( \ - (user_op::HobDeviceType() == DeviceType::kGPU) \ + (user_op::HobDeviceTag() == "gpu") \ & (user_op::HobDataType("y", 0) == GetDataType<dtype>::value) \ & (user_op::HobAttr<std::string>("interpolation") == std::string("bilinear"))); \ REGISTER_USER_KERNEL("upsample_grad") \ .SetCreateFn<UpsampleBilinearGradGPUKernel<dtype>>() \ .SetIsMatchedHob( \ - (user_op::HobDeviceType() == DeviceType::kGPU) \ + (user_op::HobDeviceTag() == "gpu") \ & (user_op::HobDataType("dx", 0) == GetDataType<dtype>::value) \ & (user_op::HobAttr<std::string>("interpolation") == std::string("bilinear"))); diff --git a/oneflow/user/kernels/where_kernel.cpp b/oneflow/user/kernels/where_kernel.cpp index 7f07807051d5d5cf5e3a657dcbcb84a44e27ab2d..ea10e1db15166e61c73930e43d7f8b6402095a88 100644 --- a/oneflow/user/kernels/where_kernel.cpp +++ b/oneflow/user/kernels/where_kernel.cpp @@ -41,7 +41,7 @@ class WhereKernel final : public user_op::OpKernel { REGISTER_USER_KERNEL("where") \ .SetCreateFn<WhereKernel<device_type_v, OF_PP_PAIR_FIRST(dtype_pair), \ OF_PP_PAIR_FIRST(ctype_pair)>>() \ - .SetIsMatchedHob((user_op::HobDeviceType() == device_type_v) \ + .SetIsMatchedHob((user_op::HobDeviceTag() == device_type_v) \ & (user_op::HobDataType("condition", 0) == OF_PP_PAIR_SECOND(ctype_pair)) \ & (user_op::HobDataType("out", 0) == OF_PP_PAIR_SECOND(dtype_pair))); diff --git a/oneflow/user/kernels/zero_like_kernel.cpp b/oneflow/user/kernels/zero_like_kernel.cpp index 7895296d66dba8e0bcc000403774987872571a38..d2f821f6e0d771363c32cb950e86f2b90eace781 100644 --- a/oneflow/user/kernels/zero_like_kernel.cpp +++ b/oneflow/user/kernels/zero_like_kernel.cpp @@ -36,7 +36,7 @@ class ZeroLikeKernel final : public user_op::OpKernel { #define REGISTER_ZERO_LIKE_KERNEL(device_type_v) \ REGISTER_USER_KERNEL("zero_like") \ .SetCreateFn<ZeroLikeKernel<device_type_v>>() \ - .SetIsMatchedHob(user_op::HobDeviceType() == device_type_v); + .SetIsMatchedHob(user_op::HobDeviceTag() == device_type_v); REGISTER_ZERO_LIKE_KERNEL(DeviceType::kCPU) #ifdef WITH_CUDA diff --git a/oneflow/xrt/api.cpp b/oneflow/xrt/api.cpp index 88cd1509f519135c75ca828f2c60553d7ad33629..99fd5886a7a1df33902b6f02953fd16de5878694 100644 --- a/oneflow/xrt/api.cpp +++ b/oneflow/xrt/api.cpp @@ -13,15 +13,15 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include "oneflow/core/operator/operator.h" // GenLogicalBlobName, GenLogicalBlobId +#include "oneflow/core/framework/to_string.h" #include "oneflow/xrt/api.h" +#include "oneflow/xrt/build_graph.h" +#include "oneflow/xrt/utility/env.h" #include "absl/strings/str_cat.h" #include "glog/logging.h" -#include "oneflow/core/operator/operator.h" // GenLogicalBlobName, GenLogicalBlobId -#include "oneflow/xrt/build_graph.h" -#include "oneflow/xrt/utility/env.h" - #include <fstream> #include <mutex> @@ -122,6 +122,11 @@ std::string ExtractOpTypeAsString(const OperatorConf &conf) { } } +XrtDevice DeviceTagToXrtDevice(const std::string &device_tag) { + DeviceType device_type = CHECK_JUST(DeviceType4DeviceTag(device_tag)); + return DeviceTypeToXrtDevice(device_type); +} + XrtDevice DeviceTypeToXrtDevice(const DeviceType &device_type) { switch (device_type) { case DeviceType::kGPU: return XrtDevice::GPU_CUDA; diff --git a/oneflow/xrt/api.h b/oneflow/xrt/api.h index 28c77d7b0b6d70ba0169bdbacd4a9f326c54fbf0..f4b44a379dc89ba0cc24a4286fc545d02db873b9 100644 --- a/oneflow/xrt/api.h +++ b/oneflow/xrt/api.h @@ -33,6 +33,8 @@ std::string ExtractOpTypeAsString(const OperatorConf &conf); XrtDevice DeviceTypeToXrtDevice(const DeviceType &device_type); +XrtDevice DeviceTagToXrtDevice(const std::string &device_tag); + DeviceType XrtDeviceToDeviceType(const XrtDevice &device); XrtEngine StringToXrtEngine(const std::string &engine); diff --git a/oneflow/xrt/build_graph.h b/oneflow/xrt/build_graph.h index 19a524fc9948305844fbfcfd994ad3da32fa90e7..465e8b2ae966b60fda26a03638b86e903bca77d2 100644 --- a/oneflow/xrt/build_graph.h +++ b/oneflow/xrt/build_graph.h @@ -53,7 +53,7 @@ class GraphBuilder { void SetupXrtNode(XrtNode *node, const OperatorConf &node_conf) const { node->set_name(node_conf.name()); node->set_type(ExtractOpTypeAsString(node_conf)); - node->set_device(DeviceTypeToXrtDevice(node_conf.device_type())); + node->set_device(DeviceTagToXrtDevice(node_conf.device_tag())); } void SetupXrtNode(XrtNode *node, const XrtLaunchOpConf::Argument &arg_conf) const { diff --git a/oneflow/xrt/launch_op.cpp b/oneflow/xrt/launch_op.cpp index 8bc43ca9eaa489c00766ac7a9283570ce4695f0d..068f98c8ae4417b99de88360288f456bdb41e960 100644 --- a/oneflow/xrt/launch_op.cpp +++ b/oneflow/xrt/launch_op.cpp @@ -13,13 +13,14 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "absl/strings/str_cat.h" -#include "absl/strings/str_split.h" - +#include "oneflow/core/framework/to_string.h" #include "oneflow/core/job/sbp_signature_builder.h" #include "oneflow/xrt/api.h" #include "oneflow/xrt/launch_op.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_split.h" + namespace oneflow { void XrtLaunchOp::InitFromOpConf() { @@ -61,8 +62,9 @@ Maybe<void> XrtLaunchOp::InferBlobDescs( // Run InferShape pass const auto &sbp_signatures = launch_conf.sbp_signatures(); auto options = xrt::CreateDefaultXrtPassOptions(); + DeviceType device_type = JUST(DeviceType4DeviceTag(op_conf().device_tag())); auto graph = - xrt::BuildXrtGraph(launch_conf.function(), op_conf().device_type(), this->job_desc()); + xrt::BuildXrtGraph(launch_conf.function(), device_type, this->job_desc()); xrt::RunXrtPass("InferShape", graph.get(), options, &this->job_desc(), parallel_ctx, &sbp_signatures, &blob_descs); } diff --git a/oneflow/xrt/passes/rebuild_job_pass.cpp b/oneflow/xrt/passes/rebuild_job_pass.cpp index 82736d5ebfb290639dd02e4e09aa60a5288f3057..b83cb16ebb6a672ae335f2f0eb5bd476201a8561 100644 --- a/oneflow/xrt/passes/rebuild_job_pass.cpp +++ b/oneflow/xrt/passes/rebuild_job_pass.cpp @@ -13,14 +13,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "oneflow/xrt/passes/pass.h" - -#include <string> -#include <vector> -#include "absl/strings/str_cat.h" -#include "absl/strings/str_split.h" -#include "glog/logging.h" - +#include "oneflow/core/framework/to_string.h" #include "oneflow/core/job/job_builder.h" #include "oneflow/core/operator/op_conf.pb.h" #include "oneflow/xrt/api.h" @@ -28,9 +21,17 @@ limitations under the License. #include "oneflow/xrt/graph/graph.h" #include "oneflow/xrt/kernel/op_kernel.h" #include "oneflow/xrt/node_util.h" +#include "oneflow/xrt/passes/pass.h" #include "oneflow/xrt/types.h" #include "oneflow/xrt/utility/stl.h" +#include "absl/strings/str_cat.h" +#include "absl/strings/str_split.h" +#include "glog/logging.h" + +#include <string> +#include <vector> + namespace oneflow { namespace xrt { @@ -222,7 +223,7 @@ void FoldSubgraphBuilder::BuildXrtLaunchOps() { OperatorConf op_conf; op_conf.set_name(node->name()); DeviceType device_type = XrtDeviceToDeviceType(node->device()); - op_conf.set_device_type(device_type); + op_conf.set_device_tag(CHECK_JUST(DeviceTag4DeviceType(device_type))); XrtLaunchOpConf *launch_conf = op_conf.mutable_xrt_launch_conf(); // Add inputs and outputs in launch_conf