diff --git a/oneflow/core/actor/act_event.proto b/oneflow/core/actor/act_event.proto new file mode 100644 index 0000000000000000000000000000000000000000..d9b783986c3d09fdae494214e70b0087536a21af --- /dev/null +++ b/oneflow/core/actor/act_event.proto @@ -0,0 +1,10 @@ +syntax = "proto2"; +package oneflow; + +message ActEvent { + required int64 actor_id = 1; + required int64 work_stream_id = 2; + required int64 act_id = 3; + required double start_time = 4; + required double stop_time = 5; +} diff --git a/oneflow/core/actor/actor.cpp b/oneflow/core/actor/actor.cpp index 53cdfebeb0fa7d99367fda7c762e9067d39b7337..a2a83086942d819cb09c5628180f1088aee8e9af 100644 --- a/oneflow/core/actor/actor.cpp +++ b/oneflow/core/actor/actor.cpp @@ -1,9 +1,11 @@ +#include "oneflow/core/actor/act_event.pb.h" #include "oneflow/core/actor/actor.h" namespace oneflow { void Actor::Init(const TaskProto& task_proto, const ThreadCtx& thread_ctx) { actor_id_ = task_proto.task_id(); + act_id_ = 0; if (task_proto.has_parallel_ctx()) { parallel_ctx_.reset(new ParallelContext(task_proto.parallel_ctx())); } @@ -93,7 +95,23 @@ int Actor::HandlerZombie(const ActorMsg& msg) { } void Actor::ActUntilFail() { - while (IsReadReady() && IsWriteReady()) { Act(); } + while (IsReadReady() && IsWriteReady()) { + double start_time = GetCurTime(); + Act(); + if (RuntimeCtx::Singleton()->is_adjust_phase() == false) { continue; } + int64_t actor_id = actor_id_; + int64_t act_id = actor_id_++; + int64_t work_stream_id = device_ctx_->work_stream_id(); + device_ctx_->AddCallBack([start_time, act_id, actor_id, work_stream_id]() { + double stop_time = GetCurTime(); + ActEvent act_event; + act_event.set_actor_id(actor_id); + act_event.set_work_stream_id(work_stream_id); + act_event.set_act_id(act_id); + act_event.set_start_time(start_time); + act_event.set_stop_time(stop_time); + }); + } } bool Actor::IsWriteReady() { diff --git a/oneflow/core/actor/actor.h b/oneflow/core/actor/actor.h index 78ff562da70e969f511a7040026d42b61c3b83c1..ccb2a9f5eb12ac2a3ce59c2c8b737e4402d8aa5b 100644 --- a/oneflow/core/actor/actor.h +++ b/oneflow/core/actor/actor.h @@ -95,6 +95,7 @@ class Actor { private: int64_t actor_id_; + int64_t act_id_; std::unique_ptr<ParallelContext> parallel_ctx_; std::vector<ExecKernel> exec_kernel_vec_; HashMap<int64_t, std::vector<std::unique_ptr<Regst>>> produced_regsts_; diff --git a/oneflow/core/common/util.h b/oneflow/core/common/util.h index 0643329700a0700660298723161869cdc5fce2f6..77f21fac5b5944cf0aeffa1edce4d3b6a1b136a1 100644 --- a/oneflow/core/common/util.h +++ b/oneflow/core/common/util.h @@ -152,6 +152,10 @@ inline uint32_t NewRandomSeed() { void RedirectStdoutAndStderrToGlogDir(); void CloseStdoutAndStderr(); +inline double GetCurTime() { + return std::chrono::high_resolution_clock::now().time_since_epoch().count(); +} + } // namespace oneflow #endif // ONEFLOW_CORE_COMMON_UTIL_H_