diff --git a/oneflow/api/python/env/env.cpp b/oneflow/api/python/env/env.cpp index 50f324b35e53537894cdfd5f59a863d3aab4592e..887dccc85b1a8f5c85eca41da07598f848bf766e 100644 --- a/oneflow/api/python/env/env.cpp +++ b/oneflow/api/python/env/env.cpp @@ -26,7 +26,6 @@ ONEFLOW_API_PYBIND11_MODULE("", m) { m.def("IsEnvInited", &IsEnvInited); m.def("InitEnv", &InitEnv); - m.def("InitDefaultEnv", &InitDefaultEnv); m.def("DestroyEnv", &DestroyEnv, py::call_guard<py::gil_scoped_release>()); m.def("CurrentMachineId", &CurrentMachineId); diff --git a/oneflow/api/python/env/env.h b/oneflow/api/python/env/env.h index 6b75d399d61ac8ec550ac8ae35b3f78951517af1..87162d99775c377993d96b4b202facfa47036c6a 100644 --- a/oneflow/api/python/env/env.h +++ b/oneflow/api/python/env/env.h @@ -58,16 +58,7 @@ inline Maybe<void> SetIsMultiClient(bool is_multi_client) { return Maybe<void>::Ok(); } -inline Maybe<bool> IsEnvInited() { - return Global<EnvGlobalObjectsScope>::Get() != nullptr - && !JUST(Global<EnvGlobalObjectsScope>::Get()->is_default_physical_env()); -} - -inline Maybe<void> DestroyDefaultEnv() { - if (Global<EnvGlobalObjectsScope>::Get() == nullptr) { return Maybe<void>::Ok(); } - Global<EnvGlobalObjectsScope>::Delete(); - return Maybe<void>::Ok(); -} +inline Maybe<bool> IsEnvInited() { return Global<EnvGlobalObjectsScope>::Get() != nullptr; } inline Maybe<void> DestroyEnv() { if (Global<EnvGlobalObjectsScope>::Get() == nullptr) { return Maybe<void>::Ok(); } @@ -80,24 +71,10 @@ inline Maybe<void> DestroyEnv() { return Maybe<void>::Ok(); } -inline Maybe<void> InitDefaultEnv(const std::string& env_proto_str) { - EnvProto env_proto; - CHECK_OR_RETURN(TxtString2PbMessage(env_proto_str, &env_proto)) - << "failed to parse env_proto" << env_proto_str; - CHECK_ISNULL_OR_RETURN(Global<EnvGlobalObjectsScope>::Get()); - // Global<T>::New is not allowed to be called here - // because glog is not constructed yet and LOG(INFO) has bad bahavior - Global<EnvGlobalObjectsScope>::SetAllocated(new EnvGlobalObjectsScope()); - JUST(Global<EnvGlobalObjectsScope>::Get()->Init(env_proto)); - if (!GlobalProcessCtx::IsThisProcessMaster()) { JUST(Cluster::WorkerLoop()); } - return Maybe<void>::Ok(); -} - inline Maybe<void> InitEnv(const std::string& env_proto_str, bool is_multi_client) { EnvProto env_proto; CHECK_OR_RETURN(TxtString2PbMessage(env_proto_str, &env_proto)) << "failed to parse env_proto" << env_proto_str; - JUST(DestroyDefaultEnv()); CHECK_ISNULL_OR_RETURN(Global<EnvGlobalObjectsScope>::Get()); // Global<T>::New is not allowed to be called here // because glog is not constructed yet and LOG(INFO) has bad bahavior diff --git a/oneflow/api/python/env/env_api.h b/oneflow/api/python/env/env_api.h index 4226ed0528aaa57318c0de8440a093b37c0c2b7f..4e18a7e348888eaa6db5beba1eca051cb3b57c29 100644 --- a/oneflow/api/python/env/env_api.h +++ b/oneflow/api/python/env/env_api.h @@ -32,10 +32,6 @@ inline void InitEnv(const std::string& env_proto_str, bool is_multi_client) { return oneflow::InitEnv(env_proto_str, is_multi_client).GetOrThrow(); } -inline void InitDefaultEnv(const std::string& env_proto_str) { - return oneflow::InitDefaultEnv(env_proto_str).GetOrThrow(); -} - inline void DestroyEnv() { return oneflow::DestroyEnv().GetOrThrow(); } inline long long CurrentMachineId() { return oneflow::CurrentMachineId().GetOrThrow(); } diff --git a/oneflow/api/python/session/session.cpp b/oneflow/api/python/session/session.cpp index eca8ba9d34d6510c8de1300077175f794c5488f0..80c52306b752dcd9e4c68034ca3b2cf5c035ef0a 100644 --- a/oneflow/api/python/session/session.cpp +++ b/oneflow/api/python/session/session.cpp @@ -33,7 +33,7 @@ ONEFLOW_API_PYBIND11_MODULE("", m) { // multi-client lazy global session context m.def("CreateMultiClientSessionContext", &CreateMultiClientSessionContext); m.def("InitMultiClientSessionContext", &InitMultiClientSessionContext); - m.def("DestroyMultiClientSessionContext", &DestroyMultiClientSessionContext); + m.def("TryDestroyMultiClientSessionContext", &TryDestroyMultiClientSessionContext); using namespace oneflow; m.def("NewSessionId", &NewSessionId); diff --git a/oneflow/api/python/session/session.h b/oneflow/api/python/session/session.h index b5be7f52f2f89bb275c5e90c1ec656a1d5327842..3371f46c82e0c232afe841db458f7c7550cc07b8 100644 --- a/oneflow/api/python/session/session.h +++ b/oneflow/api/python/session/session.h @@ -126,8 +126,13 @@ inline Maybe<void> InitMultiClientSessionContext(const std::string& config_proto return Maybe<void>::Ok(); } -inline Maybe<void> DestroyMultiClientSessionContext() { - Global<MultiClientSessionContext>::Delete(); +inline Maybe<void> TryDestroyMultiClientSessionContext() { + // Global<T>::Delete is not allowed to be called here + // because glog is not constructed yet and LOG(INFO) has bad bahavior + if (Global<MultiClientSessionContext>::Get() != nullptr) { + delete Global<MultiClientSessionContext>::Get(); + Global<MultiClientSessionContext>::SetAllocated(nullptr); + } return Maybe<void>::Ok(); } diff --git a/oneflow/api/python/session/session_api.h b/oneflow/api/python/session/session_api.h index 1dc0b01569718e1cecd291ed48c9d54edfe33211..f89f0b53768b820f7cd860055905095c64bb84e7 100644 --- a/oneflow/api/python/session/session_api.h +++ b/oneflow/api/python/session/session_api.h @@ -42,8 +42,8 @@ inline void InitMultiClientSessionContext(const std::string& config_proto_str) { return oneflow::InitMultiClientSessionContext(config_proto_str).GetOrThrow(); } -inline void DestroyMultiClientSessionContext() { - return oneflow::DestroyMultiClientSessionContext().GetOrThrow(); +inline void TryDestroyMultiClientSessionContext() { + return oneflow::TryDestroyMultiClientSessionContext().GetOrThrow(); } #endif // ONEFLOW_API_PYTHON_SESSION_SESSION_API_H_ diff --git a/oneflow/compatible_single_client_python/framework/c_api_util.py b/oneflow/compatible_single_client_python/framework/c_api_util.py index 181035bdf1964ca38ff1660f045568d742c87367..d03da5a9822e8a237eb27304d57a3fdf9365a30d 100644 --- a/oneflow/compatible_single_client_python/framework/c_api_util.py +++ b/oneflow/compatible_single_client_python/framework/c_api_util.py @@ -47,12 +47,6 @@ def EnvResource(): return text_format.Parse(resource, resource_util.Resource()) -def InitDefaultEnv(env_proto): - assert type(env_proto) is env_pb2.EnvProto - env_proto_str = text_format.MessageToString(env_proto) - oneflow._oneflow_internal.InitDefaultEnv(env_proto_str) - - def InitEnv(env_proto, is_multi_client): assert type(env_proto) is env_pb2.EnvProto env_proto_str = text_format.MessageToString(env_proto) diff --git a/oneflow/compatible_single_client_python/framework/env_util.py b/oneflow/compatible_single_client_python/framework/env_util.py index 88342d88dc14f2511e330c7e4497c13892e1520b..71923f37887316872070ec1d24e7dfca2948a710 100644 --- a/oneflow/compatible_single_client_python/framework/env_util.py +++ b/oneflow/compatible_single_client_python/framework/env_util.py @@ -93,16 +93,6 @@ def env_init(is_multi_client): return True -def init_default_physical_env(): - default_physical_env_proto = _DefaultEnvProto() - log_dir = os.getenv("ONEFLOW_TEST_LOG_DIR") - if log_dir: - default_physical_env_proto.cpp_logging_conf.log_dir = log_dir - default_physical_env_proto.is_default_physical_env = True - CompleteEnvProto(default_physical_env_proto, False) - c_api_util.InitDefaultEnv(default_physical_env_proto) - - @oneflow_export("env.current_resource", "current_resource") def api_get_current_resource() -> resource_util.Resource: r"""Get current resources, such as:machine nums, cpu/gpu device nums, diff --git a/oneflow/compatible_single_client_python/framework/session_context.py b/oneflow/compatible_single_client_python/framework/session_context.py index f829a4655aac11faa855f2ee552f131537c2861a..1965ed3d2c15b1bf9f80d8ce89411c3e499c9f21 100644 --- a/oneflow/compatible_single_client_python/framework/session_context.py +++ b/oneflow/compatible_single_client_python/framework/session_context.py @@ -48,6 +48,13 @@ def TryCloseDefaultSession(): del _sess_id2sess[default_sess_id] +def TryCloseAllSession(): + global _sess_id2sess + for sess_id in _sess_id2sess.keys(): + _sess_id2sess[sess_id].TryClose() + _sess_id2sess.clear() + + def try_init_default_session(func): @functools.wraps(func) def Func(*args, **kwargs): diff --git a/oneflow/core/job/env.proto b/oneflow/core/job/env.proto index 09bd5fe367c81f6d66925d86de87c8b020de7807..e0de1932c3de5402c14c909da749a1187ab653a7 100644 --- a/oneflow/core/job/env.proto +++ b/oneflow/core/job/env.proto @@ -22,5 +22,4 @@ message EnvProto { optional int32 data_port = 3 [default = -1]; optional CppLoggingConf cpp_logging_conf = 4; optional BootstrapConf ctrl_bootstrap_conf = 5; - optional bool is_default_physical_env = 6 [default = false]; } diff --git a/oneflow/core/job/env_global_objects_scope.cpp b/oneflow/core/job/env_global_objects_scope.cpp index 42e79b945b56b842f45f93ae699def667986d5f2..2c263b8f59243115cc21ac4e851882bbd5f3b881 100644 --- a/oneflow/core/job/env_global_objects_scope.cpp +++ b/oneflow/core/job/env_global_objects_scope.cpp @@ -51,13 +51,8 @@ std::string LogDir(const std::string& log_dir) { return v; } -void InitLogging(const CppLoggingConf& logging_conf, bool default_physical_env) { - if (!default_physical_env) { - FLAGS_log_dir = LogDir(logging_conf.log_dir()); - } else { - std::string default_env_log_path = JoinPath(logging_conf.log_dir(), "default_physical_env_log"); - FLAGS_log_dir = LogDir(default_env_log_path); - } +void InitLogging(const CppLoggingConf& logging_conf) { + FLAGS_log_dir = LogDir(logging_conf.log_dir()); FLAGS_logtostderr = logging_conf.logtostderr(); FLAGS_logbuflevel = logging_conf.logbuflevel(); FLAGS_stderrthreshold = 1; // 1=WARNING @@ -111,8 +106,7 @@ void ClearAllSymbolAndIdCache() { } // namespace Maybe<void> EnvGlobalObjectsScope::Init(const EnvProto& env_proto) { - is_default_physical_env_ = env_proto.is_default_physical_env(); - InitLogging(env_proto.cpp_logging_conf(), JUST(is_default_physical_env_)); + InitLogging(env_proto.cpp_logging_conf()); #ifdef WITH_CUDA InitGlobalCudaDeviceProp(); #endif diff --git a/oneflow/core/job/env_global_objects_scope.h b/oneflow/core/job/env_global_objects_scope.h index 9218723197c253082cfe0afe2261c0d14a90da50..3eeeffb382113f45a5c533900f81565a95b59936 100644 --- a/oneflow/core/job/env_global_objects_scope.h +++ b/oneflow/core/job/env_global_objects_scope.h @@ -19,7 +19,6 @@ limitations under the License. #include "oneflow/core/common/maybe.h" #include "oneflow/core/job/env_desc.h" #include "oneflow/core/framework/device.h" -#include "oneflow/core/common/error.h" namespace oneflow { @@ -28,17 +27,15 @@ class ParallelDesc; class EnvGlobalObjectsScope final { public: OF_DISALLOW_COPY_AND_MOVE(EnvGlobalObjectsScope); - EnvGlobalObjectsScope() : is_default_physical_env_(Error::ValueError("Not initialized")) {} + EnvGlobalObjectsScope() = default; ~EnvGlobalObjectsScope(); Maybe<void> Init(const EnvProto& env_proto); - const Maybe<bool>& is_default_physical_env() const { return is_default_physical_env_; } const std::shared_ptr<const ParallelDesc>& MutParallelDesc4Device(const Device& device); private: std::mutex mutex_; - Maybe<bool> is_default_physical_env_; HashMap<Device, std::shared_ptr<const ParallelDesc>> device2parallel_desc_; }; diff --git a/oneflow/init.py b/oneflow/init.py index 58303f5f03b78b0d29271291e3c9c57d88619f4d..6fdeb5490c72285269415022d2b05fe9c740b4c8 100644 --- a/oneflow/init.py +++ b/oneflow/init.py @@ -83,14 +83,18 @@ oneflow._oneflow_internal.EnableEagerEnvironment(True) del env_util -# capture oneflow methods so that they can be still accessed after `del oneflow` def _SyncOnMasterFn(): import oneflow - if oneflow.python.framework.distribute.is_multi_client(): - oneflow._oneflow_internal.eager.multi_client.Sync() - elif oneflow.python.framework.distribute.get_rank() == 0: - oneflow._oneflow_internal.eager.single_client.Sync() + def Sync(): + if not oneflow._oneflow_internal.IsEnvInited(): + return + if oneflow.python.framework.distribute.is_multi_client(): + oneflow._oneflow_internal.eager.multi_client.Sync() + elif oneflow.python.framework.distribute.get_rank() == 0: + oneflow._oneflow_internal.eager.single_client.Sync() + + return Sync atexit.register(oneflow._oneflow_internal.SetShuttingDown) diff --git a/oneflow/python/framework/c_api_util.py b/oneflow/python/framework/c_api_util.py index d8fd22ee410432d01d0091b1d49572b83a8100df..17bb9ac5404b4ea47129144fa379d2ddf64e86bb 100644 --- a/oneflow/python/framework/c_api_util.py +++ b/oneflow/python/framework/c_api_util.py @@ -45,12 +45,6 @@ def EnvResource(): return text_format.Parse(resource, resource_util.Resource()) -def InitDefaultEnv(env_proto): - assert type(env_proto) is env_pb2.EnvProto - env_proto_str = text_format.MessageToString(env_proto) - oneflow._oneflow_internal.InitDefaultEnv(env_proto_str) - - def InitEnv(env_proto, is_multi_client): assert type(env_proto) is env_pb2.EnvProto env_proto_str = text_format.MessageToString(env_proto) diff --git a/oneflow/python/framework/env_util.py b/oneflow/python/framework/env_util.py index 870755c3022a301c3a621ecca176651cb6dfb980..b6249123049e9fa346fa16433bf26d39ce59e602 100644 --- a/oneflow/python/framework/env_util.py +++ b/oneflow/python/framework/env_util.py @@ -83,16 +83,6 @@ def env_init(): return True -def init_default_physical_env(): - default_physical_env_proto = _DefaultEnvProto() - log_dir = os.getenv("ONEFLOW_TEST_LOG_DIR") - if log_dir: - default_physical_env_proto.cpp_logging_conf.log_dir = log_dir - default_physical_env_proto.is_default_physical_env = True - CompleteEnvProto(default_physical_env_proto, False) - c_api_util.InitDefaultEnv(default_physical_env_proto) - - @oneflow_export("env.current_resource", "current_resource") def api_get_current_resource() -> resource_util.Resource: r"""Get current resources, such as:machine nums, cpu/gpu device nums, diff --git a/oneflow/python/framework/multi_client_session.py b/oneflow/python/framework/multi_client_session.py index 6dbd5e6a0f2be6fed7614794e8fb374d439a4186..f6dc2acc26c4d5ddd4ac8c9bbaf0e7c87c8e446c 100644 --- a/oneflow/python/framework/multi_client_session.py +++ b/oneflow/python/framework/multi_client_session.py @@ -53,7 +53,7 @@ class MultiClientSession(object): def TryClose(self): if self.status_ != self.Status.CLOSED: - oneflow._oneflow_internal.DestroyMultiClientSessionContext() + oneflow._oneflow_internal.TryDestroyMultiClientSessionContext() oneflow._oneflow_internal.ClearSessionById(self.id) self.status_ = self.Status.CLOSED diff --git a/oneflow/single_client_init.py b/oneflow/single_client_init.py index f69a16d54dee678505bd999b2678b7c96443eccf..e7fb1f7955ea0bd24ab347307f31ff37d11820c4 100644 --- a/oneflow/single_client_init.py +++ b/oneflow/single_client_init.py @@ -58,7 +58,6 @@ time.sleep(1) del time oneflow._oneflow_internal.SetIsMultiClient(False) -env_util.init_default_physical_env() session_context.OpenDefaultSession( session_util.Session(oneflow._oneflow_internal.NewSessionId()) ) @@ -114,12 +113,12 @@ INVALID_SPLIT_AXIS = oneflow._oneflow_internal.INVALID_SPLIT_AXIS import atexit from oneflow.compatible.single_client.python.framework.session_context import ( - TryCloseDefaultSession, + TryCloseAllSession, ) -atexit.register(TryCloseDefaultSession) +atexit.register(TryCloseAllSession) -del TryCloseDefaultSession +del TryCloseAllSession del atexit import sys