diff --git a/oneflow/core/common/util.cpp b/oneflow/core/common/util.cpp index 674cd68353dddb2d2e71f8784c7a4f4a3526b7b5..d314150da8674ca402dab17f1b0b2162f6d15b65 100644 --- a/oneflow/core/common/util.cpp +++ b/oneflow/core/common/util.cpp @@ -104,4 +104,29 @@ size_t GetAvailableCpuMemSize() { bool IsKernelSafeInt32(int64_t n) { return n <= GetMaxVal<int32_t>() / 2; } +namespace { + +bool CaseInsensitiveStringEquals(const std::string& lhs, const std::string& rhs) { + return lhs.size() == rhs.size() + && std::equal(lhs.begin(), lhs.end(), rhs.begin(), + [](char a, char b) { return std::tolower(a) == std::tolower(b); }); +} + +bool StringToBool(const std::string& str) { + return CaseInsensitiveStringEquals(str, "1") || CaseInsensitiveStringEquals(str, "true") + || CaseInsensitiveStringEquals(str, "yes") || CaseInsensitiveStringEquals(str, "on") + || CaseInsensitiveStringEquals(str, "y"); +} + +} // namespace + +bool ParseBooleanFromEnv(const std::string& env_var, bool default_value) { + const char* env_p = std::getenv(env_var.c_str()); + if (env_p == nullptr) { + return default_value; + } else { + return StringToBool(env_p); + } +} + } // namespace oneflow diff --git a/oneflow/core/common/util.h b/oneflow/core/common/util.h index 65f057db01f206f24864d04a36f78f77be643aac..5b411c2b9cf302c777c1101170ff8fe5151e4b1d 100644 --- a/oneflow/core/common/util.h +++ b/oneflow/core/common/util.h @@ -216,6 +216,8 @@ class RoundModeGuard final { int saved_mode_; }; +bool ParseBooleanFromEnv(const std::string& env_var, bool default_value); + } // namespace oneflow #endif // ONEFLOW_CORE_COMMON_UTIL_H_ diff --git a/oneflow/core/device/cuda_stream_handle.cpp b/oneflow/core/device/cuda_stream_handle.cpp index b49812f7fc416e08208a87de3f86d8d79d59049a..7d053cf1e0974cb4e3147d23f6ec83eebe0f0e5d 100644 --- a/oneflow/core/device/cuda_stream_handle.cpp +++ b/oneflow/core/device/cuda_stream_handle.cpp @@ -89,7 +89,11 @@ const cudnnHandle_t* CudaStreamHandle::cudnn_handle() { void CudaStreamHandle::AddCallBack(std::function<void()> callback) { CudaCBEvent cb_event; cb_event.callback = std::move(callback); - OF_CUDA_CHECK(cudaEventCreateWithFlags(&(cb_event.event), cudaEventDisableTiming)); + int flags = cudaEventDisableTiming; + if (ParseBooleanFromEnv("ONEFLOW_STREAM_CUDA_EVENT_FLAG_BLOCKING_SYNC", false)) { + flags |= cudaEventBlockingSync; + } + OF_CUDA_CHECK(cudaEventCreateWithFlags(&(cb_event.event), flags)); OF_CUDA_CHECK(cudaEventRecord(cb_event.event, *cuda_stream())); cb_event_chan_->Send(cb_event); } diff --git a/oneflow/core/profiler/kernel.cpp b/oneflow/core/profiler/kernel.cpp index e82d3e8caa7ae25e8d2cdf7eb6b747099ba1662b..1909695e824ee1e1b0eab076b04f80f78b41262e 100644 --- a/oneflow/core/profiler/kernel.cpp +++ b/oneflow/core/profiler/kernel.cpp @@ -28,10 +28,14 @@ namespace { bool profile_cuda_memory_bandwidth = false; bool profile_kernel_forward_range = false; -COMMAND(ParseBoolFlagFromEnv("ONEFLOW_PROFILER_KERNEL_PROFILE_CUDA_MEMORY_BANDWIDTH", - &profile_cuda_memory_bandwidth)); -COMMAND(ParseBoolFlagFromEnv("ONEFLOW_PROFILER_KERNEL_PROFILE_KERNEL_FORWARD_RANGE", - &profile_kernel_forward_range)); +void Init() { + profile_cuda_memory_bandwidth = + ParseBooleanFromEnv("ONEFLOW_PROFILER_KERNEL_PROFILE_CUDA_MEMORY_BANDWIDTH", false); + profile_kernel_forward_range = + ParseBooleanFromEnv("ONEFLOW_PROFILER_KERNEL_PROFILE_KERNEL_FORWARD_RANGE", false); +} + +COMMAND(Init()); #if defined(WITH_CUDA) thread_local cudaEvent_t cuda_memory_bandwidth_profile_start_event = nullptr; diff --git a/oneflow/core/profiler/profiler.cpp b/oneflow/core/profiler/profiler.cpp index b3502d340875e62982274ee2e7bffd9d307a08b9..dabe1f7d2ff3fee3feb858db3e83492847bd2a1c 100644 --- a/oneflow/core/profiler/profiler.cpp +++ b/oneflow/core/profiler/profiler.cpp @@ -27,27 +27,6 @@ namespace oneflow { namespace profiler { -namespace { - -bool CaseInsensitiveStringEquals(const std::string& lhs, const std::string& rhs) { - return lhs.size() == rhs.size() - && std::equal(lhs.begin(), lhs.end(), rhs.begin(), - [](char a, char b) { return std::tolower(a) == std::tolower(b); }); -} - -bool StringToBool(const std::string& str) { - return CaseInsensitiveStringEquals(str, "1") || CaseInsensitiveStringEquals(str, "true") - || CaseInsensitiveStringEquals(str, "yes") || CaseInsensitiveStringEquals(str, "on") - || CaseInsensitiveStringEquals(str, "y"); -} - -} // namespace - -void ParseBoolFlagFromEnv(const std::string& env_var, bool* flag) { - const char* env_p = std::getenv(env_var.c_str()); - *flag = (env_p != nullptr && StringToBool(env_p)); -} - void NameThisHostThread(const std::string& name) { #ifdef OF_ENABLE_PROFILER nvtxNameOsThreadA(syscall(SYS_gettid), name.c_str()); diff --git a/oneflow/core/profiler/profiler.h b/oneflow/core/profiler/profiler.h index 7a90fa8c5a943b41582e252046a812d1269ff636..b15f315a9de2b86793a52c4dcf7cd4047265c77b 100644 --- a/oneflow/core/profiler/profiler.h +++ b/oneflow/core/profiler/profiler.h @@ -22,8 +22,6 @@ namespace oneflow { namespace profiler { -void ParseBoolFlagFromEnv(const std::string& env_var, bool* flag); - void NameThisHostThread(const std::string& name); void RangePush(const std::string& name);