diff --git a/oneflow/core/common/util.cpp b/oneflow/core/common/util.cpp
index 674cd68353dddb2d2e71f8784c7a4f4a3526b7b5..d314150da8674ca402dab17f1b0b2162f6d15b65 100644
--- a/oneflow/core/common/util.cpp
+++ b/oneflow/core/common/util.cpp
@@ -104,4 +104,29 @@ size_t GetAvailableCpuMemSize() {
 
 bool IsKernelSafeInt32(int64_t n) { return n <= GetMaxVal<int32_t>() / 2; }
 
+namespace {
+
+bool CaseInsensitiveStringEquals(const std::string& lhs, const std::string& rhs) {
+  return lhs.size() == rhs.size()
+         && std::equal(lhs.begin(), lhs.end(), rhs.begin(),
+                       [](char a, char b) { return std::tolower(a) == std::tolower(b); });
+}
+
+bool StringToBool(const std::string& str) {
+  return CaseInsensitiveStringEquals(str, "1") || CaseInsensitiveStringEquals(str, "true")
+         || CaseInsensitiveStringEquals(str, "yes") || CaseInsensitiveStringEquals(str, "on")
+         || CaseInsensitiveStringEquals(str, "y");
+}
+
+}  // namespace
+
+bool ParseBooleanFromEnv(const std::string& env_var, bool default_value) {
+  const char* env_p = std::getenv(env_var.c_str());
+  if (env_p == nullptr) {
+    return default_value;
+  } else {
+    return StringToBool(env_p);
+  }
+}
+
 }  // namespace oneflow
diff --git a/oneflow/core/common/util.h b/oneflow/core/common/util.h
index 65f057db01f206f24864d04a36f78f77be643aac..5b411c2b9cf302c777c1101170ff8fe5151e4b1d 100644
--- a/oneflow/core/common/util.h
+++ b/oneflow/core/common/util.h
@@ -216,6 +216,8 @@ class RoundModeGuard final {
   int saved_mode_;
 };
 
+bool ParseBooleanFromEnv(const std::string& env_var, bool default_value);
+
 }  // namespace oneflow
 
 #endif  // ONEFLOW_CORE_COMMON_UTIL_H_
diff --git a/oneflow/core/device/cuda_stream_handle.cpp b/oneflow/core/device/cuda_stream_handle.cpp
index b49812f7fc416e08208a87de3f86d8d79d59049a..7d053cf1e0974cb4e3147d23f6ec83eebe0f0e5d 100644
--- a/oneflow/core/device/cuda_stream_handle.cpp
+++ b/oneflow/core/device/cuda_stream_handle.cpp
@@ -89,7 +89,11 @@ const cudnnHandle_t* CudaStreamHandle::cudnn_handle() {
 void CudaStreamHandle::AddCallBack(std::function<void()> callback) {
   CudaCBEvent cb_event;
   cb_event.callback = std::move(callback);
-  OF_CUDA_CHECK(cudaEventCreateWithFlags(&(cb_event.event), cudaEventDisableTiming));
+  int flags = cudaEventDisableTiming;
+  if (ParseBooleanFromEnv("ONEFLOW_STREAM_CUDA_EVENT_FLAG_BLOCKING_SYNC", false)) {
+    flags |= cudaEventBlockingSync;
+  }
+  OF_CUDA_CHECK(cudaEventCreateWithFlags(&(cb_event.event), flags));
   OF_CUDA_CHECK(cudaEventRecord(cb_event.event, *cuda_stream()));
   cb_event_chan_->Send(cb_event);
 }
diff --git a/oneflow/core/profiler/kernel.cpp b/oneflow/core/profiler/kernel.cpp
index e82d3e8caa7ae25e8d2cdf7eb6b747099ba1662b..1909695e824ee1e1b0eab076b04f80f78b41262e 100644
--- a/oneflow/core/profiler/kernel.cpp
+++ b/oneflow/core/profiler/kernel.cpp
@@ -28,10 +28,14 @@ namespace {
 bool profile_cuda_memory_bandwidth = false;
 bool profile_kernel_forward_range = false;
 
-COMMAND(ParseBoolFlagFromEnv("ONEFLOW_PROFILER_KERNEL_PROFILE_CUDA_MEMORY_BANDWIDTH",
-                             &profile_cuda_memory_bandwidth));
-COMMAND(ParseBoolFlagFromEnv("ONEFLOW_PROFILER_KERNEL_PROFILE_KERNEL_FORWARD_RANGE",
-                             &profile_kernel_forward_range));
+void Init() {
+  profile_cuda_memory_bandwidth =
+      ParseBooleanFromEnv("ONEFLOW_PROFILER_KERNEL_PROFILE_CUDA_MEMORY_BANDWIDTH", false);
+  profile_kernel_forward_range =
+      ParseBooleanFromEnv("ONEFLOW_PROFILER_KERNEL_PROFILE_KERNEL_FORWARD_RANGE", false);
+}
+
+COMMAND(Init());
 
 #if defined(WITH_CUDA)
 thread_local cudaEvent_t cuda_memory_bandwidth_profile_start_event = nullptr;
diff --git a/oneflow/core/profiler/profiler.cpp b/oneflow/core/profiler/profiler.cpp
index b3502d340875e62982274ee2e7bffd9d307a08b9..dabe1f7d2ff3fee3feb858db3e83492847bd2a1c 100644
--- a/oneflow/core/profiler/profiler.cpp
+++ b/oneflow/core/profiler/profiler.cpp
@@ -27,27 +27,6 @@ namespace oneflow {
 
 namespace profiler {
 
-namespace {
-
-bool CaseInsensitiveStringEquals(const std::string& lhs, const std::string& rhs) {
-  return lhs.size() == rhs.size()
-         && std::equal(lhs.begin(), lhs.end(), rhs.begin(),
-                       [](char a, char b) { return std::tolower(a) == std::tolower(b); });
-}
-
-bool StringToBool(const std::string& str) {
-  return CaseInsensitiveStringEquals(str, "1") || CaseInsensitiveStringEquals(str, "true")
-         || CaseInsensitiveStringEquals(str, "yes") || CaseInsensitiveStringEquals(str, "on")
-         || CaseInsensitiveStringEquals(str, "y");
-}
-
-}  // namespace
-
-void ParseBoolFlagFromEnv(const std::string& env_var, bool* flag) {
-  const char* env_p = std::getenv(env_var.c_str());
-  *flag = (env_p != nullptr && StringToBool(env_p));
-}
-
 void NameThisHostThread(const std::string& name) {
 #ifdef OF_ENABLE_PROFILER
   nvtxNameOsThreadA(syscall(SYS_gettid), name.c_str());
diff --git a/oneflow/core/profiler/profiler.h b/oneflow/core/profiler/profiler.h
index 7a90fa8c5a943b41582e252046a812d1269ff636..b15f315a9de2b86793a52c4dcf7cd4047265c77b 100644
--- a/oneflow/core/profiler/profiler.h
+++ b/oneflow/core/profiler/profiler.h
@@ -22,8 +22,6 @@ namespace oneflow {
 
 namespace profiler {
 
-void ParseBoolFlagFromEnv(const std::string& env_var, bool* flag);
-
 void NameThisHostThread(const std::string& name);
 
 void RangePush(const std::string& name);