diff --git a/official/cv/resnet/gpu_resnet_benchmark.py b/official/cv/resnet/gpu_resnet_benchmark.py
index 094301e024b401a385f148cc565f4a6e04b02df2..d4ecf48255ee92775ec500b7bbb7774c31885229 100644
--- a/official/cv/resnet/gpu_resnet_benchmark.py
+++ b/official/cv/resnet/gpu_resnet_benchmark.py
@@ -66,7 +66,8 @@ class MyTimeMonitor(Callback):
         fps = self.batch_size / step_mseconds * 1000 * self.size
         print("epoch: [%s/%s] step: [%s/%s], loss is %s" % (cur_epoch_num, total_epochs,\
             cur_step_in_epoch, self.data_size, loss),\
-                "Epoch time: {:5.3f} ms, fps: {:d} img/sec.".format(step_mseconds, int(fps)), flush=True)
+                "Epoch time: {:5.3f} ms, per_step_time: {:.2f} ms, fps: {:d} img/sec.".format(
+                    step_mseconds, step_mseconds / self.size, int(fps)), flush=True)
 
 
 def create_dataset(dataset_path, do_train, repeat_num=1, batch_size=32, target="GPU", dtype="fp16",