diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index a61e3425dd5ea14c818ca4cfb895a55b68c89a6e..c241ac1851324e85007590011e18a38bb9d0e755 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -550,6 +550,33 @@ jobs:
             -e ONEFLOW_TEST_DIR=$PWD/oneflow/python/test/graph \
             ${{ env.image_tag }} \
             bash -c "python3 -m pip config set global.index-url ${{ env.pip_index_mirror }} && bash ci/test/try_install.sh && bash ci/test/generic_test.sh"
+      - name: Checkout Oneflow-Inc/models
+        if: matrix.test_suite == 'cuda_new_interface'
+        uses: actions/checkout@v2
+        with:
+          repository: Oneflow-Inc/models
+          ref: 830a6b91f10c0a04a68843370cea6319a21ed9c2
+          path: oneflow-models
+      - name: Speed test
+        id: speed
+        if: matrix.test_suite == 'cuda_new_interface'
+        run: |
+          docker run \
+            ${{ env.extra_docker_args }} ${{ env.pip_cache_docker_args }} \
+            -e ONEFLOW_MODELS_DIR=$PWD/oneflow-models \
+            ${{ env.image_tag }} \
+            bash -c "python3 -m pip config set global.index-url ${{ env.pip_index_mirror }} && bash ci/test/try_install.sh && bash ci/test_multi_client/test_speed.sh"
+      - name: Post speed stats
+        if: matrix.test_suite == 'cuda_new_interface'
+        uses: actions/github-script@v4
+        with:
+          script: |
+            github.issues.createComment({
+              issue_number: context.issue.number,
+              owner: context.repo.owner,
+              repo: context.repo.repo,
+              body: "<details>\n <summary>Speed stats:</summary>\n\n ``` \n${{ steps.speed.outputs.stats }}\n ``` \n\n</details>".replace(/\\n/g, '\n')
+            })
       - name: Single client op test
         timeout-minutes: 45
         if: matrix.test_suite == 'cpu' || matrix.test_suite == 'cuda_op'
diff --git a/ci/test_multi_client/test_speed.sh b/ci/test_multi_client/test_speed.sh
new file mode 100755
index 0000000000000000000000000000000000000000..62f603aeda13fc33b0c7e7473154c6b62dd3a330
--- /dev/null
+++ b/ci/test_multi_client/test_speed.sh
@@ -0,0 +1,35 @@
+#!/usr/bin/env bash
+
+set -uxo pipefail
+
+rc=0
+trap 'rc=$?' ERR
+
+cd $ONEFLOW_MODELS_DIR
+
+function check_relative_speed {
+  awk -F'[:(]' -v threshold=$1 'BEGIN { ret=2 } /Relative speed/{ if ($2 > threshold) { ret=0 } else { ret=1 }} {print $0} END { exit ret }'
+}
+
+function write_to_file_and_print {
+  tee -a result
+  printf "\n" >> result
+}
+
+python3 scripts/compare_speed_with_pytorch.py resnet50/models/resnet50.py resnet50 16x3x224x224 --times 30 | check_relative_speed 1.05 | write_to_file_and_print
+python3 scripts/compare_speed_with_pytorch.py resnet50/models/resnet50.py resnet50 8x3x224x224 --times 30 | check_relative_speed 1.05 | write_to_file_and_print
+python3 scripts/compare_speed_with_pytorch.py resnet50/models/resnet50.py resnet50 4x3x224x224 --times 30 | check_relative_speed 1 | write_to_file_and_print
+python3 scripts/compare_speed_with_pytorch.py resnet50/models/resnet50.py resnet50 2x3x224x224 --times 30 | check_relative_speed 0.8 | write_to_file_and_print
+python3 scripts/compare_speed_with_pytorch.py resnet50/models/resnet50.py resnet50 1x3x224x224 --times 30 | check_relative_speed 0.7 | write_to_file_and_print
+
+result="GPU Name: `nvidia-smi --query-gpu=name --format=csv,noheader -i 0` \n\n `cat result`"
+# escape newline for github actions: https://github.community/t/set-output-truncates-multiline-strings/16852/2
+# note that we escape \n and \r to \\n and \\r (i.e. raw string "\n" and "\r") instead of %0A and %0D, 
+# so that they can be correctly handled in javascript code
+result="${result//'%'/'%25'}"
+result="${result//$'\n'/'\\n'}"
+result="${result//$'\r'/'\\r'}"
+
+echo "::set-output name=stats::$result"
+
+exit $rc