From 7641567b41946600571513f0c34dd0f1b1ff3ed8 Mon Sep 17 00:00:00 2001
From: tqchen <tianqi.tchen@gmail.com>
Date: Sat, 18 Feb 2023 09:11:56 -0500
Subject: [PATCH 01/10] [MLC][CI] Do not upstream

MLC local ci setup.
---
 ci/jenkins/mlc_jenkinsfile.groovy          | 341 +++++++++++++++++++++
 tests/scripts/mlc/task_mlc_build.sh        |  22 ++
 tests/scripts/mlc/task_mlc_lint_cleanup.sh |  22 ++
 3 files changed, 385 insertions(+)
 create mode 100644 ci/jenkins/mlc_jenkinsfile.groovy
 create mode 100755 tests/scripts/mlc/task_mlc_build.sh
 create mode 100755 tests/scripts/mlc/task_mlc_lint_cleanup.sh

diff --git a/ci/jenkins/mlc_jenkinsfile.groovy b/ci/jenkins/mlc_jenkinsfile.groovy
new file mode 100644
index 0000000000..42b44e15bd
--- /dev/null
+++ b/ci/jenkins/mlc_jenkinsfile.groovy
@@ -0,0 +1,341 @@
+#!groovy
+// -*- mode: groovy -*-
+
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+// Jenkins pipeline
+// See documents at https://jenkins.io/doc/book/pipeline/jenkinsfile/
+
+// ============================= IMPORTANT NOTE =============================
+// To keep things simple
+// This file is manually updated to maintain unity branch specific builds.
+// Please do not send this file to main
+
+
+import org.jenkinsci.plugins.pipeline.modeldefinition.Utils
+
+// NOTE: these lines are scanned by docker/dev_common.sh. Please update the regex as needed. -->
+ci_lint = 'tlcpack/ci-lint:20240105-165030-51bdaec6'
+ci_gpu = 'tlcpack/ci-gpu:20240105-165030-51bdaec6'
+ci_cpu = 'tlcpack/ci-cpu:20240105-165030-51bdaec6'
+ci_wasm = 'tlcpack/ci-wasm:v0.72'
+ci_i386 = 'tlcpack/ci-i386:v0.75'
+ci_qemu = 'tlcpack/ci-qemu:v0.11'
+ci_arm = 'tlcpack/ci-arm:v0.08'
+ci_hexagon = 'tlcpack/ci-hexagon:20240105-165030-51bdaec6'
+// <--- End of regex-scanned config.
+
+// Parameters to allow overriding (in Jenkins UI), the images
+// to be used by a given build. When provided, they take precedence
+// over default values above.
+properties([
+  parameters([
+    string(name: 'ci_lint_param', defaultValue: ''),
+    string(name: 'ci_cpu_param',  defaultValue: ''),
+    string(name: 'ci_gpu_param',  defaultValue: ''),
+    string(name: 'ci_wasm_param', defaultValue: ''),
+    string(name: 'ci_i386_param', defaultValue: ''),
+    string(name: 'ci_qemu_param', defaultValue: ''),
+    string(name: 'ci_arm_param',  defaultValue: ''),
+    string(name: 'ci_hexagon_param', defaultValue: '')
+  ])
+])
+
+// tvm libraries
+tvm_runtime = 'build/libtvm_runtime.so, build/config.cmake'
+tvm_lib = 'build/libtvm.so, ' + tvm_runtime
+// LLVM upstream lib
+tvm_multilib = 'build/libtvm.so, ' +
+               'build/libvta_fsim.so, ' +
+               tvm_runtime
+
+tvm_multilib_tsim = 'build/libvta_tsim.so, ' +
+               tvm_multilib
+
+// command to start a docker container
+docker_run = 'docker/bash.sh'
+// timeout in minutes
+max_time = 240
+
+def per_exec_ws(folder) {
+  return "workspace/exec_${env.EXECUTOR_NUMBER}/" + folder
+}
+
+// initialize source codes
+def init_git() {
+  checkout scm
+  // Add more info about job node
+  sh (
+   script: "echo NODE_NAME=${env.NODE_NAME}",
+   label: 'Show executor node info',
+  )
+  retry(5) {
+    timeout(time: 5, unit: 'MINUTES') {
+      sh (script: 'git submodule update --init --recursive -f', label: 'Update git submodules')
+    }
+  }
+}
+
+def should_skip_slow_tests(pr_number) {
+  withCredentials([string(
+    credentialsId: 'tvm-bot-jenkins-reader',
+    variable: 'GITHUB_TOKEN',
+  )]) {
+    // Exit code of 1 means run slow tests, exit code of 0 means skip slow tests
+    result = sh (
+      returnStatus: true,
+      script: "./tests/scripts/should_run_slow_tests.py --pr '${pr_number}'",
+      label: 'Check if CI should run slow tests',
+    )
+  }
+  return result == 0
+}
+
+def cancel_previous_build() {
+  // cancel previous build if it is not on main.
+  if (env.BRANCH_NAME != 'main') {
+    def buildNumber = env.BUILD_NUMBER as int
+    // Milestone API allows us to cancel previous build
+    // with the same milestone number
+    if (buildNumber > 1) milestone(buildNumber - 1)
+    milestone(buildNumber)
+  }
+}
+
+def should_skip_ci(pr_number) {
+  withCredentials([string(
+    credentialsId: 'tvm-bot-jenkins-reader',
+    variable: 'TOKEN',
+    )]) {
+    // Exit code of 1 means run full CI (or the script had an error, so run
+    // full CI just in case). Exit code of 0 means skip CI.
+    git_skip_ci_code = sh (
+      returnStatus: true,
+      script: "./tests/scripts/git_skip_ci.py --pr '${pr_number}'",
+      label: 'Check if CI should be skipped',
+    )
+    }
+  return git_skip_ci_code == 0
+}
+
+cancel_previous_build()
+
+def lint() {
+stage('Prepare') {
+  node('CPU-SMALL') {
+    // When something is provided in ci_*_param, use it, otherwise default with ci_*
+    ci_lint = params.ci_lint_param ?: ci_lint
+    ci_cpu = params.ci_cpu_param ?: ci_cpu
+    ci_gpu = params.ci_gpu_param ?: ci_gpu
+    ci_wasm = params.ci_wasm_param ?: ci_wasm
+    ci_i386 = params.ci_i386_param ?: ci_i386
+    ci_qemu = params.ci_qemu_param ?: ci_qemu
+    ci_arm = params.ci_arm_param ?: ci_arm
+    ci_hexagon = params.ci_hexagon_param ?: ci_hexagon
+
+    sh (script: """
+      echo "Docker images being used in this build:"
+      echo " ci_lint = ${ci_lint}"
+      echo " ci_cpu  = ${ci_cpu}"
+      echo " ci_gpu  = ${ci_gpu}"
+      echo " ci_wasm = ${ci_wasm}"
+      echo " ci_i386 = ${ci_i386}"
+      echo " ci_qemu = ${ci_qemu}"
+      echo " ci_arm  = ${ci_arm}"
+      echo " ci_hexagon  = ${ci_hexagon}"
+    """, label: 'Docker image names')
+  }
+}
+
+stage('Sanity Check') {
+  timeout(time: max_time, unit: 'MINUTES') {
+    node('CPU') {
+      ws(per_exec_ws('tvm/sanity')) {
+        init_git()
+        is_docs_only_build = sh (
+          returnStatus: true,
+          script: './tests/scripts/git_change_docs.sh',
+          label: 'Check for docs only changes',
+        )
+        // skip_ci = should_skip_ci(env.CHANGE_ID)
+        // skip_slow_tests = should_skip_slow_tests(env.CHANGE_ID)
+        sh (
+          script: "${docker_run} ${ci_lint} ./tests/scripts/mlc/task_mlc_lint_cleanup.sh",
+          label: 'Cleanup before linting',
+        )
+        sh (
+          script: "${docker_run} ${ci_lint}  ./tests/scripts/task_lint.sh",
+          label: 'Run lint',
+        )
+        sh (
+          script: "${docker_run} ${ci_lint}  ./tests/scripts/unity/task_extra_lint.sh",
+          label: 'Run extra lint',
+        )
+      }
+    }
+  }
+}
+}
+
+lint()
+
+// Run make. First try to do an incremental make from a previous workspace in hope to
+// accelerate the compilation. If something is wrong, clean the workspace and then
+// build from scratch.
+def make(docker_type, path, make_flag) {
+  timeout(time: max_time, unit: 'MINUTES') {
+    try {
+      cmake_build(docker_type, path, make_flag)
+      // always run cpp test when build
+      // sh "${docker_run} ${docker_type} ./tests/scripts/task_cpp_unittest.sh"
+    } catch (hudson.AbortException ae) {
+      // script exited due to user abort, directly throw instead of retry
+      if (ae.getMessage().contains('script returned exit code 143')) {
+        throw ae
+      }
+      echo 'Incremental compilation failed. Fall back to build from scratch'
+      sh (
+        script: "${docker_run} ${docker_type} ./tests/scripts/task_clean.sh ${path}",
+        label: 'Clear old cmake workspace',
+      )
+      cmake_build(docker_type, path, make_flag)
+      cpp_unittest(docker_type)
+    }
+  }
+}
+
+// Specifications to Jenkins "stash" command for use with various pack_ and unpack_ functions.
+tvm_runtime = 'build/libtvm_runtime.so, build/config.cmake'  // use libtvm_runtime.so.
+tvm_lib = 'build/libtvm.so, ' + tvm_runtime  // use libtvm.so to run the full compiler.
+// LLVM upstream lib
+tvm_multilib = 'build/libtvm.so, ' +
+               'build/libvta_fsim.so, ' +
+               tvm_runtime
+
+tvm_multilib_tsim = 'build/libvta_tsim.so, ' +
+                    tvm_multilib
+
+microtvm_tar_gz = 'build/microtvm_template_projects.tar.gz'
+
+// pack libraries for later use
+def pack_lib(name, libs) {
+  sh (script: """
+     echo "Packing ${libs} into ${name}"
+     echo ${libs} | sed -e 's/,/ /g' | xargs md5sum
+     """, label: 'Stash libraries and show md5')
+  stash includes: libs, name: name
+}
+
+// unpack libraries saved before
+def unpack_lib(name, libs) {
+  unstash name
+  sh (script: """
+     echo "Unpacked ${libs} from ${name}"
+     echo ${libs} | sed -e 's/,/ /g' | xargs md5sum
+     """, label: 'Unstash libraries and show md5')
+}
+
+// compress microtvm template projects and pack the tar.
+def pack_microtvm_template_projects(name) {
+  sh(
+    script: 'cd build && tar -czvf microtvm_template_projects.tar.gz microtvm_template_projects/',
+    label: 'Compress microtvm_template_projects'
+  )
+  pack_lib(name + '-microtvm-libs', microtvm_tar_gz)
+}
+
+def unpack_microtvm_template_projects(name) {
+  unpack_lib(name + '-microtvm-libs', microtvm_tar_gz)
+  sh(
+    script: 'cd build && tar -xzvf microtvm_template_projects.tar.gz',
+    label: 'Unpack microtvm_template_projects'
+  )
+}
+
+def ci_setup(image) {
+  sh (
+    script: "${docker_run} ${image} ./tests/scripts/task_ci_setup.sh",
+    label: 'Set up CI environment',
+  )
+}
+
+def python_unittest(image) {
+  sh (
+    script: "${docker_run} ${image} ./tests/scripts/task_python_unittest.sh",
+    label: 'Run Python unit tests',
+  )
+}
+
+def fsim_test(image) {
+  sh (
+    script: "${docker_run} ${image} ./tests/scripts/task_python_vta_fsim.sh",
+    label: 'Run VTA tests in FSIM',
+  )
+}
+
+def cmake_build(image, path, make_flag) {
+  sh (
+    script: "${docker_run} ${image} ./tests/scripts/mlc/task_mlc_build.sh",
+    label: 'Run cmake build',
+  )
+}
+
+def cpp_unittest(image) {
+  sh (
+    script: "${docker_run} ${image} ./tests/scripts/task_cpp_unittest.sh",
+    label: 'Build and run C++ tests',
+  )
+}
+
+def add_hexagon_permissions() {
+  sh(
+    script: 'find build/hexagon_api_output -type f | xargs chmod +x',
+    label: 'Add execute permissions for hexagon files',
+  )
+}
+
+// NOTE: limit tests to relax folder for now to allow us to skip some of the tests
+// that are mostly related to changes in main.
+// This helps to speedup CI time and reduce CI cost.
+stage('Build and Test') {
+  if (is_docs_only_build != 1) {
+    parallel 'BUILD: GPU': {
+      node('GPU') {
+        ws(per_exec_ws('tvm/build-gpu')) {
+          init_git()
+          sh "${docker_run} ${ci_gpu} nvidia-smi"
+          sh "${docker_run}  ${ci_gpu} ./tests/scripts/task_config_build_gpu.sh build"
+          make("${ci_gpu}", 'build', '-j2')
+          sh "${docker_run} ${ci_gpu} ./tests/scripts/unity/task_python_relax_gpuonly.sh"
+        }
+      }
+    },
+    'BUILD: CPU': {
+      node('CPU') {
+        ws(per_exec_ws('tvm/build-cpu')) {
+          init_git()
+          sh "${docker_run} ${ci_cpu} ./tests/scripts/task_config_build_cpu.sh build"
+          make(ci_cpu, 'build', '-j2')
+          sh "${docker_run} ${ci_cpu} ./tests/scripts/unity/task_python_relax.sh"
+        }
+      }
+    }
+  } else {
+    Utils.markStageSkippedForConditional('BUILD: CPU')
+  }
+}
diff --git a/tests/scripts/mlc/task_mlc_build.sh b/tests/scripts/mlc/task_mlc_build.sh
new file mode 100755
index 0000000000..c38832677c
--- /dev/null
+++ b/tests/scripts/mlc/task_mlc_build.sh
@@ -0,0 +1,22 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+set -euxo pipefail
+
+cd build
+cmake -DCMAKE_BUILD_TYPE=RelWithDebInfo ..
+make -j8
diff --git a/tests/scripts/mlc/task_mlc_lint_cleanup.sh b/tests/scripts/mlc/task_mlc_lint_cleanup.sh
new file mode 100755
index 0000000000..a9cacb9805
--- /dev/null
+++ b/tests/scripts/mlc/task_mlc_lint_cleanup.sh
@@ -0,0 +1,22 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+set -euxo pipefail
+
+echo "Cleanup before linting..."
+# Remove clang-format-index.locok
+rm -f .git/clang-format-index.lock

From 33de02f3b1179f8090e6ef0620402c9f9e07bf24 Mon Sep 17 00:00:00 2001
From: Ruihang Lai <ruihangl@cs.cmu.edu>
Date: Sat, 18 Feb 2023 11:27:55 -0500
Subject: [PATCH 02/10] [MLC][CI] Do not upstream - Win/Mac Building CI (#137)

This PR adds CI for Windows and macOS building, which may take 90-100
mins.

Co-authored-by: Siyuan Feng <Hzfengsy@sjtu.edu.cn>
---
 .github/workflows/mlc.yml | 98 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 98 insertions(+)
 create mode 100644 .github/workflows/mlc.yml

diff --git a/.github/workflows/mlc.yml b/.github/workflows/mlc.yml
new file mode 100644
index 0000000000..d6bbfc2b76
--- /dev/null
+++ b/.github/workflows/mlc.yml
@@ -0,0 +1,98 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# GH actions.
+# We use it to cover windows and mac builds
+# Jenkins is still the primary CI
+
+name: CI
+
+on:
+  push:
+    branches:
+      - mlc
+  pull_request:
+    branches:
+      - mlc
+  workflow_dispatch:
+
+concurrency:
+  group: CI-${{ github.event.pull_request.number || github.sha }}
+  cancel-in-progress: true
+
+jobs:
+  MacOS:
+    if: ${{ github.repository == 'mlc-ai/relax' }}
+    runs-on: macOS-latest
+    steps:
+      - uses: actions/checkout@v2
+        with:
+          submodules: 'recursive'
+      - name: Set up environment
+        uses: ./.github/actions/setup
+      - name: Conda Build
+        shell: bash -l {0}
+        run: >-
+          conda build --output-folder=conda/pkg  conda/recipe &&
+          conda install tvm -c ./conda/pkg
+      - name: Build iOS RPC
+        run: |
+          IOS_VERSION="14.0"
+          CMAKE_FLAGS="-DCMAKE_BUILD_TYPE=Release \
+                       -DCMAKE_SYSTEM_NAME=iOS \
+                       -DCMAKE_SYSTEM_VERSION=${IOS_VERSION} \
+                       -DCMAKE_OSX_SYSROOT=iphonesimulator \
+                       -DCMAKE_OSX_ARCHITECTURES=x86_64 \
+                       -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \
+                       -DCMAKE_BUILD_WITH_INSTALL_NAME_DIR=ON \
+                       -DUSE_IOS_RPC=ON"
+
+          mkdir build-ios-simulator
+          cd build-ios-simulator
+          cmake .. ${CMAKE_FLAGS}
+          cmake --build . --target ios_rpc
+      - name: Test
+        shell: bash -l {0}
+        run: >-
+          python -m pytest -v tests/python/all-platform-minimal-test
+      - name: Test iOS RPC
+        shell: bash -l {0}
+        run: >-
+          python -m pip install tornado psutil cloudpickle &&
+          export PYTHONPATH=tests/python/contrib:${PYTHONPATH} &&
+          export BUNDLE_ID=org.apache.tvmrpc &&
+          export BUNDLE_PATH=build-ios-simulator/apps/ios_rpc/ios_rpc/src/ios_rpc-build/Release-iphonesimulator/tvmrpc.app &&
+          python -m pytest -v tests/python/contrib/test_rpc_server_device.py
+
+  Windows:
+    if: ${{ github.repository == 'mlc-ai/relax' }}
+    runs-on: windows-2019
+    steps:
+      - uses: actions/checkout@v2
+        with:
+          submodules: 'recursive'
+      - name: Set up environment
+        uses: ./.github/actions/setup
+      - name: Conda Build
+        shell: cmd /C call {0}
+        run: >-
+          conda build --output-folder=conda/pkg conda/recipe &&
+          conda install tvm -c ./conda/pkg
+      - name: Test
+        shell: cmd /C call {0}
+        run: >-
+          python -m pytest -v tests/python/all-platform-minimal-test

From 9eedccc5bbbe7e1fe2a7e72de9f599392c8fb967 Mon Sep 17 00:00:00 2001
From: Junru Shao <junrushao@apache.org>
Date: Sun, 17 Sep 2023 11:55:27 -0700
Subject: [PATCH 03/10] [CI] Add GitHub Action to Trigger Jenkins

---
 .github/workflows/github-command-test.yml | 54 +++++++++++++++++++++++
 1 file changed, 54 insertions(+)
 create mode 100644 .github/workflows/github-command-test.yml

diff --git a/.github/workflows/github-command-test.yml b/.github/workflows/github-command-test.yml
new file mode 100644
index 0000000000..b01f6db939
--- /dev/null
+++ b/.github/workflows/github-command-test.yml
@@ -0,0 +1,54 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+name: GitHub Command - \test
+
+on:
+  issue_comment:
+    types: [created]
+
+jobs:
+ run_command:
+   if: github.event.issue.pull_request && contains(github.event.comment.body, '\test')
+   runs-on: ubuntu-latest
+   steps:
+     - name: Get PR branch
+       uses: xt0rted/pull-request-comment-branch@v2
+       id: comment-branch
+     - name: Set latest commit status as pending
+       uses: myrotvorets/set-commit-status-action@master
+       with:
+         sha: ${{ steps.comment-branch.outputs.head_sha }}
+         token: ${{ secrets.GITHUB_TOKEN }}
+         status: pending
+     - name: Checkout PR branch
+       uses: actions/checkout@v3
+     - name: Trigger
+       env:
+        JENKINS_USER: junrushao
+        JENKINS_TOKEN: ${{ secrets.JENKINS_TOKEN }}
+        JENKINS_JOB: https://ci.mlc.ai/job/mlc/job/PR-${{ github.event.issue.number }}
+       run: |
+          set -euxo pipefail
+          BUILD_NUMBER=$(curl --fail -s -X GET $JENKINS_JOB/lastBuild/buildNumber)
+          curl --fail -X POST -u $JENKINS_USER:$JENKINS_TOKEN $JENKINS_JOB/$BUILD_NUMBER/input/1/proceedEmpty
+     - name: Set latest commit status as ${{ job.status }}
+       uses: myrotvorets/set-commit-status-action@master
+       if: always()
+       with:
+         sha: ${{ steps.comment-branch.outputs.head_sha }}
+         token: ${{ secrets.GITHUB_TOKEN }}
+         status: ${{ job.status }}

From e13541ed4e62ff58c6a368ee5ae5cda3871efaad Mon Sep 17 00:00:00 2001
From: Ruihang Lai <ruihangl@cs.cmu.edu>
Date: Mon, 11 Dec 2023 13:45:55 -0500
Subject: [PATCH 04/10] [MLC][CI] Do not upstream - Skip MSC tests

---
 tests/scripts/unity/task_python_relax.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/scripts/unity/task_python_relax.sh b/tests/scripts/unity/task_python_relax.sh
index 121ba1389a..6d6751aaa3 100755
--- a/tests/scripts/unity/task_python_relax.sh
+++ b/tests/scripts/unity/task_python_relax.sh
@@ -38,4 +38,4 @@ TVM_TEST_TARGETS="${TVM_RELAY_TEST_TARGETS:-llvm}" pytest tests/python/dlight
 # python3 ./apps/relax_examples/resnet.py
 
 # Test for MSC
-pytest tests/python/contrib/test_msc
+# pytest tests/python/contrib/test_msc

From 8aacfc51277a568e3be2439d82fc7b8dae9df03d Mon Sep 17 00:00:00 2001
From: Lesheng Jin <34279105+LeshengJin@users.noreply.github.com>
Date: Mon, 1 Jan 2024 23:42:38 +0800
Subject: [PATCH 05/10] [CMake] Add CMAKE_CUDA_ARCHITECTURES to CMakeLists.txt
 (#303)

1. Bump `3rdparty/cutlass_fpA_intB_gemm`
2. Set `CMAKE_CUDA_ARCHITECTURES=75;80;86;89;90` when it's not defined
---
 cmake/modules/CUDA.cmake | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/cmake/modules/CUDA.cmake b/cmake/modules/CUDA.cmake
index 1284f85bec..1ebf4e2234 100644
--- a/cmake/modules/CUDA.cmake
+++ b/cmake/modules/CUDA.cmake
@@ -109,6 +109,30 @@ if(USE_CUDA)
   # Add CUDA builtins to RelaxVM
   tvm_file_glob(GLOB RELAX_VM_CUDA_BUILTIN_SRC_CC src/runtime/relax_vm/cuda/*.cc)
   list(APPEND RUNTIME_SRCS ${RELAX_VM_CUDA_BUILTIN_SRC_CC})
+
+  if(USE_CUTLASS)
+    if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
+      set(CMAKE_CUDA_COMPILER ${CUDA_TOOLKIT_ROOT_DIR}/bin/nvcc)
+      execute_process(
+        COMMAND ${CMAKE_CUDA_COMPILER} --version
+        OUTPUT_VARIABLE NVCC_VERSION_OUTPUT
+      )
+      if(NVCC_VERSION_OUTPUT MATCHES "release ([^,]+),")
+        set(CUDA_VERSION "${CMAKE_MATCH_1}")
+      endif(NVCC_VERSION_OUTPUT MATCHES "release ([^,]+),")
+      message(STATUS "CUDA_VERSION=${CUDA_VERSION}")
+
+      if(CUDA_VERSION VERSION_GREATER_EQUAL 11.8)
+        set(CMAKE_CUDA_ARCHITECTURES "75;80;86;89;90")
+      else(CUDA_VERSION VERSION_GREATER_EQUAL 11.8)
+        set(CMAKE_CUDA_ARCHITECTURES "75;80;86")
+      endif(CUDA_VERSION VERSION_GREATER_EQUAL 11.8)
+      message(STATUS "Set CMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES}")
+    else(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
+      message(STATUS "Found CMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES}")
+    endif(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
+  endif(USE_CUTLASS)
+
 else(USE_CUDA)
   list(APPEND COMPILER_SRCS src/target/opt/build_cuda_off.cc)
 endif(USE_CUDA)

From 8358387ea3beb99f685a4a7f79c6205bb9eb3cb3 Mon Sep 17 00:00:00 2001
From: Ruihang Lai <ruihangl@cs.cmu.edu>
Date: Mon, 8 Jan 2024 14:22:16 -0500
Subject: [PATCH 06/10] Revert "[CMake][MSVC] Disable permissive mode for MSVC
 builds (#16343)"

This reverts commit e3d031bc7cef6f61c287b1f642c0c928612c018c.
---
 CMakeLists.txt | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index f7fd92e25a..f4e4c2fe01 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -168,11 +168,6 @@ if(MSVC)
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /MP")
   add_compile_options(/bigobj)
 
-  # Use standard-conforming two-phase name resolution for templates.
-  # This minimizes the differences between g++/clang builds on Linux,
-  # and MSVC builds on Windows.
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /permissive-")
-
   # MSVC already errors on undefined symbols, no additional flag needed.
   set(TVM_NO_UNDEFINED_SYMBOLS "")
 

From 893d5f3d8fb1e763fc15b2d4c140069390b27d3d Mon Sep 17 00:00:00 2001
From: Ruihang Lai <ruihangl@cs.cmu.edu>
Date: Wed, 10 Jan 2024 19:06:05 -0500
Subject: [PATCH 07/10] [MLC][CI] Disable nnpack

---
 tests/scripts/task_config_build_cpu.sh | 2 +-
 tests/scripts/task_config_build_gpu.sh | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/scripts/task_config_build_cpu.sh b/tests/scripts/task_config_build_cpu.sh
index 0d6c0e2cae..b5c84d7aad 100755
--- a/tests/scripts/task_config_build_cpu.sh
+++ b/tests/scripts/task_config_build_cpu.sh
@@ -30,7 +30,7 @@ echo set\(USE_PROFILER ON\) >> config.cmake
 echo set\(USE_DNNL ON\) >> config.cmake
 echo set\(USE_ARM_COMPUTE_LIB ON\) >> config.cmake
 echo set\(USE_LLVM \"/usr/bin/llvm-config-15 --link-static\"\) >> config.cmake
-echo set\(USE_NNPACK ON\) >> config.cmake
+echo set\(USE_NNPACK OFF\) >> config.cmake
 echo set\(NNPACK_PATH /NNPACK/build/\) >> config.cmake
 echo set\(USE_ANTLR ON\) >> config.cmake
 echo set\(CMAKE_CXX_FLAGS \"-Werror -Wno-error=range-loop-construct\"\) >> config.cmake
diff --git a/tests/scripts/task_config_build_gpu.sh b/tests/scripts/task_config_build_gpu.sh
index 37ab0a87f1..8ec1763d67 100755
--- a/tests/scripts/task_config_build_gpu.sh
+++ b/tests/scripts/task_config_build_gpu.sh
@@ -33,7 +33,7 @@ echo set\(USE_OPENCL_GTEST \"/googletest\"\) >> config.cmake
 echo set\(USE_MICRO ON\) >> config.cmake
 echo set\(USE_MICRO_STANDALONE_RUNTIME ON\) >> config.cmake
 echo set\(USE_LLVM \"/usr/bin/llvm-config-15 --link-static\"\) >> config.cmake
-echo set\(USE_NNPACK ON\) >> config.cmake
+echo set\(USE_NNPACK OFF\) >> config.cmake
 echo set\(NNPACK_PATH /NNPACK/build/\) >> config.cmake
 echo set\(USE_RPC ON\) >> config.cmake
 echo set\(USE_SORT ON\) >> config.cmake

From 3ffcf77a11a3456f0cad970165484ce969b369f9 Mon Sep 17 00:00:00 2001
From: Ruihang Lai <ruihangl@cs.cmu.edu>
Date: Wed, 10 Jan 2024 19:08:11 -0500
Subject: [PATCH 08/10] [MLC][CI] Set CMAKE_CUDA_ARCHITECTURES 75

---
 tests/scripts/task_config_build_gpu.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/scripts/task_config_build_gpu.sh b/tests/scripts/task_config_build_gpu.sh
index 8ec1763d67..6096169051 100755
--- a/tests/scripts/task_config_build_gpu.sh
+++ b/tests/scripts/task_config_build_gpu.sh
@@ -54,3 +54,4 @@ echo set\(USE_PIPELINE_EXECUTOR ON\) >> config.cmake
 echo set\(USE_CUTLASS ON\) >> config.cmake
 echo set\(USE_CMSISNN ON\) >> config.cmake
 echo set\(USE_MSC ON\) >> config.cmake
+echo set\(CMAKE_CUDA_ARCHITECTURES 75\) >> config.cmake

From 0b2a3517abd6d974bb0164422691e30da7b6436b Mon Sep 17 00:00:00 2001
From: Ruihang Lai <ruihangl@cs.cmu.edu>
Date: Wed, 10 Jan 2024 21:12:22 -0500
Subject: [PATCH 09/10] [MLC][CI] Disable TFLITE

---
 tests/scripts/task_config_build_cpu.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/scripts/task_config_build_cpu.sh b/tests/scripts/task_config_build_cpu.sh
index b5c84d7aad..df5761b861 100755
--- a/tests/scripts/task_config_build_cpu.sh
+++ b/tests/scripts/task_config_build_cpu.sh
@@ -40,9 +40,9 @@ echo set\(HIDE_PRIVATE_SYMBOLS ON\) >> config.cmake
 # with the change in the way TFLite is built. It can be
 # removed once we migrate to TensorFlow and TFLite > 2.9.1
 if [ -d "/opt/tflite" ]; then
-  echo set\(USE_TFLITE \"/opt/tflite\"\) >> config.cmake
+  echo set\(USE_TFLITE OFF\) >> config.cmake
 else
-  echo set\(USE_TFLITE ON\) >> config.cmake
+  echo set\(USE_TFLITE OFF\) >> config.cmake
 fi
 
 echo set\(USE_TENSORFLOW_PATH \"/tensorflow\"\) >> config.cmake

From 71997ca185b4bbeada0df01fbbfb90045d743161 Mon Sep 17 00:00:00 2001
From: Ruihang Lai <ruihangl@cs.cmu.edu>
Date: Wed, 10 Jan 2024 22:06:50 -0500
Subject: [PATCH 10/10] Revert "[Unity] Add dlight.gpu.Fallback in
 DispatchSortScan, add argsort, topk, and cumprod (#16351)"

This reverts commit e1d71b3720347ba566e5100a0dc7c4fc7fc054a5.
---
 include/tvm/relax/attrs/sort.h                |  52 +++
 include/tvm/relax/attrs/sorting.h             |  99 ------
 include/tvm/relax/attrs/statistical.h         |  20 +-
 .../tvm/relax/backend/dispatch_sort_scan.py   | 124 ++------
 python/tvm/relax/op/__init__.py               |   4 +-
 python/tvm/relax/op/op_attrs.py               |  16 +-
 python/tvm/relax/op/sort.py                   |  45 +++
 python/tvm/relax/op/sorting.py                | 116 -------
 python/tvm/relax/op/statistical.py            |  72 +----
 .../transform/legalize_ops/statistical.py     |  11 +-
 python/tvm/script/ir_builder/relax/ir.py      |   6 -
 src/relax/op/tensor/sort.cc                   |  56 ++++
 src/relax/op/tensor/{sorting.h => sort.h}     |  32 +-
 src/relax/op/tensor/sorting.cc                | 155 ---------
 src/relax/op/tensor/statistical.cc            |  55 +---
 src/relax/op/tensor/statistical.h             |  21 +-
 .../relax/test_backend_dispatch_sort_scan.py  | 301 ++----------------
 tests/python/relax/test_op_sort.py            | 192 -----------
 tests/python/relax/test_op_statistical.py     |  53 ++-
 .../relax/test_tvmscript_parser_op_sort.py    |  14 +-
 .../test_tvmscript_parser_op_statistical.py   |   8 +-
 21 files changed, 274 insertions(+), 1178 deletions(-)
 create mode 100644 include/tvm/relax/attrs/sort.h
 delete mode 100644 include/tvm/relax/attrs/sorting.h
 create mode 100644 python/tvm/relax/op/sort.py
 delete mode 100644 python/tvm/relax/op/sorting.py
 create mode 100644 src/relax/op/tensor/sort.cc
 rename src/relax/op/tensor/{sorting.h => sort.h} (55%)
 delete mode 100644 src/relax/op/tensor/sorting.cc

diff --git a/include/tvm/relax/attrs/sort.h b/include/tvm/relax/attrs/sort.h
new file mode 100644
index 0000000000..fc0c4e7189
--- /dev/null
+++ b/include/tvm/relax/attrs/sort.h
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file tvm/relax/attrs/sort.h
+ * \brief Attributes for sorting operators.
+ */
+#ifndef TVM_RELAX_ATTRS_SORT_H_
+#define TVM_RELAX_ATTRS_SORT_H_
+
+#include <tvm/relax/expr.h>
+#include <tvm/tir/index_map.h>
+
+namespace tvm {
+namespace relax {
+
+/*! \brief Attributes used in sort operator */
+struct SortAttrs : public tvm::AttrsNode<SortAttrs> {
+  int axis;
+  bool descending;
+
+  TVM_DECLARE_ATTRS(SortAttrs, "relax.attrs.SortAttrs") {
+    TVM_ATTR_FIELD(axis).set_default(-1).describe(
+        "Axis along which the sort is computed."
+        "The default the last axis is used.");
+    TVM_ATTR_FIELD(descending)
+        .set_default(false)
+        .describe(
+            "Whether to sort in descending order."
+            "If it is not specified, it defaults to the ascending order.");
+  }
+};  // struct SortAttrs
+}  // namespace relax
+}  // namespace tvm
+
+#endif  // TVM_RELAX_ATTRS_SORT_H_
diff --git a/include/tvm/relax/attrs/sorting.h b/include/tvm/relax/attrs/sorting.h
deleted file mode 100644
index 4daf7a45b2..0000000000
--- a/include/tvm/relax/attrs/sorting.h
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file tvm/relax/attrs/sorting.h
- * \brief Attributes for sorting operators.
- */
-#ifndef TVM_RELAX_ATTRS_SORTING_H_
-#define TVM_RELAX_ATTRS_SORTING_H_
-
-#include <tvm/relax/expr.h>
-#include <tvm/tir/index_map.h>
-
-namespace tvm {
-namespace relax {
-
-/*! \brief Attributes used in sort operator */
-struct SortAttrs : public tvm::AttrsNode<SortAttrs> {
-  int axis;
-  bool descending;
-
-  TVM_DECLARE_ATTRS(SortAttrs, "relax.attrs.SortAttrs") {
-    TVM_ATTR_FIELD(axis).set_default(-1).describe(
-        "Axis along which the sort is computed."
-        "The default the last axis is used.");
-    TVM_ATTR_FIELD(descending)
-        .set_default(false)
-        .describe(
-            "Whether to sort in descending order."
-            "If it is not specified, it defaults to the ascending order.");
-  }
-};  // struct SortAttrs
-
-/*! \brief Attributes used in argsort operator */
-struct ArgsortAttrs : public tvm::AttrsNode<ArgsortAttrs> {
-  int axis;
-  bool descending;
-  DataType dtype;
-
-  TVM_DECLARE_ATTRS(ArgsortAttrs, "relax.attrs.ArgsortAttrs") {
-    TVM_ATTR_FIELD(axis).set_default(-1).describe(
-        "Axis along which the argsort is computed."
-        "The default the last axis is used.");
-    TVM_ATTR_FIELD(descending)
-        .set_default(false)
-        .describe(
-            "Whether to argsort in descending order."
-            "If it is not specified, it defaults to the ascending order.");
-    TVM_ATTR_FIELD(dtype)
-        .set_default(NullValue<DataType>())
-        .describe("DType of the output indices.");
-  }
-};  // struct ArgsortAttrs
-
-/*! \brief Attributes used in topk operator */
-struct TopKAttrs : public tvm::AttrsNode<TopKAttrs> {
-  int k;
-  int axis;
-  bool largest;
-  String ret_type;
-  DataType dtype;
-
-  TVM_DECLARE_ATTRS(TopKAttrs, "relax.attrs.TopKAttrs") {
-    TVM_ATTR_FIELD(k).describe("Number of top elements to select");
-    TVM_ATTR_FIELD(axis).set_default(-1).describe("Axis along which to sort the input tensor.");
-    TVM_ATTR_FIELD(ret_type).set_default("both").describe(
-        "The return type [both, values, indices]."
-        "both - return both top k data and indices."
-        "values - return top k data only."
-        "indices - return top k indices only.");
-    TVM_ATTR_FIELD(largest).set_default(true).describe(
-        "Whether to return largest or smallest elements."
-        "By default, return the largest k elements.");
-    TVM_ATTR_FIELD(dtype)
-        .set_default(NullValue<DataType>())
-        .describe("Data type of the output indices.");
-  }
-};  // struct TopKAttrs
-
-}  // namespace relax
-}  // namespace tvm
-
-#endif  // TVM_RELAX_ATTRS_SORTING_H_
diff --git a/include/tvm/relax/attrs/statistical.h b/include/tvm/relax/attrs/statistical.h
index 9f9a2fa870..d5d8d982b5 100644
--- a/include/tvm/relax/attrs/statistical.h
+++ b/include/tvm/relax/attrs/statistical.h
@@ -42,24 +42,20 @@ struct StatisticalAttrs : public tvm::AttrsNode<StatisticalAttrs> {
   }
 };  // struct StatisticalAttrs
 
-/*! \brief Attributes used in scan operators like cumsum, cumprod */
-struct ScanopAttrs : public tvm::AttrsNode<ScanopAttrs> {
+/*! \brief Attributes used in cumsum operators */
+struct CumsumAttrs : public tvm::AttrsNode<CumsumAttrs> {
   Optional<Integer> axis;
   DataType dtype;
-  Bool exclusive = Bool(false);
 
-  TVM_DECLARE_ATTRS(ScanopAttrs, "relax.attrs.ScanopAttrs") {
+  TVM_DECLARE_ATTRS(CumsumAttrs, "relax.attrs.CumsumAttrs") {
     TVM_ATTR_FIELD(axis).describe(
-        "The axis along which to perform the scan computation."
-        "The default (None) is to compute over the flattened array.");
+        "Axis along which the cumulative sum is computed."
+        "The default (None) is to compute the cumsum over the flattened array.");
     TVM_ATTR_FIELD(dtype).describe(
-        "The output data type."
-        "If dtype is not specified, it defaults to the dtype of input data.");
-    TVM_ATTR_FIELD(exclusive)
-        .describe("The first element is not included")
-        .set_default(Bool(false));
+        "Type of the returned array and of the accumulator in which the elements are summed."
+        "If dtype is not specified, it defaults to the dtype of data.");
   }
-};  // struct ScanopAttrs
+};  // struct CumsumAttrs
 
 }  // namespace relax
 }  // namespace tvm
diff --git a/python/tvm/relax/backend/dispatch_sort_scan.py b/python/tvm/relax/backend/dispatch_sort_scan.py
index bb3f57ce96..f0f1aa9063 100644
--- a/python/tvm/relax/backend/dispatch_sort_scan.py
+++ b/python/tvm/relax/backend/dispatch_sort_scan.py
@@ -17,13 +17,13 @@
 # pylint: disable=invalid-name, unused-argument, redefined-argument-from-local
 """Dispatch sort and scan operators to platform dependent implementation."""
 
-from tvm import topi, dlight, relax
+from tvm import topi
 from tvm.ir import Op
 from tvm.ir.module import IRModule
 from tvm.ir.transform import PassContext, module_pass
 from tvm.target import Target
 from tvm.contrib.thrust import can_use_thrust
-from tvm.relax import PyExprMutator, expr_functor
+from tvm.relax import Expr, Function, Call, PyExprMutator, expr_functor, TensorStructInfo
 
 
 @expr_functor.mutator
@@ -36,17 +36,13 @@ class SortScanDispatcher(PyExprMutator):
     def __init__(self, mod):
         super().__init__(mod)
 
-    def _get_target(self, sinfo: relax.StructInfo) -> Target:
+    def _get_target(self, expr: Expr) -> Target:
+        sinfo = expr.struct_info
         # Get target information from TensorStructInfo
-        if isinstance(sinfo, relax.TensorStructInfo):
+        if isinstance(sinfo, TensorStructInfo):
             vdevice = sinfo.vdevice
             if vdevice is not None:
                 return vdevice.target
-        elif isinstance(sinfo, relax.TupleStructInfo):
-            for f in sinfo.fields:
-                tgt = self._get_target(f)
-                if tgt != Target.current():
-                    return tgt
         # Return the target in current context
         target = Target.current()
         if target is None:
@@ -56,94 +52,38 @@ def _get_target(self, sinfo: relax.StructInfo) -> Target:
             )
         return target
 
-    def _apply_dlight_gpu_fallback(self, target: Target, tir_call: relax.Call) -> None:
-        # Apply dlight.gpu.Fallback() on GPU
-        gvar = tir_call.args[0]
-        assert isinstance(gvar, relax.GlobalVar)
-        scan_prim_func = self.builder_.get()[gvar]
-        sch = dlight.base.transform._apply_rules(
-            scan_prim_func,
-            target,
-            [
-                dlight.gpu.Fallback(),
-            ],
-            False,
-        )
-        if sch is not None:
-            assert len(sch) == 1
-            self.builder_.update_func(gvar, sch[0].mod["main"].with_attr("tir.is_scheduled", 1))
-
-    def visit_call_(self, call: relax.Call) -> relax.Expr:
+    def visit_call_(self, call: Call) -> Expr:
         if not isinstance(call.op, Op):
             return super().visit_call_(call)
 
         if call.op.name == "relax.sort":
-            tgt = self._get_target(call.struct_info)
-            te_func = topi.sort
+            tgt = self._get_target(call)
             with tgt:
                 if can_use_thrust(tgt, "tvm.contrib.thrust.sort"):
-                    te_func = topi.cuda.sort_thrust
-                elif tgt.kind.name == "cuda":
-                    te_func = topi.cuda.sort
-            return self.builder_.call_te(
-                te_func,
-                call.args[0],
-                call.attrs.axis,
-                not call.attrs.descending,
-            )
-        if call.op.name == "relax.argsort":
-            tgt = self._get_target(call.struct_info)
-            te_func = topi.argsort
-            with tgt:
-                if can_use_thrust(tgt, "tvm.contrib.thrust.sort"):
-                    te_func = topi.cuda.argsort_thrust
-                elif tgt.kind.name == "cuda":
-                    te_func = topi.cuda.argsort
-            return self.builder_.call_te(
-                te_func,
-                call.args[0],
-                axis=call.attrs.axis,
-                is_ascend=not call.attrs.descending,
-                dtype=call.attrs.dtype,
-            )
-        if call.op.name == "relax.topk":
-            tgt = self._get_target(call.struct_info)
-            te_func = topi.topk
-            if can_use_thrust(tgt, "tvm.contrib.thrust.sort"):
-                te_func = topi.cuda.topk_thrust
-            elif tgt.kind.name == "cuda":
-                te_func = topi.cuda.topk
-            tir_call = self.builder_.call_te(
-                te_func,
-                call.args[0],
-                axis=call.attrs.axis,
-                ret_type=call.attrs.ret_type,
-                is_ascend=not call.attrs.largest,
-                dtype=call.attrs.dtype,
-            )
-            if tgt.kind.name != "cuda":
-                return tir_call
-            # apply dlight gpu fallback
-            self._apply_dlight_gpu_fallback(tgt, tir_call)
-            return tir_call
-        if call.op.name in ("relax.cumprod", "relax.cumsum"):
-            tgt = self._get_target(call.struct_info)
+                    return self.builder_.call_te(
+                        topi.cuda.sort_thrust,
+                        call.args[0],
+                        call.attrs.axis,
+                        not call.attrs.descending,
+                    )
+                return self.builder_.call_te(
+                    topi.cuda.sort if tgt.kind.name == "cuda" else topi.sort,
+                    call.args[0],
+                    call.attrs.axis,
+                    not call.attrs.descending,
+                )
+
+        if call.op.name == "relax.cumsum":
+            tgt = self._get_target(call)
             axis = int(call.attrs.axis) if call.attrs.axis is not None else call.attrs.axis
-            te_func = topi.cuda.cumsum if tgt.kind.name == "cuda" else topi.cumsum
-            if call.op.name == "relax.cumprod":
-                te_func = topi.cuda.cumprod if tgt.kind.name == "cuda" else topi.cumprod
-            tir_call = self.builder_.call_te(
-                te_func,
-                call.args[0],
-                axis,
-                call.attrs.dtype,
-                call.attrs.exclusive,
-            )
-            if tgt.kind.name != "cuda":
-                return tir_call
-            # apply dlight gpu fallback
-            self._apply_dlight_gpu_fallback(tgt, tir_call)
-            return tir_call
+            with tgt:
+                return self.builder_.call_te(
+                    topi.cuda.cumsum if tgt.kind.name == "cuda" else topi.cumsum,
+                    call.args[0],
+                    axis,
+                    call.attrs.dtype,
+                )
+
         return super().visit_call_(call)
 
 
@@ -156,7 +96,7 @@ class DispatchSortScan:
     def transform_module(self, mod: IRModule, ctx: PassContext) -> IRModule:
         sort_scan_dispater = SortScanDispatcher(mod)
         for gv, func in mod.functions_items():
-            if isinstance(func, relax.Function):
+            if isinstance(func, Function):
                 func = sort_scan_dispater.visit_expr(func)
                 sort_scan_dispater.builder_.update_func(gv, func)
-        return sort_scan_dispater.builder_.finalize()
+        return sort_scan_dispater.builder_.get()
diff --git a/python/tvm/relax/op/__init__.py b/python/tvm/relax/op/__init__.py
index 5b585e18b4..085761f15d 100644
--- a/python/tvm/relax/op/__init__.py
+++ b/python/tvm/relax/op/__init__.py
@@ -99,8 +99,8 @@
 from .qdq import quantize, dequantize
 from .search import argmax, argmin, where
 from .set import unique
-from .sorting import sort, argsort, topk
-from .statistical import cumsum, cumprod, max, mean, min, prod, std, sum, variance
+from .sort import sort
+from .statistical import cumsum, max, mean, min, prod, std, sum, variance
 from .ternary import ewise_fma
 from .unary import (
     abs,
diff --git a/python/tvm/relax/op/op_attrs.py b/python/tvm/relax/op/op_attrs.py
index a3d46428c5..4dbbc17cf2 100644
--- a/python/tvm/relax/op/op_attrs.py
+++ b/python/tvm/relax/op/op_attrs.py
@@ -119,11 +119,6 @@ class SortAttrs(Attrs):
     """Attributes for sort operator"""
 
 
-@tvm._ffi.register_object("relax.attrs.ArgsortAttrs")
-class ArgsortAttrs(Attrs):
-    """Attributes for argsort operator"""
-
-
 @tvm._ffi.register_object("relax.attrs.SplitAttrs")
 class SplitAttrs(Attrs):
     """Attributes used in split operator"""
@@ -159,14 +154,9 @@ class TileAttrs(Attrs):
     """Attributes for tile operator"""
 
 
-@tvm._ffi.register_object("relax.attrs.ScanopAttrs")
-class ScanopAttrs(Attrs):
-    """Attributes for scan operators"""
-
-
-@tvm._ffi.register_object("relax.attrs.TopKAttrs")
-class TopKAttrs(Attrs):
-    """Attributes for topk operators"""
+@tvm._ffi.register_object("relax.attrs.CumsumAttrs")
+class CumsumAttrs(Attrs):
+    """Attributes for cumsum operator"""
 
 
 @tvm._ffi.register_object("relax.attrs.EinsumAttrs")
diff --git a/python/tvm/relax/op/sort.py b/python/tvm/relax/op/sort.py
new file mode 100644
index 0000000000..b139eefcdf
--- /dev/null
+++ b/python/tvm/relax/op/sort.py
@@ -0,0 +1,45 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+"""Sortings operators."""
+
+from . import _ffi_api
+from ..expr import Expr
+
+
+def sort(x: Expr, axis: int = -1, descending: bool = False):
+    """Performs sorting along the given axis and returns an array
+    in sorted order.
+
+    Parameters
+    ----------
+    x : relax.Expr
+        The input tensor.
+
+    axis : int
+        Axis along which to sort the input tensor.
+        By default the last axis of the input is used.
+
+    descending : bool
+        Whether to sort in descending order, the default is False
+
+    Returns
+    -------
+    out : relax.Expr
+        Sorted tensor.
+
+    """
+    return _ffi_api.sort(x, axis, descending)  # type: ignore
diff --git a/python/tvm/relax/op/sorting.py b/python/tvm/relax/op/sorting.py
deleted file mode 100644
index 13937933c4..0000000000
--- a/python/tvm/relax/op/sorting.py
+++ /dev/null
@@ -1,116 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-#
-#   http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-"""Sortings operators."""
-from . import _ffi_api
-from ..expr import Expr, Constant
-
-
-def sort(x: Expr, axis: int = -1, descending: bool = False):
-    """Performs sorting along the given axis and returns an array
-    in sorted order.
-
-    Parameters
-    ----------
-    x : relax.Expr
-        The input tensor.
-
-    axis : int
-        Axis along which to sort the input tensor.
-        By default the last axis of the input is used.
-
-    descending : bool
-        Whether to sort in descending order, the default is False
-
-    Returns
-    -------
-    out : relax.Expr
-        Sorted tensor.
-
-    """
-    return _ffi_api.sort(x, axis, descending)  # type: ignore
-
-
-def argsort(data: Expr, axis: int = -1, descending: bool = False, dtype: str = "int32"):
-    """Performs sorting along the given axis and returns an array of indices
-    having same shape as an input array that index data in sorted order.
-
-    Parameters
-    ----------
-    data : relax.Expr
-        The input data tensor.
-
-    axis : int
-        Axis long which to sort the input tensor.
-
-    descending : bool
-        Whether to sort in descending order, the default is False
-
-    dtype : str
-        The data type of the output indices.
-
-    Returns
-    -------
-    out : relax.Expr
-        Tensor with same shape as data.
-    """
-    return _ffi_api.argsort(data, axis, descending, dtype)  # type: ignore
-
-
-def topk(
-    data: Expr,
-    k: int = 1,
-    axis: int = -1,
-    ret_type: str = "both",
-    largest: bool = True,
-    dtype: str = "int32",
-):
-    """Get the top k elements in an input tensor along the given axis.
-
-    ret_type specifies the return type, can be one of ("both", "values", "indices").
-
-    Parameters
-    ----------
-    data : relax.Expr
-        The input data tensor.
-
-    k : int
-        Number of top elements to select. Return all elements if k < 1.
-
-    axis : int
-        Axis long which to sort the input tensor.
-
-    ret_type: str
-        The return type [both, values, indices].
-        "both": return both top k data and indices.
-        "values": return top k data only.
-        "indices": return top k indices only.
-
-    largest : bool
-        Whether to return largest or smallest elements.
-        The k smallest elements are returned if largest is False.
-
-    dtype : str
-        The data type of the indices output.
-
-    Returns
-    -------
-    out : relax.Expr or List[relax.Expr]
-        The computed result.
-    """
-    if isinstance(k, Constant):
-        k = k.data.numpy().item()
-    return _ffi_api.topk(data, k, axis, ret_type, largest, dtype)  # type: ignore
diff --git a/python/tvm/relax/op/statistical.py b/python/tvm/relax/op/statistical.py
index eb44696871..f187f9d456 100644
--- a/python/tvm/relax/op/statistical.py
+++ b/python/tvm/relax/op/statistical.py
@@ -191,71 +191,7 @@ def sum(x: Expr, axis: Optional[Union[int, List[int]]] = None, keepdims: bool =
     return _ffi_api.sum(x, axis, keepdims)  # type: ignore
 
 
-def cumprod(
-    data: Expr,
-    axis: Optional[int] = None,
-    dtype: Optional[Union[str, DataType]] = None,
-    exclusive: Optional[bool] = None,
-):
-    """Numpy style cumprod op. Return the cumulative product of the elements along
-    a given axis.
-
-    Parameters
-    ----------
-    data : relax.Expr
-        The input data to the operator.
-
-    axis : Optional[int]
-        Axis along which the cumulative product is computed. The default (None) is to compute
-        the cumprod over the flattened array.
-
-    dtype : Optional[Union[str, DataType]]
-        Type of the returned array and of the accumulator in which the elements are computed.
-        If dtype is not specified, it defaults to the dtype of data.
-
-    exclusive : Optional[bool]
-        If true will return exclusive sum in which the first element is not
-        included.
-
-    Returns
-    -------
-    result : relax.Expr
-        The result has the same size as data, and the same shape as data if axis is not None.
-        If axis is None, the result is a 1-d array.
-
-    Examples
-    --------
-    .. code-block:: python
-
-        a = [[1, 2, 3], [4, 5, 6]]
-
-        cumprod(a)  # if axis is not provided, cumprod is done over the flattened input.
-        -> [ 1,  2,  6, 24, 120, 720]
-
-        cumprod(a, dtype="float32")
-        -> [  1.,  2.,  6., 24., 120., 720.]
-
-        cumprod(a, axis=0)  # multiply over rows for each of the 3 columns
-        -> [[1, 2, 3],
-            [4, 10, 18]]
-
-        cumprod(a, axis=1)
-        -> [[ 1,  2,  6],
-            [ 4,  20, 120]]
-
-        a = [1, 1, 1, 0, 1, 1, 0]  # a is a boolean array
-        cumprod(a, dtype=int32)  # dtype should be provided to get the expected results
-        -> [1, 1, 1, 0, 0, 0, 0]
-    """
-    return _ffi_api.cumprod(data, axis, dtype, exclusive)  # type: ignore
-
-
-def cumsum(
-    data: Expr,
-    axis: Optional[int] = None,
-    dtype: Optional[Union[str, DataType]] = None,
-    exclusive: Optional[bool] = None,
-):
+def cumsum(data: Expr, axis: Optional[int] = None, dtype: Optional[Union[str, DataType]] = None):
     """Numpy style cumsum op. Return the cumulative inclusive sum of the elements along
     a given axis.
 
@@ -272,10 +208,6 @@ def cumsum(
         Type of the returned array and of the accumulator in which the elements are summed.
         If dtype is not specified, it defaults to the dtype of data.
 
-    exclusive : Optional[bool]
-        If true will return exclusive sum in which the first element is not
-        included.
-
     Returns
     -------
     result : relax.Expr
@@ -306,7 +238,7 @@ def cumsum(
         cumsum(a, dtype=int32)  # dtype should be provided to get the expected results
         -> [1, 1, 2, 2, 3, 4, 4]
     """
-    return _ffi_api.cumsum(data, axis, dtype, exclusive)  # type: ignore
+    return _ffi_api.cumsum(data, axis, dtype)  # type: ignore
 
 
 def variance(x: Expr, axis: Optional[Union[int, List[int]]] = None, keepdims: bool = False) -> Expr:
diff --git a/python/tvm/relax/transform/legalize_ops/statistical.py b/python/tvm/relax/transform/legalize_ops/statistical.py
index 1181b3b2a7..e1f273bda0 100644
--- a/python/tvm/relax/transform/legalize_ops/statistical.py
+++ b/python/tvm/relax/transform/legalize_ops/statistical.py
@@ -89,13 +89,4 @@ def _variance(bb: BlockBuilder, call: Call) -> Expr:
 
 @register_legalize("relax.cumsum")
 def _cumsum(bb: BlockBuilder, call: Call) -> Expr:
-    return bb.call_te(
-        topi.cumsum, call.args[0], call.attrs.axis, call.attrs.dtype, call.attrs.exclusive
-    )
-
-
-@register_legalize("relax.cumprod")
-def _cumprod(bb: BlockBuilder, call: Call) -> Expr:
-    return bb.call_te(
-        topi.cumprod, call.args[0], call.attrs.axis, call.attrs.dtype, call.attrs.exclusive
-    )
+    return bb.call_te(topi.cumsum, call.args[0], call.attrs.axis, call.attrs.dtype)
diff --git a/python/tvm/script/ir_builder/relax/ir.py b/python/tvm/script/ir_builder/relax/ir.py
index 9105fce00f..7c0be2a722 100644
--- a/python/tvm/script/ir_builder/relax/ir.py
+++ b/python/tvm/script/ir_builder/relax/ir.py
@@ -43,7 +43,6 @@
     arange,
     argmax,
     argmin,
-    argsort,
     assert_op,
     astype,
     bitwise_and,
@@ -66,7 +65,6 @@
     concat,
     cos,
     cosh,
-    cumprod,
     cumsum,
     einsum,
     scatter_elements,
@@ -147,7 +145,6 @@
     tanh,
     erf,
     tile,
-    topk,
     tril,
     triu,
     unique,
@@ -647,7 +644,6 @@ def dtype(value: Union[py_str, DataType]) -> Expr:
     "arg",
     "argmax",
     "argmin",
-    "argsort",
     "assert_op",
     "astype",
     "bitwise_and",
@@ -674,7 +670,6 @@ def dtype(value: Union[py_str, DataType]) -> Expr:
     "const",
     "cpu",
     "cuda",
-    "cumprod",
     "cumsum",
     "einsum",
     "scatter_elements",
@@ -778,7 +773,6 @@ def dtype(value: Union[py_str, DataType]) -> Expr:
     "tan",
     "tanh",
     "tile",
-    "topk",
     "to_vdevice",
     "tril",
     "triu",
diff --git a/src/relax/op/tensor/sort.cc b/src/relax/op/tensor/sort.cc
new file mode 100644
index 0000000000..31de102ccf
--- /dev/null
+++ b/src/relax/op/tensor/sort.cc
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file sort.cc
+ * \brief sorting operators.
+ */
+
+#include "sort.h"
+
+namespace tvm {
+namespace relax {
+
+/* relax.sort */
+TVM_REGISTER_NODE_TYPE(SortAttrs);
+
+Expr sort(Expr data, int axis, bool descending) {
+  auto attrs = make_object<SortAttrs>();
+  attrs->axis = std::move(axis);
+  attrs->descending = std::move(descending);
+
+  static const Op& op = Op::Get("relax.sort");
+  return Call(op, {std::move(data)}, Attrs{attrs}, {});
+}
+
+TVM_REGISTER_GLOBAL("relax.op.sort").set_body_typed(sort);
+
+StructInfo InferStructInfoSort(const Call& call, const BlockBuilder& ctx) {
+  return GetUnaryInputTensorStructInfo(call, ctx);
+}
+
+TVM_REGISTER_OP("relax.sort")
+    .set_attrs_type<SortAttrs>()
+    .set_num_inputs(1)
+    .add_argument("data", "Tensor", "The input tensor.")
+    .set_attr<FInferStructInfo>("FInferStructInfo", InferStructInfoSort)
+    .set_attr<Bool>("FPurity", Bool(true));
+
+}  // namespace relax
+}  // namespace tvm
diff --git a/src/relax/op/tensor/sorting.h b/src/relax/op/tensor/sort.h
similarity index 55%
rename from src/relax/op/tensor/sorting.h
rename to src/relax/op/tensor/sort.h
index 8a785bc4e2..92203034aa 100644
--- a/src/relax/op/tensor/sorting.h
+++ b/src/relax/op/tensor/sort.h
@@ -18,13 +18,13 @@
  */
 
 /*!
- * \file sorting.h
+ * \file sort.h
  * \brief The functions to make Relax tensor sorting operator calls.
  */
-#ifndef TVM_RELAX_OP_TENSOR_SORTING_H_
-#define TVM_RELAX_OP_TENSOR_SORTING_H_
+#ifndef TVM_RELAX_OP_TENSOR_SORT_H_
+#define TVM_RELAX_OP_TENSOR_SORT_H_
 
-#include <tvm/relax/attrs/sorting.h>
+#include <tvm/relax/attrs/sort.h>
 
 #include <algorithm>
 #include <utility>
@@ -43,29 +43,7 @@ namespace relax {
  */
 Expr sort(Expr data, int axis, bool descending);
 
-/*!
- * \brief Performs sorting along the given axis and returns an array of indices.
- * \param data The input tensor.
- * \param axis The axis to sort on.
- * \param descending Whether to sort in descending order.
- * \param dtype The data type of the output indices.
- * \return The computed result.
- */
-Expr argsort(Expr data, int axis, bool descending, DataType dtype);
-
-/*!
- * \brief Get the top k elements in an input tensor along the given axis.
- * \param data The input tensor.
- * \param k Number of top elements.
- * \param axis The axis to sort on.
- * \param ret_type The return type, can be set to one of [both, values, indices].
- * \param largest Whether to return largest or smallest elements.
- * \param dtype The data type of the indices output.
- * \return The computed result.
- */
-Expr topk(Expr data, int k, int axis, String ret_type, bool largest, DataType dtype);
-
 }  // namespace relax
 }  // namespace tvm
 
-#endif  // TVM_RELAX_OP_TENSOR_SORTING_H_
+#endif  // TVM_RELAX_OP_TENSOR_SORT_H_
diff --git a/src/relax/op/tensor/sorting.cc b/src/relax/op/tensor/sorting.cc
deleted file mode 100644
index c4c4c5a614..0000000000
--- a/src/relax/op/tensor/sorting.cc
+++ /dev/null
@@ -1,155 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-/*!
- * \file sorting.cc
- * \brief sorting operators.
- */
-
-#include "sorting.h"
-
-#include <vector>
-
-namespace tvm {
-namespace relax {
-
-/* relax.sort */
-TVM_REGISTER_NODE_TYPE(SortAttrs);
-
-Expr sort(Expr data, int axis, bool descending) {
-  auto attrs = make_object<SortAttrs>();
-  attrs->axis = std::move(axis);
-  attrs->descending = std::move(descending);
-
-  static const Op& op = Op::Get("relax.sort");
-  return Call(op, {std::move(data)}, Attrs{attrs}, {});
-}
-
-TVM_REGISTER_GLOBAL("relax.op.sort").set_body_typed(sort);
-
-StructInfo InferStructInfoSort(const Call& call, const BlockBuilder& ctx) {
-  return GetUnaryInputTensorStructInfo(call, ctx);
-}
-
-TVM_REGISTER_OP("relax.sort")
-    .set_attrs_type<SortAttrs>()
-    .set_num_inputs(1)
-    .add_argument("data", "Tensor", "The input tensor.")
-    .set_attr<FInferStructInfo>("FInferStructInfo", InferStructInfoSort)
-    .set_attr<Bool>("FPurity", Bool(true));
-
-/* relax.argsort */
-TVM_REGISTER_NODE_TYPE(ArgsortAttrs);
-
-Expr argsort(Expr data, int axis, bool descending, DataType dtype) {
-  auto attrs = make_object<ArgsortAttrs>();
-  attrs->axis = std::move(axis);
-  attrs->descending = std::move(descending);
-  attrs->dtype = std::move(dtype);
-
-  static const Op& op = Op::Get("relax.argsort");
-  return Call(op, {std::move(data)}, Attrs{attrs}, {});
-}
-
-TVM_REGISTER_GLOBAL("relax.op.argsort").set_body_typed(argsort);
-
-StructInfo InferStructInfoArgsort(const Call& call, const BlockBuilder& ctx) {
-  TensorStructInfo data_sinfo = GetUnaryInputTensorStructInfo(call, ctx);
-  const auto* attrs = call->attrs.as<ArgsortAttrs>();
-  DataType out_type = attrs->dtype.is_void() ? data_sinfo->dtype : attrs->dtype;
-  if (data_sinfo->shape.defined()) {
-    return TensorStructInfo(data_sinfo->shape.value(), out_type, data_sinfo->vdevice);
-  }
-  return TensorStructInfo(out_type, data_sinfo->ndim, data_sinfo->vdevice);
-}
-
-TVM_REGISTER_OP("relax.argsort")
-    .set_attrs_type<ArgsortAttrs>()
-    .set_num_inputs(1)
-    .add_argument("data", "Tensor", "The input tensor.")
-    .set_attr<FInferStructInfo>("FInferStructInfo", InferStructInfoArgsort)
-    .set_attr<Bool>("FPurity", Bool(true));
-
-/* relax.topk */
-TVM_REGISTER_NODE_TYPE(TopKAttrs);
-
-Expr topk(Expr data, int k, int axis, String ret_type, bool largest, DataType dtype) {
-  auto attrs = make_object<TopKAttrs>();
-  attrs->k = std::move(k);
-  attrs->axis = std::move(axis);
-  attrs->ret_type = std::move(ret_type);
-  attrs->largest = std::move(largest);
-  attrs->dtype = std::move(dtype);
-
-  static const Op& op = Op::Get("relax.topk");
-  return Call(op, {std::move(data)}, Attrs{attrs}, {});
-}
-
-TVM_REGISTER_GLOBAL("relax.op.topk").set_body_typed(topk);
-
-StructInfo InferStructInfoTopK(const Call& call, const BlockBuilder& ctx) {
-  TensorStructInfo data_sinfo = GetUnaryInputTensorStructInfo(call, ctx);
-  const auto* data_shape = data_sinfo->shape.as<ShapeExprNode>();
-  const auto* attrs = call->attrs.as<TopKAttrs>();
-  DataType indices_type = attrs->dtype.is_void() ? data_sinfo->dtype : attrs->dtype;
-  int ndim = data_sinfo->ndim;
-  int k = attrs->k;
-  String ret_type = attrs->ret_type;
-  int axis = attrs->axis;
-  if (axis < 0 && ndim > 0) {
-    axis += ndim;
-  }
-
-  std::vector<StructInfo> output_sinfos;
-  output_sinfos.reserve(2);
-  if (data_shape == nullptr) {
-    output_sinfos.push_back(
-        TensorStructInfo(data_sinfo->dtype, data_sinfo->ndim, data_sinfo->vdevice));
-    output_sinfos.push_back(TensorStructInfo(indices_type, data_sinfo->ndim, data_sinfo->vdevice));
-  } else {
-    Array<PrimExpr> out_shape = data_shape->values;
-    const auto* int_dim = out_shape[axis].as<IntImmNode>();
-    if (k > 0 && (int_dim == nullptr || k < int_dim->value)) {
-      out_shape.Set(axis, k);
-    }
-    output_sinfos.push_back(
-        TensorStructInfo(ShapeExpr(out_shape), data_sinfo->dtype, data_sinfo->vdevice));
-    output_sinfos.push_back(
-        TensorStructInfo(ShapeExpr(out_shape), indices_type, data_sinfo->vdevice));
-  }
-
-  if (ret_type == "both") {
-    return TupleStructInfo(output_sinfos);
-  } else if (ret_type == "values") {
-    return output_sinfos[0];
-  } else if (ret_type == "indices") {
-    return output_sinfos[1];
-  }
-  LOG(FATAL) << "Unsupported ret type: " << ret_type;
-}
-
-TVM_REGISTER_OP("relax.topk")
-    .set_attrs_type<TopKAttrs>()
-    .set_num_inputs(1)
-    .add_argument("data", "Tensor", "The input tensor.")
-    .set_attr<FInferStructInfo>("FInferStructInfo", InferStructInfoTopK)
-    .set_attr<Bool>("FPurity", Bool(true));
-
-}  // namespace relax
-}  // namespace tvm
diff --git a/src/relax/op/tensor/statistical.cc b/src/relax/op/tensor/statistical.cc
index 24ccde4559..b861aafe21 100644
--- a/src/relax/op/tensor/statistical.cc
+++ b/src/relax/op/tensor/statistical.cc
@@ -135,11 +135,23 @@ InferLayoutOutput InferLayoutStatistical(const Call& call,
                            Attrs(new_attrs));
 }
 
-TVM_REGISTER_NODE_TYPE(ScanopAttrs);
+/* relax.cumsum */
+TVM_REGISTER_NODE_TYPE(CumsumAttrs);
+
+Expr cumsum(Expr data, Optional<Integer> axis, DataType dtype) {
+  auto attrs = make_object<CumsumAttrs>();
+  attrs->axis = std::move(axis);
+  attrs->dtype = std::move(dtype);
+
+  static const Op& op = Op::Get("relax.cumsum");
+  return Call(op, {std::move(data)}, Attrs{attrs}, {});
+}
 
-StructInfo InferStructInfoScan(const Call& call, const BlockBuilder& ctx) {
+TVM_REGISTER_GLOBAL("relax.op.cumsum").set_body_typed(cumsum);
+
+StructInfo InferStructInfoCumsum(const Call& call, const BlockBuilder& ctx) {
   TensorStructInfo data_sinfo = GetUnaryInputTensorStructInfo(call, ctx);
-  const auto* attrs = call->attrs.as<ScanopAttrs>();
+  const auto* attrs = call->attrs.as<CumsumAttrs>();
 
   DataType out_type = attrs->dtype.is_void() ? data_sinfo->dtype : attrs->dtype;
 
@@ -165,44 +177,11 @@ StructInfo InferStructInfoScan(const Call& call, const BlockBuilder& ctx) {
   }
 }
 
-/* relax.cumprod */
-Expr cumprod(Expr data, Optional<Integer> axis, DataType dtype, Bool exclusive) {
-  auto attrs = make_object<ScanopAttrs>();
-  attrs->axis = std::move(axis);
-  attrs->dtype = std::move(dtype);
-  attrs->exclusive = std::move(exclusive);
-
-  static const Op& op = Op::Get("relax.cumprod");
-  return Call(op, {std::move(data)}, Attrs{attrs}, {});
-}
-
-TVM_REGISTER_GLOBAL("relax.op.cumprod").set_body_typed(cumprod);
-
-TVM_REGISTER_OP("relax.cumprod")
-    .set_attrs_type<ScanopAttrs>()
-    .set_num_inputs(1)
-    .add_argument("data", "Tensor", "The input tensor.")
-    .set_attr<FInferStructInfo>("FInferStructInfo", InferStructInfoScan)
-    .set_attr<Bool>("FPurity", Bool(true));
-
-/* relax.cumsum */
-Expr cumsum(Expr data, Optional<Integer> axis, DataType dtype, Bool exclusive) {
-  auto attrs = make_object<ScanopAttrs>();
-  attrs->axis = std::move(axis);
-  attrs->dtype = std::move(dtype);
-  attrs->exclusive = std::move(exclusive);
-
-  static const Op& op = Op::Get("relax.cumsum");
-  return Call(op, {std::move(data)}, Attrs{attrs}, {});
-}
-
-TVM_REGISTER_GLOBAL("relax.op.cumsum").set_body_typed(cumsum);
-
 TVM_REGISTER_OP("relax.cumsum")
-    .set_attrs_type<ScanopAttrs>()
+    .set_attrs_type<CumsumAttrs>()
     .set_num_inputs(1)
     .add_argument("data", "Tensor", "The input tensor.")
-    .set_attr<FInferStructInfo>("FInferStructInfo", InferStructInfoScan)
+    .set_attr<FInferStructInfo>("FInferStructInfo", InferStructInfoCumsum)
     .set_attr<Bool>("FPurity", Bool(true));
 
 TVM_REGISTER_NODE_TYPE(StatisticalAttrs);
diff --git a/src/relax/op/tensor/statistical.h b/src/relax/op/tensor/statistical.h
index 310c87f7d6..23a6da99f1 100644
--- a/src/relax/op/tensor/statistical.h
+++ b/src/relax/op/tensor/statistical.h
@@ -85,22 +85,6 @@ Expr std(Expr x, Optional<Array<Integer>> axis, bool keepdims);
 /*! \brief Computes the sum of tensor elements over given axes. */
 Expr sum(Expr x, Optional<Array<Integer>> axis, bool keepdims);
 
-/*!
- * \brief Numpy style cumprod op. Return the cumulative inclusive product of the elements along
- * a given axis.
- * \param data The input tensor.
- * \param axis Axis along which the cumulative product is computed. The default (None) is to compute
- * the cumprod over the flattened array.
- * \param dtype Type of the returned array and of the accumulator in which the elements are
- * computed. If dtype is not specified, it defaults to the dtype of data.
- * \param exclusive Whehter the first element is exclusive. If true will return exclusive sum in
- * which the first element is not included.
- * \return The computed
- * result.
- */
-Expr cumprod(Expr data, Optional<Integer> axis = NullOpt, DataType dtype = DataType::Void(),
-             Bool exclusive = Bool(false));
-
 /*!
  * \brief Numpy style cumsum op. Return the cumulative inclusive sum of the elements along
  * a given axis.
@@ -109,12 +93,9 @@ Expr cumprod(Expr data, Optional<Integer> axis = NullOpt, DataType dtype = DataT
  * the cumsum over the flattened array.
  * \param dtype Type of the returned array and of the accumulator in which the elements are summed.
  * If dtype is not specified, it defaults to the dtype of data.
- * \param exclusive Whehter the first element is exclusive. If true will return exclusive sum in
- * which the first element is not included.
  * \return The computed result.
  */
-Expr cumsum(Expr data, Optional<Integer> axis = NullOpt, DataType dtype = DataType::Void(),
-            Bool exclusive = Bool(false));
+Expr cumsum(Expr data, Optional<Integer> axis = NullOpt, DataType dtype = DataType::Void());
 
 /*! \brief Computes the variance of tensor elements over given axes. */
 Expr variance(Expr x, Optional<Array<Integer>> axis, bool keepdims);
diff --git a/tests/python/relax/test_backend_dispatch_sort_scan.py b/tests/python/relax/test_backend_dispatch_sort_scan.py
index 8921372f2f..c21dd4882f 100644
--- a/tests/python/relax/test_backend_dispatch_sort_scan.py
+++ b/tests/python/relax/test_backend_dispatch_sort_scan.py
@@ -18,7 +18,7 @@
 import pytest
 
 import tvm
-from tvm import topi, relax, tir, dlight
+from tvm import topi, relax, tir
 import tvm.script
 import tvm.testing
 from tvm.script import relax as R, tir as T, ir as I
@@ -29,22 +29,21 @@
 from tvm.ir.base import assert_structural_equal
 
 
-def test_dispatch_scanop():
+def test_dispatch_cumsum():
     @I.ir_module
     class Before:
-        I.module_global_infos({"vdevice": [I.vdevice("llvm", 0)]})
+        I.module_global_infos({"vdevice": [I.vdevice("cuda", 0), I.vdevice("llvm", 0)]})
 
         @R.function
         def foo(x: R.Tensor((2, 3), "float32", "llvm")):
             with R.dataflow():
-                lv0 = R.cumsum(x, axis=1, dtype="float64", exclusive=False)
-                gv = R.cumprod(lv0, axis=1, dtype="float64", exclusive=False)
+                gv = R.cumsum(x, axis=1, dtype="float64")
                 R.output(gv)
             return gv
 
     @I.ir_module
     class Expected:
-        I.module_global_infos({"vdevice": [I.vdevice("llvm", 0)]})
+        I.module_global_infos({"vdevice": [I.vdevice("cuda", 0), I.vdevice("llvm", 0)]})
 
         @T.prim_func(private=True)
         def cumsum(var_A: T.handle, out_buf: T.Buffer((T.int64(2), T.int64(3)), "float64")):
@@ -73,44 +72,13 @@ def cumsum(var_A: T.handle, out_buf: T.Buffer((T.int64(2), T.int64(3)), "float64
                             ],
                         )
 
-        @T.prim_func(private=True)
-        def cumprod(var_A: T.handle, out_buf: T.Buffer((T.int64(2), T.int64(3)), "float64")):
-            T.func_attr({"tir.noalias": T.bool(True)})
-            A = T.match_buffer(var_A, (T.int64(2), T.int64(3)), "float64", offset_factor=1)
-            with T.block("cumprod_generic"):
-                T.reads(A[T.int64(0) : T.int64(2), T.int64(0) : T.int64(3)])
-                T.writes(out_buf[T.int64(0) : T.int64(2), T.int64(0) : T.int64(3)])
-                for fused in T.parallel(T.int64(2)):
-                    out_buf[fused * T.int64(3) // T.int64(3), fused * T.int64(3) % T.int64(3)] = A[
-                        fused * T.int64(3) // T.int64(3), fused * T.int64(3) % T.int64(3)
-                    ]
-                    for _k in range(T.int64(2)):
-                        out_buf[
-                            (fused * T.int64(3) + (_k + T.int64(1))) // T.int64(3),
-                            (fused * T.int64(3) + (_k + T.int64(1))) % T.int64(3),
-                        ] = (
-                            out_buf[
-                                (fused * T.int64(3) + (_k + T.int64(1) - T.int64(1))) // T.int64(3),
-                                (fused * T.int64(3) + (_k + T.int64(1) - T.int64(1))) % T.int64(3),
-                            ]
-                            * A[
-                                (fused * T.int64(3) + (_k + T.int64(1))) // T.int64(3),
-                                (fused * T.int64(3) + (_k + T.int64(1))) % T.int64(3),
-                            ]
-                        )
-
         @R.function
         def foo(
             x: R.Tensor((2, 3), dtype="float32", vdevice="llvm")
         ) -> R.Tensor((2, 3), dtype="float64", vdevice="llvm"):
             cls = Expected
             with R.dataflow():
-                lv0 = R.call_tir(cls.cumsum, (x,), out_sinfo=R.Tensor((2, 3), "float64", "llvm"))
-                gv = R.call_tir(
-                    cls.cumprod,
-                    (lv0,),
-                    out_sinfo=R.Tensor((2, 3), dtype="float64", vdevice="llvm"),
-                )
+                gv = R.call_tir(cls.cumsum, (x,), out_sinfo=R.Tensor((2, 3), "float64", "llvm"))
                 R.output(gv)
             return gv
 
@@ -118,39 +86,33 @@ def foo(
     assert_structural_equal(mod, Expected)
 
 
-def test_dispatch_scanop_cuda():
+def test_dispatch_cumsum_cuda():
     @I.ir_module
     class Before:
-        I.module_global_infos({"vdevice": [I.vdevice("cuda", 0)]})
+        I.module_global_infos({"vdevice": [I.vdevice("cuda", 0), I.vdevice("llvm", 0)]})
 
         @R.function
         def main(x: R.Tensor(("m", 3), "float32", "cuda")):
             with R.dataflow():
-                lv0 = R.cumsum(x, axis=1)
-                lv1 = R.cumprod(lv0, axis=1)
-                gv = lv1
+                lv = R.cumsum(x, axis=1)
+                gv = lv
                 R.output(gv)
             return gv
 
     target = tvm.target.Target("cuda", host="llvm")
 
-    vdevices = [I.vdevice("cuda", 0)]
+    vdevices = [I.vdevice("cuda", 0), I.vdevice("llvm", 0)]
     m = tir.Var("m", "int64")
     x = relax.Var("x", R.Tensor((m, 3), "float32", vdevices[0]))
     bb = relax.BlockBuilder()
     with target:
         with bb.function("main", (x,), {"global_symbol": "main"}):
             with bb.dataflow():
-                lv = bb.emit_te(
+                out = bb.emit_te(
                     topi.cuda.cumsum,
                     x,
                     axis=1,
                 )
-                out = bb.emit_te(
-                    topi.cuda.cumprod,
-                    lv,
-                    axis=1,
-                )
                 out = bb.emit_output(out)
             bb.emit_func_output(out)
     expected_mod = bb.finalize()
@@ -158,15 +120,14 @@ def main(x: R.Tensor(("m", 3), "float32", "cuda")):
 
     with target:
         mod = DispatchSortScan()(Before)
-        expected_mod = dlight.ApplyDefaultSchedule(dlight.gpu.Fallback())(expected_mod)
 
-    assert_structural_equal(mod, expected_mod, map_free_vars=True)
+    assert_structural_equal(mod, expected_mod)
 
 
 def test_dispatch_sort():
     @I.ir_module
     class Before:
-        I.module_global_infos({"vdevice": [I.vdevice("llvm", 0)]})
+        I.module_global_infos({"vdevice": [I.vdevice("cuda", 0), I.vdevice("llvm", 0)]})
 
         @R.function
         def foo(x: R.Tensor(("m", 3), "float32", "llvm")):
@@ -178,7 +139,7 @@ def foo(x: R.Tensor(("m", 3), "float32", "llvm")):
 
     @I.ir_module
     class Expected:
-        I.module_global_infos({"vdevice": [I.vdevice("llvm", 0)]})
+        I.module_global_infos({"vdevice": [I.vdevice("cuda", 0), I.vdevice("llvm", 0)]})
 
         @T.prim_func(private=True)
         def sort(var_A: T.handle, var_sort_cpu: T.handle):
@@ -231,7 +192,7 @@ def foo(
 def test_dispatch_sort_cuda():
     @I.ir_module
     class Before:
-        I.module_global_infos({"vdevice": [I.vdevice("cuda")]})
+        I.module_global_infos({"vdevice": [I.vdevice("cuda"), I.vdevice("llvm")]})
 
         @R.function
         def foo(x: R.Tensor((2, 3), "float32", "cuda")):
@@ -251,7 +212,7 @@ def foo2(y: R.Tensor((2, 3), "float32")):
 
     target = tvm.target.Target("cuda -libs=thrust", host="llvm")
 
-    vdevices = [I.vdevice("cuda", 0)]
+    vdevices = [I.vdevice("cuda", 0), I.vdevice("llvm", 0)]
     x = relax.Var("x", R.Tensor((2, 3), "float32", vdevices[0]))
     y = relax.Var("y", R.Tensor((2, 3), "float32"))
     bb = relax.BlockBuilder()
@@ -283,233 +244,7 @@ def foo2(y: R.Tensor((2, 3), "float32")):
     with target:
         mod = DispatchSortScan()(Before)
 
-    assert_structural_equal(mod, expected_mod)
-
-
-def test_dispatch_argsort():
-    @I.ir_module
-    class Before:
-        I.module_global_infos({"vdevice": [I.vdevice("llvm", 0)]})
-
-        @R.function
-        def foo(x: R.Tensor(("m", 3), "float32", "llvm")):
-            m = T.int64()
-            with R.dataflow():
-                gv = R.argsort(x, axis=1, descending=False)
-                R.output(gv)
-            return gv
-
-    @I.ir_module
-    class Expected:
-        I.module_global_infos({"vdevice": [I.vdevice("llvm", 0)]})
-
-        @T.prim_func(private=True)
-        def argsort(var_A: T.handle, var_argsort_cpu: T.handle):
-            T.func_attr({"tir.noalias": T.bool(True)})
-            m = T.int64()
-            data_buf = T.match_buffer(var_A, (m, T.int64(3)), align=8)
-            out_buf = T.match_buffer(var_argsort_cpu, (m, T.int64(3)), "int32", align=8)
-            with T.block("argsort_cpu"):
-                T.reads(data_buf[T.int64(0) : m, T.int64(0) : T.int64(3)])
-                T.writes(out_buf[T.int64(0) : m, T.int64(0) : T.int64(3)])
-                T.call_packed(
-                    "tvm.contrib.sort.argsort",
-                    T.tvm_stack_make_array(
-                        data_buf.data,
-                        T.tvm_stack_make_shape(m, T.int64(3)),
-                        0,
-                        2,
-                        T.float32(0),
-                        T.int64(0),
-                    ),
-                    T.tvm_stack_make_array(
-                        out_buf.data, T.tvm_stack_make_shape(m, T.int64(3)), 0, 2, 0, T.int64(0)
-                    ),
-                    1,
-                    T.bool(True),
-                )
-
-        @R.function
-        def foo(
-            x: R.Tensor(("m", 3), dtype="float32", vdevice="llvm")
-        ) -> R.Tensor(("m", 3), dtype="int32", vdevice="llvm"):
-            m = T.int64()
-            cls = Expected
-            with R.dataflow():
-                gv = R.call_tir(
-                    cls.argsort, (x,), out_sinfo=R.Tensor((m, 3), dtype="int32", vdevice="llvm")
-                )
-                R.output(gv)
-            return gv
-
-    mod = DispatchSortScan()(Before)
-    assert_structural_equal(mod, Expected)
-
-
-def test_dispatch_argsort_cuda():
-    @I.ir_module
-    class Before:
-        I.module_global_infos({"vdevice": [I.vdevice("cuda")]})
-
-        @R.function
-        def foo(x: R.Tensor((2, 3), "float32", "cuda")):
-            with R.dataflow():
-                lv = R.argsort(x, axis=1, descending=False)
-                gv = lv
-                R.output(gv)
-            return gv
-
-        @R.function
-        def foo2(y: R.Tensor((2, 3), "float32")):
-            with R.dataflow():
-                lv = R.argsort(y, axis=0, descending=True, dtype="int64")
-                gv = lv
-                R.output(gv)
-            return gv
-
-    target = tvm.target.Target("cuda -libs=thrust", host="llvm")
-
-    vdevices = [I.vdevice("cuda", 0)]
-    x = relax.Var("x", R.Tensor((2, 3), "float32", vdevices[0]))
-    y = relax.Var("y", R.Tensor((2, 3), "float32"))
-    bb = relax.BlockBuilder()
-    with target:
-        with bb.function("foo", (x,), {"global_symbol": "foo"}):
-            with bb.dataflow():
-                out = bb.emit_te(topi.cuda.argsort, x, axis=1, is_ascend=True, dtype="int32")
-                out = bb.emit_output(out)
-            bb.emit_func_output(out)
-        with bb.function("foo2", (y,), {"global_symbol": "foo2"}):
-            with bb.dataflow():
-                out = bb.emit_te(
-                    topi.cuda.argsort_thrust
-                    if can_use_thrust(target, "tvm.contrib.thrust.sort")
-                    else topi.cuda.argsort,
-                    y,
-                    0,
-                    False,
-                    "int64",
-                )
-                out = bb.emit_output(out)
-            bb.emit_func_output(out)
-    expected_mod = bb.finalize()
-    expected_mod.update_global_info("vdevice", vdevices)
-
-    with target:
-        mod = DispatchSortScan()(Before)
-
-    assert_structural_equal(mod, expected_mod)
-
-
-def test_dispatch_topk():
-    @I.ir_module
-    class Before:
-        I.module_global_infos({"vdevice": [I.vdevice("llvm", 0)]})
-
-        @R.function
-        def foo(x: R.Tensor(("m", 3), "float32", "llvm")):
-            m = T.int64()
-            with R.dataflow():
-                gv = R.topk(x, k=2, axis=1, largest=True)
-                R.output(gv)
-            return gv
-
-    @I.ir_module
-    class Expected:
-        I.module_global_infos({"vdevice": [I.vdevice("llvm", 0)]})
-
-        @T.prim_func(private=True)
-        def topk(var_A: T.handle, var_topk_cpu_v0: T.handle, var_topk_cpu_v1: T.handle):
-            T.func_attr({"tir.noalias": T.bool(True)})
-            m = T.int64()
-            data_buf = T.match_buffer(var_A, (m, T.int64(3)), align=8)
-            value_buf = T.match_buffer(var_topk_cpu_v0, (m, T.int64(1)), align=8)
-            indices_buf = T.match_buffer(var_topk_cpu_v1, (m, T.int64(1)), "int32", align=8)
-            with T.block("topk_cpu"):
-                T.reads(data_buf[T.int64(0) : m, T.int64(0) : T.int64(3)])
-                T.writes(
-                    value_buf[T.int64(0) : m, T.int64(0)], indices_buf[T.int64(0) : m, T.int64(0)]
-                )
-                T.call_packed(
-                    "tvm.contrib.sort.topk",
-                    T.tvm_stack_make_array(
-                        data_buf.data,
-                        T.tvm_stack_make_shape(m, T.int64(3)),
-                        0,
-                        2,
-                        T.float32(0),
-                        T.int64(0),
-                    ),
-                    T.tvm_stack_make_array(
-                        value_buf.data, T.tvm_stack_make_shape(m, 1), 0, 2, T.float32(0), T.int64(0)
-                    ),
-                    T.tvm_stack_make_array(
-                        indices_buf.data, T.tvm_stack_make_shape(m, 1), 0, 2, 0, T.int64(0)
-                    ),
-                    1,
-                    1,
-                    "both",
-                    T.bool(False),
-                )
-
-        @R.function
-        def foo(
-            x: R.Tensor(("m", 3), dtype="float32", vdevice="llvm")
-        ) -> R.Tuple(
-            R.Tensor(("m", 1), dtype="float32", vdevice="llvm"),
-            R.Tensor(("m", 1), dtype="int32", vdevice="llvm"),
-        ):
-            m = T.int64()
-            cls = Expected
-            with R.dataflow():
-                gv = R.call_tir(
-                    cls.topk,
-                    (x,),
-                    out_sinfo=[
-                        R.Tensor((m, 1), dtype="float32", vdevice="llvm"),
-                        R.Tensor((m, 1), dtype="int32", vdevice="llvm"),
-                    ],
-                )
-                R.output(gv)
-            return gv
-
-    mod = DispatchSortScan()(Before)
-    assert_structural_equal(mod, Expected)
-
-
-def test_dispatch_topk_cuda():
-    @I.ir_module
-    class Before:
-        I.module_global_infos({"vdevice": [I.vdevice("cuda")]})
-
-        @R.function
-        def foo(x: R.Tensor((2, 3), "float32", "cuda")):
-            with R.dataflow():
-                lv = R.topk(x, k=2, axis=1, largest=True)
-                gv = lv
-                R.output(gv)
-            return gv
-
-    target = tvm.target.Target("cuda -libs=thrust", host="llvm")
-
-    vdevices = [I.vdevice("cuda", 0)]
-    x = relax.Var("x", R.Tensor((2, 3), "float32", vdevices[0]))
-    y = relax.Var("y", R.Tensor((2, 3), "float32"))
-    bb = relax.BlockBuilder()
-    with target:
-        with bb.function("foo", (x,), {"global_symbol": "foo"}):
-            with bb.dataflow():
-                out = bb.emit_te(topi.cuda.topk, x, axis=1, is_ascend=False, dtype="int32")
-                out = bb.emit_output(out)
-            bb.emit_func_output(out)
-    expected_mod = bb.finalize()
-    expected_mod.update_global_info("vdevice", vdevices)
-
-    with target:
-        mod = DispatchSortScan()(Before)
-        expected_mod = dlight.ApplyDefaultSchedule(dlight.gpu.Fallback())(expected_mod)
-
-    assert_structural_equal(mod, expected_mod)
+    assert_structural_equal(mod, expected_mod, map_free_vars=True)
 
 
 if __name__ == "__main__":
diff --git a/tests/python/relax/test_op_sort.py b/tests/python/relax/test_op_sort.py
index ed47570b82..b6a064a641 100644
--- a/tests/python/relax/test_op_sort.py
+++ b/tests/python/relax/test_op_sort.py
@@ -26,8 +26,6 @@
 def test_op_correctness():
     x = relax.Var("x", R.Tensor((3, 4, 5), "float32"))
     assert relax.op.sort(x, axis=1).op == Op.get("relax.sort")
-    assert relax.op.argsort(x, axis=1).op == Op.get("relax.argsort")
-    assert relax.op.topk(x, k=1, axis=1).op == Op.get("relax.topk")
 
 
 def _check_inference(bb: relax.BlockBuilder, call: relax.Call, expected_sinfo: relax.StructInfo):
@@ -100,195 +98,5 @@ def test_sort_wrong_input():
         bb.normalize(relax.op.sort(x1, axis=1))
 
 
-def test_argsort_infer_struct_info():
-    bb = relax.BlockBuilder()
-    vdev0 = VDevice("llvm")
-    x0 = relax.Var("x", R.Tensor((2, 10, 4), "float32"))
-    x1 = relax.Var("x", R.Tensor("float32", ndim=3))
-    x2 = relax.Var("x", R.Tensor("float32"))
-    x3 = relax.Var("x", R.Tensor((2, 10, 4)))
-    x4 = relax.Var("x", R.Tensor(ndim=3))
-    x5 = relax.Var("x", R.Tensor())
-    x6 = relax.Var("x", R.Tensor((2, 10, 4), "float32", vdev0))
-
-    _check_inference(
-        bb,
-        relax.op.argsort(x0, axis=1, descending=False, dtype="int64"),
-        relax.TensorStructInfo((2, 10, 4), "int64"),
-    )
-    _check_inference(
-        bb, relax.op.argsort(x6, axis=1), relax.TensorStructInfo((2, 10, 4), "int32", vdev0)
-    )
-    _check_inference(
-        bb, relax.op.argsort(x1, axis=1), relax.TensorStructInfo(dtype="int32", ndim=3)
-    )
-    _check_inference(
-        bb, relax.op.argsort(x2, axis=1, dtype="float16"), relax.TensorStructInfo(dtype="float16")
-    )
-    _check_inference(
-        bb, relax.op.argsort(x3, axis=1), relax.TensorStructInfo((2, 10, 4), dtype="int32")
-    )
-    _check_inference(
-        bb, relax.op.argsort(x4, axis=1), relax.TensorStructInfo(dtype="int32", ndim=3)
-    )
-    _check_inference(bb, relax.op.argsort(x5, axis=1), relax.TensorStructInfo(dtype="int32"))
-    _check_inference(bb, relax.op.argsort(x0), relax.TensorStructInfo((2, 10, 4), "int32"))
-    _check_inference(
-        bb,
-        relax.op.argsort(x0, axis=1, descending=False),
-        relax.TensorStructInfo((2, 10, 4), "int32"),
-    )
-
-
-def test_argsort_infer_struct_info_shape_symbolic():
-    bb = relax.BlockBuilder()
-    a = tir.Var("a", "int64")
-    b = tir.Var("b", "int64")
-    c = tir.Var("c", "int64")
-    x = relax.Var("x", R.Tensor((a, b, c), "float32"))
-
-    _check_inference(bb, relax.op.argsort(x, axis=1), relax.TensorStructInfo((a, b, c), "int32"))
-    _check_inference(bb, relax.op.argsort(x), relax.TensorStructInfo((a, b, c), "int32"))
-
-
-def test_topk_infer_struct_info():
-    bb = relax.BlockBuilder()
-    vdev0 = VDevice("llvm")
-    x0 = relax.Var("x", R.Tensor((2, 10, 4), "float32"))
-    x1 = relax.Var("x", R.Tensor("float32", ndim=3))
-    x2 = relax.Var("x", R.Tensor("float32"))
-    x3 = relax.Var("x", R.Tensor((2, 10, 4)))
-    x4 = relax.Var("x", R.Tensor(ndim=3))
-    x5 = relax.Var("x", R.Tensor())
-    x6 = relax.Var("x", R.Tensor((2, 10, 4), "float32", vdev0))
-
-    _check_inference(
-        bb,
-        relax.op.topk(x0, k=5, axis=1, ret_type="both", largest=False, dtype="int64"),
-        relax.TupleStructInfo(
-            [
-                relax.TensorStructInfo((2, 5, 4), "float32"),
-                relax.TensorStructInfo((2, 5, 4), "int64"),
-            ]
-        ),
-    )
-    _check_inference(
-        bb,
-        relax.op.topk(x6),
-        relax.TupleStructInfo(
-            [
-                relax.TensorStructInfo((2, 10, 1), "float32", vdev0),
-                relax.TensorStructInfo((2, 10, 1), "int32", vdev0),
-            ]
-        ),
-    )
-    _check_inference(
-        bb,
-        relax.op.topk(x1, k=3, axis=1),
-        relax.TupleStructInfo(
-            [
-                relax.TensorStructInfo(dtype="float32", ndim=3),
-                relax.TensorStructInfo(dtype="int32", ndim=3),
-            ]
-        ),
-    )
-    _check_inference(
-        bb,
-        relax.op.topk(x2),
-        relax.TupleStructInfo(
-            [relax.TensorStructInfo(dtype="float32"), relax.TensorStructInfo(dtype="int32")]
-        ),
-    )
-    _check_inference(
-        bb,
-        relax.op.topk(x3, axis=0),
-        relax.TupleStructInfo(
-            [
-                relax.TensorStructInfo((1, 10, 4), None),
-                relax.TensorStructInfo((1, 10, 4), dtype="int32"),
-            ]
-        ),
-    )
-    _check_inference(
-        bb,
-        relax.op.topk(x4, axis=1),
-        relax.TupleStructInfo(
-            [
-                relax.TensorStructInfo(ndim=3, dtype=None),
-                relax.TensorStructInfo(dtype="int32", ndim=3),
-            ]
-        ),
-    )
-    _check_inference(
-        bb,
-        relax.op.topk(x5, axis=1),
-        relax.TupleStructInfo(
-            [
-                relax.TensorStructInfo(dtype=None),
-                relax.TensorStructInfo(dtype="int32"),
-            ]
-        ),
-    )
-    _check_inference(
-        bb,
-        relax.op.topk(x0),
-        relax.TupleStructInfo(
-            [
-                relax.TensorStructInfo((2, 10, 1), "float32"),
-                relax.TensorStructInfo((2, 10, 1), "int32"),
-            ]
-        ),
-    )
-    _check_inference(
-        bb,
-        relax.op.topk(x0, k=-1),
-        relax.TupleStructInfo(
-            [
-                relax.TensorStructInfo((2, 10, 4), "float32"),
-                relax.TensorStructInfo((2, 10, 4), "int32"),
-            ]
-        ),
-    )
-    _check_inference(
-        bb,
-        relax.op.topk(x0, k=6),
-        relax.TupleStructInfo(
-            [
-                relax.TensorStructInfo((2, 10, 4), "float32"),
-                relax.TensorStructInfo((2, 10, 4), "int32"),
-            ]
-        ),
-    )
-
-
-def test_topk_infer_struct_info_shape_symbolic():
-    bb = relax.BlockBuilder()
-    a = tir.Var("a", "int64")
-    b = tir.Var("b", "int64")
-    c = tir.Var("c", "int64")
-    x = relax.Var("x", R.Tensor((a, b, c), "float32"))
-
-    _check_inference(
-        bb,
-        relax.op.topk(x, axis=1),
-        relax.TupleStructInfo(
-            [
-                relax.TensorStructInfo((a, 1, c), "float32"),
-                relax.TensorStructInfo((a, 1, c), "int32"),
-            ]
-        ),
-    )
-    _check_inference(
-        bb,
-        relax.op.topk(x, k=3),
-        relax.TupleStructInfo(
-            [
-                relax.TensorStructInfo((a, b, 3), "float32"),
-                relax.TensorStructInfo((a, b, 3), "int32"),
-            ]
-        ),
-    )
-
-
 if __name__ == "__main__":
     tvm.testing.main()
diff --git a/tests/python/relax/test_op_statistical.py b/tests/python/relax/test_op_statistical.py
index 0f32c964f4..5c7d56556c 100644
--- a/tests/python/relax/test_op_statistical.py
+++ b/tests/python/relax/test_op_statistical.py
@@ -14,7 +14,6 @@
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
-from typing import Callable
 import pytest
 import tvm
 import tvm.testing
@@ -206,13 +205,7 @@ def test_statistical_infer_struct_info_wrong_input_type():
         bb.normalize(relax.op.variance(x1))
 
 
-(scan_op,) = tvm.testing.parameters(
-    (relax.op.cumprod,),
-    (relax.op.cumsum,),
-)
-
-
-def test_scan_op_infer_struct_info(scan_op: Callable):
+def test_cumsum_infer_struct_info():
     bb = relax.BlockBuilder()
     vdev0 = VDevice("llvm")
     x0 = relax.Var("x", R.Tensor((2, 10, 4), "float32"))
@@ -223,56 +216,60 @@ def test_scan_op_infer_struct_info(scan_op: Callable):
     x5 = relax.Var("x", R.Tensor())
     x6 = relax.Var("x", R.Tensor((2, 10, 4), "float32", vdev0))
 
-    _check_inference(bb, scan_op(x0, axis=1), relax.TensorStructInfo((2, 10, 4), "float32"))
-    _check_inference(bb, scan_op(x6, axis=1), relax.TensorStructInfo((2, 10, 4), "float32", vdev0))
-    _check_inference(bb, scan_op(x1, axis=1), relax.TensorStructInfo(dtype="float32", ndim=3))
-    _check_inference(bb, scan_op(x2, axis=1), relax.TensorStructInfo(dtype="float32"))
-    _check_inference(bb, scan_op(x3, axis=1), relax.TensorStructInfo((2, 10, 4), dtype=""))
-    _check_inference(bb, scan_op(x4, axis=1), relax.TensorStructInfo(dtype="", ndim=3))
-    _check_inference(bb, scan_op(x5, axis=1), relax.TensorStructInfo(dtype=""))
-    _check_inference(bb, scan_op(x0), relax.TensorStructInfo((80,), "float32"))
+    _check_inference(bb, relax.op.cumsum(x0, axis=1), relax.TensorStructInfo((2, 10, 4), "float32"))
+    _check_inference(
+        bb, relax.op.cumsum(x6, axis=1), relax.TensorStructInfo((2, 10, 4), "float32", vdev0)
+    )
+    _check_inference(
+        bb, relax.op.cumsum(x1, axis=1), relax.TensorStructInfo(dtype="float32", ndim=3)
+    )
+    _check_inference(bb, relax.op.cumsum(x2, axis=1), relax.TensorStructInfo(dtype="float32"))
+    _check_inference(bb, relax.op.cumsum(x3, axis=1), relax.TensorStructInfo((2, 10, 4), dtype=""))
+    _check_inference(bb, relax.op.cumsum(x4, axis=1), relax.TensorStructInfo(dtype="", ndim=3))
+    _check_inference(bb, relax.op.cumsum(x5, axis=1), relax.TensorStructInfo(dtype=""))
+    _check_inference(bb, relax.op.cumsum(x0), relax.TensorStructInfo((80,), "float32"))
     _check_inference(
-        bb, scan_op(x0, axis=1, dtype="int32"), relax.TensorStructInfo((2, 10, 4), "int32")
+        bb, relax.op.cumsum(x0, axis=1, dtype="int32"), relax.TensorStructInfo((2, 10, 4), "int32")
     )
 
 
-def test_scan_op_infer_struct_info_shape_symbolic(scan_op: Callable):
+def test_cumsum_infer_struct_info_shape_symbolic():
     bb = relax.BlockBuilder()
     a = tir.Var("a", "int64")
     b = tir.Var("b", "int64")
     c = tir.Var("c", "int64")
     x = relax.Var("x", R.Tensor((a, b, c), "float32"))
 
-    _check_inference(bb, scan_op(x, axis=1), relax.TensorStructInfo((a, b, c), "float32"))
-    _check_inference(bb, scan_op(x), relax.TensorStructInfo((a * b * c,), "float32"))
+    _check_inference(bb, relax.op.cumsum(x, axis=1), relax.TensorStructInfo((a, b, c), "float32"))
+    _check_inference(bb, relax.op.cumsum(x), relax.TensorStructInfo((a * b * c,), "float32"))
 
 
-def test_scan_op_infer_struct_info_more_input_dtype(scan_op: Callable):
+def test_cumsum_infer_struct_info_more_input_dtype():
     bb = relax.BlockBuilder()
     x0 = relax.Var("x", R.Tensor((2, 3, 4), "float16"))
     x1 = relax.Var("x", R.Tensor((2, 3, 4), "int8"))
 
-    _check_inference(bb, scan_op(x0, axis=1), relax.TensorStructInfo((2, 3, 4), "float16"))
-    _check_inference(bb, scan_op(x1, axis=1), relax.TensorStructInfo((2, 3, 4), "int8"))
+    _check_inference(bb, relax.op.cumsum(x0, axis=1), relax.TensorStructInfo((2, 3, 4), "float16"))
+    _check_inference(bb, relax.op.cumsum(x1, axis=1), relax.TensorStructInfo((2, 3, 4), "int8"))
 
 
-def test_scan_op_wrong_input_number(scan_op: Callable):
+def test_cumsum_wrong_input_number():
     x = relax.Var("x", R.Tensor((3, 4, 5), "float32"))
     y = relax.Var("y", R.Tensor((2, 3, 4), "float32"))
 
     with pytest.raises(TVMError):
-        scan_op(x, y)
+        relax.op.cumsum(x, y)
 
 
-def test_scan_opinfer_struct_info_wrong_input_type(scan_op: Callable):
+def test_cumsum_infer_struct_info_wrong_input_type():
     bb = relax.BlockBuilder()
     x0 = relax.Var("x", relax.ShapeStructInfo((2, 3, 4, 5)))
     x1 = relax.Var("x", relax.FuncStructInfo([], R.Tensor((2, 3, 4, 5), "float32")))
 
     with pytest.raises(TVMError):
-        bb.normalize(scan_op(x0, axis=1))
+        bb.normalize(relax.op.cumsum(x0, axis=1))
     with pytest.raises(TVMError):
-        bb.normalize(scan_op(x1, axis=1))
+        bb.normalize(relax.op.cumsum(x1, axis=1))
 
 
 if __name__ == "__main__":
diff --git a/tests/python/relax/test_tvmscript_parser_op_sort.py b/tests/python/relax/test_tvmscript_parser_op_sort.py
index 044fba3d8d..8b94fa0ab9 100644
--- a/tests/python/relax/test_tvmscript_parser_op_sort.py
+++ b/tests/python/relax/test_tvmscript_parser_op_sort.py
@@ -37,21 +37,15 @@ def _check(
 
 def test_sort():
     @R.function
-    def foo(
-        x: R.Tensor((2, 3), "int32")
-    ) -> R.Tuple(R.Tensor((2, 2), dtype="int32"), R.Tensor((2, 2), dtype="int32")):
-        lv0 = R.sort(x, axis=1)
-        lv1 = R.argsort(lv0)
-        r = R.topk(lv1, axis=1, k=2)
+    def foo(x: R.Tensor((2, 3), "int32")) -> R.Tensor((2, 3), "int32"):
+        r = R.sort(x, axis=1)
         return r
 
     x = relax.Var("x", R.Tensor((2, 3), "int32"))
     bb = relax.BlockBuilder()
     with bb.function("foo", (x,)):
-        lv0 = bb.emit(relax.op.sort(x, axis=1))
-        lv1 = bb.emit(relax.op.argsort(lv0))
-        r = bb.emit(relax.op.topk(lv1, axis=1, k=2))
-        bb.emit_func_output(r)
+        tensor = bb.emit(relax.op.sort(x, axis=1))
+        bb.emit_func_output(tensor)
 
     _check(foo, bb.get()["foo"])
 
diff --git a/tests/python/relax/test_tvmscript_parser_op_statistical.py b/tests/python/relax/test_tvmscript_parser_op_statistical.py
index 910c08bf1e..87446cedf3 100644
--- a/tests/python/relax/test_tvmscript_parser_op_statistical.py
+++ b/tests/python/relax/test_tvmscript_parser_op_statistical.py
@@ -170,18 +170,16 @@ def foo(x: R.Tensor((1, 2, 3, 4), "float32")) -> R.Tensor((1, 3, 4), "float32"):
     _check(foo, bb.get()["foo"])
 
 
-def test_scan():
+def test_cumsum():
     @R.function
     def foo(x: R.Tensor((2, 3, 4), "float32")):
-        lv = R.cumsum(x, axis=1, dtype="int32")
-        gv = R.cumprod(lv, axis=1, dtype="int32")
+        gv = R.cumsum(x, axis=1, dtype="int32")
         return gv
 
     x = relax.Var("x", R.Tensor((2, 3, 4), "float32"))
     bb = relax.BlockBuilder()
     with bb.function("foo", [x]):
-        lv = bb.emit(relax.op.cumsum(x, axis=1, dtype="int32"))
-        gv = bb.emit(relax.op.cumprod(lv, axis=1, dtype="int32"))
+        gv = bb.emit(relax.op.cumsum(x, axis=1, dtype="int32"))
         bb.emit_func_output(gv)
 
     _check(foo, bb.get()["foo"])