diff --git a/jvm/core/src/main/java/org/apache/tvm/Device.java b/jvm/core/src/main/java/org/apache/tvm/Device.java
index 3447c692b5ef..6b10a26ed3b3 100644
--- a/jvm/core/src/main/java/org/apache/tvm/Device.java
+++ b/jvm/core/src/main/java/org/apache/tvm/Device.java
@@ -28,7 +28,7 @@ public class Device {
 
   static {
     MASK2STR.put(1, "cpu");
-    MASK2STR.put(2, "gpu");
+    MASK2STR.put(2, "cuda");
     MASK2STR.put(4, "opencl");
     MASK2STR.put(7, "vulkan");
     MASK2STR.put(8, "metal");
@@ -36,7 +36,6 @@ public class Device {
     MASK2STR.put(14, "hexagon");
 
     STR2MASK.put("cpu", 1);
-    STR2MASK.put("gpu", 2);
     STR2MASK.put("cuda", 2);
     STR2MASK.put("cl", 4);
     STR2MASK.put("opencl", 4);
@@ -60,16 +59,16 @@ public static Device cpu() {
   }
 
   /**
-   * Construct a GPU device.
+   * Construct a CUDA GPU device.
    * @param devId The device id
    * @return The created device
    */
-  public static Device gpu(int devId) {
+  public static Device cuda(int devId) {
     return new Device(2, devId);
   }
 
-  public static Device gpu() {
-    return gpu(0);
+  public static Device cuda() {
+    return cuda(0);
   }
 
   /**
diff --git a/jvm/core/src/main/java/org/apache/tvm/Module.java b/jvm/core/src/main/java/org/apache/tvm/Module.java
index 874daa4029dc..0682a6595a3e 100644
--- a/jvm/core/src/main/java/org/apache/tvm/Module.java
+++ b/jvm/core/src/main/java/org/apache/tvm/Module.java
@@ -148,8 +148,8 @@ public static Module load(String path) {
 
   /**
    * Whether module runtime is enabled for target,
-   * e.g., The following code checks if gpu is enabled.
-   * Module.enabled("gpu")
+   * e.g., The following code checks if cuda is enabled.
+   * Module.enabled("cuda")
    * @param target The target device type.
    * @return Whether runtime is enabled.
    */
diff --git a/jvm/core/src/main/java/org/apache/tvm/rpc/RPCSession.java b/jvm/core/src/main/java/org/apache/tvm/rpc/RPCSession.java
index 1d3f38627926..7acafa6cfbe8 100644
--- a/jvm/core/src/main/java/org/apache/tvm/rpc/RPCSession.java
+++ b/jvm/core/src/main/java/org/apache/tvm/rpc/RPCSession.java
@@ -110,20 +110,20 @@ public Device cpu() {
   }
 
   /**
-   * Construct remote GPU device.
+   * Construct remote CUDA GPU device.
    * @param devId device id.
-   * @return Remote GPU device.
+   * @return Remote CUDA GPU device.
    */
-  public Device gpu(int devId) {
+  public Device cuda(int devId) {
     return device(2, devId);
   }
 
   /**
-   * Construct remote GPU device.
-   * @return Remote GPU device.
+   * Construct remote CUDA GPU device.
+   * @return Remote CUDA GPU device.
    */
-  public Device gpu() {
-    return gpu(0);
+  public Device cuda() {
+    return cuda(0);
   }
 
   /**
diff --git a/jvm/core/src/test/java/org/apache/tvm/ModuleTest.java b/jvm/core/src/test/java/org/apache/tvm/ModuleTest.java
index 666cbac6afee..b9538ca96b5d 100644
--- a/jvm/core/src/test/java/org/apache/tvm/ModuleTest.java
+++ b/jvm/core/src/test/java/org/apache/tvm/ModuleTest.java
@@ -61,17 +61,17 @@ public void test_load_add_func_cpu() {
   }
 
   @Test
-  public void test_load_add_func_gpu() {
+  public void test_load_add_func_cuda() {
     final Random RND = new Random(0);
 
-    Device dev = new Device("gpu", 0);
+    Device dev = new Device("cuda", 0);
     if (!dev.exist()) {
-      logger.warn("GPU does not exist. Skip the test.");
+      logger.warn("CUDA GPU does not exist. Skip the test.");
       return;
     }
 
-    Module fadd = Module.load(loadingDir + File.separator + "add_gpu.so");
-    Module faddDev = Module.load(loadingDir + File.separator + "add_gpu.ptx");
+    Module fadd = Module.load(loadingDir + File.separator + "add_cuda.so");
+    Module faddDev = Module.load(loadingDir + File.separator + "add_cuda.ptx");
     fadd.importModule(faddDev);
 
     final int dim = 100;
diff --git a/jvm/core/src/test/scripts/test_add_gpu.py b/jvm/core/src/test/scripts/test_add_gpu.py
index 040a447c3c27..aed338f15845 100644
--- a/jvm/core/src/test/scripts/test_add_gpu.py
+++ b/jvm/core/src/test/scripts/test_add_gpu.py
@@ -37,10 +37,10 @@ def test_add(target_dir):
     s[C].bind(tx, te.thread_axis("threadIdx.x"))
     fadd_cuda = tvm.build(s, [A, B, C], "cuda", target_host="llvm", name="myadd")
 
-    fadd_cuda.save(os.path.join(target_dir, "add_gpu.o"))
-    fadd_cuda.imported_modules[0].save(os.path.join(target_dir, "add_gpu.ptx"))
+    fadd_cuda.save(os.path.join(target_dir, "add_cuda.o"))
+    fadd_cuda.imported_modules[0].save(os.path.join(target_dir, "add_cuda.ptx"))
     cc.create_shared(
-        os.path.join(target_dir, "add_gpu.so"), [os.path.join(target_dir, "add_gpu.o")]
+        os.path.join(target_dir, "add_cuda.so"), [os.path.join(target_dir, "add_cuda.o")]
     )
 
 
diff --git a/python/tvm/contrib/nvcc.py b/python/tvm/contrib/nvcc.py
index 0124d00dda0c..30b5e6dffdc2 100644
--- a/python/tvm/contrib/nvcc.py
+++ b/python/tvm/contrib/nvcc.py
@@ -64,9 +64,9 @@ def compile_cuda(code, target="ptx", arch=None, options=None, path_target=None):
         out_file.write(code)
 
     if arch is None:
-        if nd.gpu(0).exist:
+        if nd.cuda(0).exist:
             # auto detect the compute arch argument
-            arch = "sm_" + "".join(nd.gpu(0).compute_version.split("."))
+            arch = "sm_" + "".join(nd.cuda(0).compute_version.split("."))
         else:
             raise ValueError("arch(sm_xy) is not passed, and we cannot detect it from env")
 
diff --git a/python/tvm/contrib/peak.py b/python/tvm/contrib/peak.py
index 8e8e158b0740..195f3dc9d81e 100644
--- a/python/tvm/contrib/peak.py
+++ b/python/tvm/contrib/peak.py
@@ -377,7 +377,7 @@ def measure_peak_all(target, target_host, host, port):
     if str(target).startswith("opencl"):
         dev = remote.cl()
     elif str(target).startswith("cuda"):
-        dev = remote.gpu()
+        dev = remote.cuda()
     elif str(target).startswith("metal"):
         dev = remote.metal()
     else:
diff --git a/python/tvm/driver/tvmc/runner.py b/python/tvm/driver/tvmc/runner.py
index b15a16a9fd38..9bd31644197d 100644
--- a/python/tvm/driver/tvmc/runner.py
+++ b/python/tvm/driver/tvmc/runner.py
@@ -379,8 +379,8 @@ def run_module(
 
     # TODO expand to other supported devices, as listed in tvm.rpc.client (@leandron)
     logger.debug("Device is %s.", device)
-    if device == "gpu":
-        dev = session.gpu()
+    if device == "cuda":
+        dev = session.cuda()
     elif device == "cl":
         dev = session.cl()
     else:
diff --git a/python/tvm/rpc/client.py b/python/tvm/rpc/client.py
index 6fcd3713f566..4531ceca2ce9 100644
--- a/python/tvm/rpc/client.py
+++ b/python/tvm/rpc/client.py
@@ -201,8 +201,8 @@ def cpu(self, dev_id=0):
         """Construct CPU device."""
         return self.device(1, dev_id)
 
-    def gpu(self, dev_id=0):
-        """Construct GPU device."""
+    def cuda(self, dev_id=0):
+        """Construct CUDA GPU device."""
         return self.device(2, dev_id)
 
     def cl(self, dev_id=0):
diff --git a/rust/tvm-rt/src/lib.rs b/rust/tvm-rt/src/lib.rs
index c43264da9e5b..ce2d709c2a6c 100644
--- a/rust/tvm-rt/src/lib.rs
+++ b/rust/tvm-rt/src/lib.rs
@@ -149,7 +149,7 @@ mod tests {
 
     #[test]
     fn device() {
-        let c = Device::from_str("gpu").unwrap();
+        let c = Device::from_str("cuda").unwrap();
         let tvm: Device = RetValue::from(c).try_into().unwrap();
         assert_eq!(tvm, c);
     }
diff --git a/rust/tvm-rt/src/module.rs b/rust/tvm-rt/src/module.rs
index 6109819939af..343f0dce8f98 100644
--- a/rust/tvm-rt/src/module.rs
+++ b/rust/tvm-rt/src/module.rs
@@ -85,7 +85,7 @@ impl Module {
         Ok(Function::new(fhandle))
     }
 
-    /// Imports a dependent module such as `.ptx` for gpu.
+    /// Imports a dependent module such as `.ptx` for cuda gpu.
     pub fn import_module(&self, dependent_module: Module) {
         check_call!(ffi::TVMModImport(self.handle(), dependent_module.handle()))
     }
diff --git a/rust/tvm-sys/src/device.rs b/rust/tvm-sys/src/device.rs
index 7b659efb6b44..1da64fd60483 100644
--- a/rust/tvm-sys/src/device.rs
+++ b/rust/tvm-sys/src/device.rs
@@ -65,7 +65,7 @@ use thiserror::Error;
 #[repr(i64)]
 pub enum DeviceType {
     CPU = 1,
-    GPU,
+    CUDA,
     CUDAHost,
     OpenCL,
     Vulkan,
@@ -101,7 +101,7 @@ impl Display for DeviceType {
             "{}",
             match self {
                 DeviceType::CPU => "cpu",
-                DeviceType::GPU => "cuda",
+                DeviceType::CUDA => "cuda",
                 DeviceType::CUDAHost => "cuda_host",
                 DeviceType::OpenCL => "opencl",
                 DeviceType::Vulkan => "vulkan",
@@ -121,9 +121,8 @@ impl<'a> From<&'a str> for DeviceType {
             "cpu" => DeviceType::CPU,
             "llvm" => DeviceType::CPU,
             "stackvm" => DeviceType::CPU,
-            "gpu" => DeviceType::GPU,
-            "cuda" => DeviceType::GPU,
-            "nvptx" => DeviceType::GPU,
+            "cuda" => DeviceType::CUDA,
+            "nvptx" => DeviceType::CUDA,
             "cl" => DeviceType::OpenCL,
             "opencl" => DeviceType::OpenCL,
             "metal" => DeviceType::Metal,
@@ -179,7 +178,7 @@ pub struct UnsupportedDeviceError(String);
 
 macro_rules! impl_tvm_device {
     ( $( $dev_type:ident : [ $( $dev_name:ident ),+ ] ),+ ) => {
-        /// Creates a Device from a string (e.g., "cpu", "gpu", "ext_dev")
+        /// Creates a Device from a string (e.g., "cpu", "cuda", "ext_dev")
         impl FromStr for Device {
             type Err = UnsupportedDeviceError;
             fn from_str(type_str: &str) -> Result<Self, Self::Err> {
@@ -210,7 +209,7 @@ macro_rules! impl_tvm_device {
 
 impl_tvm_device!(
     DLDeviceType_kDLCPU: [cpu, llvm, stackvm],
-    DLDeviceType_kDLCUDA: [gpu, cuda, nvptx],
+    DLDeviceType_kDLCUDA: [cuda, nvptx],
     DLDeviceType_kDLOpenCL: [cl],
     DLDeviceType_kDLMetal: [metal],
     DLDeviceType_kDLVPI: [vpi],
@@ -287,9 +286,9 @@ mod tests {
         println!("device: {}", dev);
         let default_dev = Device::new(DeviceType::CPU, 0);
         assert_eq!(dev.clone(), default_dev);
-        assert_ne!(dev, Device::gpu(0));
+        assert_ne!(dev, Device::cuda(0));
 
-        let str_dev = Device::new(DeviceType::GPU, 0);
+        let str_dev = Device::new(DeviceType::CUDA, 0);
         assert_eq!(str_dev.clone(), str_dev);
         assert_ne!(str_dev, Device::new(DeviceType::CPU, 0));
     }
diff --git a/rust/tvm-sys/src/value.rs b/rust/tvm-sys/src/value.rs
index 1b4f773b851f..f69172f41221 100644
--- a/rust/tvm-sys/src/value.rs
+++ b/rust/tvm-sys/src/value.rs
@@ -55,7 +55,7 @@ pub struct UnsupportedDeviceError(String);
 
 macro_rules! impl_tvm_device {
     ( $( $dev_type:ident : [ $( $dev_name:ident ),+ ] ),+ ) => {
-        /// Creates a DLDevice from a string (e.g., "cpu", "gpu", "ext_dev")
+        /// Creates a DLDevice from a string (e.g., "cpu", "cuda", "ext_dev")
         impl FromStr for DLDevice {
             type Err = UnsupportedDeviceError;
             fn from_str(type_str: &str) -> Result<Self, Self::Err> {
@@ -86,7 +86,7 @@ macro_rules! impl_tvm_device {
 
 impl_tvm_device!(
     DLDeviceType_kDLCPU: [cpu, llvm, stackvm],
-    DLDeviceType_kDLCUDA: [gpu, cuda, nvptx],
+    DLDeviceType_kDLCUDA: [cuda, nvptx],
     DLDeviceType_kDLOpenCL: [cl],
     DLDeviceType_kDLMetal: [metal],
     DLDeviceType_kDLVPI: [vpi],
diff --git a/rust/tvm/tests/basics/src/main.rs b/rust/tvm/tests/basics/src/main.rs
index d1cfbcf78dc7..2e0f5b5255a1 100644
--- a/rust/tvm/tests/basics/src/main.rs
+++ b/rust/tvm/tests/basics/src/main.rs
@@ -28,7 +28,7 @@ fn main() {
     let (dev, dev_name) = if cfg!(feature = "cpu") {
         (Device::cpu(0), "cpu")
     } else {
-        (Device::gpu(0), "gpu")
+        (Device::cuda(0), "cuda")
     };
 
     let dtype = DataType::from_str("float32").unwrap();
@@ -40,7 +40,7 @@ fn main() {
         return;
     }
 
-    if cfg!(feature = "gpu") {
+    if cfg!(feature = "cuda") {
         fadd.import_module(Module::load(&concat!(env!("OUT_DIR"), "/test_add.ptx")).unwrap());
     }
 
diff --git a/tests/python/unittest/test_runtime_module_based_interface.py b/tests/python/unittest/test_runtime_module_based_interface.py
index 3100414aee73..ff1e7ff42f9c 100644
--- a/tests/python/unittest/test_runtime_module_based_interface.py
+++ b/tests/python/unittest/test_runtime_module_based_interface.py
@@ -280,7 +280,7 @@ def verify_rpc_gpu_export(obj_format):
         remote.upload(path_lib)
         loaded_lib = remote.load_module(path_lib)
         data = np.random.uniform(-1, 1, size=input_shape(mod)).astype("float32")
-        dev = remote.gpu()
+        dev = remote.cuda()
 
         # raw api
         gmod = loaded_lib["default"](dev)
@@ -484,7 +484,7 @@ def verify_rpc_gpu_remove_package_params(obj_format):
         remote.upload(path_lib)
         loaded_lib = remote.load_module(path_lib)
         data = np.random.uniform(-1, 1, size=input_shape(mod)).astype("float32")
-        dev = remote.gpu()
+        dev = remote.cuda()
 
         # raw api
         gmod = loaded_lib["default"](dev)
diff --git a/web/src/runtime.ts b/web/src/runtime.ts
index a76096ebba4d..60a28d53f361 100644
--- a/web/src/runtime.ts
+++ b/web/src/runtime.ts
@@ -174,7 +174,7 @@ class PackedFuncCell implements Disposable {
 
 const DeviceEnumToStr: Record<number, string> = {
   1: "cpu",
-  2: "gpu",
+  2: "cuda",
   4: "opencl",
   8: "metal",
   15: "webgpu"
@@ -182,7 +182,6 @@ const DeviceEnumToStr: Record<number, string> = {
 
 const DeviceStrToEnum: Record<string, number> = {
   cpu: 1,
-  gpu: 2,
   cuda: 2,
   cl: 4,
   opencl: 4,