From a41e233d16435bf3ed80be123107bad274b26e90 Mon Sep 17 00:00:00 2001
From: jjsjann123 <alex.jann2012@gmail.com>
Date: Mon, 5 Feb 2024 12:51:01 -0800
Subject: [PATCH 01/10] quick cpp test

---
 test/test_gpu3.cpp | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)
diff --git a/test/test_gpu3.cpp b/test/test_gpu3.cpp
index 8f53a851eac..dcf0295b7af 100644
--- a/test/test_gpu3.cpp
+++ b/test/test_gpu3.cpp
@@ -8664,6 +8664,31 @@ TEST_F(NVFuserTest, Reduction3DConstantIterationDomain) {
       executor_cache.fusion(), cg_outputs, inputs, {ref}, __LINE__, __FILE__);
 }
 
+TEST_F(NVFuserTest, FusionTensorRankLimit) {
+  auto fusion = std::make_unique<Fusion>();
+  FusionGuard fg(fusion.get());
+
+  std::vector<int64_t> input_shape;
+  for (auto i : c10::irange(12)) {
+    input_shape.append(3);
+  }
+
+  auto tv0 = makeSymbolicTensor(input_shape.size());
+  fusion->addInput(tv0);
+  auto tv1 = sum(tv0, {0});
+  fusion->addOutput(tv1);
+
+  auto options = at::TensorOptions().dtype(at::kFloat).device(at::kCUDA, 0);
+  at::Tensor t0 = at::randn(input_shape, options);
+  std::vector<c10::IValue> aten_inputs({t0});
+
+  FusionExecutorCache executor_cache(std::move(fusion));
+  auto cg_outputs = executor_cache.runFusionWithInputs(aten_inputs);
+
+  testValidate(
+      executor_cache.fusion(), cg_outputs, {t0, t1, t2}, __LINE__, __FILE__);
+}
+
 // Test file size should be up to 10K LoC. Create a new file for more tests.
 
 } // namespace nvfuser

From 88f6abb9b96819a679e8ef218d1f289308e5b0f4 Mon Sep 17 00:00:00 2001
From: jjsjann123 <alex.jann2012@gmail.com>
Date: Mon, 5 Feb 2024 12:54:58 -0800
Subject: [PATCH 02/10] fixing tests

---
 test/test_gpu3.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/test_gpu3.cpp b/test/test_gpu3.cpp
index dcf0295b7af..5fc88be221f 100644
--- a/test/test_gpu3.cpp
+++ b/test/test_gpu3.cpp
@@ -8686,7 +8686,7 @@ TEST_F(NVFuserTest, FusionTensorRankLimit) {
   auto cg_outputs = executor_cache.runFusionWithInputs(aten_inputs);
 
   testValidate(
-      executor_cache.fusion(), cg_outputs, {t0, t1, t2}, __LINE__, __FILE__);
+      executor_cache.fusion(), cg_outputs, aten_inputs, __LINE__, __FILE__);
 }
 
 // Test file size should be up to 10K LoC. Create a new file for more tests.

From ae868ae68d48c37a8bef427a0831797da8b0a19c Mon Sep 17 00:00:00 2001
From: jjsjann123 <alex.jann2012@gmail.com>
Date: Mon, 5 Feb 2024 13:26:44 -0800
Subject: [PATCH 03/10] fixing cpp tests; adding python tests

---
 python_tests/test_python_frontend.py | 20 ++++++++++++++++++++
 test/test_gpu3.cpp                   |  6 +++---
 2 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/python_tests/test_python_frontend.py b/python_tests/test_python_frontend.py
index da91381017c..8dbef706270 100644
--- a/python_tests/test_python_frontend.py
+++ b/python_tests/test_python_frontend.py
@@ -519,6 +519,26 @@ def nvfuser_fusion(
 
         self.assertEqual(eager_out, nvf_out[0])
 
+    def test_tensor_ndim(self):
+        shape = []
+        for i in range(12):
+            shape.append(3)
+
+        inputs = [
+            torch.randn(shape, device="cuda"),
+        ]
+
+        def fusion_func(fd: FusionDefinition):
+            t0 = fd.from_pytorch(inputs[0])
+
+            t1 = fd.ops.sum(t0, axes=[3])
+
+            fd.add_output(t1)
+
+        nvf_out, _ = self.exec_nvfuser(fusion_func, inputs)
+        eager_out = torch.sum(inputs[0], dim=3)
+        self.assertEqual(eager_out, nvf_out[0])
+
     # Testing a scenario where a broadcast requires a symbolic output shape
     def test_tensor_shape(self):
         inputs = [
diff --git a/test/test_gpu3.cpp b/test/test_gpu3.cpp
index 5fc88be221f..9afcd40a2ae 100644
--- a/test/test_gpu3.cpp
+++ b/test/test_gpu3.cpp
@@ -8669,13 +8669,13 @@ TEST_F(NVFuserTest, FusionTensorRankLimit) {
   FusionGuard fg(fusion.get());
 
   std::vector<int64_t> input_shape;
-  for (auto i : c10::irange(12)) {
-    input_shape.append(3);
+  for (__attribute__((unused)) auto i : c10::irange(12)) {
+    input_shape.push_back(3);
   }
 
   auto tv0 = makeSymbolicTensor(input_shape.size());
   fusion->addInput(tv0);
-  auto tv1 = sum(tv0, {0});
+  auto tv1 = sum(tv0, {3});
   fusion->addOutput(tv1);
 
   auto options = at::TensorOptions().dtype(at::kFloat).device(at::kCUDA, 0);

From 35b55e401d86626f4dae381321465fce7ef88fd0 Mon Sep 17 00:00:00 2001
From: jjsjann123 <alex.jann2012@gmail.com>
Date: Mon, 5 Feb 2024 13:49:44 -0800
Subject: [PATCH 04/10] updating tests

---
 python_tests/test_python_frontend.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/python_tests/test_python_frontend.py b/python_tests/test_python_frontend.py
index 8dbef706270..7096af481cb 100644
--- a/python_tests/test_python_frontend.py
+++ b/python_tests/test_python_frontend.py
@@ -520,23 +520,26 @@ def nvfuser_fusion(
         self.assertEqual(eager_out, nvf_out[0])
 
     def test_tensor_ndim(self):
-        shape = []
-        for i in range(12):
-            shape.append(3)
+        shape = [2 for i in range(12)]
+        new_shape = shape[:9]
+        new_shape.append(8)
 
         inputs = [
             torch.randn(shape, device="cuda"),
+            new_shape
         ]
 
         def fusion_func(fd: FusionDefinition):
             t0 = fd.from_pytorch(inputs[0])
+            n_shape = fd.define_vector(10)
 
-            t1 = fd.ops.sum(t0, axes=[3])
+            t1 = fd.ops.reshape(t0, n_shape)
+            t2 = fd.ops.sum(t1, axes=[3])
 
             fd.add_output(t1)
 
         nvf_out, _ = self.exec_nvfuser(fusion_func, inputs)
-        eager_out = torch.sum(inputs[0], dim=3)
+        eager_out = torch.sum(inputs[0].reshape(new_shape), dim=3)
         self.assertEqual(eager_out, nvf_out[0])
 
     # Testing a scenario where a broadcast requires a symbolic output shape

From d5264299c114a2c3838b8245eec789b8fa2e4c68 Mon Sep 17 00:00:00 2001
From: jjsjann123 <alex.jann2012@gmail.com>
Date: Mon, 5 Feb 2024 13:52:03 -0800
Subject: [PATCH 05/10] lifting restrictions

---
 csrc/python_frontend/python_bindings.cpp | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/csrc/python_frontend/python_bindings.cpp b/csrc/python_frontend/python_bindings.cpp
index 08029a1e205..d03e97c6f77 100644
--- a/csrc/python_frontend/python_bindings.cpp
+++ b/csrc/python_frontend/python_bindings.cpp
@@ -55,9 +55,6 @@ Vector define_vector_fn(
   std::vector<Scalar> args;
   size_t idx = 0;
   for (const auto& item : values) {
-    NVF_CHECK(
-        idx < 8,
-        "The specified vector size exceeds the max tensor size for nvfuser.");
     if (py::isinstance<py::int_>(item)) {
       auto int_value = py::cast<int64_t>(item);
       NVF_CHECK(
@@ -865,9 +862,6 @@ void initNvFuserPythonBindings(PyObject* module) {
   fusion_def.def(
       "define_vector",
       [](FusionDefinition& self, size_t size) -> Vector {
-        NVF_CHECK(
-            size < 8,
-            "The specified vector size exceeds the max tensor size for nvfuser.");
         std::vector<Scalar> args;
         args.reserve(size);
         for (size_t i = 0; i < size; ++i) {

From 4569820056132517bdd5b462e1f9bcd97a197f50 Mon Sep 17 00:00:00 2001
From: jjsjann123 <alex.jann2012@gmail.com>
Date: Mon, 5 Feb 2024 13:57:31 -0800
Subject: [PATCH 06/10] fixing python tests

---
 python_tests/test_python_frontend.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python_tests/test_python_frontend.py b/python_tests/test_python_frontend.py
index 7096af481cb..a0f759cc2f0 100644
--- a/python_tests/test_python_frontend.py
+++ b/python_tests/test_python_frontend.py
@@ -536,7 +536,7 @@ def fusion_func(fd: FusionDefinition):
             t1 = fd.ops.reshape(t0, n_shape)
             t2 = fd.ops.sum(t1, axes=[3])
 
-            fd.add_output(t1)
+            fd.add_output(t2)
 
         nvf_out, _ = self.exec_nvfuser(fusion_func, inputs)
         eager_out = torch.sum(inputs[0].reshape(new_shape), dim=3)

From 04c4f3a41c74d242e8c891979cf0aeaa7dcf9221 Mon Sep 17 00:00:00 2001
From: jjsjann123 <alex.jann2012@gmail.com>
Date: Mon, 5 Feb 2024 14:27:14 -0800
Subject: [PATCH 07/10] black

---
 python_tests/test_python_frontend.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/python_tests/test_python_frontend.py b/python_tests/test_python_frontend.py
index a0f759cc2f0..48d3b9aae82 100644
--- a/python_tests/test_python_frontend.py
+++ b/python_tests/test_python_frontend.py
@@ -524,10 +524,7 @@ def test_tensor_ndim(self):
         new_shape = shape[:9]
         new_shape.append(8)
 
-        inputs = [
-            torch.randn(shape, device="cuda"),
-            new_shape
-        ]
+        inputs = [torch.randn(shape, device="cuda"), new_shape]
 
         def fusion_func(fd: FusionDefinition):
             t0 = fd.from_pytorch(inputs[0])

From 6e6ca6c995932ba825faf62f2efe336b4d20d15a Mon Sep 17 00:00:00 2001
From: jjsjann123 <alex.jann2012@gmail.com>
Date: Mon, 5 Feb 2024 15:37:54 -0800
Subject: [PATCH 08/10] moving cpp test to test_pointwise

---
 test/test_gpu3.cpp      | 25 -------------------------
 test/test_pointwise.cpp | 25 +++++++++++++++++++++++++
 2 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/test/test_gpu3.cpp b/test/test_gpu3.cpp
index 9afcd40a2ae..8f53a851eac 100644
--- a/test/test_gpu3.cpp
+++ b/test/test_gpu3.cpp
@@ -8664,31 +8664,6 @@ TEST_F(NVFuserTest, Reduction3DConstantIterationDomain) {
       executor_cache.fusion(), cg_outputs, inputs, {ref}, __LINE__, __FILE__);
 }
 
-TEST_F(NVFuserTest, FusionTensorRankLimit) {
-  auto fusion = std::make_unique<Fusion>();
-  FusionGuard fg(fusion.get());
-
-  std::vector<int64_t> input_shape;
-  for (__attribute__((unused)) auto i : c10::irange(12)) {
-    input_shape.push_back(3);
-  }
-
-  auto tv0 = makeSymbolicTensor(input_shape.size());
-  fusion->addInput(tv0);
-  auto tv1 = sum(tv0, {3});
-  fusion->addOutput(tv1);
-
-  auto options = at::TensorOptions().dtype(at::kFloat).device(at::kCUDA, 0);
-  at::Tensor t0 = at::randn(input_shape, options);
-  std::vector<c10::IValue> aten_inputs({t0});
-
-  FusionExecutorCache executor_cache(std::move(fusion));
-  auto cg_outputs = executor_cache.runFusionWithInputs(aten_inputs);
-
-  testValidate(
-      executor_cache.fusion(), cg_outputs, aten_inputs, __LINE__, __FILE__);
-}
-
 // Test file size should be up to 10K LoC. Create a new file for more tests.
 
 } // namespace nvfuser
diff --git a/test/test_pointwise.cpp b/test/test_pointwise.cpp
index ebae2ff9f7d..c28ddac2f19 100644
--- a/test/test_pointwise.cpp
+++ b/test/test_pointwise.cpp
@@ -430,4 +430,29 @@ TEST_F(PointwiseTest, VIssue1567ectorizationFactorAnalysisCase3) {
   testValidate(fusion, cg_outputs, aten_inputs, __LINE__, __FILE__);
 }
 
+TEST_F(NVFuserTest, FusionTensorRankLimit) {
+  auto fusion = std::make_unique<Fusion>();
+  FusionGuard fg(fusion.get());
+
+  std::vector<int64_t> input_shape;
+  for (__attribute__((unused)) auto i : c10::irange(12)) {
+    input_shape.push_back(3);
+  }
+
+  auto tv0 = makeSymbolicTensor(input_shape.size());
+  fusion->addInput(tv0);
+  auto tv1 = sum(tv0, {3});
+  fusion->addOutput(tv1);
+
+  auto options = at::TensorOptions().dtype(at::kFloat).device(at::kCUDA, 0);
+  at::Tensor t0 = at::randn(input_shape, options);
+  std::vector<c10::IValue> aten_inputs({t0});
+
+  FusionExecutorCache executor_cache(std::move(fusion));
+  auto cg_outputs = executor_cache.runFusionWithInputs(aten_inputs);
+
+  testValidate(
+      executor_cache.fusion(), cg_outputs, aten_inputs, __LINE__, __FILE__);
+}
+
 } // namespace nvfuser

From c6576771dd809f765e0fe2f6dc39580cd7ca8c4f Mon Sep 17 00:00:00 2001
From: jjsjann123 <alex.jann2012@gmail.com>
Date: Mon, 5 Feb 2024 15:47:11 -0800
Subject: [PATCH 09/10] moving test yet again

---
 test/test_gpu_fused_reduction.cpp | 25 +++++++++++++++++++++++++
 test/test_pointwise.cpp           | 25 -------------------------
 2 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/test/test_gpu_fused_reduction.cpp b/test/test_gpu_fused_reduction.cpp
index a06d6d54f49..476da1981ba 100644
--- a/test/test_gpu_fused_reduction.cpp
+++ b/test/test_gpu_fused_reduction.cpp
@@ -2559,4 +2559,29 @@ TEST_F(NVFuserTest, FusionCrossEntropyGatherPattern_CUDA) {
   testValidate(&fusion, cg_outputs, inputs, {ref}, __LINE__, __FILE__);
 }
 
+TEST_F(NVFuserTest, FusionTensorRankLimit) {
+  auto fusion = std::make_unique<Fusion>();
+  FusionGuard fg(fusion.get());
+
+  std::vector<int64_t> input_shape;
+  for (__attribute__((unused)) auto i : c10::irange(12)) {
+    input_shape.push_back(3);
+  }
+
+  auto tv0 = makeSymbolicTensor(input_shape.size());
+  fusion->addInput(tv0);
+  auto tv1 = sum(tv0, {3});
+  fusion->addOutput(tv1);
+
+  auto options = at::TensorOptions().dtype(at::kFloat).device(at::kCUDA, 0);
+  at::Tensor t0 = at::randn(input_shape, options);
+  std::vector<c10::IValue> aten_inputs({t0});
+
+  FusionExecutorCache executor_cache(std::move(fusion));
+  auto cg_outputs = executor_cache.runFusionWithInputs(aten_inputs);
+
+  testValidate(
+      executor_cache.fusion(), cg_outputs, aten_inputs, __LINE__, __FILE__);
+}
+
 } // namespace nvfuser
diff --git a/test/test_pointwise.cpp b/test/test_pointwise.cpp
index c28ddac2f19..ebae2ff9f7d 100644
--- a/test/test_pointwise.cpp
+++ b/test/test_pointwise.cpp
@@ -430,29 +430,4 @@ TEST_F(PointwiseTest, VIssue1567ectorizationFactorAnalysisCase3) {
   testValidate(fusion, cg_outputs, aten_inputs, __LINE__, __FILE__);
 }
 
-TEST_F(NVFuserTest, FusionTensorRankLimit) {
-  auto fusion = std::make_unique<Fusion>();
-  FusionGuard fg(fusion.get());
-
-  std::vector<int64_t> input_shape;
-  for (__attribute__((unused)) auto i : c10::irange(12)) {
-    input_shape.push_back(3);
-  }
-
-  auto tv0 = makeSymbolicTensor(input_shape.size());
-  fusion->addInput(tv0);
-  auto tv1 = sum(tv0, {3});
-  fusion->addOutput(tv1);
-
-  auto options = at::TensorOptions().dtype(at::kFloat).device(at::kCUDA, 0);
-  at::Tensor t0 = at::randn(input_shape, options);
-  std::vector<c10::IValue> aten_inputs({t0});
-
-  FusionExecutorCache executor_cache(std::move(fusion));
-  auto cg_outputs = executor_cache.runFusionWithInputs(aten_inputs);
-
-  testValidate(
-      executor_cache.fusion(), cg_outputs, aten_inputs, __LINE__, __FILE__);
-}
-
 } // namespace nvfuser

From 89acedac518ba4854b4e0e4d42bab0ae735081ab Mon Sep 17 00:00:00 2001
From: jjsjann123 <jiej@nvidia.com>
Date: Tue, 6 Feb 2024 12:29:58 -0800
Subject: [PATCH 10/10] fixing tests

---
 python_tests/pytest_input_generators.py | 32 -------------------------
 python_tests/pytest_opinfos.py          | 10 --------
 2 files changed, 42 deletions(-)

diff --git a/python_tests/pytest_input_generators.py b/python_tests/pytest_input_generators.py
index a963620bb00..ac282584eea 100644
--- a/python_tests/pytest_input_generators.py
+++ b/python_tests/pytest_input_generators.py
@@ -26,7 +26,6 @@
 MINIMUM_SYMBOLIC_SIZE = -1
 INT64_MAX = 2**63 - 1
 MAX_TENSOR_DIMS = 8
-MAX_VECTOR_SIZE = 8
 
 
 # Determine if a number is with desired Domain [low, high)
@@ -468,42 +467,11 @@ def define_vector_constant_error_generator(
         "The value -2 at index 0 was neither symbolic(-1), zero_element(0), broadcast(1), or static(>1)",
     )
 
-    check_max_vector_size = ErrorSample(
-        {
-            "values": [-1 for _ in range(MAX_VECTOR_SIZE + 1)],
-        },
-        "The specified vector size exceeds the max tensor size for nvfuser.",
-    )
-
     error_cases = [
         # FIXME: The above_size_range case gives a non-sensical error message.
         # "Unable to cast Python instance to C++ type (#define PYBIND11_DETAILED_ER"
         # check_above_size_range,
         check_below_size_range,
-        check_max_vector_size,
-    ]
-
-    for es in error_cases:
-        yield SampleInput(**es.kwargs), es.ex_type, es.ex_str
-
-
-def define_vector_input_error_generator(
-    op: OpInfo, dtype: torch.dtype, requires_grad: bool = False, **kwargs
-):
-    """
-    "define_vector",
-    [](FusionDefinition& self, size_t size) -> Vector {
-    """
-
-    check_max_vector_size = ErrorSample(
-        {
-            "size": (MAX_VECTOR_SIZE + 1),
-        },
-        "The specified vector size exceeds the max tensor size for nvfuser.",
-    )
-
-    error_cases = [
-        check_max_vector_size,
     ]
 
     for es in error_cases:
diff --git a/python_tests/pytest_opinfos.py b/python_tests/pytest_opinfos.py
index 53d6f0f80b3..e52eb7fc183 100644
--- a/python_tests/pytest_opinfos.py
+++ b/python_tests/pytest_opinfos.py
@@ -22,7 +22,6 @@
     define_tensor_generator,
     define_tensor_error_generator,
     define_vector_constant_error_generator,
-    define_vector_input_error_generator,
     elementwise_binary_generator,
     _elementwise_binary_torch,
     elementwise_unary_generator,
@@ -90,15 +89,6 @@
 )
 fusion_input_ops.append(define_vector_constant_opinfo)
 
-define_vector_input_opinfo = OpInfo(
-    lambda fd: fd.define_vector,
-    "define_vector_input",
-    sample_input_generator=None,
-    error_input_generator=define_vector_input_error_generator,
-    fd_error_input_fn=api_test_fd_fn,
-)
-fusion_input_ops.append(define_vector_input_opinfo)
-
 """ End Fusion Input Operations """
 
 """ Start Unary-Float Operations """