From a41e233d16435bf3ed80be123107bad274b26e90 Mon Sep 17 00:00:00 2001 From: jjsjann123 Date: Mon, 5 Feb 2024 12:51:01 -0800 Subject: [PATCH 01/10] quick cpp test --- test/test_gpu3.cpp | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/test/test_gpu3.cpp b/test/test_gpu3.cpp index 8f53a851eac..dcf0295b7af 100644 --- a/test/test_gpu3.cpp +++ b/test/test_gpu3.cpp @@ -8664,6 +8664,31 @@ TEST_F(NVFuserTest, Reduction3DConstantIterationDomain) { executor_cache.fusion(), cg_outputs, inputs, {ref}, __LINE__, __FILE__); } +TEST_F(NVFuserTest, FusionTensorRankLimit) { + auto fusion = std::make_unique(); + FusionGuard fg(fusion.get()); + + std::vector input_shape; + for (auto i : c10::irange(12)) { + input_shape.append(3); + } + + auto tv0 = makeSymbolicTensor(input_shape.size()); + fusion->addInput(tv0); + auto tv1 = sum(tv0, {0}); + fusion->addOutput(tv1); + + auto options = at::TensorOptions().dtype(at::kFloat).device(at::kCUDA, 0); + at::Tensor t0 = at::randn(input_shape, options); + std::vector aten_inputs({t0}); + + FusionExecutorCache executor_cache(std::move(fusion)); + auto cg_outputs = executor_cache.runFusionWithInputs(aten_inputs); + + testValidate( + executor_cache.fusion(), cg_outputs, {t0, t1, t2}, __LINE__, __FILE__); +} + // Test file size should be up to 10K LoC. Create a new file for more tests. } // namespace nvfuser From 88f6abb9b96819a679e8ef218d1f289308e5b0f4 Mon Sep 17 00:00:00 2001 From: jjsjann123 Date: Mon, 5 Feb 2024 12:54:58 -0800 Subject: [PATCH 02/10] fixing tests --- test/test_gpu3.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/test_gpu3.cpp b/test/test_gpu3.cpp index dcf0295b7af..5fc88be221f 100644 --- a/test/test_gpu3.cpp +++ b/test/test_gpu3.cpp @@ -8686,7 +8686,7 @@ TEST_F(NVFuserTest, FusionTensorRankLimit) { auto cg_outputs = executor_cache.runFusionWithInputs(aten_inputs); testValidate( - executor_cache.fusion(), cg_outputs, {t0, t1, t2}, __LINE__, __FILE__); + executor_cache.fusion(), cg_outputs, aten_inputs, __LINE__, __FILE__); } // Test file size should be up to 10K LoC. Create a new file for more tests. From ae868ae68d48c37a8bef427a0831797da8b0a19c Mon Sep 17 00:00:00 2001 From: jjsjann123 Date: Mon, 5 Feb 2024 13:26:44 -0800 Subject: [PATCH 03/10] fixing cpp tests; adding python tests --- python_tests/test_python_frontend.py | 20 ++++++++++++++++++++ test/test_gpu3.cpp | 6 +++--- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/python_tests/test_python_frontend.py b/python_tests/test_python_frontend.py index da91381017c..8dbef706270 100644 --- a/python_tests/test_python_frontend.py +++ b/python_tests/test_python_frontend.py @@ -519,6 +519,26 @@ def nvfuser_fusion( self.assertEqual(eager_out, nvf_out[0]) + def test_tensor_ndim(self): + shape = [] + for i in range(12): + shape.append(3) + + inputs = [ + torch.randn(shape, device="cuda"), + ] + + def fusion_func(fd: FusionDefinition): + t0 = fd.from_pytorch(inputs[0]) + + t1 = fd.ops.sum(t0, axes=[3]) + + fd.add_output(t1) + + nvf_out, _ = self.exec_nvfuser(fusion_func, inputs) + eager_out = torch.sum(inputs[0], dim=3) + self.assertEqual(eager_out, nvf_out[0]) + # Testing a scenario where a broadcast requires a symbolic output shape def test_tensor_shape(self): inputs = [ diff --git a/test/test_gpu3.cpp b/test/test_gpu3.cpp index 5fc88be221f..9afcd40a2ae 100644 --- a/test/test_gpu3.cpp +++ b/test/test_gpu3.cpp @@ -8669,13 +8669,13 @@ TEST_F(NVFuserTest, FusionTensorRankLimit) { FusionGuard fg(fusion.get()); std::vector input_shape; - for (auto i : c10::irange(12)) { - input_shape.append(3); + for (__attribute__((unused)) auto i : c10::irange(12)) { + input_shape.push_back(3); } auto tv0 = makeSymbolicTensor(input_shape.size()); fusion->addInput(tv0); - auto tv1 = sum(tv0, {0}); + auto tv1 = sum(tv0, {3}); fusion->addOutput(tv1); auto options = at::TensorOptions().dtype(at::kFloat).device(at::kCUDA, 0); From 35b55e401d86626f4dae381321465fce7ef88fd0 Mon Sep 17 00:00:00 2001 From: jjsjann123 Date: Mon, 5 Feb 2024 13:49:44 -0800 Subject: [PATCH 04/10] updating tests --- python_tests/test_python_frontend.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/python_tests/test_python_frontend.py b/python_tests/test_python_frontend.py index 8dbef706270..7096af481cb 100644 --- a/python_tests/test_python_frontend.py +++ b/python_tests/test_python_frontend.py @@ -520,23 +520,26 @@ def nvfuser_fusion( self.assertEqual(eager_out, nvf_out[0]) def test_tensor_ndim(self): - shape = [] - for i in range(12): - shape.append(3) + shape = [2 for i in range(12)] + new_shape = shape[:9] + new_shape.append(8) inputs = [ torch.randn(shape, device="cuda"), + new_shape ] def fusion_func(fd: FusionDefinition): t0 = fd.from_pytorch(inputs[0]) + n_shape = fd.define_vector(10) - t1 = fd.ops.sum(t0, axes=[3]) + t1 = fd.ops.reshape(t0, n_shape) + t2 = fd.ops.sum(t1, axes=[3]) fd.add_output(t1) nvf_out, _ = self.exec_nvfuser(fusion_func, inputs) - eager_out = torch.sum(inputs[0], dim=3) + eager_out = torch.sum(inputs[0].reshape(new_shape), dim=3) self.assertEqual(eager_out, nvf_out[0]) # Testing a scenario where a broadcast requires a symbolic output shape From d5264299c114a2c3838b8245eec789b8fa2e4c68 Mon Sep 17 00:00:00 2001 From: jjsjann123 Date: Mon, 5 Feb 2024 13:52:03 -0800 Subject: [PATCH 05/10] lifting restrictions --- csrc/python_frontend/python_bindings.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/csrc/python_frontend/python_bindings.cpp b/csrc/python_frontend/python_bindings.cpp index 08029a1e205..d03e97c6f77 100644 --- a/csrc/python_frontend/python_bindings.cpp +++ b/csrc/python_frontend/python_bindings.cpp @@ -55,9 +55,6 @@ Vector define_vector_fn( std::vector args; size_t idx = 0; for (const auto& item : values) { - NVF_CHECK( - idx < 8, - "The specified vector size exceeds the max tensor size for nvfuser."); if (py::isinstance(item)) { auto int_value = py::cast(item); NVF_CHECK( @@ -865,9 +862,6 @@ void initNvFuserPythonBindings(PyObject* module) { fusion_def.def( "define_vector", [](FusionDefinition& self, size_t size) -> Vector { - NVF_CHECK( - size < 8, - "The specified vector size exceeds the max tensor size for nvfuser."); std::vector args; args.reserve(size); for (size_t i = 0; i < size; ++i) { From 4569820056132517bdd5b462e1f9bcd97a197f50 Mon Sep 17 00:00:00 2001 From: jjsjann123 Date: Mon, 5 Feb 2024 13:57:31 -0800 Subject: [PATCH 06/10] fixing python tests --- python_tests/test_python_frontend.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python_tests/test_python_frontend.py b/python_tests/test_python_frontend.py index 7096af481cb..a0f759cc2f0 100644 --- a/python_tests/test_python_frontend.py +++ b/python_tests/test_python_frontend.py @@ -536,7 +536,7 @@ def fusion_func(fd: FusionDefinition): t1 = fd.ops.reshape(t0, n_shape) t2 = fd.ops.sum(t1, axes=[3]) - fd.add_output(t1) + fd.add_output(t2) nvf_out, _ = self.exec_nvfuser(fusion_func, inputs) eager_out = torch.sum(inputs[0].reshape(new_shape), dim=3) From 04c4f3a41c74d242e8c891979cf0aeaa7dcf9221 Mon Sep 17 00:00:00 2001 From: jjsjann123 Date: Mon, 5 Feb 2024 14:27:14 -0800 Subject: [PATCH 07/10] black --- python_tests/test_python_frontend.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/python_tests/test_python_frontend.py b/python_tests/test_python_frontend.py index a0f759cc2f0..48d3b9aae82 100644 --- a/python_tests/test_python_frontend.py +++ b/python_tests/test_python_frontend.py @@ -524,10 +524,7 @@ def test_tensor_ndim(self): new_shape = shape[:9] new_shape.append(8) - inputs = [ - torch.randn(shape, device="cuda"), - new_shape - ] + inputs = [torch.randn(shape, device="cuda"), new_shape] def fusion_func(fd: FusionDefinition): t0 = fd.from_pytorch(inputs[0]) From 6e6ca6c995932ba825faf62f2efe336b4d20d15a Mon Sep 17 00:00:00 2001 From: jjsjann123 Date: Mon, 5 Feb 2024 15:37:54 -0800 Subject: [PATCH 08/10] moving cpp test to test_pointwise --- test/test_gpu3.cpp | 25 ------------------------- test/test_pointwise.cpp | 25 +++++++++++++++++++++++++ 2 files changed, 25 insertions(+), 25 deletions(-) diff --git a/test/test_gpu3.cpp b/test/test_gpu3.cpp index 9afcd40a2ae..8f53a851eac 100644 --- a/test/test_gpu3.cpp +++ b/test/test_gpu3.cpp @@ -8664,31 +8664,6 @@ TEST_F(NVFuserTest, Reduction3DConstantIterationDomain) { executor_cache.fusion(), cg_outputs, inputs, {ref}, __LINE__, __FILE__); } -TEST_F(NVFuserTest, FusionTensorRankLimit) { - auto fusion = std::make_unique(); - FusionGuard fg(fusion.get()); - - std::vector input_shape; - for (__attribute__((unused)) auto i : c10::irange(12)) { - input_shape.push_back(3); - } - - auto tv0 = makeSymbolicTensor(input_shape.size()); - fusion->addInput(tv0); - auto tv1 = sum(tv0, {3}); - fusion->addOutput(tv1); - - auto options = at::TensorOptions().dtype(at::kFloat).device(at::kCUDA, 0); - at::Tensor t0 = at::randn(input_shape, options); - std::vector aten_inputs({t0}); - - FusionExecutorCache executor_cache(std::move(fusion)); - auto cg_outputs = executor_cache.runFusionWithInputs(aten_inputs); - - testValidate( - executor_cache.fusion(), cg_outputs, aten_inputs, __LINE__, __FILE__); -} - // Test file size should be up to 10K LoC. Create a new file for more tests. } // namespace nvfuser diff --git a/test/test_pointwise.cpp b/test/test_pointwise.cpp index ebae2ff9f7d..c28ddac2f19 100644 --- a/test/test_pointwise.cpp +++ b/test/test_pointwise.cpp @@ -430,4 +430,29 @@ TEST_F(PointwiseTest, VIssue1567ectorizationFactorAnalysisCase3) { testValidate(fusion, cg_outputs, aten_inputs, __LINE__, __FILE__); } +TEST_F(NVFuserTest, FusionTensorRankLimit) { + auto fusion = std::make_unique(); + FusionGuard fg(fusion.get()); + + std::vector input_shape; + for (__attribute__((unused)) auto i : c10::irange(12)) { + input_shape.push_back(3); + } + + auto tv0 = makeSymbolicTensor(input_shape.size()); + fusion->addInput(tv0); + auto tv1 = sum(tv0, {3}); + fusion->addOutput(tv1); + + auto options = at::TensorOptions().dtype(at::kFloat).device(at::kCUDA, 0); + at::Tensor t0 = at::randn(input_shape, options); + std::vector aten_inputs({t0}); + + FusionExecutorCache executor_cache(std::move(fusion)); + auto cg_outputs = executor_cache.runFusionWithInputs(aten_inputs); + + testValidate( + executor_cache.fusion(), cg_outputs, aten_inputs, __LINE__, __FILE__); +} + } // namespace nvfuser From c6576771dd809f765e0fe2f6dc39580cd7ca8c4f Mon Sep 17 00:00:00 2001 From: jjsjann123 Date: Mon, 5 Feb 2024 15:47:11 -0800 Subject: [PATCH 09/10] moving test yet again --- test/test_gpu_fused_reduction.cpp | 25 +++++++++++++++++++++++++ test/test_pointwise.cpp | 25 ------------------------- 2 files changed, 25 insertions(+), 25 deletions(-) diff --git a/test/test_gpu_fused_reduction.cpp b/test/test_gpu_fused_reduction.cpp index a06d6d54f49..476da1981ba 100644 --- a/test/test_gpu_fused_reduction.cpp +++ b/test/test_gpu_fused_reduction.cpp @@ -2559,4 +2559,29 @@ TEST_F(NVFuserTest, FusionCrossEntropyGatherPattern_CUDA) { testValidate(&fusion, cg_outputs, inputs, {ref}, __LINE__, __FILE__); } +TEST_F(NVFuserTest, FusionTensorRankLimit) { + auto fusion = std::make_unique(); + FusionGuard fg(fusion.get()); + + std::vector input_shape; + for (__attribute__((unused)) auto i : c10::irange(12)) { + input_shape.push_back(3); + } + + auto tv0 = makeSymbolicTensor(input_shape.size()); + fusion->addInput(tv0); + auto tv1 = sum(tv0, {3}); + fusion->addOutput(tv1); + + auto options = at::TensorOptions().dtype(at::kFloat).device(at::kCUDA, 0); + at::Tensor t0 = at::randn(input_shape, options); + std::vector aten_inputs({t0}); + + FusionExecutorCache executor_cache(std::move(fusion)); + auto cg_outputs = executor_cache.runFusionWithInputs(aten_inputs); + + testValidate( + executor_cache.fusion(), cg_outputs, aten_inputs, __LINE__, __FILE__); +} + } // namespace nvfuser diff --git a/test/test_pointwise.cpp b/test/test_pointwise.cpp index c28ddac2f19..ebae2ff9f7d 100644 --- a/test/test_pointwise.cpp +++ b/test/test_pointwise.cpp @@ -430,29 +430,4 @@ TEST_F(PointwiseTest, VIssue1567ectorizationFactorAnalysisCase3) { testValidate(fusion, cg_outputs, aten_inputs, __LINE__, __FILE__); } -TEST_F(NVFuserTest, FusionTensorRankLimit) { - auto fusion = std::make_unique(); - FusionGuard fg(fusion.get()); - - std::vector input_shape; - for (__attribute__((unused)) auto i : c10::irange(12)) { - input_shape.push_back(3); - } - - auto tv0 = makeSymbolicTensor(input_shape.size()); - fusion->addInput(tv0); - auto tv1 = sum(tv0, {3}); - fusion->addOutput(tv1); - - auto options = at::TensorOptions().dtype(at::kFloat).device(at::kCUDA, 0); - at::Tensor t0 = at::randn(input_shape, options); - std::vector aten_inputs({t0}); - - FusionExecutorCache executor_cache(std::move(fusion)); - auto cg_outputs = executor_cache.runFusionWithInputs(aten_inputs); - - testValidate( - executor_cache.fusion(), cg_outputs, aten_inputs, __LINE__, __FILE__); -} - } // namespace nvfuser From 89acedac518ba4854b4e0e4d42bab0ae735081ab Mon Sep 17 00:00:00 2001 From: jjsjann123 Date: Tue, 6 Feb 2024 12:29:58 -0800 Subject: [PATCH 10/10] fixing tests --- python_tests/pytest_input_generators.py | 32 ------------------------- python_tests/pytest_opinfos.py | 10 -------- 2 files changed, 42 deletions(-) diff --git a/python_tests/pytest_input_generators.py b/python_tests/pytest_input_generators.py index a963620bb00..ac282584eea 100644 --- a/python_tests/pytest_input_generators.py +++ b/python_tests/pytest_input_generators.py @@ -26,7 +26,6 @@ MINIMUM_SYMBOLIC_SIZE = -1 INT64_MAX = 2**63 - 1 MAX_TENSOR_DIMS = 8 -MAX_VECTOR_SIZE = 8 # Determine if a number is with desired Domain [low, high) @@ -468,42 +467,11 @@ def define_vector_constant_error_generator( "The value -2 at index 0 was neither symbolic(-1), zero_element(0), broadcast(1), or static(>1)", ) - check_max_vector_size = ErrorSample( - { - "values": [-1 for _ in range(MAX_VECTOR_SIZE + 1)], - }, - "The specified vector size exceeds the max tensor size for nvfuser.", - ) - error_cases = [ # FIXME: The above_size_range case gives a non-sensical error message. # "Unable to cast Python instance to C++ type (#define PYBIND11_DETAILED_ER" # check_above_size_range, check_below_size_range, - check_max_vector_size, - ] - - for es in error_cases: - yield SampleInput(**es.kwargs), es.ex_type, es.ex_str - - -def define_vector_input_error_generator( - op: OpInfo, dtype: torch.dtype, requires_grad: bool = False, **kwargs -): - """ - "define_vector", - [](FusionDefinition& self, size_t size) -> Vector { - """ - - check_max_vector_size = ErrorSample( - { - "size": (MAX_VECTOR_SIZE + 1), - }, - "The specified vector size exceeds the max tensor size for nvfuser.", - ) - - error_cases = [ - check_max_vector_size, ] for es in error_cases: diff --git a/python_tests/pytest_opinfos.py b/python_tests/pytest_opinfos.py index 53d6f0f80b3..e52eb7fc183 100644 --- a/python_tests/pytest_opinfos.py +++ b/python_tests/pytest_opinfos.py @@ -22,7 +22,6 @@ define_tensor_generator, define_tensor_error_generator, define_vector_constant_error_generator, - define_vector_input_error_generator, elementwise_binary_generator, _elementwise_binary_torch, elementwise_unary_generator, @@ -90,15 +89,6 @@ ) fusion_input_ops.append(define_vector_constant_opinfo) -define_vector_input_opinfo = OpInfo( - lambda fd: fd.define_vector, - "define_vector_input", - sample_input_generator=None, - error_input_generator=define_vector_input_error_generator, - fd_error_input_fn=api_test_fd_fn, -) -fusion_input_ops.append(define_vector_input_opinfo) - """ End Fusion Input Operations """ """ Start Unary-Float Operations """