Skip to content

Commit

Permalink
Add back FusionReductionWithTrivialReduction_CUDA (#2030)
Browse files Browse the repository at this point in the history
  • Loading branch information
zasdfgbnm authored Oct 4, 2022
1 parent fbd97e5 commit ddd1cf7
Showing 1 changed file with 68 additions and 0 deletions.
68 changes: 68 additions & 0 deletions torch/csrc/jit/codegen/cuda/test/test_gpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7853,6 +7853,74 @@ TEST_F(NVFuserTest, FusionReductionScheduler_CUDA) {
lparams);
}

// This test checks if our system could correctly handles the case where both
// reduction and trivial reduction exist in the fusion. Trivial reduction
// deserve testing because trivial reduction is handled more like a broadcasting
// rather than a reduction.
TEST_F(NVFuserTest, FusionReductionWithTrivialReduction_CUDA) {
constexpr int bid_x = 80;
constexpr int tid_x = 4096;

std::vector<std::vector<int64_t>> shapes = {
{-1, -1, 1}, {-1, 1, -1}, {1, -1, -1}};

for (auto shape : shapes) {
std::unique_ptr<Fusion> fusion_ptr = std::make_unique<Fusion>();
Fusion& fusion = *fusion_ptr;
FusionGuard fg(&fusion);

std::vector<std::vector<int64_t>> reduction_dims = {
{0},
{1},
{2},
{0, 1},
{0, 2},
{1, 2},
{0, 1, 2},
};

// Set up your input tensor views
TensorView* tv0 = makeConcreteTensor(shape);
fusion.addInput(tv0);

for (auto rdims : reduction_dims) {
std::vector<int> rdims_(rdims.begin(), rdims.end());
auto tv = sum(tv0, rdims_);
fusion.addOutput(tv);
}

const auto options =
at::TensorOptions().dtype(at::kFloat).device(at::kCUDA, 0);

auto concrete_shape = shape;
std::deque<int64_t> concrete_values = {bid_x, tid_x};
for (auto& s : concrete_shape) {
if (s == -1) {
s = concrete_values.front();
concrete_values.pop_front();
}
}

at::Tensor aten_input = at::randn(concrete_shape, options);
std::vector<at::Tensor> aten_outputs;
for (auto rdims : reduction_dims) {
aten_outputs.push_back(aten_input.sum(rdims));
}

FusionExecutorCache executor_cache(std::move(fusion_ptr));
auto cg_outputs = executor_cache.runFusionWithInputs({aten_input});

testValidate(
&fusion,
cg_outputs,
{aten_input},
aten_outputs,
__LINE__,
__FILE__,
"");
}
}

// Simple reduction parallelized on a symbolic size.
TEST_F(NVFuserTest, FusionSymbolicReduction_CUDA) {
Fusion fusion;
Expand Down

0 comments on commit ddd1cf7

Please sign in to comment.