llvm · qedawkins · Aug 29, 2022
diff --git a/e2e_testing/xfail_sets.py b/e2e_testing/xfail_sets.py
@@ -31,7 +31,6 @@
     "SqueezeDimModule_unitDim",
     "MeanModule_basic",
     "MeanDynamicSizesModule_basic",
-    "MeanDimEmptyDimModule_basic",
     "NumToTensorFloatModule_basic",
     "AtenToDeviceModule_basic",
     "AvgPool2dStaticModule_basic",

diff --git a/lib/Dialect/Torch/Transforms/DecomposeComplexOps.cpp b/lib/Dialect/Torch/Transforms/DecomposeComplexOps.cpp
@@ -1214,9 +1214,18 @@ class DecomposeAtenMeanDimOp : public OpRewritePattern<AtenMeanDimOp> {
           op, "expected `dim` to be `None` or constructed from list construct");
     }
 
+    // Upcasting the input tensor to `F64` dtype for higher precision during the
+    // computation of the result.
+    BaseTensorType outputTensorType = outputType.cast<BaseTensorType>();
+    Type outputTypeAsF64 = outputTensorType.getWithSizesAndDtype(
+              outputTensorType.getSizes(), rewriter.getF64Type());;
+    if (inputType.getDtype().getIntOrFloatBitWidth() != 64) {
+      input = convertTensorToDtype(rewriter, loc, input, rewriter.getF64Type());
+    }
+
     // Compute sum along dimensions specified in `dimList`.
     Value sumAlongDims = rewriter.create<AtenSumDimIntListOp>(
-        loc, outputType, input, dimList, keepDim, dtype);
+        loc, outputTypeAsF64, input, dimList, keepDim, dtype);
 
     // `productDimSize` is product of sizes of dimensions to be reduced.
     Value productDimSize;
@@ -1232,8 +1241,12 @@ class DecomposeAtenMeanDimOp : public OpRewritePattern<AtenMeanDimOp> {
             rewriter.create<AtenMulIntOp>(loc, productDimSize, dimSize);
       }
     }
-    rewriter.replaceOpWithNewOp<AtenDivScalarOp>(op, outputType, sumAlongDims,
+    Value result = rewriter.create<AtenDivScalarOp>(loc, outputTypeAsF64, sumAlongDims,
                                                  productDimSize);
+    if (outputTensorType.getDtype().getIntOrFloatBitWidth() != 64) {
+      result = convertTensorToDtype(rewriter, loc, result, outputTensorType.getDtype());
+    }
+    rewriter.replaceOp(op, result);
     return success();
   }
 };

diff --git a/python/torch_mlir_e2e_test/test_suite/stats.py b/python/torch_mlir_e2e_test/test_suite/stats.py
@@ -221,6 +221,46 @@ def MeanDimNoneDimModule_basic(module, tu: TestUtils):
 
 # ==============================================================================
 
+class MeanDimLargeInputModule(torch.nn.Module):
+
+    def __init__(self):
+        super().__init__()
+
+    @export
+    @annotate_args([
+        None,
+        ([-1, -1, -1, -1], torch.float32, True),
+    ])
+    def forward(self, x):
+        return torch.ops.aten.mean(x, (2, 3))
+
+
+@register_test_case(module_factory=lambda: MeanDimLargeInputModule())
+def MeanDimLargeInputModule_basic(module, tu: TestUtils):
+    module.forward(100 + tu.rand(3, 4, 1024, 8192))
+
+# ==============================================================================
+
+class MeanDimLargeInputDtypeModule(torch.nn.Module):
+
+    def __init__(self):
+        super().__init__()
+
+    @export
+    @annotate_args([
+        None,
+        ([-1, -1, -1, -1], torch.float32, True),
+    ])
+    def forward(self, x):
+        return torch.ops.aten.mean(x, (2, 3), dtype=torch.float32)
+
+
+@register_test_case(module_factory=lambda: MeanDimLargeInputDtypeModule())
+def MeanDimLargeInputDtypeModule_basic(module, tu: TestUtils):
+    module.forward(100 + tu.rand(3, 4, 1024, 8192))
+
+# ==============================================================================
+
 class VarUnbiasedModule(torch.nn.Module):
     def __init__(self):
         super().__init__()