diff --git a/codegen/codegen_outofplacebatching.py b/codegen/codegen_outofplacebatching.py
index bfe039d6d..8d0ee4aba 100644
--- a/codegen/codegen_outofplacebatching.py
+++ b/codegen/codegen_outofplacebatching.py
@@ -155,7 +155,7 @@ def parse_return(return_t):
     return tuple([x.strip() for x in m.group(1).split(',')])
 
 def parse_args(args_t):
-    args = args_t.split(',')
+    args = args_t.split(', ')
     result = []
     for arg in args:
         split_idx = arg.rfind(' ')
@@ -170,8 +170,6 @@ def get_signatures(path='build/aten/src/ATen/RegistrationDeclarations.h', includ
     for line in lines:
         if 'void' in line:
             continue
-        if 'std::array' in line:
-            continue
         m = re.match(r'(.*) \w+\((.*)\); // {"schema": "aten::(\w+\.?\w*)\(.*', line)
         if m is None:
             continue
diff --git a/functorch/csrc/BatchRulesModules.cpp b/functorch/csrc/BatchRulesModules.cpp
index 127c10ea0..f599d18c3 100644
--- a/functorch/csrc/BatchRulesModules.cpp
+++ b/functorch/csrc/BatchRulesModules.cpp
@@ -255,6 +255,202 @@ grid_sample_batch_rule(const Tensor& input, optional<int64_t> input_bdim, const
   return result;
 }
 
+Tensor expand_reshape_dim_into(int64_t batch_size, int64_t dst, const Tensor& x) {
+  auto x_ = x.unsqueeze(0);
+  VmapDimVector new_shape(x_.sizes().begin(), x_.sizes().end());
+  new_shape[0] = batch_size;
+  x_ = x_.expand(new_shape);
+  return reshape_dim_into(0, dst, x_);
+}
+
+
+std::tuple<Tensor, Tensor, optional<int64_t>, Tensor, optional<int64_t>, int64_t>
+grid_sample_backward_helper_in(
+    const Tensor& grad_output, optional<int64_t> grad_output_bdim,
+    const Tensor& input, optional<int64_t> input_bdim,
+    const Tensor& grid, optional<int64_t> grid_bdim) {
+  auto new_grad_output = grad_output;
+  auto new_input = input;
+  auto new_grid = grid;
+
+  optional<int64_t> grad_input_out_bdim = nullopt;
+  optional<int64_t> grad_grid_out_bdim = nullopt;
+  int64_t bdim_size = 0;
+
+  if (grad_output_bdim) {
+
+    bdim_size = grad_output.sizes()[*grad_output_bdim];
+
+    if (input_bdim && grid_bdim) {
+      // case 1: (grad_output is batched, input is batched, grid is batched)
+      //        grad_output: (BN)CH_{out}W_{out}, input: (BN)CH_{in}W_{in}, grid: (BN)H_{out}W_{out}2
+      //        grad_input: (BN)CH_{in}W_{in}
+
+      new_grad_output = reshape_dim_into(*grad_output_bdim, 0, grad_output);
+      new_input = reshape_dim_into(*input_bdim, 0, input);
+      new_grid = reshape_dim_into(*grid_bdim, 0, grid);
+      grad_input_out_bdim = 0;
+      grad_grid_out_bdim = 0;
+    } else if (input_bdim && !grid_bdim) {
+      // case 2: (grad_output is batched, input is batched, grid is not batched)
+      //        IF PUT BATCH DIM TO CHANNEL -> backward produces wrong grad_grid
+      //
+      //        grad_output: (BN)CH_{out}W_{out}, input: (BN)CH_{in}W_{in}, grid: NH_{out}W_{out}2
+      //        -> grid: (BN)H_{out}W_{out}2
+      //        grad_input: (BN)CH_{in}W_{in}
+
+      new_grad_output = reshape_dim_into(*grad_output_bdim, 0, grad_output);
+      new_input = reshape_dim_into(*input_bdim, 0, input);
+      grad_input_out_bdim = 0;
+      new_grid = expand_reshape_dim_into(bdim_size, 0, grid);
+      grad_grid_out_bdim = 0;
+    } else if (!input_bdim && grid_bdim) {
+      // case 3: (grad_output is batched, input is not batched, grid is batched)
+      //        IF PUT BATCH DIM TO H_out -> backward produces wrong grad_grid
+      //
+      //        grad_output: (BN)CH_{out}W_{out}, input: NCH_{in}W_{in}, grid: (BN)H_{out}W_{out}2
+      //        -> input: (BN)CH_{in}W_{in}
+      //        grad_input: (BN)CH_{in}W_{in}
+
+      new_grad_output = reshape_dim_into(*grad_output_bdim, 0, grad_output);
+      new_grid = reshape_dim_into(*grid_bdim, 0, grid);
+      grad_grid_out_bdim = 0;
+      // expand input to (BN)H_{out}W_{out}2
+      new_input = expand_reshape_dim_into(bdim_size, 0, new_input);
+      grad_input_out_bdim = 0;
+    } else {
+      // case 4: (grad_output is batched, input is not batched, grid is not batched)
+      //        IF PUT BATCH DIM TO H_out -> backward produces wrong grad_grid
+      //
+      //        grad_output: (BN)CH_{out}W_{out}, input: NCH_{in}W_{in}, grid: NH_{out}W_{out}2
+      //        -> grid: (BN)H_{out}W_{out}2
+      //        -> input: (BN)CH_{in}W_{in}
+      //        grad_input: NCH_{in}W_{in}
+
+      new_grad_output = reshape_dim_into(*grad_output_bdim, 0, grad_output);
+      // expand grid to (BN)H_{out}W_{out}2
+      new_grid = expand_reshape_dim_into(bdim_size, 0, grid);
+      grad_grid_out_bdim = 0;
+      // expand input to (BN)CH_{in}W_{in}
+      new_input = expand_reshape_dim_into(bdim_size, 0, input);
+      grad_input_out_bdim = 0;
+    }
+  } else {
+    if (input_bdim && grid_bdim) {
+      // case 5: (grad_output is not batched, input is batched, grid is batched)
+      //        grad_output: NCH_{out}W_{out}, input: (BN)CH_{in}W_{in}, grid: (BN)H_{out}W_{out}2
+      //        -> grad_output: (BN)CH_{out}W_{out}
+      //        grad_input: (BN)CH_{in}W_{in}
+
+      bdim_size = input.sizes()[*input_bdim];
+      // expand new_grad_output to (BN)CH_{out}W_{out}
+      new_grad_output = expand_reshape_dim_into(bdim_size, 0, new_grad_output);
+      new_input = reshape_dim_into(*input_bdim, 0, input);
+      grad_input_out_bdim = 0;
+      new_grid = reshape_dim_into(*grid_bdim, 0, grid);
+      grad_grid_out_bdim = 0;
+    } else if (input_bdim && !grid_bdim) {
+      // case 6: (grad_output is not batched, input is batched, grid is not batched)
+      //        grad_output: NCH_{out}W_{out}, input: (BN)CH_{in}W_{in}, grid: NH_{out}W_{out}2
+      //        -> grad_output: (BN)CH_{out}W_{out}
+      //        -> grid: (BN)H_{out}W_{out}2
+      //        grad_input: (BN)CH_{in}W_{in}
+
+      bdim_size = input.sizes()[*input_bdim];
+      // expand new_grad_output to (BN)CH_{out}W_{out}
+      new_grad_output = expand_reshape_dim_into(bdim_size, 0, new_grad_output);
+      new_input = reshape_dim_into(*input_bdim, 0, input);
+      grad_input_out_bdim = 0;
+      // expand new_grid to (BN)H_{out}W_{out}2
+      new_grid = expand_reshape_dim_into(bdim_size, 0, grid);
+      grad_grid_out_bdim = 0;
+    } else if (!input_bdim && grid_bdim) {
+      // case 7: (grad_output is not batched, input is not batched, grid is batched)
+      //        IF PUT BATCH DIM TO H_out -> backward produces wrong grad_grid
+      //
+      //        grad_output: NCH_{out}W_{out}, input: NCH_{in}W_{in}, grid: (BN)H_{out}W_{out}2
+      //        -> grad_output: (BN)CH_{out}W_{out}
+      //        -> input: (BN)CH_{out}W_{out}
+      //        grad_input: NCH_{in}W_{in}
+
+      bdim_size = grid.sizes()[*grid_bdim];
+      // expand new_grad_output to NC(BH_{out})W_{out}
+      new_grad_output = expand_reshape_dim_into(bdim_size, 0, new_grad_output);
+      // expand new_input to (BN)CH_{in}W_{in}
+      new_input = expand_reshape_dim_into(bdim_size, 0, new_input);
+      grad_input_out_bdim = 0;
+      new_grid = reshape_dim_into(*grid_bdim, 0, grid);
+      grad_grid_out_bdim = 0;
+    } // case 8 can be ignored
+  }
+  return std::make_tuple(
+      new_grad_output, new_input, grad_input_out_bdim, new_grid, grad_grid_out_bdim, bdim_size);
+}
+
+std::tuple<Tensor, optional<int64_t>, Tensor, optional<int64_t>>
+grid_sample_backward_helper_out(
+    const std::tuple<Tensor, Tensor> & bw_out,
+    optional<int64_t> grad_input_out_bdim,
+    optional<int64_t> grad_grid_out_bdim,
+    int64_t bdim_size) {
+  auto grad_input = std::get<0>(bw_out);
+  auto grad_grid = std::get<1>(bw_out);
+  if (grad_input_out_bdim) {
+    grad_input = reshape_dim_outof(*grad_input_out_bdim, bdim_size, grad_input);
+  }
+  if (grad_grid_out_bdim) {
+    grad_grid = reshape_dim_outof(*grad_grid_out_bdim, bdim_size, grad_grid);
+  }
+  auto result = std::make_tuple(grad_input, grad_input_out_bdim, grad_grid, grad_grid_out_bdim);
+  return result;
+}
+
+
+template<typename F, F Func, typename... ExtraArgs>
+std::tuple<Tensor, optional<int64_t>, Tensor, optional<int64_t>>
+grid_sample_backward_batch_rule(
+    const Tensor& grad_output, optional<int64_t> grad_output_bdim,
+    const Tensor& input, optional<int64_t> input_bdim,
+    const Tensor& grid, optional<int64_t> grid_bdim,
+    ExtraArgs... extra_args) {
+
+  auto new_bw_input = grid_sample_backward_helper_in(
+      grad_output, grad_output_bdim, input, input_bdim, grid, grid_bdim);
+
+  auto new_grad_output = std::get<0>(new_bw_input);
+  auto new_input = std::get<1>(new_bw_input);
+  auto grad_input_out_bdim = std::get<2>(new_bw_input);
+  auto new_grid = std::get<3>(new_bw_input);
+  auto grad_grid_out_bdim = std::get<4>(new_bw_input);
+  int64_t bdim_size = std::get<5>(new_bw_input);
+
+  auto bw_out = Func(new_grad_output, new_input, new_grid, std::forward<ExtraArgs>(extra_args)...);
+
+  return grid_sample_backward_helper_out(bw_out, grad_input_out_bdim, grad_grid_out_bdim, bdim_size);
+}
+
+template<typename F, F Func>
+std::tuple<Tensor, optional<int64_t>, Tensor, optional<int64_t>>
+cudnn_grid_sample_backward_batch_rule(
+    const Tensor& input, optional<int64_t> input_bdim,
+    const Tensor& grid, optional<int64_t> grid_bdim,
+    const Tensor& grad_output, optional<int64_t> grad_output_bdim) {
+
+  auto new_bw_input = grid_sample_backward_helper_in(
+      grad_output, grad_output_bdim, input, input_bdim, grid, grid_bdim);
+
+  auto new_grad_output = std::get<0>(new_bw_input);
+  auto new_input = std::get<1>(new_bw_input);
+  auto grad_input_out_bdim = std::get<2>(new_bw_input);
+  auto new_grid = std::get<3>(new_bw_input);
+  auto grad_grid_out_bdim = std::get<4>(new_bw_input);
+  int64_t bdim_size = std::get<5>(new_bw_input);
+
+  auto bw_out = Func(new_input, new_grid, new_grad_output);
+
+  return grid_sample_backward_helper_out(bw_out, grad_input_out_bdim, grad_grid_out_bdim, bdim_size);
+}
+
 std::tuple<Tensor, optional<int64_t>> cross_batch_rule(
     const Tensor& self, optional<int64_t> self_bdim,
     const Tensor& other, optional<int64_t> other_bdim,
@@ -370,12 +566,53 @@ struct GridSampleBatchRuleHelper<F, Func, typelist<T1, T2, T...>> {
   }
 };
 
+template <typename A, A a, typename C>
+struct GridSampleBackwardBatchRuleHelper;
+
+template <typename F, F Func, typename T1, typename T2, typename T3, typename... T>
+struct GridSampleBackwardBatchRuleHelper<F, Func, typelist<T1, T2, T3, T...>> {
+  static std::tuple<Tensor, optional<int64_t>, Tensor, optional<int64_t>> apply(
+      const Tensor& grad_output, optional<int64_t> grad_output_batch_dim,
+      const Tensor& input, optional<int64_t> input_batch_dim,
+      const Tensor& grid, optional<int64_t> grid_batch_dim,
+      T... extra_args) {
+    return grid_sample_backward_batch_rule<F, Func, T...>(
+        grad_output, grad_output_batch_dim,
+        input, input_batch_dim,
+        grid, grid_batch_dim,
+        std::forward<T>(extra_args)...);
+  }
+};
+
+template <typename F, F Func>
+struct CudnnGridSampleBackwardBatchRuleHelper {
+  static std::tuple<Tensor, optional<int64_t>, Tensor, optional<int64_t>> apply(
+      const Tensor& input, optional<int64_t> input_batch_dim,
+      const Tensor& grid, optional<int64_t> grid_batch_dim,
+      const Tensor& grad_output, optional<int64_t> grad_output_batch_dim) {
+    return cudnn_grid_sample_backward_batch_rule<F, Func>(
+        input, input_batch_dim,
+        grid, grid_batch_dim,
+        grad_output, grad_output_batch_dim
+    );
+  }
+};
+
 #define GRID_SAMPLE_BATCH_RULE(fn) SINGLE_ARG(\
     GridSampleBatchRuleHelper<\
       decltype(&ATEN_FN(fn)),\
       &ATEN_FN(fn),\
       c10::guts::function_traits<decltype(ATEN_FN(fn))>::parameter_types>::apply)
 
+#define GRID_SAMPLE_BW_BATCH_RULE(fn) SINGLE_ARG(\
+    GridSampleBackwardBatchRuleHelper<\
+      decltype(&ATEN_FN(fn)),\
+      &ATEN_FN(fn),\
+      c10::guts::function_traits<decltype(ATEN_FN(fn))>::parameter_types>::apply)
+
+#define CUDNN_GRID_SAMPLE_BW_BATCH_RULE(fn)\
+    CudnnGridSampleBackwardBatchRuleHelper<decltype(&ATEN_FN(fn)), &ATEN_FN(fn)>::apply
+
 #define UPSAMPLE_BACKWARD(op, overload) VMAP_SUPPORT(#op"."#overload, SINGLE_ARG(\
     UpsampleBackwardBatchRuleHelper<\
       decltype(&ATEN_FN2(op, overload)),\
@@ -386,6 +623,12 @@ struct GridSampleBatchRuleHelper<F, Func, typelist<T1, T2, T...>> {
   EXISTING_BDIM2(op, vec); \
   EXISTING_BDIM(op);
 
+Tensor this_grid_sampler_3d_backward_cpu(const Tensor& grad_output, const Tensor& input, const Tensor& grid, int64_t interpolation_mode, int64_t padding_mode, bool align_corners) {
+  return input;
+}
+
+
+
 TORCH_LIBRARY_IMPL(aten, FT_BATCHED_KEY, m) {
   VMAP_SUPPORT("convolution", convolution_batch_rule);
   // m.impl("conv_transpose2d", convNd_transpose_decomp);
@@ -400,7 +643,12 @@ TORCH_LIBRARY_IMPL(aten, FT_BATCHED_KEY, m) {
   EXISTING_BDIM(im2col_backward);
 
   VMAP_SUPPORT("grid_sampler_2d", GRID_SAMPLE_BATCH_RULE(grid_sampler));
+  VMAP_SUPPORT("grid_sampler_2d_backward", GRID_SAMPLE_BW_BATCH_RULE(grid_sampler_2d_backward));
+
   VMAP_SUPPORT("grid_sampler_3d", GRID_SAMPLE_BATCH_RULE(grid_sampler));
+  VMAP_SUPPORT("grid_sampler_3d_backward", GRID_SAMPLE_BW_BATCH_RULE(grid_sampler_3d_backward));
+  VMAP_SUPPORT("cudnn_grid_sampler_backward", CUDNN_GRID_SAMPLE_BW_BATCH_RULE(cudnn_grid_sampler_backward));
+
   VMAP_SUPPORT("cudnn_grid_sampler", GRID_SAMPLE_BATCH_RULE(cudnn_grid_sampler));
   VMAP_SUPPORT("cross", cross_batch_rule);
 
diff --git a/functorch/csrc/OutOfPlacePlumbing.cpp b/functorch/csrc/OutOfPlacePlumbing.cpp
index dbb974d14..a28532781 100644
--- a/functorch/csrc/OutOfPlacePlumbing.cpp
+++ b/functorch/csrc/OutOfPlacePlumbing.cpp
@@ -24,16 +24,16 @@ typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_1_t)(const Tensor
 template <>
 Tensor lowerToNextLayer<batch_rule_1_t,Tensor,const Tensor &>(
   batch_rule_1_t batch_rule,
-  const Tensor & input
+  const Tensor & self
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
   auto maybe_layer = maybeCurrentDynamicLayer();
   TORCH_INTERNAL_ASSERT(maybe_layer.has_value());
   int64_t cur_level = maybe_layer->layerId();
-  Tensor input_value;
-  optional<int64_t> input_bdim;
-  std::tie(input_value, input_bdim) = unwrapTensorAtLevel(input, cur_level);
-  auto results = batch_rule(input_value, input_bdim);
+  Tensor self_value;
+  optional<int64_t> self_bdim;
+  std::tie(self_value, self_bdim) = unwrapTensorAtLevel(self, cur_level);
+  auto results = batch_rule(self_value, self_bdim);
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
@@ -92,19 +92,19 @@ typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_5_t)(const Tensor
 template <>
 Tensor lowerToNextLayer<batch_rule_5_t,Tensor,const Tensor &, const Tensor &, int64_t>(
   batch_rule_5_t batch_rule,
-  const Tensor & grad_output, const Tensor & self, int64_t dim
+  const Tensor & self, const Tensor & other, int64_t dim
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
   auto maybe_layer = maybeCurrentDynamicLayer();
   TORCH_INTERNAL_ASSERT(maybe_layer.has_value());
   int64_t cur_level = maybe_layer->layerId();
-  Tensor grad_output_value;
-  optional<int64_t> grad_output_bdim;
-  std::tie(grad_output_value, grad_output_bdim) = unwrapTensorAtLevel(grad_output, cur_level);
   Tensor self_value;
   optional<int64_t> self_bdim;
   std::tie(self_value, self_bdim) = unwrapTensorAtLevel(self, cur_level);
-  auto results = batch_rule(grad_output_value, grad_output_bdim, self_value, self_bdim, dim);
+  Tensor other_value;
+  optional<int64_t> other_bdim;
+  std::tie(other_value, other_bdim) = unwrapTensorAtLevel(other, cur_level);
+  auto results = batch_rule(self_value, self_bdim, other_value, other_bdim, dim);
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
@@ -297,7 +297,7 @@ typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_16_t)(const Tenso
 template <>
 Tensor lowerToNextLayer<batch_rule_16_t,Tensor,const Tensor &, double, bool>(
   batch_rule_16_t batch_rule,
-  const Tensor & self, double rcond, bool hermitian
+  const Tensor & self, double tol, bool hermitian
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
   auto maybe_layer = maybeCurrentDynamicLayer();
@@ -306,7 +306,7 @@ Tensor lowerToNextLayer<batch_rule_16_t,Tensor,const Tensor &, double, bool>(
   Tensor self_value;
   optional<int64_t> self_bdim;
   std::tie(self_value, self_bdim) = unwrapTensorAtLevel(self, cur_level);
-  auto results = batch_rule(self_value, self_bdim, rcond, hermitian);
+  auto results = batch_rule(self_value, self_bdim, tol, hermitian);
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
@@ -633,10 +633,58 @@ std::tuple<Tensor,Tensor,Tensor,Tensor,int64_t> lowerToNextLayer<batch_rule_31_t
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level), makeBatched(std::get<4>(results), std::get<5>(results), cur_level), makeBatched(std::get<6>(results), std::get<7>(results), cur_level), std::get<8>(results));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_32_t)(const Tensor &, c10::optional<int64_t>, c10::optional<Generator>);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_32_t)(int64_t, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, bool, double, ::std::array<bool,3>, const Tensor &, c10::optional<int64_t>);
 template <>
-Tensor lowerToNextLayer<batch_rule_32_t,Tensor,const Tensor &, c10::optional<Generator>>(
+std::tuple<Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_32_t,std::tuple<Tensor,Tensor,Tensor>,int64_t, const Tensor &, const Tensor &, const c10::optional<Tensor> &, const c10::optional<Tensor> &, const c10::optional<Tensor> &, const c10::optional<Tensor> &, const c10::optional<Tensor> &, bool, double, ::std::array<bool,3>, const Tensor &>(
   batch_rule_32_t batch_rule,
+  int64_t impl_index, const Tensor & input, const Tensor & grad_output, const c10::optional<Tensor> & weight, const c10::optional<Tensor> & running_mean, const c10::optional<Tensor> & running_var, const c10::optional<Tensor> & save_mean, const c10::optional<Tensor> & save_var_transform, bool train, double eps, ::std::array<bool,3> output_mask, const Tensor & reservedSpace
+) {
+  c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
+  auto maybe_layer = maybeCurrentDynamicLayer();
+  TORCH_INTERNAL_ASSERT(maybe_layer.has_value());
+  int64_t cur_level = maybe_layer->layerId();
+  Tensor input_value;
+  optional<int64_t> input_bdim;
+  std::tie(input_value, input_bdim) = unwrapTensorAtLevel(input, cur_level);
+  Tensor grad_output_value;
+  optional<int64_t> grad_output_bdim;
+  std::tie(grad_output_value, grad_output_bdim) = unwrapTensorAtLevel(grad_output, cur_level);
+  Tensor reservedSpace_value;
+  optional<int64_t> reservedSpace_bdim;
+  std::tie(reservedSpace_value, reservedSpace_bdim) = unwrapTensorAtLevel(reservedSpace, cur_level);
+  optional<Tensor> weight_value;
+  optional<int64_t> weight_bdim;
+  if (weight) {
+      std::tie(weight_value, weight_bdim) = unwrapTensorAtLevel(weight.value(), cur_level);
+  }
+  optional<Tensor> running_mean_value;
+  optional<int64_t> running_mean_bdim;
+  if (running_mean) {
+      std::tie(running_mean_value, running_mean_bdim) = unwrapTensorAtLevel(running_mean.value(), cur_level);
+  }
+  optional<Tensor> running_var_value;
+  optional<int64_t> running_var_bdim;
+  if (running_var) {
+      std::tie(running_var_value, running_var_bdim) = unwrapTensorAtLevel(running_var.value(), cur_level);
+  }
+  optional<Tensor> save_mean_value;
+  optional<int64_t> save_mean_bdim;
+  if (save_mean) {
+      std::tie(save_mean_value, save_mean_bdim) = unwrapTensorAtLevel(save_mean.value(), cur_level);
+  }
+  optional<Tensor> save_var_transform_value;
+  optional<int64_t> save_var_transform_bdim;
+  if (save_var_transform) {
+      std::tie(save_var_transform_value, save_var_transform_bdim) = unwrapTensorAtLevel(save_var_transform.value(), cur_level);
+  }
+  auto results = batch_rule(impl_index, input_value, input_bdim, grad_output_value, grad_output_bdim, weight_value, weight_bdim, running_mean_value, running_mean_bdim, running_var_value, running_var_bdim, save_mean_value, save_mean_bdim, save_var_transform_value, save_var_transform_bdim, train, eps, output_mask, reservedSpace_value, reservedSpace_bdim);
+  return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level), makeBatched(std::get<4>(results), std::get<5>(results), cur_level));
+}
+
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_33_t)(const Tensor &, c10::optional<int64_t>, c10::optional<Generator>);
+template <>
+Tensor lowerToNextLayer<batch_rule_33_t,Tensor,const Tensor &, c10::optional<Generator>>(
+  batch_rule_33_t batch_rule,
   const Tensor & self, c10::optional<Generator> generator
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -650,10 +698,10 @@ Tensor lowerToNextLayer<batch_rule_32_t,Tensor,const Tensor &, c10::optional<Gen
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_33_t)(const Tensor &, c10::optional<int64_t>, double, c10::optional<Generator>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_34_t)(const Tensor &, c10::optional<int64_t>, double, c10::optional<Generator>);
 template <>
-Tensor lowerToNextLayer<batch_rule_33_t,Tensor,const Tensor &, double, c10::optional<Generator>>(
-  batch_rule_33_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_34_t,Tensor,const Tensor &, double, c10::optional<Generator>>(
+  batch_rule_34_t batch_rule,
   const Tensor & mean, double std, c10::optional<Generator> generator
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -667,10 +715,10 @@ Tensor lowerToNextLayer<batch_rule_33_t,Tensor,const Tensor &, double, c10::opti
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_34_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_35_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>);
 template <>
-Tensor lowerToNextLayer<batch_rule_34_t,Tensor,const Tensor &, const Tensor &, const Tensor &, const c10::optional<Tensor> &>(
-  batch_rule_34_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_35_t,Tensor,const Tensor &, const Tensor &, const Tensor &, const c10::optional<Tensor> &>(
+  batch_rule_35_t batch_rule,
   const Tensor & input1, const Tensor & input2, const Tensor & weight, const c10::optional<Tensor> & bias
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -695,10 +743,10 @@ Tensor lowerToNextLayer<batch_rule_34_t,Tensor,const Tensor &, const Tensor &, c
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_35_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, int64_t);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_36_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, int64_t);
 template <>
-Tensor lowerToNextLayer<batch_rule_35_t,Tensor,const Tensor &, const Tensor &, const c10::optional<Tensor> &, int64_t>(
-  batch_rule_35_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_36_t,Tensor,const Tensor &, const Tensor &, const c10::optional<Tensor> &, int64_t>(
+  batch_rule_36_t batch_rule,
   const Tensor & self, const Tensor & target, const c10::optional<Tensor> & weight, int64_t reduction
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -720,10 +768,10 @@ Tensor lowerToNextLayer<batch_rule_35_t,Tensor,const Tensor &, const Tensor &, c
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_36_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, int64_t);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_37_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, int64_t);
 template <>
-Tensor lowerToNextLayer<batch_rule_36_t,Tensor,const Tensor &, const Tensor &, const Tensor &, const c10::optional<Tensor> &, int64_t>(
-  batch_rule_36_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_37_t,Tensor,const Tensor &, const Tensor &, const Tensor &, const c10::optional<Tensor> &, int64_t>(
+  batch_rule_37_t batch_rule,
   const Tensor & grad_output, const Tensor & self, const Tensor & target, const c10::optional<Tensor> & weight, int64_t reduction
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -748,10 +796,10 @@ Tensor lowerToNextLayer<batch_rule_36_t,Tensor,const Tensor &, const Tensor &, c
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_37_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, int64_t);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_38_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, int64_t);
 template <>
-Tensor lowerToNextLayer<batch_rule_37_t,Tensor,const Tensor &, const Tensor &, const c10::optional<Tensor> &, const c10::optional<Tensor> &, int64_t>(
-  batch_rule_37_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_38_t,Tensor,const Tensor &, const Tensor &, const c10::optional<Tensor> &, const c10::optional<Tensor> &, int64_t>(
+  batch_rule_38_t batch_rule,
   const Tensor & self, const Tensor & target, const c10::optional<Tensor> & weight, const c10::optional<Tensor> & pos_weight, int64_t reduction
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -778,10 +826,10 @@ Tensor lowerToNextLayer<batch_rule_37_t,Tensor,const Tensor &, const Tensor &, c
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_38_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, int64_t);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_39_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, int64_t);
 template <>
-Tensor lowerToNextLayer<batch_rule_38_t,Tensor,const Tensor &, const Tensor &, const Tensor &, const c10::optional<Tensor> &, const c10::optional<Tensor> &, int64_t>(
-  batch_rule_38_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_39_t,Tensor,const Tensor &, const Tensor &, const Tensor &, const c10::optional<Tensor> &, const c10::optional<Tensor> &, int64_t>(
+  batch_rule_39_t batch_rule,
   const Tensor & grad_output, const Tensor & self, const Tensor & target, const c10::optional<Tensor> & weight, const c10::optional<Tensor> & pos_weight, int64_t reduction
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -811,10 +859,10 @@ Tensor lowerToNextLayer<batch_rule_38_t,Tensor,const Tensor &, const Tensor &, c
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_39_t)(const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, int64_t);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_40_t)(const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, int64_t);
 template <>
-Tensor lowerToNextLayer<batch_rule_39_t,Tensor,const Tensor &, const c10::optional<Tensor> &, int64_t>(
-  batch_rule_39_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_40_t,Tensor,const Tensor &, const c10::optional<Tensor> &, int64_t>(
+  batch_rule_40_t batch_rule,
   const Tensor & self, const c10::optional<Tensor> & weights, int64_t minlength
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -833,10 +881,10 @@ Tensor lowerToNextLayer<batch_rule_39_t,Tensor,const Tensor &, const c10::option
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_40_t)(const Tensor &, c10::optional<int64_t>, const Scalar &);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_41_t)(const Tensor &, c10::optional<int64_t>, const Scalar &);
 template <>
-Tensor lowerToNextLayer<batch_rule_40_t,Tensor,const Tensor &, const Scalar &>(
-  batch_rule_40_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_41_t,Tensor,const Tensor &, const Scalar &>(
+  batch_rule_41_t batch_rule,
   const Tensor & self, const Scalar & other
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -850,10 +898,10 @@ Tensor lowerToNextLayer<batch_rule_40_t,Tensor,const Tensor &, const Scalar &>(
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<::std::vector<Tensor>,c10::optional<int64_t>> (*batch_rule_41_t)(const Tensor &, c10::optional<int64_t>, int64_t, int64_t);
+typedef std::tuple<::std::vector<Tensor>,c10::optional<int64_t>> (*batch_rule_42_t)(const Tensor &, c10::optional<int64_t>, int64_t, int64_t);
 template <>
-::std::vector<Tensor> lowerToNextLayer<batch_rule_41_t,::std::vector<Tensor>,const Tensor &, int64_t, int64_t>(
-  batch_rule_41_t batch_rule,
+::std::vector<Tensor> lowerToNextLayer<batch_rule_42_t,::std::vector<Tensor>,const Tensor &, int64_t, int64_t>(
+  batch_rule_42_t batch_rule,
   const Tensor & self, int64_t split_size, int64_t dim
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -867,10 +915,10 @@ ::std::vector<Tensor> lowerToNextLayer<batch_rule_41_t,::std::vector<Tensor>,con
   return makeBatchedVector(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<::std::vector<Tensor>,c10::optional<int64_t>> (*batch_rule_42_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, int64_t);
+typedef std::tuple<::std::vector<Tensor>,c10::optional<int64_t>> (*batch_rule_43_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, int64_t);
 template <>
-::std::vector<Tensor> lowerToNextLayer<batch_rule_42_t,::std::vector<Tensor>,const Tensor &, IntArrayRef, int64_t>(
-  batch_rule_42_t batch_rule,
+::std::vector<Tensor> lowerToNextLayer<batch_rule_43_t,::std::vector<Tensor>,const Tensor &, IntArrayRef, int64_t>(
+  batch_rule_43_t batch_rule,
   const Tensor & self, IntArrayRef split_sizes, int64_t dim
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -884,10 +932,10 @@ ::std::vector<Tensor> lowerToNextLayer<batch_rule_42_t,::std::vector<Tensor>,con
   return makeBatchedVector(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<::std::vector<Tensor>,c10::optional<int64_t>> (*batch_rule_43_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, int64_t);
+typedef std::tuple<::std::vector<Tensor>,c10::optional<int64_t>> (*batch_rule_44_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, int64_t);
 template <>
-::std::vector<Tensor> lowerToNextLayer<batch_rule_43_t,::std::vector<Tensor>,const Tensor &, const Tensor &, int64_t>(
-  batch_rule_43_t batch_rule,
+::std::vector<Tensor> lowerToNextLayer<batch_rule_44_t,::std::vector<Tensor>,const Tensor &, const Tensor &, int64_t>(
+  batch_rule_44_t batch_rule,
   const Tensor & self, const Tensor & tensor_indices_or_sections, int64_t dim
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -904,10 +952,10 @@ ::std::vector<Tensor> lowerToNextLayer<batch_rule_43_t,::std::vector<Tensor>,con
   return makeBatchedVector(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_44_t)(const Tensor &, c10::optional<int64_t>, const c10::optional<Scalar> &, const c10::optional<Scalar> &);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_45_t)(const Tensor &, c10::optional<int64_t>, const c10::optional<Scalar> &, const c10::optional<Scalar> &);
 template <>
-Tensor lowerToNextLayer<batch_rule_44_t,Tensor,const Tensor &, const c10::optional<Scalar> &, const c10::optional<Scalar> &>(
-  batch_rule_44_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_45_t,Tensor,const Tensor &, const c10::optional<Scalar> &, const c10::optional<Scalar> &>(
+  batch_rule_45_t batch_rule,
   const Tensor & self, const c10::optional<Scalar> & min, const c10::optional<Scalar> & max
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -921,10 +969,10 @@ Tensor lowerToNextLayer<batch_rule_44_t,Tensor,const Tensor &, const c10::option
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_45_t)(const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_46_t)(const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>);
 template <>
-Tensor lowerToNextLayer<batch_rule_45_t,Tensor,const Tensor &, const c10::optional<Tensor> &, const c10::optional<Tensor> &>(
-  batch_rule_45_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_46_t,Tensor,const Tensor &, const c10::optional<Tensor> &, const c10::optional<Tensor> &>(
+  batch_rule_46_t batch_rule,
   const Tensor & self, const c10::optional<Tensor> & min, const c10::optional<Tensor> & max
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -948,10 +996,10 @@ Tensor lowerToNextLayer<batch_rule_45_t,Tensor,const Tensor &, const c10::option
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_46_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, const Scalar &);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_47_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, const Scalar &);
 template <>
-Tensor lowerToNextLayer<batch_rule_46_t,Tensor,const Tensor &, IntArrayRef, const Scalar &>(
-  batch_rule_46_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_47_t,Tensor,const Tensor &, IntArrayRef, const Scalar &>(
+  batch_rule_47_t batch_rule,
   const Tensor & self, IntArrayRef pad, const Scalar & value
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -965,10 +1013,10 @@ Tensor lowerToNextLayer<batch_rule_46_t,Tensor,const Tensor &, IntArrayRef, cons
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_47_t)(const Tensor &, c10::optional<int64_t>, MemoryFormat);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_48_t)(const Tensor &, c10::optional<int64_t>, MemoryFormat);
 template <>
-Tensor lowerToNextLayer<batch_rule_47_t,Tensor,const Tensor &, MemoryFormat>(
-  batch_rule_47_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_48_t,Tensor,const Tensor &, MemoryFormat>(
+  batch_rule_48_t batch_rule,
   const Tensor & self, MemoryFormat memory_format
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -982,10 +1030,10 @@ Tensor lowerToNextLayer<batch_rule_47_t,Tensor,const Tensor &, MemoryFormat>(
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_48_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef, bool, IntArrayRef, int64_t);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_49_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef, bool, IntArrayRef, int64_t);
 template <>
-Tensor lowerToNextLayer<batch_rule_48_t,Tensor,const Tensor &, const Tensor &, const c10::optional<Tensor> &, IntArrayRef, IntArrayRef, IntArrayRef, bool, IntArrayRef, int64_t>(
-  batch_rule_48_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_49_t,Tensor,const Tensor &, const Tensor &, const c10::optional<Tensor> &, IntArrayRef, IntArrayRef, IntArrayRef, bool, IntArrayRef, int64_t>(
+  batch_rule_49_t batch_rule,
   const Tensor & input, const Tensor & weight, const c10::optional<Tensor> & bias, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, bool transposed, IntArrayRef output_padding, int64_t groups
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -1007,10 +1055,33 @@ Tensor lowerToNextLayer<batch_rule_48_t,Tensor,const Tensor &, const Tensor &, c
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_49_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef, bool, IntArrayRef, int64_t, bool, bool, bool, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_50_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef, bool, IntArrayRef, int64_t, ::std::array<bool,3>);
 template <>
-Tensor lowerToNextLayer<batch_rule_49_t,Tensor,const Tensor &, const Tensor &, const c10::optional<Tensor> &, IntArrayRef, IntArrayRef, IntArrayRef, bool, IntArrayRef, int64_t, bool, bool, bool, bool>(
-  batch_rule_49_t batch_rule,
+std::tuple<Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_50_t,std::tuple<Tensor,Tensor,Tensor>,const Tensor &, const Tensor &, const Tensor &, IntArrayRef, IntArrayRef, IntArrayRef, bool, IntArrayRef, int64_t, ::std::array<bool,3>>(
+  batch_rule_50_t batch_rule,
+  const Tensor & grad_output, const Tensor & input, const Tensor & weight, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, bool transposed, IntArrayRef output_padding, int64_t groups, ::std::array<bool,3> output_mask
+) {
+  c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
+  auto maybe_layer = maybeCurrentDynamicLayer();
+  TORCH_INTERNAL_ASSERT(maybe_layer.has_value());
+  int64_t cur_level = maybe_layer->layerId();
+  Tensor grad_output_value;
+  optional<int64_t> grad_output_bdim;
+  std::tie(grad_output_value, grad_output_bdim) = unwrapTensorAtLevel(grad_output, cur_level);
+  Tensor input_value;
+  optional<int64_t> input_bdim;
+  std::tie(input_value, input_bdim) = unwrapTensorAtLevel(input, cur_level);
+  Tensor weight_value;
+  optional<int64_t> weight_bdim;
+  std::tie(weight_value, weight_bdim) = unwrapTensorAtLevel(weight, cur_level);
+  auto results = batch_rule(grad_output_value, grad_output_bdim, input_value, input_bdim, weight_value, weight_bdim, stride, padding, dilation, transposed, output_padding, groups, output_mask);
+  return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level), makeBatched(std::get<4>(results), std::get<5>(results), cur_level));
+}
+
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_51_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef, bool, IntArrayRef, int64_t, bool, bool, bool, bool);
+template <>
+Tensor lowerToNextLayer<batch_rule_51_t,Tensor,const Tensor &, const Tensor &, const c10::optional<Tensor> &, IntArrayRef, IntArrayRef, IntArrayRef, bool, IntArrayRef, int64_t, bool, bool, bool, bool>(
+  batch_rule_51_t batch_rule,
   const Tensor & input, const Tensor & weight, const c10::optional<Tensor> & bias, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, bool transposed, IntArrayRef output_padding, int64_t groups, bool benchmark, bool deterministic, bool cudnn_enabled, bool allow_tf32
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -1032,10 +1103,10 @@ Tensor lowerToNextLayer<batch_rule_49_t,Tensor,const Tensor &, const Tensor &, c
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_50_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef, bool, IntArrayRef, int64_t, bool, bool, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_52_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef, bool, IntArrayRef, int64_t, bool, bool, bool);
 template <>
-Tensor lowerToNextLayer<batch_rule_50_t,Tensor,const Tensor &, const Tensor &, const c10::optional<Tensor> &, IntArrayRef, IntArrayRef, IntArrayRef, bool, IntArrayRef, int64_t, bool, bool, bool>(
-  batch_rule_50_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_52_t,Tensor,const Tensor &, const Tensor &, const c10::optional<Tensor> &, IntArrayRef, IntArrayRef, IntArrayRef, bool, IntArrayRef, int64_t, bool, bool, bool>(
+  batch_rule_52_t batch_rule,
   const Tensor & input, const Tensor & weight, const c10::optional<Tensor> & bias, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, bool transposed, IntArrayRef output_padding, int64_t groups, bool benchmark, bool deterministic, bool cudnn_enabled
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -1057,10 +1128,10 @@ Tensor lowerToNextLayer<batch_rule_50_t,Tensor,const Tensor &, const Tensor &, c
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_51_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, IntArrayRef, c10::string_view, IntArrayRef, int64_t);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_53_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, IntArrayRef, c10::string_view, IntArrayRef, int64_t);
 template <>
-Tensor lowerToNextLayer<batch_rule_51_t,Tensor,const Tensor &, const Tensor &, const c10::optional<Tensor> &, IntArrayRef, c10::string_view, IntArrayRef, int64_t>(
-  batch_rule_51_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_53_t,Tensor,const Tensor &, const Tensor &, const c10::optional<Tensor> &, IntArrayRef, c10::string_view, IntArrayRef, int64_t>(
+  batch_rule_53_t batch_rule,
   const Tensor & input, const Tensor & weight, const c10::optional<Tensor> & bias, IntArrayRef stride, c10::string_view padding, IntArrayRef dilation, int64_t groups
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -1082,10 +1153,10 @@ Tensor lowerToNextLayer<batch_rule_51_t,Tensor,const Tensor &, const Tensor &, c
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_52_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef, bool, IntArrayRef);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_54_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef, bool, IntArrayRef);
 template <>
-Tensor lowerToNextLayer<batch_rule_52_t,Tensor,const Tensor &, const Tensor &, const c10::optional<Tensor> &, IntArrayRef, IntArrayRef, IntArrayRef, bool, IntArrayRef>(
-  batch_rule_52_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_54_t,Tensor,const Tensor &, const Tensor &, const c10::optional<Tensor> &, IntArrayRef, IntArrayRef, IntArrayRef, bool, IntArrayRef>(
+  batch_rule_54_t batch_rule,
   const Tensor & input, const Tensor & weight, const c10::optional<Tensor> & bias, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, bool transposed, IntArrayRef output_padding
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -1107,10 +1178,48 @@ Tensor lowerToNextLayer<batch_rule_52_t,Tensor,const Tensor &, const Tensor &, c
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_53_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef, int64_t);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_55_t)(const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef, bool, IntArrayRef, int64_t, bool, bool, bool, bool, ::std::array<bool,3>);
 template <>
-Tensor lowerToNextLayer<batch_rule_53_t,Tensor,const Tensor &, const Tensor &, const c10::optional<Tensor> &, IntArrayRef, IntArrayRef, IntArrayRef, int64_t>(
-  batch_rule_53_t batch_rule,
+std::tuple<Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_55_t,std::tuple<Tensor,Tensor,Tensor>,const c10::optional<Tensor> &, const c10::optional<Tensor> &, const c10::optional<Tensor> &, const Tensor &, const Tensor &, const Tensor &, IntArrayRef, IntArrayRef, IntArrayRef, bool, IntArrayRef, int64_t, bool, bool, bool, bool, ::std::array<bool,3>>(
+  batch_rule_55_t batch_rule,
+  const c10::optional<Tensor> & ggI, const c10::optional<Tensor> & ggW, const c10::optional<Tensor> & ggb, const Tensor & gO, const Tensor & weight, const Tensor & self, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, bool transposed, IntArrayRef output_padding, int64_t groups, bool benchmark, bool deterministic, bool cudnn_enabled, bool allow_tf32, ::std::array<bool,3> output_mask
+) {
+  c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
+  auto maybe_layer = maybeCurrentDynamicLayer();
+  TORCH_INTERNAL_ASSERT(maybe_layer.has_value());
+  int64_t cur_level = maybe_layer->layerId();
+  Tensor gO_value;
+  optional<int64_t> gO_bdim;
+  std::tie(gO_value, gO_bdim) = unwrapTensorAtLevel(gO, cur_level);
+  Tensor weight_value;
+  optional<int64_t> weight_bdim;
+  std::tie(weight_value, weight_bdim) = unwrapTensorAtLevel(weight, cur_level);
+  Tensor self_value;
+  optional<int64_t> self_bdim;
+  std::tie(self_value, self_bdim) = unwrapTensorAtLevel(self, cur_level);
+  optional<Tensor> ggI_value;
+  optional<int64_t> ggI_bdim;
+  if (ggI) {
+      std::tie(ggI_value, ggI_bdim) = unwrapTensorAtLevel(ggI.value(), cur_level);
+  }
+  optional<Tensor> ggW_value;
+  optional<int64_t> ggW_bdim;
+  if (ggW) {
+      std::tie(ggW_value, ggW_bdim) = unwrapTensorAtLevel(ggW.value(), cur_level);
+  }
+  optional<Tensor> ggb_value;
+  optional<int64_t> ggb_bdim;
+  if (ggb) {
+      std::tie(ggb_value, ggb_bdim) = unwrapTensorAtLevel(ggb.value(), cur_level);
+  }
+  auto results = batch_rule(ggI_value, ggI_bdim, ggW_value, ggW_bdim, ggb_value, ggb_bdim, gO_value, gO_bdim, weight_value, weight_bdim, self_value, self_bdim, stride, padding, dilation, transposed, output_padding, groups, benchmark, deterministic, cudnn_enabled, allow_tf32, output_mask);
+  return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level), makeBatched(std::get<4>(results), std::get<5>(results), cur_level));
+}
+
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_56_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef, int64_t);
+template <>
+Tensor lowerToNextLayer<batch_rule_56_t,Tensor,const Tensor &, const Tensor &, const c10::optional<Tensor> &, IntArrayRef, IntArrayRef, IntArrayRef, int64_t>(
+  batch_rule_56_t batch_rule,
   const Tensor & self, const Tensor & weight, const c10::optional<Tensor> & bias, IntArrayRef padding, IntArrayRef stride, IntArrayRef dilation, int64_t groups
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -1132,10 +1241,10 @@ Tensor lowerToNextLayer<batch_rule_53_t,Tensor,const Tensor &, const Tensor &, c
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_54_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, int64_t);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_57_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, int64_t);
 template <>
-Tensor lowerToNextLayer<batch_rule_54_t,Tensor,const Tensor &, const Tensor &, const Tensor &, int64_t>(
-  batch_rule_54_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_57_t,Tensor,const Tensor &, const Tensor &, const Tensor &, int64_t>(
+  batch_rule_57_t batch_rule,
   const Tensor & grad_output, const Tensor & self, const Tensor & target, int64_t reduction
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -1155,10 +1264,10 @@ Tensor lowerToNextLayer<batch_rule_54_t,Tensor,const Tensor &, const Tensor &, c
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_55_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, int64_t);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_58_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, int64_t);
 template <>
-std::tuple<Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_55_t,std::tuple<Tensor,Tensor,Tensor>,const Tensor &, const Tensor &, const Tensor &, const Tensor &, int64_t>(
-  batch_rule_55_t batch_rule,
+std::tuple<Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_58_t,std::tuple<Tensor,Tensor,Tensor>,const Tensor &, const Tensor &, const Tensor &, const Tensor &, int64_t>(
+  batch_rule_58_t batch_rule,
   const Tensor & self, const Tensor & input, const Tensor & weight, const Tensor & bias, int64_t pad
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -1181,10 +1290,10 @@ std::tuple<Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_55_t,std::tuple<Ten
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level), makeBatched(std::get<4>(results), std::get<5>(results), cur_level));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_56_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef, int64_t, IntArrayRef);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_59_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef, int64_t, IntArrayRef);
 template <>
-Tensor lowerToNextLayer<batch_rule_56_t,Tensor,const Tensor &, const Tensor &, const c10::optional<Tensor> &, IntArrayRef, IntArrayRef, IntArrayRef, int64_t, IntArrayRef>(
-  batch_rule_56_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_59_t,Tensor,const Tensor &, const Tensor &, const c10::optional<Tensor> &, IntArrayRef, IntArrayRef, IntArrayRef, int64_t, IntArrayRef>(
+  batch_rule_59_t batch_rule,
   const Tensor & input, const Tensor & weight, const c10::optional<Tensor> & bias, IntArrayRef stride, IntArrayRef padding, IntArrayRef output_padding, int64_t groups, IntArrayRef dilation
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -1206,10 +1315,10 @@ Tensor lowerToNextLayer<batch_rule_56_t,Tensor,const Tensor &, const Tensor &, c
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_57_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_60_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, bool);
 template <>
-Tensor lowerToNextLayer<batch_rule_57_t,Tensor,const Tensor &, const Tensor &, bool>(
-  batch_rule_57_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_60_t,Tensor,const Tensor &, const Tensor &, bool>(
+  batch_rule_60_t batch_rule,
   const Tensor & input, const Tensor & tol, bool hermitian
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -1226,10 +1335,10 @@ Tensor lowerToNextLayer<batch_rule_57_t,Tensor,const Tensor &, const Tensor &, b
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_58_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, double, int64_t);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_61_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, double, int64_t);
 template <>
-Tensor lowerToNextLayer<batch_rule_58_t,Tensor,const Tensor &, const Tensor &, const Tensor &, double, int64_t>(
-  batch_rule_58_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_61_t,Tensor,const Tensor &, const Tensor &, const Tensor &, double, int64_t>(
+  batch_rule_61_t batch_rule,
   const Tensor & input1, const Tensor & input2, const Tensor & target, double margin, int64_t reduction
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -1249,10 +1358,10 @@ Tensor lowerToNextLayer<batch_rule_58_t,Tensor,const Tensor &, const Tensor &, c
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_59_t)(const Tensor &, c10::optional<int64_t>, c10::optional<int64_t>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_62_t)(const Tensor &, c10::optional<int64_t>, c10::optional<int64_t>);
 template <>
-Tensor lowerToNextLayer<batch_rule_59_t,Tensor,const Tensor &, c10::optional<int64_t>>(
-  batch_rule_59_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_62_t,Tensor,const Tensor &, c10::optional<int64_t>>(
+  batch_rule_62_t batch_rule,
   const Tensor & repeats, c10::optional<int64_t> output_size
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -1266,10 +1375,10 @@ Tensor lowerToNextLayer<batch_rule_59_t,Tensor,const Tensor &, c10::optional<int
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_60_t)(const Tensor &, c10::optional<int64_t>, int64_t, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_63_t)(const Tensor &, c10::optional<int64_t>, int64_t, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>);
 template <>
-Tensor lowerToNextLayer<batch_rule_60_t,Tensor,const Tensor &, int64_t, const c10::optional<Tensor> &, const c10::optional<Tensor> &>(
-  batch_rule_60_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_63_t,Tensor,const Tensor &, int64_t, const c10::optional<Tensor> &, const c10::optional<Tensor> &>(
+  batch_rule_63_t batch_rule,
   const Tensor & self, int64_t correction, const c10::optional<Tensor> & fweights, const c10::optional<Tensor> & aweights
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -1293,10 +1402,10 @@ Tensor lowerToNextLayer<batch_rule_60_t,Tensor,const Tensor &, int64_t, const c1
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_61_t)(const Tensor &, c10::optional<int64_t>, int64_t, int64_t, int64_t, int64_t);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_64_t)(const Tensor &, c10::optional<int64_t>, int64_t, int64_t, int64_t, int64_t);
 template <>
-Tensor lowerToNextLayer<batch_rule_61_t,Tensor,const Tensor &, int64_t, int64_t, int64_t, int64_t>(
-  batch_rule_61_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_64_t,Tensor,const Tensor &, int64_t, int64_t, int64_t, int64_t>(
+  batch_rule_64_t batch_rule,
   const Tensor & grad, int64_t N, int64_t C, int64_t H, int64_t W
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -1310,10 +1419,10 @@ Tensor lowerToNextLayer<batch_rule_61_t,Tensor,const Tensor &, int64_t, int64_t,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_62_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, bool, double, double);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_65_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, bool, double, double);
 template <>
-std::tuple<Tensor,Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_62_t,std::tuple<Tensor,Tensor,Tensor,Tensor>,const Tensor &, const Tensor &, const c10::optional<Tensor> &, const c10::optional<Tensor> &, const c10::optional<Tensor> &, bool, double, double>(
-  batch_rule_62_t batch_rule,
+std::tuple<Tensor,Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_65_t,std::tuple<Tensor,Tensor,Tensor,Tensor>,const Tensor &, const Tensor &, const c10::optional<Tensor> &, const c10::optional<Tensor> &, const c10::optional<Tensor> &, bool, double, double>(
+  batch_rule_65_t batch_rule,
   const Tensor & input, const Tensor & weight, const c10::optional<Tensor> & bias, const c10::optional<Tensor> & running_mean, const c10::optional<Tensor> & running_var, bool training, double exponential_average_factor, double epsilon
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -1345,10 +1454,10 @@ std::tuple<Tensor,Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_62_t,std::tu
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level), makeBatched(std::get<4>(results), std::get<5>(results), cur_level), makeBatched(std::get<6>(results), std::get<7>(results), cur_level));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_63_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, double, const Tensor &, c10::optional<int64_t>);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_66_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, double, const Tensor &, c10::optional<int64_t>);
 template <>
-std::tuple<Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_63_t,std::tuple<Tensor,Tensor,Tensor>,const Tensor &, const Tensor &, const Tensor &, const c10::optional<Tensor> &, const c10::optional<Tensor> &, const c10::optional<Tensor> &, const c10::optional<Tensor> &, double, const Tensor &>(
-  batch_rule_63_t batch_rule,
+std::tuple<Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_66_t,std::tuple<Tensor,Tensor,Tensor>,const Tensor &, const Tensor &, const Tensor &, const c10::optional<Tensor> &, const c10::optional<Tensor> &, const c10::optional<Tensor> &, const c10::optional<Tensor> &, double, const Tensor &>(
+  batch_rule_66_t batch_rule,
   const Tensor & input, const Tensor & grad_output, const Tensor & weight, const c10::optional<Tensor> & running_mean, const c10::optional<Tensor> & running_var, const c10::optional<Tensor> & save_mean, const c10::optional<Tensor> & save_var, double epsilon, const Tensor & reserveSpace
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -1391,10 +1500,10 @@ std::tuple<Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_63_t,std::tuple<Ten
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level), makeBatched(std::get<4>(results), std::get<5>(results), cur_level));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_64_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef, int64_t, bool, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_67_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef, int64_t, bool, bool);
 template <>
-Tensor lowerToNextLayer<batch_rule_64_t,Tensor,const Tensor &, const Tensor &, const c10::optional<Tensor> &, IntArrayRef, IntArrayRef, IntArrayRef, int64_t, bool, bool>(
-  batch_rule_64_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_67_t,Tensor,const Tensor &, const Tensor &, const c10::optional<Tensor> &, IntArrayRef, IntArrayRef, IntArrayRef, int64_t, bool, bool>(
+  batch_rule_67_t batch_rule,
   const Tensor & self, const Tensor & weight, const c10::optional<Tensor> & bias, IntArrayRef padding, IntArrayRef stride, IntArrayRef dilation, int64_t groups, bool benchmark, bool deterministic
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -1416,10 +1525,10 @@ Tensor lowerToNextLayer<batch_rule_64_t,Tensor,const Tensor &, const Tensor &, c
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_65_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef, int64_t, bool, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_68_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef, int64_t, bool, bool);
 template <>
-Tensor lowerToNextLayer<batch_rule_65_t,Tensor,const Tensor &, const Tensor &, IntArrayRef, IntArrayRef, IntArrayRef, int64_t, bool, bool>(
-  batch_rule_65_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_68_t,Tensor,const Tensor &, const Tensor &, IntArrayRef, IntArrayRef, IntArrayRef, int64_t, bool, bool>(
+  batch_rule_68_t batch_rule,
   const Tensor & grad_output, const Tensor & weight, IntArrayRef padding, IntArrayRef stride, IntArrayRef dilation, int64_t groups, bool benchmark, bool deterministic
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -1436,10 +1545,10 @@ Tensor lowerToNextLayer<batch_rule_65_t,Tensor,const Tensor &, const Tensor &, I
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_66_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef, int64_t, bool, bool, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_69_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef, int64_t, bool, bool, bool);
 template <>
-Tensor lowerToNextLayer<batch_rule_66_t,Tensor,const Tensor &, const Tensor &, IntArrayRef, IntArrayRef, IntArrayRef, int64_t, bool, bool, bool>(
-  batch_rule_66_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_69_t,Tensor,const Tensor &, const Tensor &, IntArrayRef, IntArrayRef, IntArrayRef, int64_t, bool, bool, bool>(
+  batch_rule_69_t batch_rule,
   const Tensor & grad_output, const Tensor & weight, IntArrayRef padding, IntArrayRef stride, IntArrayRef dilation, int64_t groups, bool benchmark, bool deterministic, bool allow_tf32
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -1456,10 +1565,10 @@ Tensor lowerToNextLayer<batch_rule_66_t,Tensor,const Tensor &, const Tensor &, I
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_67_t)(IntArrayRef, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef, int64_t, bool, bool, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_70_t)(IntArrayRef, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef, int64_t, bool, bool, bool);
 template <>
-Tensor lowerToNextLayer<batch_rule_67_t,Tensor,IntArrayRef, const Tensor &, const Tensor &, IntArrayRef, IntArrayRef, IntArrayRef, int64_t, bool, bool, bool>(
-  batch_rule_67_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_70_t,Tensor,IntArrayRef, const Tensor &, const Tensor &, IntArrayRef, IntArrayRef, IntArrayRef, int64_t, bool, bool, bool>(
+  batch_rule_70_t batch_rule,
   IntArrayRef weight_size, const Tensor & grad_output, const Tensor & self, IntArrayRef padding, IntArrayRef stride, IntArrayRef dilation, int64_t groups, bool benchmark, bool deterministic, bool allow_tf32
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -1476,10 +1585,33 @@ Tensor lowerToNextLayer<batch_rule_67_t,Tensor,IntArrayRef, const Tensor &, cons
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_68_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef, IntArrayRef, int64_t, bool, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_71_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef, int64_t, bool, bool, bool, ::std::array<bool,2>);
 template <>
-Tensor lowerToNextLayer<batch_rule_68_t,Tensor,const Tensor &, const Tensor &, const c10::optional<Tensor> &, IntArrayRef, IntArrayRef, IntArrayRef, IntArrayRef, int64_t, bool, bool>(
-  batch_rule_68_t batch_rule,
+std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_71_t,std::tuple<Tensor,Tensor>,const Tensor &, const Tensor &, const Tensor &, IntArrayRef, IntArrayRef, IntArrayRef, int64_t, bool, bool, bool, ::std::array<bool,2>>(
+  batch_rule_71_t batch_rule,
+  const Tensor & self, const Tensor & grad_output, const Tensor & weight, IntArrayRef padding, IntArrayRef stride, IntArrayRef dilation, int64_t groups, bool benchmark, bool deterministic, bool allow_tf32, ::std::array<bool,2> output_mask
+) {
+  c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
+  auto maybe_layer = maybeCurrentDynamicLayer();
+  TORCH_INTERNAL_ASSERT(maybe_layer.has_value());
+  int64_t cur_level = maybe_layer->layerId();
+  Tensor self_value;
+  optional<int64_t> self_bdim;
+  std::tie(self_value, self_bdim) = unwrapTensorAtLevel(self, cur_level);
+  Tensor grad_output_value;
+  optional<int64_t> grad_output_bdim;
+  std::tie(grad_output_value, grad_output_bdim) = unwrapTensorAtLevel(grad_output, cur_level);
+  Tensor weight_value;
+  optional<int64_t> weight_bdim;
+  std::tie(weight_value, weight_bdim) = unwrapTensorAtLevel(weight, cur_level);
+  auto results = batch_rule(self_value, self_bdim, grad_output_value, grad_output_bdim, weight_value, weight_bdim, padding, stride, dilation, groups, benchmark, deterministic, allow_tf32, output_mask);
+  return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level));
+}
+
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_72_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef, IntArrayRef, int64_t, bool, bool);
+template <>
+Tensor lowerToNextLayer<batch_rule_72_t,Tensor,const Tensor &, const Tensor &, const c10::optional<Tensor> &, IntArrayRef, IntArrayRef, IntArrayRef, IntArrayRef, int64_t, bool, bool>(
+  batch_rule_72_t batch_rule,
   const Tensor & self, const Tensor & weight, const c10::optional<Tensor> & bias, IntArrayRef padding, IntArrayRef output_padding, IntArrayRef stride, IntArrayRef dilation, int64_t groups, bool benchmark, bool deterministic
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -1501,10 +1633,10 @@ Tensor lowerToNextLayer<batch_rule_68_t,Tensor,const Tensor &, const Tensor &, c
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_69_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef, IntArrayRef, int64_t, bool, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_73_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef, IntArrayRef, int64_t, bool, bool);
 template <>
-Tensor lowerToNextLayer<batch_rule_69_t,Tensor,const Tensor &, const Tensor &, IntArrayRef, IntArrayRef, IntArrayRef, IntArrayRef, int64_t, bool, bool>(
-  batch_rule_69_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_73_t,Tensor,const Tensor &, const Tensor &, IntArrayRef, IntArrayRef, IntArrayRef, IntArrayRef, int64_t, bool, bool>(
+  batch_rule_73_t batch_rule,
   const Tensor & self, const Tensor & weight, IntArrayRef padding, IntArrayRef output_padding, IntArrayRef stride, IntArrayRef dilation, int64_t groups, bool benchmark, bool deterministic
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -1521,10 +1653,10 @@ Tensor lowerToNextLayer<batch_rule_69_t,Tensor,const Tensor &, const Tensor &, I
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_70_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef, IntArrayRef, int64_t, bool, bool, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_74_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef, IntArrayRef, int64_t, bool, bool, bool);
 template <>
-Tensor lowerToNextLayer<batch_rule_70_t,Tensor,const Tensor &, const Tensor &, IntArrayRef, IntArrayRef, IntArrayRef, IntArrayRef, int64_t, bool, bool, bool>(
-  batch_rule_70_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_74_t,Tensor,const Tensor &, const Tensor &, IntArrayRef, IntArrayRef, IntArrayRef, IntArrayRef, int64_t, bool, bool, bool>(
+  batch_rule_74_t batch_rule,
   const Tensor & self, const Tensor & weight, IntArrayRef padding, IntArrayRef output_padding, IntArrayRef stride, IntArrayRef dilation, int64_t groups, bool benchmark, bool deterministic, bool allow_tf32
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -1541,10 +1673,33 @@ Tensor lowerToNextLayer<batch_rule_70_t,Tensor,const Tensor &, const Tensor &, I
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_71_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Scalar> &, const c10::optional<Tensor> &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef, int64_t);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_75_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef, IntArrayRef, int64_t, bool, bool, bool, ::std::array<bool,2>);
 template <>
-Tensor lowerToNextLayer<batch_rule_71_t,Tensor,const Tensor &, const Tensor &, const Tensor &, const c10::optional<Scalar> &, const c10::optional<Tensor> &, IntArrayRef, IntArrayRef, IntArrayRef, int64_t>(
-  batch_rule_71_t batch_rule,
+std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_75_t,std::tuple<Tensor,Tensor>,const Tensor &, const Tensor &, const Tensor &, IntArrayRef, IntArrayRef, IntArrayRef, IntArrayRef, int64_t, bool, bool, bool, ::std::array<bool,2>>(
+  batch_rule_75_t batch_rule,
+  const Tensor & self, const Tensor & grad_output, const Tensor & weight, IntArrayRef padding, IntArrayRef output_padding, IntArrayRef stride, IntArrayRef dilation, int64_t groups, bool benchmark, bool deterministic, bool allow_tf32, ::std::array<bool,2> output_mask
+) {
+  c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
+  auto maybe_layer = maybeCurrentDynamicLayer();
+  TORCH_INTERNAL_ASSERT(maybe_layer.has_value());
+  int64_t cur_level = maybe_layer->layerId();
+  Tensor self_value;
+  optional<int64_t> self_bdim;
+  std::tie(self_value, self_bdim) = unwrapTensorAtLevel(self, cur_level);
+  Tensor grad_output_value;
+  optional<int64_t> grad_output_bdim;
+  std::tie(grad_output_value, grad_output_bdim) = unwrapTensorAtLevel(grad_output, cur_level);
+  Tensor weight_value;
+  optional<int64_t> weight_bdim;
+  std::tie(weight_value, weight_bdim) = unwrapTensorAtLevel(weight, cur_level);
+  auto results = batch_rule(self_value, self_bdim, grad_output_value, grad_output_bdim, weight_value, weight_bdim, padding, output_padding, stride, dilation, groups, benchmark, deterministic, allow_tf32, output_mask);
+  return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level));
+}
+
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_76_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Scalar> &, const c10::optional<Tensor> &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef, int64_t);
+template <>
+Tensor lowerToNextLayer<batch_rule_76_t,Tensor,const Tensor &, const Tensor &, const Tensor &, const c10::optional<Scalar> &, const c10::optional<Tensor> &, IntArrayRef, IntArrayRef, IntArrayRef, int64_t>(
+  batch_rule_76_t batch_rule,
   const Tensor & self, const Tensor & weight, const Tensor & z, const c10::optional<Scalar> & alpha, const c10::optional<Tensor> & bias, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, int64_t groups
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -1569,10 +1724,10 @@ Tensor lowerToNextLayer<batch_rule_71_t,Tensor,const Tensor &, const Tensor &, c
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_72_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_77_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>);
 template <>
-std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_72_t,std::tuple<Tensor,Tensor>,const Tensor &, const Tensor &, const Tensor &>(
-  batch_rule_72_t batch_rule,
+std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_77_t,std::tuple<Tensor,Tensor>,const Tensor &, const Tensor &, const Tensor &>(
+  batch_rule_77_t batch_rule,
   const Tensor & grad_output, const Tensor & self, const Tensor & weight
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -1592,10 +1747,10 @@ std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_72_t,std::tuple<Tensor,Ten
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_73_t)(const Tensor &, c10::optional<int64_t>, Dimname);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_78_t)(const Tensor &, c10::optional<int64_t>, Dimname);
 template <>
-std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_73_t,std::tuple<Tensor,Tensor>,const Tensor &, Dimname>(
-  batch_rule_73_t batch_rule,
+std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_78_t,std::tuple<Tensor,Tensor>,const Tensor &, Dimname>(
+  batch_rule_78_t batch_rule,
   const Tensor & self, Dimname dim
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -1609,10 +1764,10 @@ std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_73_t,std::tuple<Tensor,Ten
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_74_t)(const Tensor &, c10::optional<int64_t>, int64_t, c10::optional<ScalarType>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_79_t)(const Tensor &, c10::optional<int64_t>, int64_t, c10::optional<ScalarType>);
 template <>
-Tensor lowerToNextLayer<batch_rule_74_t,Tensor,const Tensor &, int64_t, c10::optional<ScalarType>>(
-  batch_rule_74_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_79_t,Tensor,const Tensor &, int64_t, c10::optional<ScalarType>>(
+  batch_rule_79_t batch_rule,
   const Tensor & self, int64_t dim, c10::optional<ScalarType> dtype
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -1626,10 +1781,10 @@ Tensor lowerToNextLayer<batch_rule_74_t,Tensor,const Tensor &, int64_t, c10::opt
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_75_t)(const Tensor &, c10::optional<int64_t>, Dimname, c10::optional<ScalarType>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_80_t)(const Tensor &, c10::optional<int64_t>, Dimname, c10::optional<ScalarType>);
 template <>
-Tensor lowerToNextLayer<batch_rule_75_t,Tensor,const Tensor &, Dimname, c10::optional<ScalarType>>(
-  batch_rule_75_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_80_t,Tensor,const Tensor &, Dimname, c10::optional<ScalarType>>(
+  batch_rule_80_t batch_rule,
   const Tensor & self, Dimname dim, c10::optional<ScalarType> dtype
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -1643,11 +1798,11 @@ Tensor lowerToNextLayer<batch_rule_75_t,Tensor,const Tensor &, Dimname, c10::opt
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_76_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, int64_t, ScalarType);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_81_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, int64_t, const Tensor &, c10::optional<int64_t>);
 template <>
-Tensor lowerToNextLayer<batch_rule_76_t,Tensor,const Tensor &, const Tensor &, int64_t, ScalarType>(
-  batch_rule_76_t batch_rule,
-  const Tensor & grad_output, const Tensor & output, int64_t dim, ScalarType input_dtype
+Tensor lowerToNextLayer<batch_rule_81_t,Tensor,const Tensor &, const Tensor &, int64_t, const Tensor &>(
+  batch_rule_81_t batch_rule,
+  const Tensor & grad_output, const Tensor & output, int64_t dim, const Tensor & self
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
   auto maybe_layer = maybeCurrentDynamicLayer();
@@ -1659,14 +1814,17 @@ Tensor lowerToNextLayer<batch_rule_76_t,Tensor,const Tensor &, const Tensor &, i
   Tensor output_value;
   optional<int64_t> output_bdim;
   std::tie(output_value, output_bdim) = unwrapTensorAtLevel(output, cur_level);
-  auto results = batch_rule(grad_output_value, grad_output_bdim, output_value, output_bdim, dim, input_dtype);
+  Tensor self_value;
+  optional<int64_t> self_bdim;
+  std::tie(self_value, self_bdim) = unwrapTensorAtLevel(self, cur_level);
+  auto results = batch_rule(grad_output_value, grad_output_bdim, output_value, output_bdim, dim, self_value, self_bdim);
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_77_t)(const Tensor &, c10::optional<int64_t>, const Scalar &, int64_t);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_82_t)(const Tensor &, c10::optional<int64_t>, const Scalar &, int64_t);
 template <>
-Tensor lowerToNextLayer<batch_rule_77_t,Tensor,const Tensor &, const Scalar &, int64_t>(
-  batch_rule_77_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_82_t,Tensor,const Tensor &, const Scalar &, int64_t>(
+  batch_rule_82_t batch_rule,
   const Tensor & y, const Scalar & dx, int64_t dim
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -1680,10 +1838,10 @@ Tensor lowerToNextLayer<batch_rule_77_t,Tensor,const Tensor &, const Scalar &, i
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_78_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, int64_t, int64_t, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_83_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, int64_t, int64_t, bool);
 template <>
-Tensor lowerToNextLayer<batch_rule_78_t,Tensor,const Tensor &, const Tensor &, IntArrayRef, IntArrayRef, int64_t, int64_t, bool>(
-  batch_rule_78_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_83_t,Tensor,const Tensor &, const Tensor &, IntArrayRef, IntArrayRef, int64_t, int64_t, bool>(
+  batch_rule_83_t batch_rule,
   const Tensor & log_probs, const Tensor & targets, IntArrayRef input_lengths, IntArrayRef target_lengths, int64_t blank, int64_t reduction, bool zero_infinity
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -1700,10 +1858,10 @@ Tensor lowerToNextLayer<batch_rule_78_t,Tensor,const Tensor &, const Tensor &, I
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_79_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, int64_t, int64_t, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_84_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, int64_t, int64_t, bool);
 template <>
-Tensor lowerToNextLayer<batch_rule_79_t,Tensor,const Tensor &, const Tensor &, const Tensor &, const Tensor &, int64_t, int64_t, bool>(
-  batch_rule_79_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_84_t,Tensor,const Tensor &, const Tensor &, const Tensor &, const Tensor &, int64_t, int64_t, bool>(
+  batch_rule_84_t batch_rule,
   const Tensor & log_probs, const Tensor & targets, const Tensor & input_lengths, const Tensor & target_lengths, int64_t blank, int64_t reduction, bool zero_infinity
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -1726,10 +1884,10 @@ Tensor lowerToNextLayer<batch_rule_79_t,Tensor,const Tensor &, const Tensor &, c
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_80_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, int64_t, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_85_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, int64_t, bool);
 template <>
-std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_80_t,std::tuple<Tensor,Tensor>,const Tensor &, const Tensor &, IntArrayRef, IntArrayRef, int64_t, bool>(
-  batch_rule_80_t batch_rule,
+std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_85_t,std::tuple<Tensor,Tensor>,const Tensor &, const Tensor &, IntArrayRef, IntArrayRef, int64_t, bool>(
+  batch_rule_85_t batch_rule,
   const Tensor & log_probs, const Tensor & targets, IntArrayRef input_lengths, IntArrayRef target_lengths, int64_t blank, bool zero_infinity
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -1746,10 +1904,10 @@ std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_80_t,std::tuple<Tensor,Ten
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_81_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, int64_t, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_86_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, int64_t, bool);
 template <>
-Tensor lowerToNextLayer<batch_rule_81_t,Tensor,const Tensor &, const Tensor &, const Tensor &, IntArrayRef, IntArrayRef, const Tensor &, const Tensor &, int64_t, bool>(
-  batch_rule_81_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_86_t,Tensor,const Tensor &, const Tensor &, const Tensor &, IntArrayRef, IntArrayRef, const Tensor &, const Tensor &, int64_t, bool>(
+  batch_rule_86_t batch_rule,
   const Tensor & grad, const Tensor & log_probs, const Tensor & targets, IntArrayRef input_lengths, IntArrayRef target_lengths, const Tensor & neg_log_likelihood, const Tensor & log_alpha, int64_t blank, bool zero_infinity
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -1775,10 +1933,10 @@ Tensor lowerToNextLayer<batch_rule_81_t,Tensor,const Tensor &, const Tensor &, c
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_82_t)(const Tensor &, c10::optional<int64_t>, int64_t, int64_t, int64_t);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_87_t)(const Tensor &, c10::optional<int64_t>, int64_t, int64_t, int64_t);
 template <>
-Tensor lowerToNextLayer<batch_rule_82_t,Tensor,const Tensor &, int64_t, int64_t, int64_t>(
-  batch_rule_82_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_87_t,Tensor,const Tensor &, int64_t, int64_t, int64_t>(
+  batch_rule_87_t batch_rule,
   const Tensor & self, int64_t level, int64_t batch_size, int64_t out_dim
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -1792,10 +1950,10 @@ Tensor lowerToNextLayer<batch_rule_82_t,Tensor,const Tensor &, int64_t, int64_t,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_83_t)(const Tensor &, c10::optional<int64_t>, Dimname, Dimname, Dimname, int64_t);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_88_t)(const Tensor &, c10::optional<int64_t>, Dimname, Dimname, Dimname, int64_t);
 template <>
-Tensor lowerToNextLayer<batch_rule_83_t,Tensor,const Tensor &, Dimname, Dimname, Dimname, int64_t>(
-  batch_rule_83_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_88_t,Tensor,const Tensor &, Dimname, Dimname, Dimname, int64_t>(
+  batch_rule_88_t batch_rule,
   const Tensor & self, Dimname outdim, Dimname dim1, Dimname dim2, int64_t offset
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -1809,10 +1967,10 @@ Tensor lowerToNextLayer<batch_rule_83_t,Tensor,const Tensor &, Dimname, Dimname,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_84_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, int64_t, int64_t, int64_t);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_89_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, int64_t, int64_t, int64_t);
 template <>
-Tensor lowerToNextLayer<batch_rule_84_t,Tensor,const Tensor &, IntArrayRef, int64_t, int64_t, int64_t>(
-  batch_rule_84_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_89_t,Tensor,const Tensor &, IntArrayRef, int64_t, int64_t, int64_t>(
+  batch_rule_89_t batch_rule,
   const Tensor & grad_in, IntArrayRef input_sizes, int64_t dim, int64_t size, int64_t step
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -1826,10 +1984,10 @@ Tensor lowerToNextLayer<batch_rule_84_t,Tensor,const Tensor &, IntArrayRef, int6
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_85_t)(const Tensor &, c10::optional<int64_t>, int64_t, int64_t, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_90_t)(const Tensor &, c10::optional<int64_t>, int64_t, int64_t, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>);
 template <>
-Tensor lowerToNextLayer<batch_rule_85_t,Tensor,const Tensor &, int64_t, int64_t, const c10::optional<Tensor> &, const c10::optional<Tensor> &>(
-  batch_rule_85_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_90_t,Tensor,const Tensor &, int64_t, int64_t, const c10::optional<Tensor> &, const c10::optional<Tensor> &>(
+  batch_rule_90_t batch_rule,
   const Tensor & self, int64_t n, int64_t dim, const c10::optional<Tensor> & prepend, const c10::optional<Tensor> & append
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -1853,10 +2011,10 @@ Tensor lowerToNextLayer<batch_rule_85_t,Tensor,const Tensor &, int64_t, int64_t,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<::std::vector<Tensor>,c10::optional<int64_t>> (*batch_rule_86_t)(const Tensor &, c10::optional<int64_t>, const c10::optional<Scalar> &, c10::optional<int64_t>, int64_t);
+typedef std::tuple<::std::vector<Tensor>,c10::optional<int64_t>> (*batch_rule_91_t)(const Tensor &, c10::optional<int64_t>, const c10::optional<Scalar> &, c10::optional<int64_t>, int64_t);
 template <>
-::std::vector<Tensor> lowerToNextLayer<batch_rule_86_t,::std::vector<Tensor>,const Tensor &, const c10::optional<Scalar> &, c10::optional<int64_t>, int64_t>(
-  batch_rule_86_t batch_rule,
+::std::vector<Tensor> lowerToNextLayer<batch_rule_91_t,::std::vector<Tensor>,const Tensor &, const c10::optional<Scalar> &, c10::optional<int64_t>, int64_t>(
+  batch_rule_91_t batch_rule,
   const Tensor & self, const c10::optional<Scalar> & spacing, c10::optional<int64_t> dim, int64_t edge_order
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -1870,10 +2028,10 @@ ::std::vector<Tensor> lowerToNextLayer<batch_rule_86_t,::std::vector<Tensor>,con
   return makeBatchedVector(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<::std::vector<Tensor>,c10::optional<int64_t>> (*batch_rule_87_t)(const Tensor &, c10::optional<int64_t>, const Scalar &, IntArrayRef, int64_t);
+typedef std::tuple<::std::vector<Tensor>,c10::optional<int64_t>> (*batch_rule_92_t)(const Tensor &, c10::optional<int64_t>, const Scalar &, IntArrayRef, int64_t);
 template <>
-::std::vector<Tensor> lowerToNextLayer<batch_rule_87_t,::std::vector<Tensor>,const Tensor &, const Scalar &, IntArrayRef, int64_t>(
-  batch_rule_87_t batch_rule,
+::std::vector<Tensor> lowerToNextLayer<batch_rule_92_t,::std::vector<Tensor>,const Tensor &, const Scalar &, IntArrayRef, int64_t>(
+  batch_rule_92_t batch_rule,
   const Tensor & self, const Scalar & spacing, IntArrayRef dim, int64_t edge_order
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -1887,10 +2045,10 @@ ::std::vector<Tensor> lowerToNextLayer<batch_rule_87_t,::std::vector<Tensor>,con
   return makeBatchedVector(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<::std::vector<Tensor>,c10::optional<int64_t>> (*batch_rule_88_t)(const Tensor &, c10::optional<int64_t>, ArrayRef<Scalar>, c10::optional<int64_t>, int64_t);
+typedef std::tuple<::std::vector<Tensor>,c10::optional<int64_t>> (*batch_rule_93_t)(const Tensor &, c10::optional<int64_t>, ArrayRef<Scalar>, c10::optional<int64_t>, int64_t);
 template <>
-::std::vector<Tensor> lowerToNextLayer<batch_rule_88_t,::std::vector<Tensor>,const Tensor &, ArrayRef<Scalar>, c10::optional<int64_t>, int64_t>(
-  batch_rule_88_t batch_rule,
+::std::vector<Tensor> lowerToNextLayer<batch_rule_93_t,::std::vector<Tensor>,const Tensor &, ArrayRef<Scalar>, c10::optional<int64_t>, int64_t>(
+  batch_rule_93_t batch_rule,
   const Tensor & self, ArrayRef<Scalar> spacing, c10::optional<int64_t> dim, int64_t edge_order
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -1904,10 +2062,10 @@ ::std::vector<Tensor> lowerToNextLayer<batch_rule_88_t,::std::vector<Tensor>,con
   return makeBatchedVector(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<::std::vector<Tensor>,c10::optional<int64_t>> (*batch_rule_89_t)(const Tensor &, c10::optional<int64_t>, ArrayRef<Scalar>, IntArrayRef, int64_t);
+typedef std::tuple<::std::vector<Tensor>,c10::optional<int64_t>> (*batch_rule_94_t)(const Tensor &, c10::optional<int64_t>, ArrayRef<Scalar>, IntArrayRef, int64_t);
 template <>
-::std::vector<Tensor> lowerToNextLayer<batch_rule_89_t,::std::vector<Tensor>,const Tensor &, ArrayRef<Scalar>, IntArrayRef, int64_t>(
-  batch_rule_89_t batch_rule,
+::std::vector<Tensor> lowerToNextLayer<batch_rule_94_t,::std::vector<Tensor>,const Tensor &, ArrayRef<Scalar>, IntArrayRef, int64_t>(
+  batch_rule_94_t batch_rule,
   const Tensor & self, ArrayRef<Scalar> spacing, IntArrayRef dim, int64_t edge_order
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -1921,10 +2079,10 @@ ::std::vector<Tensor> lowerToNextLayer<batch_rule_89_t,::std::vector<Tensor>,con
   return makeBatchedVector(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_90_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, c10::optional<c10::string_view>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_95_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, c10::optional<c10::string_view>);
 template <>
-Tensor lowerToNextLayer<batch_rule_90_t,Tensor,const Tensor &, const Tensor &, c10::optional<c10::string_view>>(
-  batch_rule_90_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_95_t,Tensor,const Tensor &, const Tensor &, c10::optional<c10::string_view>>(
+  batch_rule_95_t batch_rule,
   const Tensor & self, const Tensor & other, c10::optional<c10::string_view> rounding_mode
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -1941,10 +2099,10 @@ Tensor lowerToNextLayer<batch_rule_90_t,Tensor,const Tensor &, const Tensor &, c
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_91_t)(const Tensor &, c10::optional<int64_t>, const Scalar &, c10::optional<c10::string_view>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_96_t)(const Tensor &, c10::optional<int64_t>, const Scalar &, c10::optional<c10::string_view>);
 template <>
-Tensor lowerToNextLayer<batch_rule_91_t,Tensor,const Tensor &, const Scalar &, c10::optional<c10::string_view>>(
-  batch_rule_91_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_96_t,Tensor,const Tensor &, const Scalar &, c10::optional<c10::string_view>>(
+  batch_rule_96_t batch_rule,
   const Tensor & self, const Scalar & other, c10::optional<c10::string_view> rounding_mode
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -1958,10 +2116,10 @@ Tensor lowerToNextLayer<batch_rule_91_t,Tensor,const Tensor &, const Scalar &, c
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_92_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, int64_t, bool, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_97_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, int64_t, bool, bool);
 template <>
-Tensor lowerToNextLayer<batch_rule_92_t,Tensor,const Tensor &, const Tensor &, int64_t, bool, bool>(
-  batch_rule_92_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_97_t,Tensor,const Tensor &, const Tensor &, int64_t, bool, bool>(
+  batch_rule_97_t batch_rule,
   const Tensor & weight, const Tensor & indices, int64_t padding_idx, bool scale_grad_by_freq, bool sparse
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -1978,10 +2136,10 @@ Tensor lowerToNextLayer<batch_rule_92_t,Tensor,const Tensor &, const Tensor &, i
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_93_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, int64_t, int64_t, bool, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_98_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, int64_t, int64_t, bool, bool);
 template <>
-Tensor lowerToNextLayer<batch_rule_93_t,Tensor,const Tensor &, const Tensor &, int64_t, int64_t, bool, bool>(
-  batch_rule_93_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_98_t,Tensor,const Tensor &, const Tensor &, int64_t, int64_t, bool, bool>(
+  batch_rule_98_t batch_rule,
   const Tensor & grad, const Tensor & indices, int64_t num_weights, int64_t padding_idx, bool scale_grad_by_freq, bool sparse
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -1998,10 +2156,10 @@ Tensor lowerToNextLayer<batch_rule_93_t,Tensor,const Tensor &, const Tensor &, i
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_94_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, int64_t, int64_t, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_99_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, int64_t, int64_t, bool);
 template <>
-Tensor lowerToNextLayer<batch_rule_94_t,Tensor,const Tensor &, const Tensor &, int64_t, int64_t, bool>(
-  batch_rule_94_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_99_t,Tensor,const Tensor &, const Tensor &, int64_t, int64_t, bool>(
+  batch_rule_99_t batch_rule,
   const Tensor & input, const Tensor & grid, int64_t interpolation_mode, int64_t padding_mode, bool align_corners
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -2018,10 +2176,10 @@ Tensor lowerToNextLayer<batch_rule_94_t,Tensor,const Tensor &, const Tensor &, i
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_95_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, bool, int64_t, bool, const c10::optional<Tensor> &, c10::optional<int64_t>, bool, int64_t);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_100_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, bool, int64_t, bool, const c10::optional<Tensor> &, c10::optional<int64_t>, bool, int64_t);
 template <>
-std::tuple<Tensor,Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_95_t,std::tuple<Tensor,Tensor,Tensor,Tensor>,const Tensor &, const Tensor &, const Tensor &, bool, int64_t, bool, const c10::optional<Tensor> &, bool, int64_t>(
-  batch_rule_95_t batch_rule,
+std::tuple<Tensor,Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_100_t,std::tuple<Tensor,Tensor,Tensor,Tensor>,const Tensor &, const Tensor &, const Tensor &, bool, int64_t, bool, const c10::optional<Tensor> &, bool, int64_t>(
+  batch_rule_100_t batch_rule,
   const Tensor & weight, const Tensor & indices, const Tensor & offsets, bool scale_grad_by_freq, int64_t mode, bool sparse, const c10::optional<Tensor> & per_sample_weights, bool include_last_offset, int64_t padding_idx
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -2046,10 +2204,10 @@ std::tuple<Tensor,Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_95_t,std::tu
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level), makeBatched(std::get<4>(results), std::get<5>(results), cur_level), makeBatched(std::get<6>(results), std::get<7>(results), cur_level));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_96_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, ScalarType);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_101_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, ScalarType);
 template <>
-std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_96_t,std::tuple<Tensor,Tensor>,const Tensor &, const Tensor &, ScalarType>(
-  batch_rule_96_t batch_rule,
+std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_101_t,std::tuple<Tensor,Tensor>,const Tensor &, const Tensor &, ScalarType>(
+  batch_rule_101_t batch_rule,
   const Tensor & weight, const Tensor & mask, ScalarType compressed_indices_dtype
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -2066,10 +2224,10 @@ std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_96_t,std::tuple<Tensor,Ten
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_97_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, bool, int64_t, bool, const c10::optional<Tensor> &, c10::optional<int64_t>, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_102_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, bool, int64_t, bool, const c10::optional<Tensor> &, c10::optional<int64_t>, bool);
 template <>
-std::tuple<Tensor,Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_97_t,std::tuple<Tensor,Tensor,Tensor,Tensor>,const Tensor &, const Tensor &, const Tensor &, bool, int64_t, bool, const c10::optional<Tensor> &, bool>(
-  batch_rule_97_t batch_rule,
+std::tuple<Tensor,Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_102_t,std::tuple<Tensor,Tensor,Tensor,Tensor>,const Tensor &, const Tensor &, const Tensor &, bool, int64_t, bool, const c10::optional<Tensor> &, bool>(
+  batch_rule_102_t batch_rule,
   const Tensor & weight, const Tensor & indices, const Tensor & offsets, bool scale_grad_by_freq, int64_t mode, bool sparse, const c10::optional<Tensor> & per_sample_weights, bool include_last_offset
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -2094,10 +2252,10 @@ std::tuple<Tensor,Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_97_t,std::tu
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level), makeBatched(std::get<4>(results), std::get<5>(results), cur_level), makeBatched(std::get<6>(results), std::get<7>(results), cur_level));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_98_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, bool, int64_t, bool, const c10::optional<Tensor> &, c10::optional<int64_t>, bool, c10::optional<int64_t>);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_103_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, bool, int64_t, bool, const c10::optional<Tensor> &, c10::optional<int64_t>, bool, c10::optional<int64_t>);
 template <>
-std::tuple<Tensor,Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_98_t,std::tuple<Tensor,Tensor,Tensor,Tensor>,const Tensor &, const Tensor &, const Tensor &, bool, int64_t, bool, const c10::optional<Tensor> &, bool, c10::optional<int64_t>>(
-  batch_rule_98_t batch_rule,
+std::tuple<Tensor,Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_103_t,std::tuple<Tensor,Tensor,Tensor,Tensor>,const Tensor &, const Tensor &, const Tensor &, bool, int64_t, bool, const c10::optional<Tensor> &, bool, c10::optional<int64_t>>(
+  batch_rule_103_t batch_rule,
   const Tensor & weight, const Tensor & indices, const Tensor & offsets, bool scale_grad_by_freq, int64_t mode, bool sparse, const c10::optional<Tensor> & per_sample_weights, bool include_last_offset, c10::optional<int64_t> padding_idx
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -2122,10 +2280,10 @@ std::tuple<Tensor,Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_98_t,std::tu
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level), makeBatched(std::get<4>(results), std::get<5>(results), cur_level), makeBatched(std::get<6>(results), std::get<7>(results), cur_level));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_99_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, int64_t, bool, int64_t, bool, const c10::optional<Tensor> &, c10::optional<int64_t>, int64_t);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_104_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, int64_t, bool, int64_t, bool, const c10::optional<Tensor> &, c10::optional<int64_t>, int64_t);
 template <>
-Tensor lowerToNextLayer<batch_rule_99_t,Tensor,const Tensor &, const Tensor &, const Tensor &, const Tensor &, const Tensor &, const Tensor &, int64_t, bool, int64_t, bool, const c10::optional<Tensor> &, int64_t>(
-  batch_rule_99_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_104_t,Tensor,const Tensor &, const Tensor &, const Tensor &, const Tensor &, const Tensor &, const Tensor &, int64_t, bool, int64_t, bool, const c10::optional<Tensor> &, int64_t>(
+  batch_rule_104_t batch_rule,
   const Tensor & grad, const Tensor & indices, const Tensor & offsets, const Tensor & offset2bag, const Tensor & bag_size, const Tensor & maximum_indices, int64_t num_weights, bool scale_grad_by_freq, int64_t mode, bool sparse, const c10::optional<Tensor> & per_sample_weights, int64_t padding_idx
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -2159,10 +2317,10 @@ Tensor lowerToNextLayer<batch_rule_99_t,Tensor,const Tensor &, const Tensor &, c
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_100_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, int64_t, bool, int64_t, const c10::optional<Tensor> &, c10::optional<int64_t>, int64_t);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_105_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, int64_t, bool, int64_t, const c10::optional<Tensor> &, c10::optional<int64_t>, int64_t);
 template <>
-Tensor lowerToNextLayer<batch_rule_100_t,Tensor,const Tensor &, const Tensor &, const Tensor &, const Tensor &, const Tensor &, int64_t, bool, int64_t, const c10::optional<Tensor> &, int64_t>(
-  batch_rule_100_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_105_t,Tensor,const Tensor &, const Tensor &, const Tensor &, const Tensor &, const Tensor &, int64_t, bool, int64_t, const c10::optional<Tensor> &, int64_t>(
+  batch_rule_105_t batch_rule,
   const Tensor & grad, const Tensor & indices, const Tensor & offset2bag, const Tensor & bag_size, const Tensor & maximum_indices, int64_t num_weights, bool scale_grad_by_freq, int64_t mode, const c10::optional<Tensor> & per_sample_weights, int64_t padding_idx
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -2193,10 +2351,10 @@ Tensor lowerToNextLayer<batch_rule_100_t,Tensor,const Tensor &, const Tensor &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_101_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, int64_t, int64_t);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_106_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, int64_t, int64_t);
 template <>
-Tensor lowerToNextLayer<batch_rule_101_t,Tensor,const Tensor &, const Tensor &, const Tensor &, const Tensor &, const Tensor &, int64_t, int64_t>(
-  batch_rule_101_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_106_t,Tensor,const Tensor &, const Tensor &, const Tensor &, const Tensor &, const Tensor &, int64_t, int64_t>(
+  batch_rule_106_t batch_rule,
   const Tensor & grad, const Tensor & weight, const Tensor & indices, const Tensor & offsets, const Tensor & offset2bag, int64_t mode, int64_t padding_idx
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -2222,10 +2380,10 @@ Tensor lowerToNextLayer<batch_rule_101_t,Tensor,const Tensor &, const Tensor &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_102_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, c10::optional<ScalarType>, c10::optional<Layout>, c10::optional<Device>, c10::optional<bool>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_107_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, c10::optional<ScalarType>, c10::optional<Layout>, c10::optional<Device>, c10::optional<bool>);
 template <>
-Tensor lowerToNextLayer<batch_rule_102_t,Tensor,const Tensor &, IntArrayRef, c10::optional<ScalarType>, c10::optional<Layout>, c10::optional<Device>, c10::optional<bool>>(
-  batch_rule_102_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_107_t,Tensor,const Tensor &, IntArrayRef, c10::optional<ScalarType>, c10::optional<Layout>, c10::optional<Device>, c10::optional<bool>>(
+  batch_rule_107_t batch_rule,
   const Tensor & self, IntArrayRef size, c10::optional<ScalarType> dtype, c10::optional<Layout> layout, c10::optional<Device> device, c10::optional<bool> pin_memory
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -2239,10 +2397,10 @@ Tensor lowerToNextLayer<batch_rule_102_t,Tensor,const Tensor &, IntArrayRef, c10
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_103_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, c10::optional<ScalarType>, c10::optional<Layout>, c10::optional<Device>, c10::optional<bool>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_108_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, c10::optional<ScalarType>, c10::optional<Layout>, c10::optional<Device>, c10::optional<bool>);
 template <>
-Tensor lowerToNextLayer<batch_rule_103_t,Tensor,const Tensor &, IntArrayRef, IntArrayRef, c10::optional<ScalarType>, c10::optional<Layout>, c10::optional<Device>, c10::optional<bool>>(
-  batch_rule_103_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_108_t,Tensor,const Tensor &, IntArrayRef, IntArrayRef, c10::optional<ScalarType>, c10::optional<Layout>, c10::optional<Device>, c10::optional<bool>>(
+  batch_rule_108_t batch_rule,
   const Tensor & self, IntArrayRef size, IntArrayRef stride, c10::optional<ScalarType> dtype, c10::optional<Layout> layout, c10::optional<Device> device, c10::optional<bool> pin_memory
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -2256,10 +2414,10 @@ Tensor lowerToNextLayer<batch_rule_103_t,Tensor,const Tensor &, IntArrayRef, Int
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_104_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, const Scalar &, c10::optional<ScalarType>, c10::optional<Layout>, c10::optional<Device>, c10::optional<bool>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_109_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, const Scalar &, c10::optional<ScalarType>, c10::optional<Layout>, c10::optional<Device>, c10::optional<bool>);
 template <>
-Tensor lowerToNextLayer<batch_rule_104_t,Tensor,const Tensor &, IntArrayRef, const Scalar &, c10::optional<ScalarType>, c10::optional<Layout>, c10::optional<Device>, c10::optional<bool>>(
-  batch_rule_104_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_109_t,Tensor,const Tensor &, IntArrayRef, const Scalar &, c10::optional<ScalarType>, c10::optional<Layout>, c10::optional<Device>, c10::optional<bool>>(
+  batch_rule_109_t batch_rule,
   const Tensor & self, IntArrayRef size, const Scalar & fill_value, c10::optional<ScalarType> dtype, c10::optional<Layout> layout, c10::optional<Device> device, c10::optional<bool> pin_memory
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -2273,10 +2431,10 @@ Tensor lowerToNextLayer<batch_rule_104_t,Tensor,const Tensor &, IntArrayRef, con
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_105_t)(IntArrayRef, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, int64_t, c10::optional<ScalarType>, c10::optional<Layout>, c10::optional<Device>, c10::optional<bool>, c10::optional<MemoryFormat>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_110_t)(IntArrayRef, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, int64_t, c10::optional<ScalarType>, c10::optional<Layout>, c10::optional<Device>, c10::optional<bool>, c10::optional<MemoryFormat>);
 template <>
-Tensor lowerToNextLayer<batch_rule_105_t,Tensor,IntArrayRef, const Tensor &, const Tensor &, int64_t, c10::optional<ScalarType>, c10::optional<Layout>, c10::optional<Device>, c10::optional<bool>, c10::optional<MemoryFormat>>(
-  batch_rule_105_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_110_t,Tensor,IntArrayRef, const Tensor &, const Tensor &, int64_t, c10::optional<ScalarType>, c10::optional<Layout>, c10::optional<Device>, c10::optional<bool>, c10::optional<MemoryFormat>>(
+  batch_rule_110_t batch_rule,
   IntArrayRef size, const Tensor & scales, const Tensor & zero_points, int64_t axis, c10::optional<ScalarType> dtype, c10::optional<Layout> layout, c10::optional<Device> device, c10::optional<bool> pin_memory, c10::optional<MemoryFormat> memory_format
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -2293,10 +2451,10 @@ Tensor lowerToNextLayer<batch_rule_105_t,Tensor,IntArrayRef, const Tensor &, con
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_106_t)(IntArrayRef, const Tensor &, c10::optional<int64_t>, c10::optional<ScalarType>, c10::optional<Layout>, c10::optional<Device>, c10::optional<bool>, c10::optional<MemoryFormat>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_111_t)(IntArrayRef, const Tensor &, c10::optional<int64_t>, c10::optional<ScalarType>, c10::optional<Layout>, c10::optional<Device>, c10::optional<bool>, c10::optional<MemoryFormat>);
 template <>
-Tensor lowerToNextLayer<batch_rule_106_t,Tensor,IntArrayRef, const Tensor &, c10::optional<ScalarType>, c10::optional<Layout>, c10::optional<Device>, c10::optional<bool>, c10::optional<MemoryFormat>>(
-  batch_rule_106_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_111_t,Tensor,IntArrayRef, const Tensor &, c10::optional<ScalarType>, c10::optional<Layout>, c10::optional<Device>, c10::optional<bool>, c10::optional<MemoryFormat>>(
+  batch_rule_111_t batch_rule,
   IntArrayRef size, const Tensor & qtensor, c10::optional<ScalarType> dtype, c10::optional<Layout> layout, c10::optional<Device> device, c10::optional<bool> pin_memory, c10::optional<MemoryFormat> memory_format
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -2310,10 +2468,10 @@ Tensor lowerToNextLayer<batch_rule_106_t,Tensor,IntArrayRef, const Tensor &, c10
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_107_t)(const Tensor &, c10::optional<int64_t>, c10::optional<ScalarType>, c10::optional<Layout>, c10::optional<Device>, c10::optional<bool>, c10::optional<MemoryFormat>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_112_t)(const Tensor &, c10::optional<int64_t>, c10::optional<ScalarType>, c10::optional<Layout>, c10::optional<Device>, c10::optional<bool>, c10::optional<MemoryFormat>);
 template <>
-Tensor lowerToNextLayer<batch_rule_107_t,Tensor,const Tensor &, c10::optional<ScalarType>, c10::optional<Layout>, c10::optional<Device>, c10::optional<bool>, c10::optional<MemoryFormat>>(
-  batch_rule_107_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_112_t,Tensor,const Tensor &, c10::optional<ScalarType>, c10::optional<Layout>, c10::optional<Device>, c10::optional<bool>, c10::optional<MemoryFormat>>(
+  batch_rule_112_t batch_rule,
   const Tensor & self, c10::optional<ScalarType> dtype, c10::optional<Layout> layout, c10::optional<Device> device, c10::optional<bool> pin_memory, c10::optional<MemoryFormat> memory_format
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -2327,10 +2485,10 @@ Tensor lowerToNextLayer<batch_rule_107_t,Tensor,const Tensor &, c10::optional<Sc
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_108_t)(const Tensor &, c10::optional<int64_t>, int64_t, int64_t);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_113_t)(const Tensor &, c10::optional<int64_t>, int64_t, int64_t);
 template <>
-Tensor lowerToNextLayer<batch_rule_108_t,Tensor,const Tensor &, int64_t, int64_t>(
-  batch_rule_108_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_113_t,Tensor,const Tensor &, int64_t, int64_t>(
+  batch_rule_113_t batch_rule,
   const Tensor & dummy, int64_t a, int64_t b
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -2344,10 +2502,10 @@ Tensor lowerToNextLayer<batch_rule_108_t,Tensor,const Tensor &, int64_t, int64_t
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_109_t)(const Tensor &, c10::optional<int64_t>, int64_t, int64_t, Dimname);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_114_t)(const Tensor &, c10::optional<int64_t>, int64_t, int64_t, Dimname);
 template <>
-Tensor lowerToNextLayer<batch_rule_109_t,Tensor,const Tensor &, int64_t, int64_t, Dimname>(
-  batch_rule_109_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_114_t,Tensor,const Tensor &, int64_t, int64_t, Dimname>(
+  batch_rule_114_t batch_rule,
   const Tensor & self, int64_t start_dim, int64_t end_dim, Dimname out_dim
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -2361,10 +2519,10 @@ Tensor lowerToNextLayer<batch_rule_109_t,Tensor,const Tensor &, int64_t, int64_t
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_110_t)(const Tensor &, c10::optional<int64_t>, Dimname, Dimname, Dimname);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_115_t)(const Tensor &, c10::optional<int64_t>, Dimname, Dimname, Dimname);
 template <>
-Tensor lowerToNextLayer<batch_rule_110_t,Tensor,const Tensor &, Dimname, Dimname, Dimname>(
-  batch_rule_110_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_115_t,Tensor,const Tensor &, Dimname, Dimname, Dimname>(
+  batch_rule_115_t batch_rule,
   const Tensor & self, Dimname start_dim, Dimname end_dim, Dimname out_dim
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -2378,10 +2536,10 @@ Tensor lowerToNextLayer<batch_rule_110_t,Tensor,const Tensor &, Dimname, Dimname
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_111_t)(const Tensor &, c10::optional<int64_t>, DimnameList, Dimname);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_116_t)(const Tensor &, c10::optional<int64_t>, DimnameList, Dimname);
 template <>
-Tensor lowerToNextLayer<batch_rule_111_t,Tensor,const Tensor &, DimnameList, Dimname>(
-  batch_rule_111_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_116_t,Tensor,const Tensor &, DimnameList, Dimname>(
+  batch_rule_116_t batch_rule,
   const Tensor & self, DimnameList dims, Dimname out_dim
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -2395,10 +2553,10 @@ Tensor lowerToNextLayer<batch_rule_111_t,Tensor,const Tensor &, DimnameList, Dim
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_112_t)(const Tensor &, c10::optional<int64_t>, int64_t, IntArrayRef, c10::optional<DimnameList>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_117_t)(const Tensor &, c10::optional<int64_t>, int64_t, IntArrayRef, c10::optional<DimnameList>);
 template <>
-Tensor lowerToNextLayer<batch_rule_112_t,Tensor,const Tensor &, int64_t, IntArrayRef, c10::optional<DimnameList>>(
-  batch_rule_112_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_117_t,Tensor,const Tensor &, int64_t, IntArrayRef, c10::optional<DimnameList>>(
+  batch_rule_117_t batch_rule,
   const Tensor & self, int64_t dim, IntArrayRef sizes, c10::optional<DimnameList> names
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -2412,10 +2570,10 @@ Tensor lowerToNextLayer<batch_rule_112_t,Tensor,const Tensor &, int64_t, IntArra
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_113_t)(const Tensor &, c10::optional<int64_t>, Dimname, IntArrayRef, DimnameList);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_118_t)(const Tensor &, c10::optional<int64_t>, Dimname, IntArrayRef, DimnameList);
 template <>
-Tensor lowerToNextLayer<batch_rule_113_t,Tensor,const Tensor &, Dimname, IntArrayRef, DimnameList>(
-  batch_rule_113_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_118_t,Tensor,const Tensor &, Dimname, IntArrayRef, DimnameList>(
+  batch_rule_118_t batch_rule,
   const Tensor & self, Dimname dim, IntArrayRef sizes, DimnameList names
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -2429,10 +2587,10 @@ Tensor lowerToNextLayer<batch_rule_113_t,Tensor,const Tensor &, Dimname, IntArra
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_114_t)(const Tensor &, c10::optional<int64_t>, const Scalar &, c10::optional<ScalarType>, c10::optional<Layout>, c10::optional<Device>, c10::optional<bool>, c10::optional<MemoryFormat>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_119_t)(const Tensor &, c10::optional<int64_t>, const Scalar &, c10::optional<ScalarType>, c10::optional<Layout>, c10::optional<Device>, c10::optional<bool>, c10::optional<MemoryFormat>);
 template <>
-Tensor lowerToNextLayer<batch_rule_114_t,Tensor,const Tensor &, const Scalar &, c10::optional<ScalarType>, c10::optional<Layout>, c10::optional<Device>, c10::optional<bool>, c10::optional<MemoryFormat>>(
-  batch_rule_114_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_119_t,Tensor,const Tensor &, const Scalar &, c10::optional<ScalarType>, c10::optional<Layout>, c10::optional<Device>, c10::optional<bool>, c10::optional<MemoryFormat>>(
+  batch_rule_119_t batch_rule,
   const Tensor & self, const Scalar & fill_value, c10::optional<ScalarType> dtype, c10::optional<Layout> layout, c10::optional<Device> device, c10::optional<bool> pin_memory, c10::optional<MemoryFormat> memory_format
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -2446,10 +2604,33 @@ Tensor lowerToNextLayer<batch_rule_114_t,Tensor,const Tensor &, const Scalar &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_115_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, int64_t, int64_t, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_120_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, int64_t, int64_t, bool, ::std::array<bool,2>);
 template <>
-std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_115_t,std::tuple<Tensor,Tensor>,const Tensor &, const Tensor &, const Tensor &, int64_t, int64_t, bool>(
-  batch_rule_115_t batch_rule,
+std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_120_t,std::tuple<Tensor,Tensor>,const Tensor &, const Tensor &, const Tensor &, int64_t, int64_t, bool, ::std::array<bool,2>>(
+  batch_rule_120_t batch_rule,
+  const Tensor & grad_output, const Tensor & input, const Tensor & grid, int64_t interpolation_mode, int64_t padding_mode, bool align_corners, ::std::array<bool,2> output_mask
+) {
+  c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
+  auto maybe_layer = maybeCurrentDynamicLayer();
+  TORCH_INTERNAL_ASSERT(maybe_layer.has_value());
+  int64_t cur_level = maybe_layer->layerId();
+  Tensor grad_output_value;
+  optional<int64_t> grad_output_bdim;
+  std::tie(grad_output_value, grad_output_bdim) = unwrapTensorAtLevel(grad_output, cur_level);
+  Tensor input_value;
+  optional<int64_t> input_bdim;
+  std::tie(input_value, input_bdim) = unwrapTensorAtLevel(input, cur_level);
+  Tensor grid_value;
+  optional<int64_t> grid_bdim;
+  std::tie(grid_value, grid_bdim) = unwrapTensorAtLevel(grid, cur_level);
+  auto results = batch_rule(grad_output_value, grad_output_bdim, input_value, input_bdim, grid_value, grid_bdim, interpolation_mode, padding_mode, align_corners, output_mask);
+  return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level));
+}
+
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_121_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, int64_t, int64_t, bool);
+template <>
+std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_121_t,std::tuple<Tensor,Tensor>,const Tensor &, const Tensor &, const Tensor &, int64_t, int64_t, bool>(
+  batch_rule_121_t batch_rule,
   const Tensor & grad_output, const Tensor & input, const Tensor & grid, int64_t interpolation_mode, int64_t padding_mode, bool align_corners
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -2469,10 +2650,10 @@ std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_115_t,std::tuple<Tensor,Te
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_116_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, double, int64_t);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_122_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, double, int64_t);
 template <>
-Tensor lowerToNextLayer<batch_rule_116_t,Tensor,const Tensor &, const Tensor &, double, int64_t>(
-  batch_rule_116_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_122_t,Tensor,const Tensor &, const Tensor &, double, int64_t>(
+  batch_rule_122_t batch_rule,
   const Tensor & self, const Tensor & target, double margin, int64_t reduction
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -2489,10 +2670,10 @@ Tensor lowerToNextLayer<batch_rule_116_t,Tensor,const Tensor &, const Tensor &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_117_t)(const Tensor &, c10::optional<int64_t>, int64_t, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, double, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_123_t)(const Tensor &, c10::optional<int64_t>, int64_t, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, double, bool);
 template <>
-Tensor lowerToNextLayer<batch_rule_117_t,Tensor,const Tensor &, int64_t, const c10::optional<Tensor> &, const c10::optional<Tensor> &, double, bool>(
-  batch_rule_117_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_123_t,Tensor,const Tensor &, int64_t, const c10::optional<Tensor> &, const c10::optional<Tensor> &, double, bool>(
+  batch_rule_123_t batch_rule,
   const Tensor & input, int64_t num_groups, const c10::optional<Tensor> & weight, const c10::optional<Tensor> & bias, double eps, bool cudnn_enabled
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -2516,10 +2697,10 @@ Tensor lowerToNextLayer<batch_rule_117_t,Tensor,const Tensor &, int64_t, const c
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_118_t)(const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, int64_t, int64_t, int64_t, int64_t, double);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_124_t)(const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, int64_t, int64_t, int64_t, int64_t, double);
 template <>
-std::tuple<Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_118_t,std::tuple<Tensor,Tensor,Tensor>,const Tensor &, const c10::optional<Tensor> &, const c10::optional<Tensor> &, int64_t, int64_t, int64_t, int64_t, double>(
-  batch_rule_118_t batch_rule,
+std::tuple<Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_124_t,std::tuple<Tensor,Tensor,Tensor>,const Tensor &, const c10::optional<Tensor> &, const c10::optional<Tensor> &, int64_t, int64_t, int64_t, int64_t, double>(
+  batch_rule_124_t batch_rule,
   const Tensor & input, const c10::optional<Tensor> & weight, const c10::optional<Tensor> & bias, int64_t N, int64_t C, int64_t HxW, int64_t group, double eps
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -2543,10 +2724,41 @@ std::tuple<Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_118_t,std::tuple<Te
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level), makeBatched(std::get<4>(results), std::get<5>(results), cur_level));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_119_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, int64_t, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_125_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, int64_t, int64_t, int64_t, int64_t, ::std::array<bool,3>);
 template <>
-Tensor lowerToNextLayer<batch_rule_119_t,Tensor,const Tensor &, IntArrayRef, int64_t, bool>(
-  batch_rule_119_t batch_rule,
+std::tuple<Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_125_t,std::tuple<Tensor,Tensor,Tensor>,const Tensor &, const Tensor &, const Tensor &, const Tensor &, const c10::optional<Tensor> &, int64_t, int64_t, int64_t, int64_t, ::std::array<bool,3>>(
+  batch_rule_125_t batch_rule,
+  const Tensor & grad_out, const Tensor & input, const Tensor & mean, const Tensor & rstd, const c10::optional<Tensor> & weight, int64_t N, int64_t C, int64_t HxW, int64_t group, ::std::array<bool,3> output_mask
+) {
+  c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
+  auto maybe_layer = maybeCurrentDynamicLayer();
+  TORCH_INTERNAL_ASSERT(maybe_layer.has_value());
+  int64_t cur_level = maybe_layer->layerId();
+  Tensor grad_out_value;
+  optional<int64_t> grad_out_bdim;
+  std::tie(grad_out_value, grad_out_bdim) = unwrapTensorAtLevel(grad_out, cur_level);
+  Tensor input_value;
+  optional<int64_t> input_bdim;
+  std::tie(input_value, input_bdim) = unwrapTensorAtLevel(input, cur_level);
+  Tensor mean_value;
+  optional<int64_t> mean_bdim;
+  std::tie(mean_value, mean_bdim) = unwrapTensorAtLevel(mean, cur_level);
+  Tensor rstd_value;
+  optional<int64_t> rstd_bdim;
+  std::tie(rstd_value, rstd_bdim) = unwrapTensorAtLevel(rstd, cur_level);
+  optional<Tensor> weight_value;
+  optional<int64_t> weight_bdim;
+  if (weight) {
+      std::tie(weight_value, weight_bdim) = unwrapTensorAtLevel(weight.value(), cur_level);
+  }
+  auto results = batch_rule(grad_out_value, grad_out_bdim, input_value, input_bdim, mean_value, mean_bdim, rstd_value, rstd_bdim, weight_value, weight_bdim, N, C, HxW, group, output_mask);
+  return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level), makeBatched(std::get<4>(results), std::get<5>(results), cur_level));
+}
+
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_126_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, int64_t, bool);
+template <>
+Tensor lowerToNextLayer<batch_rule_126_t,Tensor,const Tensor &, IntArrayRef, int64_t, bool>(
+  batch_rule_126_t batch_rule,
   const Tensor & self, IntArrayRef dim, int64_t normalization, bool forward
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -2560,10 +2772,10 @@ Tensor lowerToNextLayer<batch_rule_119_t,Tensor,const Tensor &, IntArrayRef, int
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_120_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, int64_t, int64_t);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_127_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, int64_t, int64_t);
 template <>
-Tensor lowerToNextLayer<batch_rule_120_t,Tensor,const Tensor &, IntArrayRef, int64_t, int64_t>(
-  batch_rule_120_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_127_t,Tensor,const Tensor &, IntArrayRef, int64_t, int64_t>(
+  batch_rule_127_t batch_rule,
   const Tensor & grad_output, IntArrayRef input_sizes, int64_t dim, int64_t index
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -2577,10 +2789,10 @@ Tensor lowerToNextLayer<batch_rule_120_t,Tensor,const Tensor &, IntArrayRef, int
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_121_t)(const Tensor &, c10::optional<int64_t>, const c10::List<c10::optional<Tensor>> &);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_128_t)(const Tensor &, c10::optional<int64_t>, const c10::List<c10::optional<Tensor>> &);
 template <>
-Tensor lowerToNextLayer<batch_rule_121_t,Tensor,const Tensor &, const c10::List<c10::optional<Tensor>> &>(
-  batch_rule_121_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_128_t,Tensor,const Tensor &, const c10::List<c10::optional<Tensor>> &>(
+  batch_rule_128_t batch_rule,
   const Tensor & self, const c10::List<c10::optional<Tensor>> & indices
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -2594,10 +2806,10 @@ Tensor lowerToNextLayer<batch_rule_121_t,Tensor,const Tensor &, const c10::List<
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_122_t)(const Tensor &, c10::optional<int64_t>, int64_t, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_129_t)(const Tensor &, c10::optional<int64_t>, int64_t, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>);
 template <>
-Tensor lowerToNextLayer<batch_rule_122_t,Tensor,const Tensor &, int64_t, const Tensor &, const Tensor &>(
-  batch_rule_122_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_129_t,Tensor,const Tensor &, int64_t, const Tensor &, const Tensor &>(
+  batch_rule_129_t batch_rule,
   const Tensor & self, int64_t dim, const Tensor & index, const Tensor & grad
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -2617,10 +2829,10 @@ Tensor lowerToNextLayer<batch_rule_122_t,Tensor,const Tensor &, int64_t, const T
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_123_t)(const Tensor &, c10::optional<int64_t>, Dimname, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_130_t)(const Tensor &, c10::optional<int64_t>, Dimname, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>);
 template <>
-Tensor lowerToNextLayer<batch_rule_123_t,Tensor,const Tensor &, Dimname, const Tensor &, const Tensor &>(
-  batch_rule_123_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_130_t,Tensor,const Tensor &, Dimname, const Tensor &, const Tensor &>(
+  batch_rule_130_t batch_rule,
   const Tensor & self, Dimname dim, const Tensor & index, const Tensor & src
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -2640,10 +2852,10 @@ Tensor lowerToNextLayer<batch_rule_123_t,Tensor,const Tensor &, Dimname, const T
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_124_t)(const Tensor &, c10::optional<int64_t>, const c10::List<c10::optional<Tensor>> &, const Tensor &, c10::optional<int64_t>, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_131_t)(const Tensor &, c10::optional<int64_t>, const c10::List<c10::optional<Tensor>> &, const Tensor &, c10::optional<int64_t>, bool);
 template <>
-Tensor lowerToNextLayer<batch_rule_124_t,Tensor,const Tensor &, const c10::List<c10::optional<Tensor>> &, const Tensor &, bool>(
-  batch_rule_124_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_131_t,Tensor,const Tensor &, const c10::List<c10::optional<Tensor>> &, const Tensor &, bool>(
+  batch_rule_131_t batch_rule,
   const Tensor & self, const c10::List<c10::optional<Tensor>> & indices, const Tensor & values, bool accumulate
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -2660,10 +2872,10 @@ Tensor lowerToNextLayer<batch_rule_124_t,Tensor,const Tensor &, const c10::List<
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_125_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, double, double, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_132_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, double, double, bool);
 template <>
-Tensor lowerToNextLayer<batch_rule_125_t,Tensor,const Tensor &, const Tensor &, double, double, bool>(
-  batch_rule_125_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_132_t,Tensor,const Tensor &, const Tensor &, double, double, bool>(
+  batch_rule_132_t batch_rule,
   const Tensor & x1, const Tensor & x2, double p, double eps, bool keepdim
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -2680,47 +2892,47 @@ Tensor lowerToNextLayer<batch_rule_125_t,Tensor,const Tensor &, const Tensor &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_126_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, bool, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_133_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, bool, bool);
 template <>
-Tensor lowerToNextLayer<batch_rule_126_t,Tensor,const Tensor &, const Tensor &, bool, bool>(
-  batch_rule_126_t batch_rule,
-  const Tensor & sorted_sequence, const Tensor & self, bool out_int32, bool right
+Tensor lowerToNextLayer<batch_rule_133_t,Tensor,const Tensor &, const Tensor &, bool, bool>(
+  batch_rule_133_t batch_rule,
+  const Tensor & self, const Tensor & boundaries, bool out_int32, bool right
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
   auto maybe_layer = maybeCurrentDynamicLayer();
   TORCH_INTERNAL_ASSERT(maybe_layer.has_value());
   int64_t cur_level = maybe_layer->layerId();
-  Tensor sorted_sequence_value;
-  optional<int64_t> sorted_sequence_bdim;
-  std::tie(sorted_sequence_value, sorted_sequence_bdim) = unwrapTensorAtLevel(sorted_sequence, cur_level);
   Tensor self_value;
   optional<int64_t> self_bdim;
   std::tie(self_value, self_bdim) = unwrapTensorAtLevel(self, cur_level);
-  auto results = batch_rule(sorted_sequence_value, sorted_sequence_bdim, self_value, self_bdim, out_int32, right);
+  Tensor boundaries_value;
+  optional<int64_t> boundaries_bdim;
+  std::tie(boundaries_value, boundaries_bdim) = unwrapTensorAtLevel(boundaries, cur_level);
+  auto results = batch_rule(self_value, self_bdim, boundaries_value, boundaries_bdim, out_int32, right);
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_127_t)(const Tensor &, c10::optional<int64_t>, const Scalar &, bool, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_134_t)(const Tensor &, c10::optional<int64_t>, const Scalar &, bool, bool);
 template <>
-Tensor lowerToNextLayer<batch_rule_127_t,Tensor,const Tensor &, const Scalar &, bool, bool>(
-  batch_rule_127_t batch_rule,
-  const Tensor & sorted_sequence, const Scalar & self, bool out_int32, bool right
+Tensor lowerToNextLayer<batch_rule_134_t,Tensor,const Tensor &, const Scalar &, bool, bool>(
+  batch_rule_134_t batch_rule,
+  const Tensor & elements, const Scalar & test_element, bool assume_unique, bool invert
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
   auto maybe_layer = maybeCurrentDynamicLayer();
   TORCH_INTERNAL_ASSERT(maybe_layer.has_value());
   int64_t cur_level = maybe_layer->layerId();
-  Tensor sorted_sequence_value;
-  optional<int64_t> sorted_sequence_bdim;
-  std::tie(sorted_sequence_value, sorted_sequence_bdim) = unwrapTensorAtLevel(sorted_sequence, cur_level);
-  auto results = batch_rule(sorted_sequence_value, sorted_sequence_bdim, self, out_int32, right);
+  Tensor elements_value;
+  optional<int64_t> elements_bdim;
+  std::tie(elements_value, elements_bdim) = unwrapTensorAtLevel(elements, cur_level);
+  auto results = batch_rule(elements_value, elements_bdim, test_element, assume_unique, invert);
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_128_t)(const Scalar &, const Tensor &, c10::optional<int64_t>, bool, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_135_t)(const Scalar &, const Tensor &, c10::optional<int64_t>, bool, bool);
 template <>
-Tensor lowerToNextLayer<batch_rule_128_t,Tensor,const Scalar &, const Tensor &, bool, bool>(
-  batch_rule_128_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_135_t,Tensor,const Scalar &, const Tensor &, bool, bool>(
+  batch_rule_135_t batch_rule,
   const Scalar & self, const Tensor & boundaries, bool out_int32, bool right
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -2734,10 +2946,10 @@ Tensor lowerToNextLayer<batch_rule_128_t,Tensor,const Scalar &, const Tensor &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<bool> (*batch_rule_129_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>);
+typedef std::tuple<bool> (*batch_rule_136_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>);
 template <>
-bool lowerToNextLayer<batch_rule_129_t,bool,const Tensor &, const Tensor &>(
-  batch_rule_129_t batch_rule,
+bool lowerToNextLayer<batch_rule_136_t,bool,const Tensor &, const Tensor &>(
+  batch_rule_136_t batch_rule,
   const Tensor & self, const Tensor & other
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -2754,10 +2966,10 @@ bool lowerToNextLayer<batch_rule_129_t,bool,const Tensor &, const Tensor &>(
   return std::get<0>(results);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_130_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, int64_t, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_137_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, int64_t, bool);
 template <>
-Tensor lowerToNextLayer<batch_rule_130_t,Tensor,const Tensor &, const Tensor &, int64_t, bool>(
-  batch_rule_130_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_137_t,Tensor,const Tensor &, const Tensor &, int64_t, bool>(
+  batch_rule_137_t batch_rule,
   const Tensor & self, const Tensor & target, int64_t reduction, bool log_target
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -2774,10 +2986,10 @@ Tensor lowerToNextLayer<batch_rule_130_t,Tensor,const Tensor &, const Tensor &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_131_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, int64_t, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_138_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, int64_t, bool);
 template <>
-Tensor lowerToNextLayer<batch_rule_131_t,Tensor,const Tensor &, const Tensor &, const Tensor &, int64_t, bool>(
-  batch_rule_131_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_138_t,Tensor,const Tensor &, const Tensor &, const Tensor &, int64_t, bool>(
+  batch_rule_138_t batch_rule,
   const Tensor & grad_output, const Tensor & self, const Tensor & target, int64_t reduction, bool log_target
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -2797,10 +3009,10 @@ Tensor lowerToNextLayer<batch_rule_131_t,Tensor,const Tensor &, const Tensor &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_132_t)(const Tensor &, c10::optional<int64_t>, int64_t, int64_t, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_139_t)(const Tensor &, c10::optional<int64_t>, int64_t, int64_t, bool);
 template <>
-std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_132_t,std::tuple<Tensor,Tensor>,const Tensor &, int64_t, int64_t, bool>(
-  batch_rule_132_t batch_rule,
+std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_139_t,std::tuple<Tensor,Tensor>,const Tensor &, int64_t, int64_t, bool>(
+  batch_rule_139_t batch_rule,
   const Tensor & self, int64_t k, int64_t dim, bool keepdim
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -2814,10 +3026,10 @@ std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_132_t,std::tuple<Tensor,Te
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_133_t)(const Tensor &, c10::optional<int64_t>, int64_t, Dimname, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_140_t)(const Tensor &, c10::optional<int64_t>, int64_t, Dimname, bool);
 template <>
-std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_133_t,std::tuple<Tensor,Tensor>,const Tensor &, int64_t, Dimname, bool>(
-  batch_rule_133_t batch_rule,
+std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_140_t,std::tuple<Tensor,Tensor>,const Tensor &, int64_t, Dimname, bool>(
+  batch_rule_140_t batch_rule,
   const Tensor & self, int64_t k, Dimname dim, bool keepdim
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -2831,10 +3043,10 @@ std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_133_t,std::tuple<Tensor,Te
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_134_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, double, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_141_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, double, bool);
 template <>
-Tensor lowerToNextLayer<batch_rule_134_t,Tensor,const Tensor &, IntArrayRef, const c10::optional<Tensor> &, const c10::optional<Tensor> &, double, bool>(
-  batch_rule_134_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_141_t,Tensor,const Tensor &, IntArrayRef, const c10::optional<Tensor> &, const c10::optional<Tensor> &, double, bool>(
+  batch_rule_141_t batch_rule,
   const Tensor & input, IntArrayRef normalized_shape, const c10::optional<Tensor> & weight, const c10::optional<Tensor> & bias, double eps, bool cudnn_enable
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -2858,10 +3070,10 @@ Tensor lowerToNextLayer<batch_rule_134_t,Tensor,const Tensor &, IntArrayRef, con
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_135_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, double);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_142_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, double);
 template <>
-std::tuple<Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_135_t,std::tuple<Tensor,Tensor,Tensor>,const Tensor &, IntArrayRef, const c10::optional<Tensor> &, const c10::optional<Tensor> &, double>(
-  batch_rule_135_t batch_rule,
+std::tuple<Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_142_t,std::tuple<Tensor,Tensor,Tensor>,const Tensor &, IntArrayRef, const c10::optional<Tensor> &, const c10::optional<Tensor> &, double>(
+  batch_rule_142_t batch_rule,
   const Tensor & input, IntArrayRef normalized_shape, const c10::optional<Tensor> & weight, const c10::optional<Tensor> & bias, double eps
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -2885,10 +3097,46 @@ std::tuple<Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_135_t,std::tuple<Te
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level), makeBatched(std::get<4>(results), std::get<5>(results), cur_level));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_136_t)(const Tensor &, c10::optional<int64_t>, c10::optional<double>, c10::optional<double>, c10::optional<double>);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_143_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, IntArrayRef, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, ::std::array<bool,3>);
 template <>
-Tensor lowerToNextLayer<batch_rule_136_t,Tensor,const Tensor &, c10::optional<double>, c10::optional<double>, c10::optional<double>>(
-  batch_rule_136_t batch_rule,
+std::tuple<Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_143_t,std::tuple<Tensor,Tensor,Tensor>,const Tensor &, const Tensor &, IntArrayRef, const Tensor &, const Tensor &, const c10::optional<Tensor> &, const c10::optional<Tensor> &, ::std::array<bool,3>>(
+  batch_rule_143_t batch_rule,
+  const Tensor & grad_out, const Tensor & input, IntArrayRef normalized_shape, const Tensor & mean, const Tensor & rstd, const c10::optional<Tensor> & weight, const c10::optional<Tensor> & bias, ::std::array<bool,3> output_mask
+) {
+  c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
+  auto maybe_layer = maybeCurrentDynamicLayer();
+  TORCH_INTERNAL_ASSERT(maybe_layer.has_value());
+  int64_t cur_level = maybe_layer->layerId();
+  Tensor grad_out_value;
+  optional<int64_t> grad_out_bdim;
+  std::tie(grad_out_value, grad_out_bdim) = unwrapTensorAtLevel(grad_out, cur_level);
+  Tensor input_value;
+  optional<int64_t> input_bdim;
+  std::tie(input_value, input_bdim) = unwrapTensorAtLevel(input, cur_level);
+  Tensor mean_value;
+  optional<int64_t> mean_bdim;
+  std::tie(mean_value, mean_bdim) = unwrapTensorAtLevel(mean, cur_level);
+  Tensor rstd_value;
+  optional<int64_t> rstd_bdim;
+  std::tie(rstd_value, rstd_bdim) = unwrapTensorAtLevel(rstd, cur_level);
+  optional<Tensor> weight_value;
+  optional<int64_t> weight_bdim;
+  if (weight) {
+      std::tie(weight_value, weight_bdim) = unwrapTensorAtLevel(weight.value(), cur_level);
+  }
+  optional<Tensor> bias_value;
+  optional<int64_t> bias_bdim;
+  if (bias) {
+      std::tie(bias_value, bias_bdim) = unwrapTensorAtLevel(bias.value(), cur_level);
+  }
+  auto results = batch_rule(grad_out_value, grad_out_bdim, input_value, input_bdim, normalized_shape, mean_value, mean_bdim, rstd_value, rstd_bdim, weight_value, weight_bdim, bias_value, bias_bdim, output_mask);
+  return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level), makeBatched(std::get<4>(results), std::get<5>(results), cur_level));
+}
+
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_144_t)(const Tensor &, c10::optional<int64_t>, c10::optional<double>, c10::optional<double>, c10::optional<double>);
+template <>
+Tensor lowerToNextLayer<batch_rule_144_t,Tensor,const Tensor &, c10::optional<double>, c10::optional<double>, c10::optional<double>>(
+  batch_rule_144_t batch_rule,
   const Tensor & self, c10::optional<double> nan, c10::optional<double> posinf, c10::optional<double> neginf
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -2902,10 +3150,10 @@ Tensor lowerToNextLayer<batch_rule_136_t,Tensor,const Tensor &, c10::optional<do
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_137_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_145_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>);
 template <>
-Tensor lowerToNextLayer<batch_rule_137_t,Tensor,const Tensor &, const Tensor &, const c10::optional<Tensor> &>(
-  batch_rule_137_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_145_t,Tensor,const Tensor &, const Tensor &, const c10::optional<Tensor> &>(
+  batch_rule_145_t batch_rule,
   const Tensor & self, const Tensor & weight, const c10::optional<Tensor> & bias
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -2927,10 +3175,10 @@ Tensor lowerToNextLayer<batch_rule_137_t,Tensor,const Tensor &, const Tensor &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_138_t)(IntArrayRef, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_146_t)(IntArrayRef, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>);
 template <>
-Tensor lowerToNextLayer<batch_rule_138_t,Tensor,IntArrayRef, const Tensor &, const Tensor &>(
-  batch_rule_138_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_146_t,Tensor,IntArrayRef, const Tensor &, const Tensor &>(
+  batch_rule_146_t batch_rule,
   IntArrayRef input_size, const Tensor & grad_output, const Tensor & weight
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -2947,10 +3195,10 @@ Tensor lowerToNextLayer<batch_rule_138_t,Tensor,IntArrayRef, const Tensor &, con
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_139_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_147_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, bool);
 template <>
-std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_139_t,std::tuple<Tensor,Tensor>,const Tensor &, const Tensor &, const Tensor &, bool>(
-  batch_rule_139_t batch_rule,
+std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_147_t,std::tuple<Tensor,Tensor>,const Tensor &, const Tensor &, const Tensor &, bool>(
+  batch_rule_147_t batch_rule,
   const Tensor & grad_output, const Tensor & input, const Tensor & weight, bool bias_defined
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -2970,10 +3218,33 @@ std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_139_t,std::tuple<Tensor,Te
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_140_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Scalar &, const Scalar &, const Tensor &, c10::optional<int64_t>);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_148_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, ::std::array<bool,3>);
 template <>
-Tensor lowerToNextLayer<batch_rule_140_t,Tensor,const Tensor &, const Tensor &, const Tensor &, const Tensor &, const Scalar &, const Scalar &, const Tensor &>(
-  batch_rule_140_t batch_rule,
+std::tuple<Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_148_t,std::tuple<Tensor,Tensor,Tensor>,const Tensor &, const Tensor &, const Tensor &, ::std::array<bool,3>>(
+  batch_rule_148_t batch_rule,
+  const Tensor & self, const Tensor & grad_output, const Tensor & weight, ::std::array<bool,3> output_mask
+) {
+  c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
+  auto maybe_layer = maybeCurrentDynamicLayer();
+  TORCH_INTERNAL_ASSERT(maybe_layer.has_value());
+  int64_t cur_level = maybe_layer->layerId();
+  Tensor self_value;
+  optional<int64_t> self_bdim;
+  std::tie(self_value, self_bdim) = unwrapTensorAtLevel(self, cur_level);
+  Tensor grad_output_value;
+  optional<int64_t> grad_output_bdim;
+  std::tie(grad_output_value, grad_output_bdim) = unwrapTensorAtLevel(grad_output, cur_level);
+  Tensor weight_value;
+  optional<int64_t> weight_bdim;
+  std::tie(weight_value, weight_bdim) = unwrapTensorAtLevel(weight, cur_level);
+  auto results = batch_rule(self_value, self_bdim, grad_output_value, grad_output_bdim, weight_value, weight_bdim, output_mask);
+  return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level), makeBatched(std::get<4>(results), std::get<5>(results), cur_level));
+}
+
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_149_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Scalar &, const Scalar &, const Tensor &, c10::optional<int64_t>);
+template <>
+Tensor lowerToNextLayer<batch_rule_149_t,Tensor,const Tensor &, const Tensor &, const Tensor &, const Tensor &, const Scalar &, const Scalar &, const Tensor &>(
+  batch_rule_149_t batch_rule,
   const Tensor & input, const Tensor & weight, const Tensor & packed, const Tensor & col_offsets, const Scalar & weight_scale, const Scalar & weight_zero_point, const Tensor & bias
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -2999,10 +3270,10 @@ Tensor lowerToNextLayer<batch_rule_140_t,Tensor,const Tensor &, const Tensor &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,double,int64_t> (*batch_rule_141_t)(const Tensor &, c10::optional<int64_t>);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,double,int64_t> (*batch_rule_150_t)(const Tensor &, c10::optional<int64_t>);
 template <>
-std::tuple<Tensor,Tensor,double,int64_t> lowerToNextLayer<batch_rule_141_t,std::tuple<Tensor,Tensor,double,int64_t>,const Tensor &>(
-  batch_rule_141_t batch_rule,
+std::tuple<Tensor,Tensor,double,int64_t> lowerToNextLayer<batch_rule_150_t,std::tuple<Tensor,Tensor,double,int64_t>,const Tensor &>(
+  batch_rule_150_t batch_rule,
   const Tensor & input
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -3016,10 +3287,10 @@ std::tuple<Tensor,Tensor,double,int64_t> lowerToNextLayer<batch_rule_141_t,std::
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level), std::get<4>(results), std::get<5>(results));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_142_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_151_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>);
 template <>
-Tensor lowerToNextLayer<batch_rule_142_t,Tensor,const Tensor &, const Tensor &, const Tensor &>(
-  batch_rule_142_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_151_t,Tensor,const Tensor &, const Tensor &, const Tensor &>(
+  batch_rule_151_t batch_rule,
   const Tensor & grad_output, const Tensor & self, const Tensor & indices
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -3039,10 +3310,10 @@ Tensor lowerToNextLayer<batch_rule_142_t,Tensor,const Tensor &, const Tensor &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_143_t)(const Scalar &, const Tensor &, c10::optional<int64_t>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_152_t)(const Scalar &, const Tensor &, c10::optional<int64_t>);
 template <>
-Tensor lowerToNextLayer<batch_rule_143_t,Tensor,const Scalar &, const Tensor &>(
-  batch_rule_143_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_152_t,Tensor,const Scalar &, const Tensor &>(
+  batch_rule_152_t batch_rule,
   const Scalar & self, const Tensor & other
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -3056,10 +3327,30 @@ Tensor lowerToNextLayer<batch_rule_143_t,Tensor,const Scalar &, const Tensor &>(
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_144_t)(const Tensor &, c10::optional<int64_t>, Dimname);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_153_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, int64_t, ScalarType);
 template <>
-Tensor lowerToNextLayer<batch_rule_144_t,Tensor,const Tensor &, Dimname>(
-  batch_rule_144_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_153_t,Tensor,const Tensor &, const Tensor &, int64_t, ScalarType>(
+  batch_rule_153_t batch_rule,
+  const Tensor & grad_output, const Tensor & output, int64_t dim, ScalarType input_dtype
+) {
+  c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
+  auto maybe_layer = maybeCurrentDynamicLayer();
+  TORCH_INTERNAL_ASSERT(maybe_layer.has_value());
+  int64_t cur_level = maybe_layer->layerId();
+  Tensor grad_output_value;
+  optional<int64_t> grad_output_bdim;
+  std::tie(grad_output_value, grad_output_bdim) = unwrapTensorAtLevel(grad_output, cur_level);
+  Tensor output_value;
+  optional<int64_t> output_bdim;
+  std::tie(output_value, output_bdim) = unwrapTensorAtLevel(output, cur_level);
+  auto results = batch_rule(grad_output_value, grad_output_bdim, output_value, output_bdim, dim, input_dtype);
+  return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
+}
+
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_154_t)(const Tensor &, c10::optional<int64_t>, Dimname);
+template <>
+Tensor lowerToNextLayer<batch_rule_154_t,Tensor,const Tensor &, Dimname>(
+  batch_rule_154_t batch_rule,
   const Tensor & self, Dimname dim
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -3073,10 +3364,10 @@ Tensor lowerToNextLayer<batch_rule_144_t,Tensor,const Tensor &, Dimname>(
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_145_t)(const Tensor &, c10::optional<int64_t>, DimnameList, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_155_t)(const Tensor &, c10::optional<int64_t>, DimnameList, bool);
 template <>
-Tensor lowerToNextLayer<batch_rule_145_t,Tensor,const Tensor &, DimnameList, bool>(
-  batch_rule_145_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_155_t,Tensor,const Tensor &, DimnameList, bool>(
+  batch_rule_155_t batch_rule,
   const Tensor & self, DimnameList dim, bool keepdim
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -3090,10 +3381,10 @@ Tensor lowerToNextLayer<batch_rule_145_t,Tensor,const Tensor &, DimnameList, boo
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_146_t)(const Tensor &, c10::optional<int64_t>);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_156_t)(const Tensor &, c10::optional<int64_t>);
 template <>
-std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_146_t,std::tuple<Tensor,Tensor>,const Tensor &>(
-  batch_rule_146_t batch_rule,
+std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_156_t,std::tuple<Tensor,Tensor>,const Tensor &>(
+  batch_rule_156_t batch_rule,
   const Tensor & self
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -3107,10 +3398,10 @@ std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_146_t,std::tuple<Tensor,Te
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_147_t)(const Tensor &, c10::optional<int64_t>, int64_t, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_157_t)(const Tensor &, c10::optional<int64_t>, int64_t, bool);
 template <>
-std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_147_t,std::tuple<Tensor,Tensor>,const Tensor &, int64_t, bool>(
-  batch_rule_147_t batch_rule,
+std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_157_t,std::tuple<Tensor,Tensor>,const Tensor &, int64_t, bool>(
+  batch_rule_157_t batch_rule,
   const Tensor & self, int64_t dim, bool descending
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -3124,10 +3415,10 @@ std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_147_t,std::tuple<Tensor,Te
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_148_t)(const Tensor &, c10::optional<int64_t>, c10::optional<int64_t>, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_158_t)(const Tensor &, c10::optional<int64_t>, c10::optional<int64_t>, bool);
 template <>
-std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_148_t,std::tuple<Tensor,Tensor>,const Tensor &, c10::optional<int64_t>, bool>(
-  batch_rule_148_t batch_rule,
+std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_158_t,std::tuple<Tensor,Tensor>,const Tensor &, c10::optional<int64_t>, bool>(
+  batch_rule_158_t batch_rule,
   const Tensor & self, c10::optional<int64_t> dim, bool keepdim
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -3141,10 +3432,10 @@ std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_148_t,std::tuple<Tensor,Te
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_149_t)(const Tensor &, c10::optional<int64_t>, Dimname, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_159_t)(const Tensor &, c10::optional<int64_t>, Dimname, bool);
 template <>
-std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_149_t,std::tuple<Tensor,Tensor>,const Tensor &, Dimname, bool>(
-  batch_rule_149_t batch_rule,
+std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_159_t,std::tuple<Tensor,Tensor>,const Tensor &, Dimname, bool>(
+  batch_rule_159_t batch_rule,
   const Tensor & self, Dimname dim, bool descending
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -3158,10 +3449,10 @@ std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_149_t,std::tuple<Tensor,Te
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_150_t)(const Tensor &, c10::optional<int64_t>, int64_t, const Tensor &, c10::optional<int64_t>, IntArrayRef, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_160_t)(const Tensor &, c10::optional<int64_t>, int64_t, const Tensor &, c10::optional<int64_t>, IntArrayRef, bool);
 template <>
-Tensor lowerToNextLayer<batch_rule_150_t,Tensor,const Tensor &, int64_t, const Tensor &, IntArrayRef, bool>(
-  batch_rule_150_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_160_t,Tensor,const Tensor &, int64_t, const Tensor &, IntArrayRef, bool>(
+  batch_rule_160_t batch_rule,
   const Tensor & grad, int64_t dim, const Tensor & indices, IntArrayRef sizes, bool keepdim
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -3178,10 +3469,10 @@ Tensor lowerToNextLayer<batch_rule_150_t,Tensor,const Tensor &, int64_t, const T
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_151_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef, IntArrayRef, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_161_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef, IntArrayRef, bool);
 template <>
-std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_151_t,std::tuple<Tensor,Tensor>,const Tensor &, IntArrayRef, IntArrayRef, IntArrayRef, IntArrayRef, bool>(
-  batch_rule_151_t batch_rule,
+std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_161_t,std::tuple<Tensor,Tensor>,const Tensor &, IntArrayRef, IntArrayRef, IntArrayRef, IntArrayRef, bool>(
+  batch_rule_161_t batch_rule,
   const Tensor & self, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, bool ceil_mode
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -3195,10 +3486,10 @@ std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_151_t,std::tuple<Tensor,Te
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_152_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef, IntArrayRef, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_162_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef, IntArrayRef, bool);
 template <>
-Tensor lowerToNextLayer<batch_rule_152_t,Tensor,const Tensor &, IntArrayRef, IntArrayRef, IntArrayRef, IntArrayRef, bool>(
-  batch_rule_152_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_162_t,Tensor,const Tensor &, IntArrayRef, IntArrayRef, IntArrayRef, IntArrayRef, bool>(
+  batch_rule_162_t batch_rule,
   const Tensor & self, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, bool ceil_mode
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -3212,10 +3503,10 @@ Tensor lowerToNextLayer<batch_rule_152_t,Tensor,const Tensor &, IntArrayRef, Int
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_153_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef, IntArrayRef, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_163_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef, IntArrayRef, bool);
 template <>
-Tensor lowerToNextLayer<batch_rule_153_t,Tensor,const Tensor &, const Tensor &, const Tensor &, IntArrayRef, IntArrayRef, IntArrayRef, IntArrayRef, bool>(
-  batch_rule_153_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_163_t,Tensor,const Tensor &, const Tensor &, const Tensor &, IntArrayRef, IntArrayRef, IntArrayRef, IntArrayRef, bool>(
+  batch_rule_163_t batch_rule,
   const Tensor & grad_output, const Tensor & output, const Tensor & input, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, bool ceil_mode
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -3235,10 +3526,10 @@ Tensor lowerToNextLayer<batch_rule_153_t,Tensor,const Tensor &, const Tensor &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_154_t)(const Tensor &, c10::optional<int64_t>, c10::optional<ScalarType>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_164_t)(const Tensor &, c10::optional<int64_t>, c10::optional<ScalarType>);
 template <>
-Tensor lowerToNextLayer<batch_rule_154_t,Tensor,const Tensor &, c10::optional<ScalarType>>(
-  batch_rule_154_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_164_t,Tensor,const Tensor &, c10::optional<ScalarType>>(
+  batch_rule_164_t batch_rule,
   const Tensor & self, c10::optional<ScalarType> dtype
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -3252,10 +3543,10 @@ Tensor lowerToNextLayer<batch_rule_154_t,Tensor,const Tensor &, c10::optional<Sc
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_155_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, bool, c10::optional<ScalarType>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_165_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, bool, c10::optional<ScalarType>);
 template <>
-Tensor lowerToNextLayer<batch_rule_155_t,Tensor,const Tensor &, IntArrayRef, bool, c10::optional<ScalarType>>(
-  batch_rule_155_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_165_t,Tensor,const Tensor &, IntArrayRef, bool, c10::optional<ScalarType>>(
+  batch_rule_165_t batch_rule,
   const Tensor & self, IntArrayRef dim, bool keepdim, c10::optional<ScalarType> dtype
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -3269,10 +3560,10 @@ Tensor lowerToNextLayer<batch_rule_155_t,Tensor,const Tensor &, IntArrayRef, boo
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_156_t)(const Tensor &, c10::optional<int64_t>, DimnameList, bool, c10::optional<ScalarType>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_166_t)(const Tensor &, c10::optional<int64_t>, DimnameList, bool, c10::optional<ScalarType>);
 template <>
-Tensor lowerToNextLayer<batch_rule_156_t,Tensor,const Tensor &, DimnameList, bool, c10::optional<ScalarType>>(
-  batch_rule_156_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_166_t,Tensor,const Tensor &, DimnameList, bool, c10::optional<ScalarType>>(
+  batch_rule_166_t batch_rule,
   const Tensor & self, DimnameList dim, bool keepdim, c10::optional<ScalarType> dtype
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -3286,10 +3577,10 @@ Tensor lowerToNextLayer<batch_rule_156_t,Tensor,const Tensor &, DimnameList, boo
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_157_t)(IntArrayRef, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef, int64_t, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_167_t)(IntArrayRef, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef, int64_t, bool);
 template <>
-Tensor lowerToNextLayer<batch_rule_157_t,Tensor,IntArrayRef, const Tensor &, const Tensor &, IntArrayRef, IntArrayRef, IntArrayRef, int64_t, bool>(
-  batch_rule_157_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_167_t,Tensor,IntArrayRef, const Tensor &, const Tensor &, IntArrayRef, IntArrayRef, IntArrayRef, int64_t, bool>(
+  batch_rule_167_t batch_rule,
   IntArrayRef self_size, const Tensor & grad_output, const Tensor & weight, IntArrayRef padding, IntArrayRef stride, IntArrayRef dilation, int64_t groups, bool bias_defined
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -3306,10 +3597,10 @@ Tensor lowerToNextLayer<batch_rule_157_t,Tensor,IntArrayRef, const Tensor &, con
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_158_t)(IntArrayRef, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef, int64_t, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_168_t)(IntArrayRef, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef, int64_t, bool);
 template <>
-std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_158_t,std::tuple<Tensor,Tensor>,IntArrayRef, const Tensor &, const Tensor &, IntArrayRef, IntArrayRef, IntArrayRef, int64_t, bool>(
-  batch_rule_158_t batch_rule,
+std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_168_t,std::tuple<Tensor,Tensor>,IntArrayRef, const Tensor &, const Tensor &, IntArrayRef, IntArrayRef, IntArrayRef, int64_t, bool>(
+  batch_rule_168_t batch_rule,
   IntArrayRef weight_size, const Tensor & grad_output, const Tensor & self, IntArrayRef padding, IntArrayRef stride, IntArrayRef dilation, int64_t groups, bool bias_defined
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -3326,20 +3617,43 @@ std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_158_t,std::tuple<Tensor,Te
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_159_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, bool, double, double);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_169_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef, int64_t, ::std::array<bool,3>);
 template <>
-std::tuple<Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_159_t,std::tuple<Tensor,Tensor,Tensor>,const Tensor &, const Tensor &, const c10::optional<Tensor> &, const c10::optional<Tensor> &, const c10::optional<Tensor> &, bool, double, double>(
-  batch_rule_159_t batch_rule,
-  const Tensor & input, const Tensor & weight, const c10::optional<Tensor> & bias, const c10::optional<Tensor> & running_mean, const c10::optional<Tensor> & running_var, bool training, double exponential_average_factor, double epsilon
+std::tuple<Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_169_t,std::tuple<Tensor,Tensor,Tensor>,const Tensor &, const Tensor &, const Tensor &, IntArrayRef, IntArrayRef, IntArrayRef, int64_t, ::std::array<bool,3>>(
+  batch_rule_169_t batch_rule,
+  const Tensor & self, const Tensor & grad_output, const Tensor & weight, IntArrayRef padding, IntArrayRef stride, IntArrayRef dilation, int64_t groups, ::std::array<bool,3> output_mask
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
   auto maybe_layer = maybeCurrentDynamicLayer();
   TORCH_INTERNAL_ASSERT(maybe_layer.has_value());
   int64_t cur_level = maybe_layer->layerId();
-  Tensor input_value;
-  optional<int64_t> input_bdim;
-  std::tie(input_value, input_bdim) = unwrapTensorAtLevel(input, cur_level);
-  Tensor weight_value;
+  Tensor self_value;
+  optional<int64_t> self_bdim;
+  std::tie(self_value, self_bdim) = unwrapTensorAtLevel(self, cur_level);
+  Tensor grad_output_value;
+  optional<int64_t> grad_output_bdim;
+  std::tie(grad_output_value, grad_output_bdim) = unwrapTensorAtLevel(grad_output, cur_level);
+  Tensor weight_value;
+  optional<int64_t> weight_bdim;
+  std::tie(weight_value, weight_bdim) = unwrapTensorAtLevel(weight, cur_level);
+  auto results = batch_rule(self_value, self_bdim, grad_output_value, grad_output_bdim, weight_value, weight_bdim, padding, stride, dilation, groups, output_mask);
+  return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level), makeBatched(std::get<4>(results), std::get<5>(results), cur_level));
+}
+
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_170_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, bool, double, double);
+template <>
+std::tuple<Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_170_t,std::tuple<Tensor,Tensor,Tensor>,const Tensor &, const Tensor &, const c10::optional<Tensor> &, const c10::optional<Tensor> &, const c10::optional<Tensor> &, bool, double, double>(
+  batch_rule_170_t batch_rule,
+  const Tensor & input, const Tensor & weight, const c10::optional<Tensor> & bias, const c10::optional<Tensor> & running_mean, const c10::optional<Tensor> & running_var, bool training, double exponential_average_factor, double epsilon
+) {
+  c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
+  auto maybe_layer = maybeCurrentDynamicLayer();
+  TORCH_INTERNAL_ASSERT(maybe_layer.has_value());
+  int64_t cur_level = maybe_layer->layerId();
+  Tensor input_value;
+  optional<int64_t> input_bdim;
+  std::tie(input_value, input_bdim) = unwrapTensorAtLevel(input, cur_level);
+  Tensor weight_value;
   optional<int64_t> weight_bdim;
   std::tie(weight_value, weight_bdim) = unwrapTensorAtLevel(weight, cur_level);
   optional<Tensor> bias_value;
@@ -3361,10 +3675,10 @@ std::tuple<Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_159_t,std::tuple<Te
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level), makeBatched(std::get<4>(results), std::get<5>(results), cur_level));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_160_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, double);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_171_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, double);
 template <>
-std::tuple<Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_160_t,std::tuple<Tensor,Tensor,Tensor>,const Tensor &, const Tensor &, const Tensor &, const c10::optional<Tensor> &, const c10::optional<Tensor> &, const c10::optional<Tensor> &, const c10::optional<Tensor> &, double>(
-  batch_rule_160_t batch_rule,
+std::tuple<Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_171_t,std::tuple<Tensor,Tensor,Tensor>,const Tensor &, const Tensor &, const Tensor &, const c10::optional<Tensor> &, const c10::optional<Tensor> &, const c10::optional<Tensor> &, const c10::optional<Tensor> &, double>(
+  batch_rule_171_t batch_rule,
   const Tensor & input, const Tensor & grad_output, const Tensor & weight, const c10::optional<Tensor> & running_mean, const c10::optional<Tensor> & running_var, const c10::optional<Tensor> & save_mean, const c10::optional<Tensor> & save_var, double epsilon
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -3404,10 +3718,10 @@ std::tuple<Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_160_t,std::tuple<Te
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level), makeBatched(std::get<4>(results), std::get<5>(results), cur_level));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_161_t)(IntArrayRef, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef, int64_t, bool, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_172_t)(IntArrayRef, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef, int64_t, bool, bool);
 template <>
-Tensor lowerToNextLayer<batch_rule_161_t,Tensor,IntArrayRef, const Tensor &, const Tensor &, IntArrayRef, IntArrayRef, IntArrayRef, int64_t, bool, bool>(
-  batch_rule_161_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_172_t,Tensor,IntArrayRef, const Tensor &, const Tensor &, IntArrayRef, IntArrayRef, IntArrayRef, int64_t, bool, bool>(
+  batch_rule_172_t batch_rule,
   IntArrayRef weight_size, const Tensor & grad_output, const Tensor & self, IntArrayRef padding, IntArrayRef stride, IntArrayRef dilation, int64_t groups, bool benchmark, bool deterministic
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -3424,10 +3738,56 @@ Tensor lowerToNextLayer<batch_rule_161_t,Tensor,IntArrayRef, const Tensor &, con
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_162_t)(const Tensor &, c10::optional<int64_t>, int64_t, const Tensor &, c10::optional<int64_t>, int64_t);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_173_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef, int64_t, bool, bool, ::std::array<bool,3>);
 template <>
-Tensor lowerToNextLayer<batch_rule_162_t,Tensor,const Tensor &, int64_t, const Tensor &, int64_t>(
-  batch_rule_162_t batch_rule,
+std::tuple<Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_173_t,std::tuple<Tensor,Tensor,Tensor>,const Tensor &, const Tensor &, const Tensor &, IntArrayRef, IntArrayRef, IntArrayRef, int64_t, bool, bool, ::std::array<bool,3>>(
+  batch_rule_173_t batch_rule,
+  const Tensor & self, const Tensor & grad_output, const Tensor & weight, IntArrayRef padding, IntArrayRef stride, IntArrayRef dilation, int64_t groups, bool benchmark, bool deterministic, ::std::array<bool,3> output_mask
+) {
+  c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
+  auto maybe_layer = maybeCurrentDynamicLayer();
+  TORCH_INTERNAL_ASSERT(maybe_layer.has_value());
+  int64_t cur_level = maybe_layer->layerId();
+  Tensor self_value;
+  optional<int64_t> self_bdim;
+  std::tie(self_value, self_bdim) = unwrapTensorAtLevel(self, cur_level);
+  Tensor grad_output_value;
+  optional<int64_t> grad_output_bdim;
+  std::tie(grad_output_value, grad_output_bdim) = unwrapTensorAtLevel(grad_output, cur_level);
+  Tensor weight_value;
+  optional<int64_t> weight_bdim;
+  std::tie(weight_value, weight_bdim) = unwrapTensorAtLevel(weight, cur_level);
+  auto results = batch_rule(self_value, self_bdim, grad_output_value, grad_output_bdim, weight_value, weight_bdim, padding, stride, dilation, groups, benchmark, deterministic, output_mask);
+  return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level), makeBatched(std::get<4>(results), std::get<5>(results), cur_level));
+}
+
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_174_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef, IntArrayRef, int64_t, bool, bool, ::std::array<bool,3>);
+template <>
+std::tuple<Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_174_t,std::tuple<Tensor,Tensor,Tensor>,const Tensor &, const Tensor &, const Tensor &, IntArrayRef, IntArrayRef, IntArrayRef, IntArrayRef, int64_t, bool, bool, ::std::array<bool,3>>(
+  batch_rule_174_t batch_rule,
+  const Tensor & self, const Tensor & grad_output, const Tensor & weight, IntArrayRef padding, IntArrayRef output_padding, IntArrayRef stride, IntArrayRef dilation, int64_t groups, bool benchmark, bool deterministic, ::std::array<bool,3> output_mask
+) {
+  c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
+  auto maybe_layer = maybeCurrentDynamicLayer();
+  TORCH_INTERNAL_ASSERT(maybe_layer.has_value());
+  int64_t cur_level = maybe_layer->layerId();
+  Tensor self_value;
+  optional<int64_t> self_bdim;
+  std::tie(self_value, self_bdim) = unwrapTensorAtLevel(self, cur_level);
+  Tensor grad_output_value;
+  optional<int64_t> grad_output_bdim;
+  std::tie(grad_output_value, grad_output_bdim) = unwrapTensorAtLevel(grad_output, cur_level);
+  Tensor weight_value;
+  optional<int64_t> weight_bdim;
+  std::tie(weight_value, weight_bdim) = unwrapTensorAtLevel(weight, cur_level);
+  auto results = batch_rule(self_value, self_bdim, grad_output_value, grad_output_bdim, weight_value, weight_bdim, padding, output_padding, stride, dilation, groups, benchmark, deterministic, output_mask);
+  return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level), makeBatched(std::get<4>(results), std::get<5>(results), cur_level));
+}
+
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_175_t)(const Tensor &, c10::optional<int64_t>, int64_t, const Tensor &, c10::optional<int64_t>, int64_t);
+template <>
+Tensor lowerToNextLayer<batch_rule_175_t,Tensor,const Tensor &, int64_t, const Tensor &, int64_t>(
+  batch_rule_175_t batch_rule,
   const Tensor & self, int64_t dim, const Tensor & start, int64_t length
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -3444,10 +3804,10 @@ Tensor lowerToNextLayer<batch_rule_162_t,Tensor,const Tensor &, int64_t, const T
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_163_t)(const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, bool, double, double);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_176_t)(const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, bool, double, double);
 template <>
-std::tuple<Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_163_t,std::tuple<Tensor,Tensor,Tensor>,const Tensor &, const c10::optional<Tensor> &, const c10::optional<Tensor> &, const c10::optional<Tensor> &, const c10::optional<Tensor> &, bool, double, double>(
-  batch_rule_163_t batch_rule,
+std::tuple<Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_176_t,std::tuple<Tensor,Tensor,Tensor>,const Tensor &, const c10::optional<Tensor> &, const c10::optional<Tensor> &, const c10::optional<Tensor> &, const c10::optional<Tensor> &, bool, double, double>(
+  batch_rule_176_t batch_rule,
   const Tensor & input, const c10::optional<Tensor> & weight, const c10::optional<Tensor> & bias, const c10::optional<Tensor> & running_mean, const c10::optional<Tensor> & running_var, bool training, double momentum, double eps
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -3481,10 +3841,10 @@ std::tuple<Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_163_t,std::tuple<Te
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level), makeBatched(std::get<4>(results), std::get<5>(results), cur_level));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_164_t)(const Tensor &, c10::optional<int64_t>, double);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_177_t)(const Tensor &, c10::optional<int64_t>, double);
 template <>
-std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_164_t,std::tuple<Tensor,Tensor>,const Tensor &, double>(
-  batch_rule_164_t batch_rule,
+std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_177_t,std::tuple<Tensor,Tensor>,const Tensor &, double>(
+  batch_rule_177_t batch_rule,
   const Tensor & input, double eps
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -3498,10 +3858,10 @@ std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_164_t,std::tuple<Tensor,Te
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_165_t)(const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, double);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_178_t)(const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, double);
 template <>
-Tensor lowerToNextLayer<batch_rule_165_t,Tensor,const Tensor &, const c10::optional<Tensor> &, const c10::optional<Tensor> &, const Tensor &, const Tensor &, double>(
-  batch_rule_165_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_178_t,Tensor,const Tensor &, const c10::optional<Tensor> &, const c10::optional<Tensor> &, const Tensor &, const Tensor &, double>(
+  batch_rule_178_t batch_rule,
   const Tensor & input, const c10::optional<Tensor> & weight, const c10::optional<Tensor> & bias, const Tensor & mean, const Tensor & invstd, double eps
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -3531,10 +3891,10 @@ Tensor lowerToNextLayer<batch_rule_165_t,Tensor,const Tensor &, const c10::optio
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_166_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, double, double, int64_t);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_179_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, double, double, int64_t);
 template <>
-std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_166_t,std::tuple<Tensor,Tensor>,const Tensor &, const Tensor &, const Tensor &, const c10::optional<Tensor> &, const c10::optional<Tensor> &, double, double, int64_t>(
-  batch_rule_166_t batch_rule,
+std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_179_t,std::tuple<Tensor,Tensor>,const Tensor &, const Tensor &, const Tensor &, const c10::optional<Tensor> &, const c10::optional<Tensor> &, double, double, int64_t>(
+  batch_rule_179_t batch_rule,
   const Tensor & input, const Tensor & mean, const Tensor & invstd, const c10::optional<Tensor> & running_mean, const c10::optional<Tensor> & running_var, double momentum, double eps, int64_t count
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -3564,10 +3924,10 @@ std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_166_t,std::tuple<Tensor,Te
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_167_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, double, double, const Tensor &, c10::optional<int64_t>);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_180_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, double, double, const Tensor &, c10::optional<int64_t>);
 template <>
-std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_167_t,std::tuple<Tensor,Tensor>,const Tensor &, const Tensor &, const Tensor &, const c10::optional<Tensor> &, const c10::optional<Tensor> &, double, double, const Tensor &>(
-  batch_rule_167_t batch_rule,
+std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_180_t,std::tuple<Tensor,Tensor>,const Tensor &, const Tensor &, const Tensor &, const c10::optional<Tensor> &, const c10::optional<Tensor> &, double, double, const Tensor &>(
+  batch_rule_180_t batch_rule,
   const Tensor & input, const Tensor & mean, const Tensor & invstd, const c10::optional<Tensor> & running_mean, const c10::optional<Tensor> & running_var, double momentum, double eps, const Tensor & counts
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -3600,10 +3960,55 @@ std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_167_t,std::tuple<Tensor,Te
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_168_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, bool, bool, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_181_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, bool, double, ::std::array<bool,3>);
 template <>
-std::tuple<Tensor,Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_168_t,std::tuple<Tensor,Tensor,Tensor,Tensor>,const Tensor &, const Tensor &, const Tensor &, const Tensor &, const c10::optional<Tensor> &, bool, bool, bool>(
-  batch_rule_168_t batch_rule,
+std::tuple<Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_181_t,std::tuple<Tensor,Tensor,Tensor>,const Tensor &, const Tensor &, const c10::optional<Tensor> &, const c10::optional<Tensor> &, const c10::optional<Tensor> &, const c10::optional<Tensor> &, const c10::optional<Tensor> &, bool, double, ::std::array<bool,3>>(
+  batch_rule_181_t batch_rule,
+  const Tensor & grad_out, const Tensor & input, const c10::optional<Tensor> & weight, const c10::optional<Tensor> & running_mean, const c10::optional<Tensor> & running_var, const c10::optional<Tensor> & save_mean, const c10::optional<Tensor> & save_invstd, bool train, double eps, ::std::array<bool,3> output_mask
+) {
+  c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
+  auto maybe_layer = maybeCurrentDynamicLayer();
+  TORCH_INTERNAL_ASSERT(maybe_layer.has_value());
+  int64_t cur_level = maybe_layer->layerId();
+  Tensor grad_out_value;
+  optional<int64_t> grad_out_bdim;
+  std::tie(grad_out_value, grad_out_bdim) = unwrapTensorAtLevel(grad_out, cur_level);
+  Tensor input_value;
+  optional<int64_t> input_bdim;
+  std::tie(input_value, input_bdim) = unwrapTensorAtLevel(input, cur_level);
+  optional<Tensor> weight_value;
+  optional<int64_t> weight_bdim;
+  if (weight) {
+      std::tie(weight_value, weight_bdim) = unwrapTensorAtLevel(weight.value(), cur_level);
+  }
+  optional<Tensor> running_mean_value;
+  optional<int64_t> running_mean_bdim;
+  if (running_mean) {
+      std::tie(running_mean_value, running_mean_bdim) = unwrapTensorAtLevel(running_mean.value(), cur_level);
+  }
+  optional<Tensor> running_var_value;
+  optional<int64_t> running_var_bdim;
+  if (running_var) {
+      std::tie(running_var_value, running_var_bdim) = unwrapTensorAtLevel(running_var.value(), cur_level);
+  }
+  optional<Tensor> save_mean_value;
+  optional<int64_t> save_mean_bdim;
+  if (save_mean) {
+      std::tie(save_mean_value, save_mean_bdim) = unwrapTensorAtLevel(save_mean.value(), cur_level);
+  }
+  optional<Tensor> save_invstd_value;
+  optional<int64_t> save_invstd_bdim;
+  if (save_invstd) {
+      std::tie(save_invstd_value, save_invstd_bdim) = unwrapTensorAtLevel(save_invstd.value(), cur_level);
+  }
+  auto results = batch_rule(grad_out_value, grad_out_bdim, input_value, input_bdim, weight_value, weight_bdim, running_mean_value, running_mean_bdim, running_var_value, running_var_bdim, save_mean_value, save_mean_bdim, save_invstd_value, save_invstd_bdim, train, eps, output_mask);
+  return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level), makeBatched(std::get<4>(results), std::get<5>(results), cur_level));
+}
+
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_182_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, bool, bool, bool);
+template <>
+std::tuple<Tensor,Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_182_t,std::tuple<Tensor,Tensor,Tensor,Tensor>,const Tensor &, const Tensor &, const Tensor &, const Tensor &, const c10::optional<Tensor> &, bool, bool, bool>(
+  batch_rule_182_t batch_rule,
   const Tensor & grad_out, const Tensor & input, const Tensor & mean, const Tensor & invstd, const c10::optional<Tensor> & weight, bool input_g, bool weight_g, bool bias_g
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -3631,10 +4036,10 @@ std::tuple<Tensor,Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_168_t,std::t
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level), makeBatched(std::get<4>(results), std::get<5>(results), cur_level), makeBatched(std::get<6>(results), std::get<7>(results), cur_level));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_169_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_183_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>);
 template <>
-Tensor lowerToNextLayer<batch_rule_169_t,Tensor,const Tensor &, const Tensor &, const Tensor &, const Tensor &, const c10::optional<Tensor> &, const Tensor &, const Tensor &, const Tensor &>(
-  batch_rule_169_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_183_t,Tensor,const Tensor &, const Tensor &, const Tensor &, const Tensor &, const c10::optional<Tensor> &, const Tensor &, const Tensor &, const Tensor &>(
+  batch_rule_183_t batch_rule,
   const Tensor & grad_out, const Tensor & input, const Tensor & mean, const Tensor & invstd, const c10::optional<Tensor> & weight, const Tensor & mean_dy, const Tensor & mean_dy_xmu, const Tensor & count
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -3671,10 +4076,10 @@ Tensor lowerToNextLayer<batch_rule_169_t,Tensor,const Tensor &, const Tensor &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_170_t)(const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, double);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_184_t)(const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, double);
 template <>
-std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_170_t,std::tuple<Tensor,Tensor>,const Tensor &, const c10::optional<Tensor> &, const c10::optional<Tensor> &, double>(
-  batch_rule_170_t batch_rule,
+std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_184_t,std::tuple<Tensor,Tensor>,const Tensor &, const c10::optional<Tensor> &, const c10::optional<Tensor> &, double>(
+  batch_rule_184_t batch_rule,
   const Tensor & input, const c10::optional<Tensor> & running_mean, const c10::optional<Tensor> & running_var, double momentum
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -3698,10 +4103,10 @@ std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_170_t,std::tuple<Tensor,Te
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_171_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, IntArrayRef, IntArrayRef);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_185_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, IntArrayRef, IntArrayRef);
 template <>
-Tensor lowerToNextLayer<batch_rule_171_t,Tensor,const Tensor &, const Tensor &, const c10::optional<Tensor> &, IntArrayRef, IntArrayRef>(
-  batch_rule_171_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_185_t,Tensor,const Tensor &, const Tensor &, const c10::optional<Tensor> &, IntArrayRef, IntArrayRef>(
+  batch_rule_185_t batch_rule,
   const Tensor & input, const Tensor & weight, const c10::optional<Tensor> & bias, IntArrayRef padding, IntArrayRef stride
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -3723,10 +4128,33 @@ Tensor lowerToNextLayer<batch_rule_171_t,Tensor,const Tensor &, const Tensor &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_172_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, IntArrayRef);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_186_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, IntArrayRef, ::std::array<bool,3>);
 template <>
-Tensor lowerToNextLayer<batch_rule_172_t,Tensor,const Tensor &, const Tensor &, const Tensor &, IntArrayRef>(
-  batch_rule_172_t batch_rule,
+std::tuple<Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_186_t,std::tuple<Tensor,Tensor,Tensor>,const Tensor &, const Tensor &, const Tensor &, IntArrayRef, ::std::array<bool,3>>(
+  batch_rule_186_t batch_rule,
+  const Tensor & input, const Tensor & grad_output, const Tensor & weight, IntArrayRef padding, ::std::array<bool,3> output_mask
+) {
+  c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
+  auto maybe_layer = maybeCurrentDynamicLayer();
+  TORCH_INTERNAL_ASSERT(maybe_layer.has_value());
+  int64_t cur_level = maybe_layer->layerId();
+  Tensor input_value;
+  optional<int64_t> input_bdim;
+  std::tie(input_value, input_bdim) = unwrapTensorAtLevel(input, cur_level);
+  Tensor grad_output_value;
+  optional<int64_t> grad_output_bdim;
+  std::tie(grad_output_value, grad_output_bdim) = unwrapTensorAtLevel(grad_output, cur_level);
+  Tensor weight_value;
+  optional<int64_t> weight_bdim;
+  std::tie(weight_value, weight_bdim) = unwrapTensorAtLevel(weight, cur_level);
+  auto results = batch_rule(input_value, input_bdim, grad_output_value, grad_output_bdim, weight_value, weight_bdim, padding, output_mask);
+  return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level), makeBatched(std::get<4>(results), std::get<5>(results), cur_level));
+}
+
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_187_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, IntArrayRef);
+template <>
+Tensor lowerToNextLayer<batch_rule_187_t,Tensor,const Tensor &, const Tensor &, const Tensor &, IntArrayRef>(
+  batch_rule_187_t batch_rule,
   const Tensor & grad_output, const Tensor & self, const Tensor & indices, IntArrayRef output_size
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -3746,10 +4174,10 @@ Tensor lowerToNextLayer<batch_rule_172_t,Tensor,const Tensor &, const Tensor &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_173_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, const Tensor &, c10::optional<int64_t>, IntArrayRef);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_188_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, const Tensor &, c10::optional<int64_t>, IntArrayRef);
 template <>
-Tensor lowerToNextLayer<batch_rule_173_t,Tensor,const Tensor &, IntArrayRef, const Tensor &, IntArrayRef>(
-  batch_rule_173_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_188_t,Tensor,const Tensor &, IntArrayRef, const Tensor &, IntArrayRef>(
+  batch_rule_188_t batch_rule,
   const Tensor & input, IntArrayRef weightsize, const Tensor & grad_output, IntArrayRef padding
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -3766,10 +4194,10 @@ Tensor lowerToNextLayer<batch_rule_173_t,Tensor,const Tensor &, IntArrayRef, con
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_174_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, double, c10::optional<int64_t>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_189_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, double, c10::optional<int64_t>);
 template <>
-Tensor lowerToNextLayer<batch_rule_174_t,Tensor,const Tensor &, const Tensor &, double, c10::optional<int64_t>>(
-  batch_rule_174_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_189_t,Tensor,const Tensor &, const Tensor &, double, c10::optional<int64_t>>(
+  batch_rule_189_t batch_rule,
   const Tensor & x1, const Tensor & x2, double p, c10::optional<int64_t> compute_mode
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -3786,10 +4214,10 @@ Tensor lowerToNextLayer<batch_rule_174_t,Tensor,const Tensor &, const Tensor &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_175_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, double, const Tensor &, c10::optional<int64_t>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_190_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, double, const Tensor &, c10::optional<int64_t>);
 template <>
-Tensor lowerToNextLayer<batch_rule_175_t,Tensor,const Tensor &, const Tensor &, const Tensor &, double, const Tensor &>(
-  batch_rule_175_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_190_t,Tensor,const Tensor &, const Tensor &, const Tensor &, double, const Tensor &>(
+  batch_rule_190_t batch_rule,
   const Tensor & grad, const Tensor & x1, const Tensor & x2, double p, const Tensor & cdist
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -3812,10 +4240,10 @@ Tensor lowerToNextLayer<batch_rule_175_t,Tensor,const Tensor &, const Tensor &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_176_t)(const Tensor &, c10::optional<int64_t>, double);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_191_t)(const Tensor &, c10::optional<int64_t>, double);
 template <>
-Tensor lowerToNextLayer<batch_rule_176_t,Tensor,const Tensor &, double>(
-  batch_rule_176_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_191_t,Tensor,const Tensor &, double>(
+  batch_rule_191_t batch_rule,
   const Tensor & self, double rcond
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -3829,10 +4257,10 @@ Tensor lowerToNextLayer<batch_rule_176_t,Tensor,const Tensor &, double>(
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_177_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, double, const Tensor &, c10::optional<int64_t>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_192_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, double, const Tensor &, c10::optional<int64_t>);
 template <>
-Tensor lowerToNextLayer<batch_rule_177_t,Tensor,const Tensor &, const Tensor &, double, const Tensor &>(
-  batch_rule_177_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_192_t,Tensor,const Tensor &, const Tensor &, double, const Tensor &>(
+  batch_rule_192_t batch_rule,
   const Tensor & grad, const Tensor & self, double p, const Tensor & pdist
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -3852,10 +4280,10 @@ Tensor lowerToNextLayer<batch_rule_177_t,Tensor,const Tensor &, const Tensor &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_178_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, int64_t, double);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_193_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, int64_t, double);
 template <>
-Tensor lowerToNextLayer<batch_rule_178_t,Tensor,const Tensor &, const Tensor &, int64_t, double>(
-  batch_rule_178_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_193_t,Tensor,const Tensor &, const Tensor &, int64_t, double>(
+  batch_rule_193_t batch_rule,
   const Tensor & self, const Tensor & target, int64_t reduction, double delta
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -3872,10 +4300,10 @@ Tensor lowerToNextLayer<batch_rule_178_t,Tensor,const Tensor &, const Tensor &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_179_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_194_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef);
 template <>
-Tensor lowerToNextLayer<batch_rule_179_t,Tensor,const Tensor &, IntArrayRef, IntArrayRef>(
-  batch_rule_179_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_194_t,Tensor,const Tensor &, IntArrayRef, IntArrayRef>(
+  batch_rule_194_t batch_rule,
   const Tensor & self, IntArrayRef shifts, IntArrayRef dims
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -3889,10 +4317,10 @@ Tensor lowerToNextLayer<batch_rule_179_t,Tensor,const Tensor &, IntArrayRef, Int
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<bool> (*batch_rule_180_t)(const Tensor &, c10::optional<int64_t>, c10::optional<Device>);
+typedef std::tuple<bool> (*batch_rule_195_t)(const Tensor &, c10::optional<int64_t>, c10::optional<Device>);
 template <>
-bool lowerToNextLayer<batch_rule_180_t,bool,const Tensor &, c10::optional<Device>>(
-  batch_rule_180_t batch_rule,
+bool lowerToNextLayer<batch_rule_195_t,bool,const Tensor &, c10::optional<Device>>(
+  batch_rule_195_t batch_rule,
   const Tensor & self, c10::optional<Device> device
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -3906,10 +4334,10 @@ bool lowerToNextLayer<batch_rule_180_t,bool,const Tensor &, c10::optional<Device
   return std::get<0>(results);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_181_t)(const Tensor &, c10::optional<int64_t>, c10::optional<Device>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_196_t)(const Tensor &, c10::optional<int64_t>, c10::optional<Device>);
 template <>
-Tensor lowerToNextLayer<batch_rule_181_t,Tensor,const Tensor &, c10::optional<Device>>(
-  batch_rule_181_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_196_t,Tensor,const Tensor &, c10::optional<Device>>(
+  batch_rule_196_t batch_rule,
   const Tensor & self, c10::optional<Device> device
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -3923,10 +4351,10 @@ Tensor lowerToNextLayer<batch_rule_181_t,Tensor,const Tensor &, c10::optional<De
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_182_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, bool, bool, double, int64_t);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_197_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, bool, bool, double, int64_t);
 template <>
-Tensor lowerToNextLayer<batch_rule_182_t,Tensor,const Tensor &, const Tensor &, bool, bool, double, int64_t>(
-  batch_rule_182_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_197_t,Tensor,const Tensor &, const Tensor &, bool, bool, double, int64_t>(
+  batch_rule_197_t batch_rule,
   const Tensor & input, const Tensor & target, bool log_input, bool full, double eps, int64_t reduction
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -3943,10 +4371,10 @@ Tensor lowerToNextLayer<batch_rule_182_t,Tensor,const Tensor &, const Tensor &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_183_t)(const Tensor &, c10::optional<int64_t>, int64_t, c10::optional<ScalarType>, c10::optional<Layout>, c10::optional<Device>, c10::optional<bool>, c10::optional<MemoryFormat>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_198_t)(const Tensor &, c10::optional<int64_t>, int64_t, c10::optional<ScalarType>, c10::optional<Layout>, c10::optional<Device>, c10::optional<bool>, c10::optional<MemoryFormat>);
 template <>
-Tensor lowerToNextLayer<batch_rule_183_t,Tensor,const Tensor &, int64_t, c10::optional<ScalarType>, c10::optional<Layout>, c10::optional<Device>, c10::optional<bool>, c10::optional<MemoryFormat>>(
-  batch_rule_183_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_198_t,Tensor,const Tensor &, int64_t, c10::optional<ScalarType>, c10::optional<Layout>, c10::optional<Device>, c10::optional<bool>, c10::optional<MemoryFormat>>(
+  batch_rule_198_t batch_rule,
   const Tensor & self, int64_t high, c10::optional<ScalarType> dtype, c10::optional<Layout> layout, c10::optional<Device> device, c10::optional<bool> pin_memory, c10::optional<MemoryFormat> memory_format
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -3960,10 +4388,10 @@ Tensor lowerToNextLayer<batch_rule_183_t,Tensor,const Tensor &, int64_t, c10::op
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_184_t)(const Tensor &, c10::optional<int64_t>, int64_t, int64_t, c10::optional<ScalarType>, c10::optional<Layout>, c10::optional<Device>, c10::optional<bool>, c10::optional<MemoryFormat>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_199_t)(const Tensor &, c10::optional<int64_t>, int64_t, int64_t, c10::optional<ScalarType>, c10::optional<Layout>, c10::optional<Device>, c10::optional<bool>, c10::optional<MemoryFormat>);
 template <>
-Tensor lowerToNextLayer<batch_rule_184_t,Tensor,const Tensor &, int64_t, int64_t, c10::optional<ScalarType>, c10::optional<Layout>, c10::optional<Device>, c10::optional<bool>, c10::optional<MemoryFormat>>(
-  batch_rule_184_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_199_t,Tensor,const Tensor &, int64_t, int64_t, c10::optional<ScalarType>, c10::optional<Layout>, c10::optional<Device>, c10::optional<bool>, c10::optional<MemoryFormat>>(
+  batch_rule_199_t batch_rule,
   const Tensor & self, int64_t low, int64_t high, c10::optional<ScalarType> dtype, c10::optional<Layout> layout, c10::optional<Device> device, c10::optional<bool> pin_memory, c10::optional<MemoryFormat> memory_format
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -3977,10 +4405,10 @@ Tensor lowerToNextLayer<batch_rule_184_t,Tensor,const Tensor &, int64_t, int64_t
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_185_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, c10::optional<int64_t>, c10::optional<int64_t>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_200_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, c10::optional<int64_t>, c10::optional<int64_t>);
 template <>
-Tensor lowerToNextLayer<batch_rule_185_t,Tensor,const Tensor &, const Tensor &, c10::optional<int64_t>, c10::optional<int64_t>>(
-  batch_rule_185_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_200_t,Tensor,const Tensor &, const Tensor &, c10::optional<int64_t>, c10::optional<int64_t>>(
+  batch_rule_200_t batch_rule,
   const Tensor & self, const Tensor & repeats, c10::optional<int64_t> dim, c10::optional<int64_t> output_size
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -3997,10 +4425,10 @@ Tensor lowerToNextLayer<batch_rule_185_t,Tensor,const Tensor &, const Tensor &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_186_t)(const Tensor &, c10::optional<int64_t>, int64_t, c10::optional<int64_t>, c10::optional<int64_t>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_201_t)(const Tensor &, c10::optional<int64_t>, int64_t, c10::optional<int64_t>, c10::optional<int64_t>);
 template <>
-Tensor lowerToNextLayer<batch_rule_186_t,Tensor,const Tensor &, int64_t, c10::optional<int64_t>, c10::optional<int64_t>>(
-  batch_rule_186_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_201_t,Tensor,const Tensor &, int64_t, c10::optional<int64_t>, c10::optional<int64_t>>(
+  batch_rule_201_t batch_rule,
   const Tensor & self, int64_t repeats, c10::optional<int64_t> dim, c10::optional<int64_t> output_size
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -4014,10 +4442,10 @@ Tensor lowerToNextLayer<batch_rule_186_t,Tensor,const Tensor &, int64_t, c10::op
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_187_t)(const Tensor &, c10::optional<int64_t>, const Scalar &, const Scalar &, bool, c10::optional<Generator>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_202_t)(const Tensor &, c10::optional<int64_t>, const Scalar &, const Scalar &, bool, c10::optional<Generator>);
 template <>
-Tensor lowerToNextLayer<batch_rule_187_t,Tensor,const Tensor &, const Scalar &, const Scalar &, bool, c10::optional<Generator>>(
-  batch_rule_187_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_202_t,Tensor,const Tensor &, const Scalar &, const Scalar &, bool, c10::optional<Generator>>(
+  batch_rule_202_t batch_rule,
   const Tensor & self, const Scalar & lower, const Scalar & upper, bool training, c10::optional<Generator> generator
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -4031,10 +4459,10 @@ Tensor lowerToNextLayer<batch_rule_187_t,Tensor,const Tensor &, const Scalar &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_188_t)(const Tensor &, c10::optional<int64_t>, Dimname, int64_t);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_203_t)(const Tensor &, c10::optional<int64_t>, Dimname, int64_t);
 template <>
-Tensor lowerToNextLayer<batch_rule_188_t,Tensor,const Tensor &, Dimname, int64_t>(
-  batch_rule_188_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_203_t,Tensor,const Tensor &, Dimname, int64_t>(
+  batch_rule_203_t batch_rule,
   const Tensor & self, Dimname dim, int64_t index
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -4048,10 +4476,10 @@ Tensor lowerToNextLayer<batch_rule_188_t,Tensor,const Tensor &, Dimname, int64_t
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_189_t)(const Tensor &, c10::optional<int64_t>, c10::optional<double>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_204_t)(const Tensor &, c10::optional<int64_t>, c10::optional<double>);
 template <>
-Tensor lowerToNextLayer<batch_rule_189_t,Tensor,const Tensor &, c10::optional<double>>(
-  batch_rule_189_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_204_t,Tensor,const Tensor &, c10::optional<double>>(
+  batch_rule_204_t batch_rule,
   const Tensor & self, c10::optional<double> eps
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -4065,10 +4493,10 @@ Tensor lowerToNextLayer<batch_rule_189_t,Tensor,const Tensor &, c10::optional<do
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<int64_t> (*batch_rule_190_t)(const Tensor &, c10::optional<int64_t>, int64_t);
+typedef std::tuple<int64_t> (*batch_rule_205_t)(const Tensor &, c10::optional<int64_t>, int64_t);
 template <>
-int64_t lowerToNextLayer<batch_rule_190_t,int64_t,const Tensor &, int64_t>(
-  batch_rule_190_t batch_rule,
+int64_t lowerToNextLayer<batch_rule_205_t,int64_t,const Tensor &, int64_t>(
+  batch_rule_205_t batch_rule,
   const Tensor & self, int64_t dim
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -4082,10 +4510,10 @@ int64_t lowerToNextLayer<batch_rule_190_t,int64_t,const Tensor &, int64_t>(
   return std::get<0>(results);
 }
 
-typedef std::tuple<int64_t> (*batch_rule_191_t)(const Tensor &, c10::optional<int64_t>, Dimname);
+typedef std::tuple<int64_t> (*batch_rule_206_t)(const Tensor &, c10::optional<int64_t>, Dimname);
 template <>
-int64_t lowerToNextLayer<batch_rule_191_t,int64_t,const Tensor &, Dimname>(
-  batch_rule_191_t batch_rule,
+int64_t lowerToNextLayer<batch_rule_206_t,int64_t,const Tensor &, Dimname>(
+  batch_rule_206_t batch_rule,
   const Tensor & self, Dimname dim
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -4099,10 +4527,10 @@ int64_t lowerToNextLayer<batch_rule_191_t,int64_t,const Tensor &, Dimname>(
   return std::get<0>(results);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_192_t)(const Tensor &, c10::optional<int64_t>, int64_t, c10::optional<int64_t>, c10::optional<int64_t>, int64_t);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_207_t)(const Tensor &, c10::optional<int64_t>, int64_t, c10::optional<int64_t>, c10::optional<int64_t>, int64_t);
 template <>
-Tensor lowerToNextLayer<batch_rule_192_t,Tensor,const Tensor &, int64_t, c10::optional<int64_t>, c10::optional<int64_t>, int64_t>(
-  batch_rule_192_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_207_t,Tensor,const Tensor &, int64_t, c10::optional<int64_t>, c10::optional<int64_t>, int64_t>(
+  batch_rule_207_t batch_rule,
   const Tensor & self, int64_t dim, c10::optional<int64_t> start, c10::optional<int64_t> end, int64_t step
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -4116,10 +4544,10 @@ Tensor lowerToNextLayer<batch_rule_192_t,Tensor,const Tensor &, int64_t, c10::op
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_193_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, int64_t, int64_t, int64_t, int64_t);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_208_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, int64_t, int64_t, int64_t, int64_t);
 template <>
-Tensor lowerToNextLayer<batch_rule_193_t,Tensor,const Tensor &, IntArrayRef, int64_t, int64_t, int64_t, int64_t>(
-  batch_rule_193_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_208_t,Tensor,const Tensor &, IntArrayRef, int64_t, int64_t, int64_t, int64_t>(
+  batch_rule_208_t batch_rule,
   const Tensor & grad_output, IntArrayRef input_sizes, int64_t dim, int64_t start, int64_t end, int64_t step
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -4133,10 +4561,70 @@ Tensor lowerToNextLayer<batch_rule_193_t,Tensor,const Tensor &, IntArrayRef, int
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<::std::vector<Tensor>,c10::optional<int64_t>> (*batch_rule_194_t)(const Tensor &, c10::optional<int64_t>, int64_t);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_209_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, int64_t, c10::optional<int64_t>, c10::optional<int64_t>, int64_t);
 template <>
-::std::vector<Tensor> lowerToNextLayer<batch_rule_194_t,::std::vector<Tensor>,const Tensor &, int64_t>(
-  batch_rule_194_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_209_t,Tensor,const Tensor &, const Tensor &, int64_t, c10::optional<int64_t>, c10::optional<int64_t>, int64_t>(
+  batch_rule_209_t batch_rule,
+  const Tensor & self, const Tensor & src, int64_t dim, c10::optional<int64_t> start, c10::optional<int64_t> end, int64_t step
+) {
+  c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
+  auto maybe_layer = maybeCurrentDynamicLayer();
+  TORCH_INTERNAL_ASSERT(maybe_layer.has_value());
+  int64_t cur_level = maybe_layer->layerId();
+  Tensor self_value;
+  optional<int64_t> self_bdim;
+  std::tie(self_value, self_bdim) = unwrapTensorAtLevel(self, cur_level);
+  Tensor src_value;
+  optional<int64_t> src_bdim;
+  std::tie(src_value, src_bdim) = unwrapTensorAtLevel(src, cur_level);
+  auto results = batch_rule(self_value, self_bdim, src_value, src_bdim, dim, start, end, step);
+  return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
+}
+
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_210_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, int64_t, int64_t);
+template <>
+Tensor lowerToNextLayer<batch_rule_210_t,Tensor,const Tensor &, const Tensor &, int64_t, int64_t>(
+  batch_rule_210_t batch_rule,
+  const Tensor & self, const Tensor & src, int64_t dim, int64_t index
+) {
+  c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
+  auto maybe_layer = maybeCurrentDynamicLayer();
+  TORCH_INTERNAL_ASSERT(maybe_layer.has_value());
+  int64_t cur_level = maybe_layer->layerId();
+  Tensor self_value;
+  optional<int64_t> self_bdim;
+  std::tie(self_value, self_bdim) = unwrapTensorAtLevel(self, cur_level);
+  Tensor src_value;
+  optional<int64_t> src_bdim;
+  std::tie(src_value, src_bdim) = unwrapTensorAtLevel(src, cur_level);
+  auto results = batch_rule(self_value, self_bdim, src_value, src_bdim, dim, index);
+  return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
+}
+
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_211_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, int64_t, int64_t, int64_t);
+template <>
+Tensor lowerToNextLayer<batch_rule_211_t,Tensor,const Tensor &, const Tensor &, int64_t, int64_t, int64_t>(
+  batch_rule_211_t batch_rule,
+  const Tensor & self, const Tensor & src, int64_t offset, int64_t dim1, int64_t dim2
+) {
+  c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
+  auto maybe_layer = maybeCurrentDynamicLayer();
+  TORCH_INTERNAL_ASSERT(maybe_layer.has_value());
+  int64_t cur_level = maybe_layer->layerId();
+  Tensor self_value;
+  optional<int64_t> self_bdim;
+  std::tie(self_value, self_bdim) = unwrapTensorAtLevel(self, cur_level);
+  Tensor src_value;
+  optional<int64_t> src_bdim;
+  std::tie(src_value, src_bdim) = unwrapTensorAtLevel(src, cur_level);
+  auto results = batch_rule(self_value, self_bdim, src_value, src_bdim, offset, dim1, dim2);
+  return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
+}
+
+typedef std::tuple<::std::vector<Tensor>,c10::optional<int64_t>> (*batch_rule_212_t)(const Tensor &, c10::optional<int64_t>, int64_t);
+template <>
+::std::vector<Tensor> lowerToNextLayer<batch_rule_212_t,::std::vector<Tensor>,const Tensor &, int64_t>(
+  batch_rule_212_t batch_rule,
   const Tensor & self, int64_t dim
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -4150,10 +4638,10 @@ ::std::vector<Tensor> lowerToNextLayer<batch_rule_194_t,::std::vector<Tensor>,co
   return makeBatchedVector(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<::std::vector<Tensor>,c10::optional<int64_t>> (*batch_rule_195_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef);
+typedef std::tuple<::std::vector<Tensor>,c10::optional<int64_t>> (*batch_rule_213_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef);
 template <>
-::std::vector<Tensor> lowerToNextLayer<batch_rule_195_t,::std::vector<Tensor>,const Tensor &, IntArrayRef>(
-  batch_rule_195_t batch_rule,
+::std::vector<Tensor> lowerToNextLayer<batch_rule_213_t,::std::vector<Tensor>,const Tensor &, IntArrayRef>(
+  batch_rule_213_t batch_rule,
   const Tensor & self, IntArrayRef indices
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -4167,10 +4655,10 @@ ::std::vector<Tensor> lowerToNextLayer<batch_rule_195_t,::std::vector<Tensor>,co
   return makeBatchedVector(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_196_t)(const Tensor &, c10::optional<int64_t>, int64_t, c10::optional<int64_t>, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, bool, c10::optional<bool>, c10::optional<bool>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_214_t)(const Tensor &, c10::optional<int64_t>, int64_t, c10::optional<int64_t>, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, bool, c10::optional<bool>, c10::optional<bool>);
 template <>
-Tensor lowerToNextLayer<batch_rule_196_t,Tensor,const Tensor &, int64_t, c10::optional<int64_t>, c10::optional<int64_t>, const c10::optional<Tensor> &, bool, c10::optional<bool>, c10::optional<bool>>(
-  batch_rule_196_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_214_t,Tensor,const Tensor &, int64_t, c10::optional<int64_t>, c10::optional<int64_t>, const c10::optional<Tensor> &, bool, c10::optional<bool>, c10::optional<bool>>(
+  batch_rule_214_t batch_rule,
   const Tensor & self, int64_t n_fft, c10::optional<int64_t> hop_length, c10::optional<int64_t> win_length, const c10::optional<Tensor> & window, bool normalized, c10::optional<bool> onesided, c10::optional<bool> return_complex
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -4189,10 +4677,10 @@ Tensor lowerToNextLayer<batch_rule_196_t,Tensor,const Tensor &, int64_t, c10::op
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_197_t)(const Tensor &, c10::optional<int64_t>, int64_t, c10::optional<int64_t>, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, bool, bool, c10::optional<bool>, c10::optional<int64_t>, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_215_t)(const Tensor &, c10::optional<int64_t>, int64_t, c10::optional<int64_t>, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, bool, bool, c10::optional<bool>, c10::optional<int64_t>, bool);
 template <>
-Tensor lowerToNextLayer<batch_rule_197_t,Tensor,const Tensor &, int64_t, c10::optional<int64_t>, c10::optional<int64_t>, const c10::optional<Tensor> &, bool, bool, c10::optional<bool>, c10::optional<int64_t>, bool>(
-  batch_rule_197_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_215_t,Tensor,const Tensor &, int64_t, c10::optional<int64_t>, c10::optional<int64_t>, const c10::optional<Tensor> &, bool, bool, c10::optional<bool>, c10::optional<int64_t>, bool>(
+  batch_rule_215_t batch_rule,
   const Tensor & self, int64_t n_fft, c10::optional<int64_t> hop_length, c10::optional<int64_t> win_length, const c10::optional<Tensor> & window, bool center, bool normalized, c10::optional<bool> onesided, c10::optional<int64_t> length, bool return_complex
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -4211,10 +4699,10 @@ Tensor lowerToNextLayer<batch_rule_197_t,Tensor,const Tensor &, int64_t, c10::op
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_198_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, bool, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_216_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, bool, bool);
 template <>
-Tensor lowerToNextLayer<batch_rule_198_t,Tensor,const Tensor &, IntArrayRef, bool, bool>(
-  batch_rule_198_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_216_t,Tensor,const Tensor &, IntArrayRef, bool, bool>(
+  batch_rule_216_t batch_rule,
   const Tensor & self, IntArrayRef dim, bool unbiased, bool keepdim
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -4228,10 +4716,10 @@ Tensor lowerToNextLayer<batch_rule_198_t,Tensor,const Tensor &, IntArrayRef, boo
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_199_t)(const Tensor &, c10::optional<int64_t>, c10::optional<IntArrayRef>, c10::optional<int64_t>, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_217_t)(const Tensor &, c10::optional<int64_t>, c10::optional<IntArrayRef>, c10::optional<int64_t>, bool);
 template <>
-Tensor lowerToNextLayer<batch_rule_199_t,Tensor,const Tensor &, c10::optional<IntArrayRef>, c10::optional<int64_t>, bool>(
-  batch_rule_199_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_217_t,Tensor,const Tensor &, c10::optional<IntArrayRef>, c10::optional<int64_t>, bool>(
+  batch_rule_217_t batch_rule,
   const Tensor & self, c10::optional<IntArrayRef> dim, c10::optional<int64_t> correction, bool keepdim
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -4245,10 +4733,10 @@ Tensor lowerToNextLayer<batch_rule_199_t,Tensor,const Tensor &, c10::optional<In
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_200_t)(const Tensor &, c10::optional<int64_t>, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_218_t)(const Tensor &, c10::optional<int64_t>, bool);
 template <>
-std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_200_t,std::tuple<Tensor,Tensor>,const Tensor &, bool>(
-  batch_rule_200_t batch_rule,
+std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_218_t,std::tuple<Tensor,Tensor>,const Tensor &, bool>(
+  batch_rule_218_t batch_rule,
   const Tensor & self, bool check_errors
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -4262,10 +4750,10 @@ std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_200_t,std::tuple<Tensor,Te
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_201_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, bool, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_219_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, bool, bool);
 template <>
-std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_201_t,std::tuple<Tensor,Tensor>,const Tensor &, IntArrayRef, bool, bool>(
-  batch_rule_201_t batch_rule,
+std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_219_t,std::tuple<Tensor,Tensor>,const Tensor &, IntArrayRef, bool, bool>(
+  batch_rule_219_t batch_rule,
   const Tensor & self, IntArrayRef dim, bool unbiased, bool keepdim
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -4279,10 +4767,10 @@ std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_201_t,std::tuple<Tensor,Te
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_202_t)(const Tensor &, c10::optional<int64_t>, c10::optional<IntArrayRef>, c10::optional<int64_t>, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_220_t)(const Tensor &, c10::optional<int64_t>, c10::optional<IntArrayRef>, c10::optional<int64_t>, bool);
 template <>
-std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_202_t,std::tuple<Tensor,Tensor>,const Tensor &, c10::optional<IntArrayRef>, c10::optional<int64_t>, bool>(
-  batch_rule_202_t batch_rule,
+std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_220_t,std::tuple<Tensor,Tensor>,const Tensor &, c10::optional<IntArrayRef>, c10::optional<int64_t>, bool>(
+  batch_rule_220_t batch_rule,
   const Tensor & self, c10::optional<IntArrayRef> dim, c10::optional<int64_t> correction, bool keepdim
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -4296,10 +4784,10 @@ std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_202_t,std::tuple<Tensor,Te
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_203_t)(const Tensor &, c10::optional<int64_t>, DimnameList, bool, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_221_t)(const Tensor &, c10::optional<int64_t>, DimnameList, bool, bool);
 template <>
-std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_203_t,std::tuple<Tensor,Tensor>,const Tensor &, DimnameList, bool, bool>(
-  batch_rule_203_t batch_rule,
+std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_221_t,std::tuple<Tensor,Tensor>,const Tensor &, DimnameList, bool, bool>(
+  batch_rule_221_t batch_rule,
   const Tensor & self, DimnameList dim, bool unbiased, bool keepdim
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -4313,10 +4801,10 @@ std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_203_t,std::tuple<Tensor,Te
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_204_t)(const Tensor &, c10::optional<int64_t>, DimnameList, c10::optional<int64_t>, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_222_t)(const Tensor &, c10::optional<int64_t>, DimnameList, c10::optional<int64_t>, bool);
 template <>
-std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_204_t,std::tuple<Tensor,Tensor>,const Tensor &, DimnameList, c10::optional<int64_t>, bool>(
-  batch_rule_204_t batch_rule,
+std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_222_t,std::tuple<Tensor,Tensor>,const Tensor &, DimnameList, c10::optional<int64_t>, bool>(
+  batch_rule_222_t batch_rule,
   const Tensor & self, DimnameList dim, c10::optional<int64_t> correction, bool keepdim
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -4330,10 +4818,10 @@ std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_204_t,std::tuple<Tensor,Te
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_205_t)(const Tensor &, c10::optional<int64_t>, DimnameList, bool, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_223_t)(const Tensor &, c10::optional<int64_t>, DimnameList, bool, bool);
 template <>
-Tensor lowerToNextLayer<batch_rule_205_t,Tensor,const Tensor &, DimnameList, bool, bool>(
-  batch_rule_205_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_223_t,Tensor,const Tensor &, DimnameList, bool, bool>(
+  batch_rule_223_t batch_rule,
   const Tensor & self, DimnameList dim, bool unbiased, bool keepdim
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -4347,10 +4835,10 @@ Tensor lowerToNextLayer<batch_rule_205_t,Tensor,const Tensor &, DimnameList, boo
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_206_t)(const Tensor &, c10::optional<int64_t>, DimnameList, c10::optional<int64_t>, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_224_t)(const Tensor &, c10::optional<int64_t>, DimnameList, c10::optional<int64_t>, bool);
 template <>
-Tensor lowerToNextLayer<batch_rule_206_t,Tensor,const Tensor &, DimnameList, c10::optional<int64_t>, bool>(
-  batch_rule_206_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_224_t,Tensor,const Tensor &, DimnameList, c10::optional<int64_t>, bool>(
+  batch_rule_224_t batch_rule,
   const Tensor & self, DimnameList dim, c10::optional<int64_t> correction, bool keepdim
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -4364,10 +4852,10 @@ Tensor lowerToNextLayer<batch_rule_206_t,Tensor,const Tensor &, DimnameList, c10
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_207_t)(const Tensor &, c10::optional<int64_t>, int64_t, bool, c10::optional<ScalarType>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_225_t)(const Tensor &, c10::optional<int64_t>, int64_t, bool, c10::optional<ScalarType>);
 template <>
-Tensor lowerToNextLayer<batch_rule_207_t,Tensor,const Tensor &, int64_t, bool, c10::optional<ScalarType>>(
-  batch_rule_207_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_225_t,Tensor,const Tensor &, int64_t, bool, c10::optional<ScalarType>>(
+  batch_rule_225_t batch_rule,
   const Tensor & self, int64_t dim, bool keepdim, c10::optional<ScalarType> dtype
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -4381,10 +4869,10 @@ Tensor lowerToNextLayer<batch_rule_207_t,Tensor,const Tensor &, int64_t, bool, c
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_208_t)(const Tensor &, c10::optional<int64_t>, Dimname, bool, c10::optional<ScalarType>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_226_t)(const Tensor &, c10::optional<int64_t>, Dimname, bool, c10::optional<ScalarType>);
 template <>
-Tensor lowerToNextLayer<batch_rule_208_t,Tensor,const Tensor &, Dimname, bool, c10::optional<ScalarType>>(
-  batch_rule_208_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_226_t,Tensor,const Tensor &, Dimname, bool, c10::optional<ScalarType>>(
+  batch_rule_226_t batch_rule,
   const Tensor & self, Dimname dim, bool keepdim, c10::optional<ScalarType> dtype
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -4398,10 +4886,10 @@ Tensor lowerToNextLayer<batch_rule_208_t,Tensor,const Tensor &, Dimname, bool, c
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_209_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_227_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef);
 template <>
-Tensor lowerToNextLayer<batch_rule_209_t,Tensor,const Tensor &, const Tensor &, IntArrayRef, IntArrayRef>(
-  batch_rule_209_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_227_t,Tensor,const Tensor &, const Tensor &, IntArrayRef, IntArrayRef>(
+  batch_rule_227_t batch_rule,
   const Tensor & self, const Tensor & other, IntArrayRef dims_self, IntArrayRef dims_other
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -4418,10 +4906,10 @@ Tensor lowerToNextLayer<batch_rule_209_t,Tensor,const Tensor &, const Tensor &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_210_t)(const Tensor &, c10::optional<int64_t>, Dimname, Dimname);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_228_t)(const Tensor &, c10::optional<int64_t>, Dimname, Dimname);
 template <>
-Tensor lowerToNextLayer<batch_rule_210_t,Tensor,const Tensor &, Dimname, Dimname>(
-  batch_rule_210_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_228_t,Tensor,const Tensor &, Dimname, Dimname>(
+  batch_rule_228_t batch_rule,
   const Tensor & self, Dimname dim0, Dimname dim1
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -4435,10 +4923,10 @@ Tensor lowerToNextLayer<batch_rule_210_t,Tensor,const Tensor &, Dimname, Dimname
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_211_t)(const Tensor &, c10::optional<int64_t>, int64_t, IntArrayRef);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_229_t)(const Tensor &, c10::optional<int64_t>, int64_t, IntArrayRef);
 template <>
-Tensor lowerToNextLayer<batch_rule_211_t,Tensor,const Tensor &, int64_t, IntArrayRef>(
-  batch_rule_211_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_229_t,Tensor,const Tensor &, int64_t, IntArrayRef>(
+  batch_rule_229_t batch_rule,
   const Tensor & self, int64_t k, IntArrayRef dims
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -4452,10 +4940,10 @@ Tensor lowerToNextLayer<batch_rule_211_t,Tensor,const Tensor &, int64_t, IntArra
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_212_t)(const Tensor &, c10::optional<int64_t>, double, int64_t);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_230_t)(const Tensor &, c10::optional<int64_t>, double, int64_t);
 template <>
-Tensor lowerToNextLayer<batch_rule_212_t,Tensor,const Tensor &, double, int64_t>(
-  batch_rule_212_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_230_t,Tensor,const Tensor &, double, int64_t>(
+  batch_rule_230_t batch_rule,
   const Tensor & self, double scale, int64_t zero_point
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -4469,10 +4957,10 @@ Tensor lowerToNextLayer<batch_rule_212_t,Tensor,const Tensor &, double, int64_t>
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_213_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef, IntArrayRef, int64_t);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_231_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef, IntArrayRef, int64_t);
 template <>
-Tensor lowerToNextLayer<batch_rule_213_t,Tensor,const Tensor &, const Tensor &, const Tensor &, IntArrayRef, IntArrayRef, IntArrayRef, IntArrayRef, int64_t>(
-  batch_rule_213_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_231_t,Tensor,const Tensor &, const Tensor &, const Tensor &, IntArrayRef, IntArrayRef, IntArrayRef, IntArrayRef, int64_t>(
+  batch_rule_231_t batch_rule,
   const Tensor & i1, const Tensor & i2, const Tensor & i3, IntArrayRef expand1, IntArrayRef expand2, IntArrayRef expand3, IntArrayRef sumdim, int64_t unroll_dim
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -4492,10 +4980,10 @@ Tensor lowerToNextLayer<batch_rule_213_t,Tensor,const Tensor &, const Tensor &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_214_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, double, double, double, bool, int64_t);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_232_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, double, double, double, bool, int64_t);
 template <>
-Tensor lowerToNextLayer<batch_rule_214_t,Tensor,const Tensor &, const Tensor &, const Tensor &, double, double, double, bool, int64_t>(
-  batch_rule_214_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_232_t,Tensor,const Tensor &, const Tensor &, const Tensor &, double, double, double, bool, int64_t>(
+  batch_rule_232_t batch_rule,
   const Tensor & anchor, const Tensor & positive, const Tensor & negative, double margin, double p, double eps, bool swap, int64_t reduction
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -4515,10 +5003,10 @@ Tensor lowerToNextLayer<batch_rule_214_t,Tensor,const Tensor &, const Tensor &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_215_t)(const Tensor &, c10::optional<int64_t>, bool, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_233_t)(const Tensor &, c10::optional<int64_t>, bool, bool);
 template <>
-std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_215_t,std::tuple<Tensor,Tensor>,const Tensor &, bool, bool>(
-  batch_rule_215_t batch_rule,
+std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_233_t,std::tuple<Tensor,Tensor>,const Tensor &, bool, bool>(
+  batch_rule_233_t batch_rule,
   const Tensor & self, bool upper, bool check_errors
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -4532,10 +5020,10 @@ std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_215_t,std::tuple<Tensor,Te
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_216_t)(const Tensor &, c10::optional<int64_t>, int64_t, bool, bool, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_234_t)(const Tensor &, c10::optional<int64_t>, int64_t, bool, bool, bool);
 template <>
-std::tuple<Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_216_t,std::tuple<Tensor,Tensor,Tensor>,const Tensor &, int64_t, bool, bool, bool>(
-  batch_rule_216_t batch_rule,
+std::tuple<Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_234_t,std::tuple<Tensor,Tensor,Tensor>,const Tensor &, int64_t, bool, bool, bool>(
+  batch_rule_234_t batch_rule,
   const Tensor & self, int64_t dim, bool sorted, bool return_inverse, bool return_counts
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -4549,10 +5037,10 @@ std::tuple<Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_216_t,std::tuple<Te
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level), makeBatched(std::get<4>(results), std::get<5>(results), cur_level));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_217_t)(const Tensor &, c10::optional<int64_t>, bool, bool, c10::optional<int64_t>);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_235_t)(const Tensor &, c10::optional<int64_t>, bool, bool, c10::optional<int64_t>);
 template <>
-std::tuple<Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_217_t,std::tuple<Tensor,Tensor,Tensor>,const Tensor &, bool, bool, c10::optional<int64_t>>(
-  batch_rule_217_t batch_rule,
+std::tuple<Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_235_t,std::tuple<Tensor,Tensor,Tensor>,const Tensor &, bool, bool, c10::optional<int64_t>>(
+  batch_rule_235_t batch_rule,
   const Tensor & self, bool return_inverse, bool return_counts, c10::optional<int64_t> dim
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -4566,10 +5054,10 @@ std::tuple<Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_217_t,std::tuple<Te
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level), makeBatched(std::get<4>(results), std::get<5>(results), cur_level));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_218_t)(const Tensor &, c10::optional<int64_t>, int64_t, bool, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_236_t)(const Tensor &, c10::optional<int64_t>, int64_t, bool, bool);
 template <>
-std::tuple<Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_218_t,std::tuple<Tensor,Tensor,Tensor>,const Tensor &, int64_t, bool, bool>(
-  batch_rule_218_t batch_rule,
+std::tuple<Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_236_t,std::tuple<Tensor,Tensor,Tensor>,const Tensor &, int64_t, bool, bool>(
+  batch_rule_236_t batch_rule,
   const Tensor & self, int64_t dim, bool return_inverse, bool return_counts
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -4583,10 +5071,10 @@ std::tuple<Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_218_t,std::tuple<Te
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level), makeBatched(std::get<4>(results), std::get<5>(results), cur_level));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_219_t)(const Tensor &, c10::optional<int64_t>, bool, bool, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_237_t)(const Tensor &, c10::optional<int64_t>, bool, bool, bool);
 template <>
-std::tuple<Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_219_t,std::tuple<Tensor,Tensor,Tensor>,const Tensor &, bool, bool, bool>(
-  batch_rule_219_t batch_rule,
+std::tuple<Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_237_t,std::tuple<Tensor,Tensor,Tensor>,const Tensor &, bool, bool, bool>(
+  batch_rule_237_t batch_rule,
   const Tensor & self, bool sorted, bool return_inverse, bool return_counts
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -4600,10 +5088,10 @@ std::tuple<Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_219_t,std::tuple<Te
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level), makeBatched(std::get<4>(results), std::get<5>(results), cur_level));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_220_t)(const Tensor &, c10::optional<int64_t>, const Scalar &, const Tensor &, c10::optional<int64_t>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_238_t)(const Tensor &, c10::optional<int64_t>, const Scalar &, const Tensor &, c10::optional<int64_t>);
 template <>
-Tensor lowerToNextLayer<batch_rule_220_t,Tensor,const Tensor &, const Scalar &, const Tensor &>(
-  batch_rule_220_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_238_t,Tensor,const Tensor &, const Scalar &, const Tensor &>(
+  batch_rule_238_t batch_rule,
   const Tensor & condition, const Scalar & self, const Tensor & other
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -4620,10 +5108,10 @@ Tensor lowerToNextLayer<batch_rule_220_t,Tensor,const Tensor &, const Scalar &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<::std::vector<Tensor>,c10::optional<int64_t>> (*batch_rule_221_t)(const Tensor &, c10::optional<int64_t>);
+typedef std::tuple<::std::vector<Tensor>,c10::optional<int64_t>> (*batch_rule_239_t)(const Tensor &, c10::optional<int64_t>);
 template <>
-::std::vector<Tensor> lowerToNextLayer<batch_rule_221_t,::std::vector<Tensor>,const Tensor &>(
-  batch_rule_221_t batch_rule,
+::std::vector<Tensor> lowerToNextLayer<batch_rule_239_t,::std::vector<Tensor>,const Tensor &>(
+  batch_rule_239_t batch_rule,
   const Tensor & self
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -4637,10 +5125,10 @@ ::std::vector<Tensor> lowerToNextLayer<batch_rule_221_t,::std::vector<Tensor>,co
   return makeBatchedVector(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_222_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, int64_t);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_240_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, int64_t);
 template <>
-std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_222_t,std::tuple<Tensor,Tensor>,const Tensor &, const Tensor &, int64_t>(
-  batch_rule_222_t batch_rule,
+std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_240_t,std::tuple<Tensor,Tensor>,const Tensor &, const Tensor &, int64_t>(
+  batch_rule_240_t batch_rule,
   const Tensor & self, const Tensor & target, int64_t reduction
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -4657,10 +5145,10 @@ std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_222_t,std::tuple<Tensor,Te
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_223_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, int64_t);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_241_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, int64_t);
 template <>
-std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_223_t,std::tuple<Tensor,Tensor>,const Tensor &, const Tensor &, const Tensor &, const Tensor &, int64_t>(
-  batch_rule_223_t batch_rule,
+std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_241_t,std::tuple<Tensor,Tensor>,const Tensor &, const Tensor &, const Tensor &, const Tensor &, int64_t>(
+  batch_rule_241_t batch_rule,
   const Tensor & grad_w, const Tensor & saved_v, const Tensor & saved_g, const Tensor & saved_norms, int64_t dim
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -4683,10 +5171,10 @@ std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_223_t,std::tuple<Tensor,Te
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_224_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, c10::optional<Generator>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_242_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, c10::optional<Generator>);
 template <>
-Tensor lowerToNextLayer<batch_rule_224_t,Tensor,const Tensor &, const Tensor &, c10::optional<Generator>>(
-  batch_rule_224_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_242_t,Tensor,const Tensor &, const Tensor &, c10::optional<Generator>>(
+  batch_rule_242_t batch_rule,
   const Tensor & mean, const Tensor & std, c10::optional<Generator> generator
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -4703,10 +5191,10 @@ Tensor lowerToNextLayer<batch_rule_224_t,Tensor,const Tensor &, const Tensor &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_225_t)(const Tensor &, c10::optional<int64_t>, const c10::optional<Scalar> &, IntArrayRef, bool, c10::optional<ScalarType>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_243_t)(const Tensor &, c10::optional<int64_t>, const c10::optional<Scalar> &, IntArrayRef, bool, c10::optional<ScalarType>);
 template <>
-Tensor lowerToNextLayer<batch_rule_225_t,Tensor,const Tensor &, const c10::optional<Scalar> &, IntArrayRef, bool, c10::optional<ScalarType>>(
-  batch_rule_225_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_243_t,Tensor,const Tensor &, const c10::optional<Scalar> &, IntArrayRef, bool, c10::optional<ScalarType>>(
+  batch_rule_243_t batch_rule,
   const Tensor & self, const c10::optional<Scalar> & p, IntArrayRef dim, bool keepdim, c10::optional<ScalarType> dtype
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -4720,10 +5208,10 @@ Tensor lowerToNextLayer<batch_rule_225_t,Tensor,const Tensor &, const c10::optio
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_226_t)(const Tensor &, c10::optional<int64_t>, ScalarType);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_244_t)(const Tensor &, c10::optional<int64_t>, ScalarType);
 template <>
-Tensor lowerToNextLayer<batch_rule_226_t,Tensor,const Tensor &, ScalarType>(
-  batch_rule_226_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_244_t,Tensor,const Tensor &, ScalarType>(
+  batch_rule_244_t batch_rule,
   const Tensor & self, ScalarType dtype
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -4737,10 +5225,10 @@ Tensor lowerToNextLayer<batch_rule_226_t,Tensor,const Tensor &, ScalarType>(
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_227_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, ScalarType);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_245_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, ScalarType);
 template <>
-Tensor lowerToNextLayer<batch_rule_227_t,Tensor,const Tensor &, IntArrayRef, ScalarType>(
-  batch_rule_227_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_245_t,Tensor,const Tensor &, IntArrayRef, ScalarType>(
+  batch_rule_245_t batch_rule,
   const Tensor & self, IntArrayRef dim, ScalarType dtype
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -4754,10 +5242,10 @@ Tensor lowerToNextLayer<batch_rule_227_t,Tensor,const Tensor &, IntArrayRef, Sca
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_228_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, IntArrayRef);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_246_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, IntArrayRef);
 template <>
-Tensor lowerToNextLayer<batch_rule_228_t,Tensor,const Tensor &, const Tensor &, IntArrayRef>(
-  batch_rule_228_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_246_t,Tensor,const Tensor &, const Tensor &, IntArrayRef>(
+  batch_rule_246_t batch_rule,
   const Tensor & grad_output, const Tensor & self, IntArrayRef padding
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -4774,10 +5262,10 @@ Tensor lowerToNextLayer<batch_rule_228_t,Tensor,const Tensor &, const Tensor &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_229_t)(const Tensor &, c10::optional<int64_t>, const c10::optional<Scalar> &, ScalarType);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_247_t)(const Tensor &, c10::optional<int64_t>, const c10::optional<Scalar> &, ScalarType);
 template <>
-Tensor lowerToNextLayer<batch_rule_229_t,Tensor,const Tensor &, const c10::optional<Scalar> &, ScalarType>(
-  batch_rule_229_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_247_t,Tensor,const Tensor &, const c10::optional<Scalar> &, ScalarType>(
+  batch_rule_247_t batch_rule,
   const Tensor & self, const c10::optional<Scalar> & p, ScalarType dtype
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -4791,10 +5279,10 @@ Tensor lowerToNextLayer<batch_rule_229_t,Tensor,const Tensor &, const c10::optio
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_230_t)(const Tensor &, c10::optional<int64_t>, const c10::optional<Scalar> &, IntArrayRef, bool, ScalarType);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_248_t)(const Tensor &, c10::optional<int64_t>, const c10::optional<Scalar> &, IntArrayRef, bool, ScalarType);
 template <>
-Tensor lowerToNextLayer<batch_rule_230_t,Tensor,const Tensor &, const c10::optional<Scalar> &, IntArrayRef, bool, ScalarType>(
-  batch_rule_230_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_248_t,Tensor,const Tensor &, const c10::optional<Scalar> &, IntArrayRef, bool, ScalarType>(
+  batch_rule_248_t batch_rule,
   const Tensor & self, const c10::optional<Scalar> & p, IntArrayRef dim, bool keepdim, ScalarType dtype
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -4808,10 +5296,10 @@ Tensor lowerToNextLayer<batch_rule_230_t,Tensor,const Tensor &, const c10::optio
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_231_t)(const Tensor &, c10::optional<int64_t>, const c10::optional<Scalar> &, IntArrayRef, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_249_t)(const Tensor &, c10::optional<int64_t>, const c10::optional<Scalar> &, IntArrayRef, bool);
 template <>
-Tensor lowerToNextLayer<batch_rule_231_t,Tensor,const Tensor &, const c10::optional<Scalar> &, IntArrayRef, bool>(
-  batch_rule_231_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_249_t,Tensor,const Tensor &, const c10::optional<Scalar> &, IntArrayRef, bool>(
+  batch_rule_249_t batch_rule,
   const Tensor & self, const c10::optional<Scalar> & p, IntArrayRef dim, bool keepdim
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -4825,10 +5313,10 @@ Tensor lowerToNextLayer<batch_rule_231_t,Tensor,const Tensor &, const c10::optio
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_232_t)(const Tensor &, c10::optional<int64_t>, const c10::optional<Scalar> &, DimnameList, bool, ScalarType);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_250_t)(const Tensor &, c10::optional<int64_t>, const c10::optional<Scalar> &, DimnameList, bool, ScalarType);
 template <>
-Tensor lowerToNextLayer<batch_rule_232_t,Tensor,const Tensor &, const c10::optional<Scalar> &, DimnameList, bool, ScalarType>(
-  batch_rule_232_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_250_t,Tensor,const Tensor &, const c10::optional<Scalar> &, DimnameList, bool, ScalarType>(
+  batch_rule_250_t batch_rule,
   const Tensor & self, const c10::optional<Scalar> & p, DimnameList dim, bool keepdim, ScalarType dtype
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -4842,10 +5330,10 @@ Tensor lowerToNextLayer<batch_rule_232_t,Tensor,const Tensor &, const c10::optio
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_233_t)(const Tensor &, c10::optional<int64_t>, const c10::optional<Scalar> &, DimnameList, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_251_t)(const Tensor &, c10::optional<int64_t>, const c10::optional<Scalar> &, DimnameList, bool);
 template <>
-Tensor lowerToNextLayer<batch_rule_233_t,Tensor,const Tensor &, const c10::optional<Scalar> &, DimnameList, bool>(
-  batch_rule_233_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_251_t,Tensor,const Tensor &, const c10::optional<Scalar> &, DimnameList, bool>(
+  batch_rule_251_t batch_rule,
   const Tensor & self, const c10::optional<Scalar> & p, DimnameList dim, bool keepdim
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -4859,10 +5347,10 @@ Tensor lowerToNextLayer<batch_rule_233_t,Tensor,const Tensor &, const c10::optio
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_234_t)(const Tensor &, c10::optional<int64_t>, c10::optional<MemoryFormat>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_252_t)(const Tensor &, c10::optional<int64_t>, c10::optional<MemoryFormat>);
 template <>
-Tensor lowerToNextLayer<batch_rule_234_t,Tensor,const Tensor &, c10::optional<MemoryFormat>>(
-  batch_rule_234_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_252_t,Tensor,const Tensor &, c10::optional<MemoryFormat>>(
+  batch_rule_252_t batch_rule,
   const Tensor & self, c10::optional<MemoryFormat> memory_format
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -4876,10 +5364,10 @@ Tensor lowerToNextLayer<batch_rule_234_t,Tensor,const Tensor &, c10::optional<Me
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_235_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, IntArrayRef, c10::optional<ScalarType>, c10::optional<Layout>, c10::optional<Device>, c10::optional<bool>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_253_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, IntArrayRef, c10::optional<ScalarType>, c10::optional<Layout>, c10::optional<Device>, c10::optional<bool>);
 template <>
-Tensor lowerToNextLayer<batch_rule_235_t,Tensor,const Tensor &, const Tensor &, const Tensor &, IntArrayRef, c10::optional<ScalarType>, c10::optional<Layout>, c10::optional<Device>, c10::optional<bool>>(
-  batch_rule_235_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_253_t,Tensor,const Tensor &, const Tensor &, const Tensor &, IntArrayRef, c10::optional<ScalarType>, c10::optional<Layout>, c10::optional<Device>, c10::optional<bool>>(
+  batch_rule_253_t batch_rule,
   const Tensor & crow_indices, const Tensor & col_indices, const Tensor & values, IntArrayRef size, c10::optional<ScalarType> dtype, c10::optional<Layout> layout, c10::optional<Device> device, c10::optional<bool> pin_memory
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -4899,10 +5387,10 @@ Tensor lowerToNextLayer<batch_rule_235_t,Tensor,const Tensor &, const Tensor &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_236_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, c10::optional<ScalarType>, c10::optional<Layout>, c10::optional<Device>, c10::optional<bool>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_254_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, c10::optional<ScalarType>, c10::optional<Layout>, c10::optional<Device>, c10::optional<bool>);
 template <>
-Tensor lowerToNextLayer<batch_rule_236_t,Tensor,const Tensor &, const Tensor &, const Tensor &, c10::optional<ScalarType>, c10::optional<Layout>, c10::optional<Device>, c10::optional<bool>>(
-  batch_rule_236_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_254_t,Tensor,const Tensor &, const Tensor &, const Tensor &, c10::optional<ScalarType>, c10::optional<Layout>, c10::optional<Device>, c10::optional<bool>>(
+  batch_rule_254_t batch_rule,
   const Tensor & crow_indices, const Tensor & col_indices, const Tensor & values, c10::optional<ScalarType> dtype, c10::optional<Layout> layout, c10::optional<Device> device, c10::optional<bool> pin_memory
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -4922,10 +5410,10 @@ Tensor lowerToNextLayer<batch_rule_236_t,Tensor,const Tensor &, const Tensor &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_237_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, c10::optional<ScalarType>, c10::optional<Layout>, c10::optional<Device>, c10::optional<bool>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_255_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, c10::optional<ScalarType>, c10::optional<Layout>, c10::optional<Device>, c10::optional<bool>);
 template <>
-Tensor lowerToNextLayer<batch_rule_237_t,Tensor,const Tensor &, const Tensor &, c10::optional<ScalarType>, c10::optional<Layout>, c10::optional<Device>, c10::optional<bool>>(
-  batch_rule_237_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_255_t,Tensor,const Tensor &, const Tensor &, c10::optional<ScalarType>, c10::optional<Layout>, c10::optional<Device>, c10::optional<bool>>(
+  batch_rule_255_t batch_rule,
   const Tensor & indices, const Tensor & values, c10::optional<ScalarType> dtype, c10::optional<Layout> layout, c10::optional<Device> device, c10::optional<bool> pin_memory
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -4942,10 +5430,10 @@ Tensor lowerToNextLayer<batch_rule_237_t,Tensor,const Tensor &, const Tensor &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_238_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, IntArrayRef, c10::optional<ScalarType>, c10::optional<Layout>, c10::optional<Device>, c10::optional<bool>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_256_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, IntArrayRef, c10::optional<ScalarType>, c10::optional<Layout>, c10::optional<Device>, c10::optional<bool>);
 template <>
-Tensor lowerToNextLayer<batch_rule_238_t,Tensor,const Tensor &, const Tensor &, IntArrayRef, c10::optional<ScalarType>, c10::optional<Layout>, c10::optional<Device>, c10::optional<bool>>(
-  batch_rule_238_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_256_t,Tensor,const Tensor &, const Tensor &, IntArrayRef, c10::optional<ScalarType>, c10::optional<Layout>, c10::optional<Device>, c10::optional<bool>>(
+  batch_rule_256_t batch_rule,
   const Tensor & indices, const Tensor & values, IntArrayRef size, c10::optional<ScalarType> dtype, c10::optional<Layout> layout, c10::optional<Device> device, c10::optional<bool> pin_memory
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -4962,10 +5450,10 @@ Tensor lowerToNextLayer<batch_rule_238_t,Tensor,const Tensor &, const Tensor &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_239_t)(int64_t, int64_t, IntArrayRef, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, c10::optional<ScalarType>, c10::optional<Layout>, c10::optional<Device>, c10::optional<bool>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_257_t)(int64_t, int64_t, IntArrayRef, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, c10::optional<ScalarType>, c10::optional<Layout>, c10::optional<Device>, c10::optional<bool>);
 template <>
-Tensor lowerToNextLayer<batch_rule_239_t,Tensor,int64_t, int64_t, IntArrayRef, const Tensor &, const Tensor &, c10::optional<ScalarType>, c10::optional<Layout>, c10::optional<Device>, c10::optional<bool>>(
-  batch_rule_239_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_257_t,Tensor,int64_t, int64_t, IntArrayRef, const Tensor &, const Tensor &, c10::optional<ScalarType>, c10::optional<Layout>, c10::optional<Device>, c10::optional<bool>>(
+  batch_rule_257_t batch_rule,
   int64_t sparse_dim, int64_t dense_dim, IntArrayRef size, const Tensor & indices, const Tensor & values, c10::optional<ScalarType> dtype, c10::optional<Layout> layout, c10::optional<Device> device, c10::optional<bool> pin_memory
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -4982,10 +5470,10 @@ Tensor lowerToNextLayer<batch_rule_239_t,Tensor,int64_t, int64_t, IntArrayRef, c
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<::std::vector<Tensor>,c10::optional<int64_t>> (*batch_rule_240_t)(const Tensor &, c10::optional<int64_t>, Dimname);
+typedef std::tuple<::std::vector<Tensor>,c10::optional<int64_t>> (*batch_rule_258_t)(const Tensor &, c10::optional<int64_t>, Dimname);
 template <>
-::std::vector<Tensor> lowerToNextLayer<batch_rule_240_t,::std::vector<Tensor>,const Tensor &, Dimname>(
-  batch_rule_240_t batch_rule,
+::std::vector<Tensor> lowerToNextLayer<batch_rule_258_t,::std::vector<Tensor>,const Tensor &, Dimname>(
+  batch_rule_258_t batch_rule,
   const Tensor & self, Dimname dim
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -4999,10 +5487,10 @@ ::std::vector<Tensor> lowerToNextLayer<batch_rule_240_t,::std::vector<Tensor>,co
   return makeBatchedVector(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_241_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef, int64_t);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_259_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef, int64_t);
 template <>
-Tensor lowerToNextLayer<batch_rule_241_t,Tensor,const Tensor &, IntArrayRef, IntArrayRef, IntArrayRef, int64_t>(
-  batch_rule_241_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_259_t,Tensor,const Tensor &, IntArrayRef, IntArrayRef, IntArrayRef, int64_t>(
+  batch_rule_259_t batch_rule,
   const Tensor & self, IntArrayRef padding, IntArrayRef stride, IntArrayRef dilation, int64_t groups
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -5016,10 +5504,27 @@ Tensor lowerToNextLayer<batch_rule_241_t,Tensor,const Tensor &, IntArrayRef, Int
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_242_t)(const Tensor &, c10::optional<int64_t>, double, int64_t, ScalarType);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_260_t)(const Tensor &, c10::optional<int64_t>, ScalarType, bool);
 template <>
-Tensor lowerToNextLayer<batch_rule_242_t,Tensor,const Tensor &, double, int64_t, ScalarType>(
-  batch_rule_242_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_260_t,Tensor,const Tensor &, ScalarType, bool>(
+  batch_rule_260_t batch_rule,
+  const Tensor & self, ScalarType dtype, bool reduce_range
+) {
+  c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
+  auto maybe_layer = maybeCurrentDynamicLayer();
+  TORCH_INTERNAL_ASSERT(maybe_layer.has_value());
+  int64_t cur_level = maybe_layer->layerId();
+  Tensor self_value;
+  optional<int64_t> self_bdim;
+  std::tie(self_value, self_bdim) = unwrapTensorAtLevel(self, cur_level);
+  auto results = batch_rule(self_value, self_bdim, dtype, reduce_range);
+  return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
+}
+
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_261_t)(const Tensor &, c10::optional<int64_t>, double, int64_t, ScalarType);
+template <>
+Tensor lowerToNextLayer<batch_rule_261_t,Tensor,const Tensor &, double, int64_t, ScalarType>(
+  batch_rule_261_t batch_rule,
   const Tensor & self, double scale, int64_t zero_point, ScalarType dtype
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -5033,10 +5538,10 @@ Tensor lowerToNextLayer<batch_rule_242_t,Tensor,const Tensor &, double, int64_t,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_243_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, ScalarType);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_262_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, ScalarType);
 template <>
-Tensor lowerToNextLayer<batch_rule_243_t,Tensor,const Tensor &, const Tensor &, const Tensor &, ScalarType>(
-  batch_rule_243_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_262_t,Tensor,const Tensor &, const Tensor &, const Tensor &, ScalarType>(
+  batch_rule_262_t batch_rule,
   const Tensor & self, const Tensor & scale, const Tensor & zero_point, ScalarType dtype
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -5056,10 +5561,10 @@ Tensor lowerToNextLayer<batch_rule_243_t,Tensor,const Tensor &, const Tensor &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_244_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, int64_t, ScalarType);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_263_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, int64_t, ScalarType);
 template <>
-Tensor lowerToNextLayer<batch_rule_244_t,Tensor,const Tensor &, const Tensor &, const Tensor &, int64_t, ScalarType>(
-  batch_rule_244_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_263_t,Tensor,const Tensor &, const Tensor &, const Tensor &, int64_t, ScalarType>(
+  batch_rule_263_t batch_rule,
   const Tensor & self, const Tensor & scales, const Tensor & zero_points, int64_t axis, ScalarType dtype
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -5079,10 +5584,10 @@ Tensor lowerToNextLayer<batch_rule_244_t,Tensor,const Tensor &, const Tensor &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<double> (*batch_rule_245_t)(const Tensor &, c10::optional<int64_t>);
+typedef std::tuple<double> (*batch_rule_264_t)(const Tensor &, c10::optional<int64_t>);
 template <>
-double lowerToNextLayer<batch_rule_245_t,double,const Tensor &>(
-  batch_rule_245_t batch_rule,
+double lowerToNextLayer<batch_rule_264_t,double,const Tensor &>(
+  batch_rule_264_t batch_rule,
   const Tensor & self
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -5096,10 +5601,10 @@ double lowerToNextLayer<batch_rule_245_t,double,const Tensor &>(
   return std::get<0>(results);
 }
 
-typedef std::tuple<QScheme> (*batch_rule_246_t)(const Tensor &, c10::optional<int64_t>);
+typedef std::tuple<QScheme> (*batch_rule_265_t)(const Tensor &, c10::optional<int64_t>);
 template <>
-QScheme lowerToNextLayer<batch_rule_246_t,QScheme,const Tensor &>(
-  batch_rule_246_t batch_rule,
+QScheme lowerToNextLayer<batch_rule_265_t,QScheme,const Tensor &>(
+  batch_rule_265_t batch_rule,
   const Tensor & self
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -5113,10 +5618,10 @@ QScheme lowerToNextLayer<batch_rule_246_t,QScheme,const Tensor &>(
   return std::get<0>(results);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_247_t)(const Tensor &, c10::optional<int64_t>, double, int64_t, int64_t, int64_t);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_266_t)(const Tensor &, c10::optional<int64_t>, double, int64_t, int64_t, int64_t);
 template <>
-Tensor lowerToNextLayer<batch_rule_247_t,Tensor,const Tensor &, double, int64_t, int64_t, int64_t>(
-  batch_rule_247_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_266_t,Tensor,const Tensor &, double, int64_t, int64_t, int64_t>(
+  batch_rule_266_t batch_rule,
   const Tensor & self, double scale, int64_t zero_point, int64_t quant_min, int64_t quant_max
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -5130,10 +5635,10 @@ Tensor lowerToNextLayer<batch_rule_247_t,Tensor,const Tensor &, double, int64_t,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_248_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, int64_t, int64_t);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_267_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, int64_t, int64_t);
 template <>
-Tensor lowerToNextLayer<batch_rule_248_t,Tensor,const Tensor &, const Tensor &, const Tensor &, int64_t, int64_t>(
-  batch_rule_248_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_267_t,Tensor,const Tensor &, const Tensor &, const Tensor &, int64_t, int64_t>(
+  batch_rule_267_t batch_rule,
   const Tensor & self, const Tensor & scale, const Tensor & zero_point, int64_t quant_min, int64_t quant_max
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -5153,10 +5658,10 @@ Tensor lowerToNextLayer<batch_rule_248_t,Tensor,const Tensor &, const Tensor &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_249_t)(const Tensor &, c10::optional<int64_t>, double, int64_t, int64_t, int64_t);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_268_t)(const Tensor &, c10::optional<int64_t>, double, int64_t, int64_t, int64_t);
 template <>
-std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_249_t,std::tuple<Tensor,Tensor>,const Tensor &, double, int64_t, int64_t, int64_t>(
-  batch_rule_249_t batch_rule,
+std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_268_t,std::tuple<Tensor,Tensor>,const Tensor &, double, int64_t, int64_t, int64_t>(
+  batch_rule_268_t batch_rule,
   const Tensor & self, double scale, int64_t zero_point, int64_t quant_min, int64_t quant_max
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -5170,10 +5675,10 @@ std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_249_t,std::tuple<Tensor,Te
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_250_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, int64_t, int64_t);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_269_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, int64_t, int64_t);
 template <>
-std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_250_t,std::tuple<Tensor,Tensor>,const Tensor &, const Tensor &, const Tensor &, const Tensor &, int64_t, int64_t>(
-  batch_rule_250_t batch_rule,
+std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_269_t,std::tuple<Tensor,Tensor>,const Tensor &, const Tensor &, const Tensor &, const Tensor &, int64_t, int64_t>(
+  batch_rule_269_t batch_rule,
   const Tensor & self, const Tensor & scale, const Tensor & zero_point, const Tensor & fake_quant_enabled, int64_t quant_min, int64_t quant_max
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -5196,10 +5701,10 @@ std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_250_t,std::tuple<Tensor,Te
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_251_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, int64_t, int64_t, double);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_270_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, int64_t, int64_t, double);
 template <>
-Tensor lowerToNextLayer<batch_rule_251_t,Tensor,const Tensor &, const Tensor &, const Tensor &, int64_t, int64_t, double>(
-  batch_rule_251_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_270_t,Tensor,const Tensor &, const Tensor &, const Tensor &, int64_t, int64_t, double>(
+  batch_rule_270_t batch_rule,
   const Tensor & self, const Tensor & scale, const Tensor & zero_point, int64_t quant_min, int64_t quant_max, double grad_factor
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -5219,10 +5724,10 @@ Tensor lowerToNextLayer<batch_rule_251_t,Tensor,const Tensor &, const Tensor &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_252_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, int64_t, int64_t, double);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_271_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, int64_t, int64_t, double);
 template <>
-std::tuple<Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_252_t,std::tuple<Tensor,Tensor,Tensor>,const Tensor &, const Tensor &, const Tensor &, const Tensor &, int64_t, int64_t, double>(
-  batch_rule_252_t batch_rule,
+std::tuple<Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_271_t,std::tuple<Tensor,Tensor,Tensor>,const Tensor &, const Tensor &, const Tensor &, const Tensor &, int64_t, int64_t, double>(
+  batch_rule_271_t batch_rule,
   const Tensor & grad, const Tensor & self, const Tensor & scale, const Tensor & zero_point, int64_t quant_min, int64_t quant_max, double grad_factor
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -5245,10 +5750,10 @@ std::tuple<Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_252_t,std::tuple<Te
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level), makeBatched(std::get<4>(results), std::get<5>(results), cur_level));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_253_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, int64_t, int64_t, int64_t);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_272_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, int64_t, int64_t, int64_t);
 template <>
-Tensor lowerToNextLayer<batch_rule_253_t,Tensor,const Tensor &, const Tensor &, const Tensor &, int64_t, int64_t, int64_t>(
-  batch_rule_253_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_272_t,Tensor,const Tensor &, const Tensor &, const Tensor &, int64_t, int64_t, int64_t>(
+  batch_rule_272_t batch_rule,
   const Tensor & self, const Tensor & scale, const Tensor & zero_point, int64_t axis, int64_t quant_min, int64_t quant_max
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -5268,10 +5773,10 @@ Tensor lowerToNextLayer<batch_rule_253_t,Tensor,const Tensor &, const Tensor &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_254_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, int64_t, int64_t, int64_t);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_273_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, int64_t, int64_t, int64_t);
 template <>
-std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_254_t,std::tuple<Tensor,Tensor>,const Tensor &, const Tensor &, const Tensor &, int64_t, int64_t, int64_t>(
-  batch_rule_254_t batch_rule,
+std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_273_t,std::tuple<Tensor,Tensor>,const Tensor &, const Tensor &, const Tensor &, int64_t, int64_t, int64_t>(
+  batch_rule_273_t batch_rule,
   const Tensor & self, const Tensor & scale, const Tensor & zero_point, int64_t axis, int64_t quant_min, int64_t quant_max
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -5291,10 +5796,10 @@ std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_254_t,std::tuple<Tensor,Te
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_255_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, int64_t, int64_t, int64_t, double);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_274_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, int64_t, int64_t, int64_t, double);
 template <>
-Tensor lowerToNextLayer<batch_rule_255_t,Tensor,const Tensor &, const Tensor &, const Tensor &, int64_t, int64_t, int64_t, double>(
-  batch_rule_255_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_274_t,Tensor,const Tensor &, const Tensor &, const Tensor &, int64_t, int64_t, int64_t, double>(
+  batch_rule_274_t batch_rule,
   const Tensor & self, const Tensor & scale, const Tensor & zero_point, int64_t axis, int64_t quant_min, int64_t quant_max, double grad_factor
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -5314,10 +5819,10 @@ Tensor lowerToNextLayer<batch_rule_255_t,Tensor,const Tensor &, const Tensor &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_256_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, int64_t, int64_t, int64_t, double);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_275_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, int64_t, int64_t, int64_t, double);
 template <>
-std::tuple<Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_256_t,std::tuple<Tensor,Tensor,Tensor>,const Tensor &, const Tensor &, const Tensor &, const Tensor &, int64_t, int64_t, int64_t, double>(
-  batch_rule_256_t batch_rule,
+std::tuple<Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_275_t,std::tuple<Tensor,Tensor,Tensor>,const Tensor &, const Tensor &, const Tensor &, const Tensor &, int64_t, int64_t, int64_t, double>(
+  batch_rule_275_t batch_rule,
   const Tensor & grad, const Tensor & self, const Tensor & scale, const Tensor & zero_point, int64_t axis, int64_t quant_min, int64_t quant_max, double grad_factor
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -5340,10 +5845,10 @@ std::tuple<Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_256_t,std::tuple<Te
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level), makeBatched(std::get<4>(results), std::get<5>(results), cur_level));
 }
 
-typedef std::tuple<double,int64_t> (*batch_rule_257_t)(const Tensor &, c10::optional<int64_t>, bool);
+typedef std::tuple<double,int64_t> (*batch_rule_276_t)(const Tensor &, c10::optional<int64_t>, bool);
 template <>
-std::tuple<double,int64_t> lowerToNextLayer<batch_rule_257_t,std::tuple<double,int64_t>,const Tensor &, bool>(
-  batch_rule_257_t batch_rule,
+std::tuple<double,int64_t> lowerToNextLayer<batch_rule_276_t,std::tuple<double,int64_t>,const Tensor &, bool>(
+  batch_rule_276_t batch_rule,
   const Tensor & self, bool reduce_range
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -5357,10 +5862,10 @@ std::tuple<double,int64_t> lowerToNextLayer<batch_rule_257_t,std::tuple<double,i
   return std::make_tuple(std::get<0>(results), std::get<1>(results));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_258_t)(const Tensor &, c10::optional<int64_t>, int64_t, int64_t, double, int64_t);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_277_t)(const Tensor &, c10::optional<int64_t>, int64_t, int64_t, double, int64_t);
 template <>
-std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_258_t,std::tuple<Tensor,Tensor>,const Tensor &, int64_t, int64_t, double, int64_t>(
-  batch_rule_258_t batch_rule,
+std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_277_t,std::tuple<Tensor,Tensor>,const Tensor &, int64_t, int64_t, double, int64_t>(
+  batch_rule_277_t batch_rule,
   const Tensor & input, int64_t numel, int64_t n_bins, double ratio, int64_t bit_width
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -5374,10 +5879,44 @@ std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_258_t,std::tuple<Tensor,Te
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_259_t)(const Tensor &, c10::optional<int64_t>, c10::optional<ScalarType>, c10::optional<Layout>, c10::optional<Device>, c10::optional<bool>, bool, c10::optional<MemoryFormat>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_278_t)(const Tensor &, c10::optional<int64_t>, bool, bool, ScalarType, ScalarType);
 template <>
-Tensor lowerToNextLayer<batch_rule_259_t,Tensor,const Tensor &, c10::optional<ScalarType>, c10::optional<Layout>, c10::optional<Device>, c10::optional<bool>, bool, c10::optional<MemoryFormat>>(
-  batch_rule_259_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_278_t,Tensor,const Tensor &, bool, bool, ScalarType, ScalarType>(
+  batch_rule_278_t batch_rule,
+  const Tensor & self, bool cuda_enabled, bool cpu_enabled, ScalarType cuda_dtype, ScalarType cpu_dtype
+) {
+  c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
+  auto maybe_layer = maybeCurrentDynamicLayer();
+  TORCH_INTERNAL_ASSERT(maybe_layer.has_value());
+  int64_t cur_level = maybe_layer->layerId();
+  Tensor self_value;
+  optional<int64_t> self_bdim;
+  std::tie(self_value, self_bdim) = unwrapTensorAtLevel(self, cur_level);
+  auto results = batch_rule(self_value, self_bdim, cuda_enabled, cpu_enabled, cuda_dtype, cpu_dtype);
+  return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
+}
+
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_279_t)(const Tensor &, c10::optional<int64_t>, bool, bool);
+template <>
+Tensor lowerToNextLayer<batch_rule_279_t,Tensor,const Tensor &, bool, bool>(
+  batch_rule_279_t batch_rule,
+  const Tensor & self, bool cuda_enabled, bool cpu_enabled
+) {
+  c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
+  auto maybe_layer = maybeCurrentDynamicLayer();
+  TORCH_INTERNAL_ASSERT(maybe_layer.has_value());
+  int64_t cur_level = maybe_layer->layerId();
+  Tensor self_value;
+  optional<int64_t> self_bdim;
+  std::tie(self_value, self_bdim) = unwrapTensorAtLevel(self, cur_level);
+  auto results = batch_rule(self_value, self_bdim, cuda_enabled, cpu_enabled);
+  return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
+}
+
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_280_t)(const Tensor &, c10::optional<int64_t>, c10::optional<ScalarType>, c10::optional<Layout>, c10::optional<Device>, c10::optional<bool>, bool, c10::optional<MemoryFormat>);
+template <>
+Tensor lowerToNextLayer<batch_rule_280_t,Tensor,const Tensor &, c10::optional<ScalarType>, c10::optional<Layout>, c10::optional<Device>, c10::optional<bool>, bool, c10::optional<MemoryFormat>>(
+  batch_rule_280_t batch_rule,
   const Tensor & self, c10::optional<ScalarType> dtype, c10::optional<Layout> layout, c10::optional<Device> device, c10::optional<bool> pin_memory, bool non_blocking, c10::optional<MemoryFormat> memory_format
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -5391,10 +5930,10 @@ Tensor lowerToNextLayer<batch_rule_259_t,Tensor,const Tensor &, c10::optional<Sc
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_260_t)(const Tensor &, c10::optional<int64_t>, c10::optional<ScalarType>, c10::optional<Layout>, c10::optional<Device>, c10::optional<bool>, bool, bool, c10::optional<MemoryFormat>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_281_t)(const Tensor &, c10::optional<int64_t>, c10::optional<ScalarType>, c10::optional<Layout>, c10::optional<Device>, c10::optional<bool>, bool, bool, c10::optional<MemoryFormat>);
 template <>
-Tensor lowerToNextLayer<batch_rule_260_t,Tensor,const Tensor &, c10::optional<ScalarType>, c10::optional<Layout>, c10::optional<Device>, c10::optional<bool>, bool, bool, c10::optional<MemoryFormat>>(
-  batch_rule_260_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_281_t,Tensor,const Tensor &, c10::optional<ScalarType>, c10::optional<Layout>, c10::optional<Device>, c10::optional<bool>, bool, bool, c10::optional<MemoryFormat>>(
+  batch_rule_281_t batch_rule,
   const Tensor & self, c10::optional<ScalarType> dtype, c10::optional<Layout> layout, c10::optional<Device> device, c10::optional<bool> pin_memory, bool non_blocking, bool copy, c10::optional<MemoryFormat> memory_format
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -5408,10 +5947,10 @@ Tensor lowerToNextLayer<batch_rule_260_t,Tensor,const Tensor &, c10::optional<Sc
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_261_t)(const Tensor &, c10::optional<int64_t>, Device, ScalarType, bool, bool, c10::optional<MemoryFormat>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_282_t)(const Tensor &, c10::optional<int64_t>, Device, ScalarType, bool, bool, c10::optional<MemoryFormat>);
 template <>
-Tensor lowerToNextLayer<batch_rule_261_t,Tensor,const Tensor &, Device, ScalarType, bool, bool, c10::optional<MemoryFormat>>(
-  batch_rule_261_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_282_t,Tensor,const Tensor &, Device, ScalarType, bool, bool, c10::optional<MemoryFormat>>(
+  batch_rule_282_t batch_rule,
   const Tensor & self, Device device, ScalarType dtype, bool non_blocking, bool copy, c10::optional<MemoryFormat> memory_format
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -5425,10 +5964,10 @@ Tensor lowerToNextLayer<batch_rule_261_t,Tensor,const Tensor &, Device, ScalarTy
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_262_t)(const Tensor &, c10::optional<int64_t>, ScalarType, bool, bool, c10::optional<MemoryFormat>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_283_t)(const Tensor &, c10::optional<int64_t>, ScalarType, bool, bool, c10::optional<MemoryFormat>);
 template <>
-Tensor lowerToNextLayer<batch_rule_262_t,Tensor,const Tensor &, ScalarType, bool, bool, c10::optional<MemoryFormat>>(
-  batch_rule_262_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_283_t,Tensor,const Tensor &, ScalarType, bool, bool, c10::optional<MemoryFormat>>(
+  batch_rule_283_t batch_rule,
   const Tensor & self, ScalarType dtype, bool non_blocking, bool copy, c10::optional<MemoryFormat> memory_format
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -5442,10 +5981,10 @@ Tensor lowerToNextLayer<batch_rule_262_t,Tensor,const Tensor &, ScalarType, bool
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_263_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, bool, bool, c10::optional<MemoryFormat>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_284_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, bool, bool, c10::optional<MemoryFormat>);
 template <>
-Tensor lowerToNextLayer<batch_rule_263_t,Tensor,const Tensor &, const Tensor &, bool, bool, c10::optional<MemoryFormat>>(
-  batch_rule_263_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_284_t,Tensor,const Tensor &, const Tensor &, bool, bool, c10::optional<MemoryFormat>>(
+  batch_rule_284_t batch_rule,
   const Tensor & self, const Tensor & other, bool non_blocking, bool copy, c10::optional<MemoryFormat> memory_format
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -5462,10 +6001,10 @@ Tensor lowerToNextLayer<batch_rule_263_t,Tensor,const Tensor &, const Tensor &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Scalar> (*batch_rule_264_t)(const Tensor &, c10::optional<int64_t>);
+typedef std::tuple<Scalar> (*batch_rule_285_t)(const Tensor &, c10::optional<int64_t>);
 template <>
-Scalar lowerToNextLayer<batch_rule_264_t,Scalar,const Tensor &>(
-  batch_rule_264_t batch_rule,
+Scalar lowerToNextLayer<batch_rule_285_t,Scalar,const Tensor &>(
+  batch_rule_285_t batch_rule,
   const Tensor & self
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -5479,10 +6018,10 @@ Scalar lowerToNextLayer<batch_rule_264_t,Scalar,const Tensor &>(
   return std::get<0>(results);
 }
 
-typedef std::tuple<ScalarType> (*batch_rule_265_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>);
+typedef std::tuple<ScalarType> (*batch_rule_286_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>);
 template <>
-ScalarType lowerToNextLayer<batch_rule_265_t,ScalarType,const Tensor &, const Tensor &>(
-  batch_rule_265_t batch_rule,
+ScalarType lowerToNextLayer<batch_rule_286_t,ScalarType,const Tensor &, const Tensor &>(
+  batch_rule_286_t batch_rule,
   const Tensor & tensor, const Tensor & other
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -5499,10 +6038,10 @@ ScalarType lowerToNextLayer<batch_rule_265_t,ScalarType,const Tensor &, const Te
   return std::get<0>(results);
 }
 
-typedef std::tuple<ScalarType> (*batch_rule_266_t)(const Tensor &, c10::optional<int64_t>, const Scalar &);
+typedef std::tuple<ScalarType> (*batch_rule_287_t)(const Tensor &, c10::optional<int64_t>, const Scalar &);
 template <>
-ScalarType lowerToNextLayer<batch_rule_266_t,ScalarType,const Tensor &, const Scalar &>(
-  batch_rule_266_t batch_rule,
+ScalarType lowerToNextLayer<batch_rule_287_t,ScalarType,const Tensor &, const Scalar &>(
+  batch_rule_287_t batch_rule,
   const Tensor & tensor, const Scalar & other
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -5516,10 +6055,10 @@ ScalarType lowerToNextLayer<batch_rule_266_t,ScalarType,const Tensor &, const Sc
   return std::get<0>(results);
 }
 
-typedef std::tuple<ScalarType> (*batch_rule_267_t)(const Scalar &, const Tensor &, c10::optional<int64_t>);
+typedef std::tuple<ScalarType> (*batch_rule_288_t)(const Scalar &, const Tensor &, c10::optional<int64_t>);
 template <>
-ScalarType lowerToNextLayer<batch_rule_267_t,ScalarType,const Scalar &, const Tensor &>(
-  batch_rule_267_t batch_rule,
+ScalarType lowerToNextLayer<batch_rule_288_t,ScalarType,const Scalar &, const Tensor &>(
+  batch_rule_288_t batch_rule,
   const Scalar & scalar, const Tensor & tensor
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -5533,10 +6072,10 @@ ScalarType lowerToNextLayer<batch_rule_267_t,ScalarType,const Scalar &, const Te
   return std::get<0>(results);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_268_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_289_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>);
 template <>
-std::tuple<Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_268_t,std::tuple<Tensor,Tensor,Tensor>,const Tensor &, const Tensor &, const Tensor &, const c10::optional<Tensor> &, const c10::optional<Tensor> &>(
-  batch_rule_268_t batch_rule,
+std::tuple<Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_289_t,std::tuple<Tensor,Tensor,Tensor>,const Tensor &, const Tensor &, const Tensor &, const c10::optional<Tensor> &, const c10::optional<Tensor> &>(
+  batch_rule_289_t batch_rule,
   const Tensor & input_gates, const Tensor & hidden_gates, const Tensor & cx, const c10::optional<Tensor> & input_bias, const c10::optional<Tensor> & hidden_bias
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -5566,10 +6105,10 @@ std::tuple<Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_268_t,std::tuple<Te
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level), makeBatched(std::get<4>(results), std::get<5>(results), cur_level));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_269_t)(const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_290_t)(const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, bool);
 template <>
-std::tuple<Tensor,Tensor,Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_269_t,std::tuple<Tensor,Tensor,Tensor,Tensor,Tensor>,const c10::optional<Tensor> &, const c10::optional<Tensor> &, const Tensor &, const Tensor &, const Tensor &, bool>(
-  batch_rule_269_t batch_rule,
+std::tuple<Tensor,Tensor,Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_290_t,std::tuple<Tensor,Tensor,Tensor,Tensor,Tensor>,const c10::optional<Tensor> &, const c10::optional<Tensor> &, const Tensor &, const Tensor &, const Tensor &, bool>(
+  batch_rule_290_t batch_rule,
   const c10::optional<Tensor> & grad_hy, const c10::optional<Tensor> & grad_cy, const Tensor & cx, const Tensor & cy, const Tensor & workspace, bool has_bias
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -5599,10 +6138,10 @@ std::tuple<Tensor,Tensor,Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_269_t
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level), makeBatched(std::get<4>(results), std::get<5>(results), cur_level), makeBatched(std::get<6>(results), std::get<7>(results), cur_level), makeBatched(std::get<8>(results), std::get<9>(results), cur_level));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_270_t)(const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_291_t)(const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>);
 template <>
-std::tuple<Tensor,Tensor,Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_270_t,std::tuple<Tensor,Tensor,Tensor,Tensor,Tensor>,const c10::optional<Tensor> &, const c10::optional<Tensor> &, const Tensor &, const Tensor &, const c10::optional<Tensor> &, const c10::optional<Tensor> &, const Tensor &, const Tensor &>(
-  batch_rule_270_t batch_rule,
+std::tuple<Tensor,Tensor,Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_291_t,std::tuple<Tensor,Tensor,Tensor,Tensor,Tensor>,const c10::optional<Tensor> &, const c10::optional<Tensor> &, const Tensor &, const Tensor &, const c10::optional<Tensor> &, const c10::optional<Tensor> &, const Tensor &, const Tensor &>(
+  batch_rule_291_t batch_rule,
   const c10::optional<Tensor> & grad_hy, const c10::optional<Tensor> & grad_cy, const Tensor & input_gates, const Tensor & hidden_gates, const c10::optional<Tensor> & input_bias, const c10::optional<Tensor> & hidden_bias, const Tensor & cx, const Tensor & cy
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -5645,10 +6184,10 @@ std::tuple<Tensor,Tensor,Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_270_t
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level), makeBatched(std::get<4>(results), std::get<5>(results), cur_level), makeBatched(std::get<6>(results), std::get<7>(results), cur_level), makeBatched(std::get<8>(results), std::get<9>(results), cur_level));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_271_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_292_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>);
 template <>
-std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_271_t,std::tuple<Tensor,Tensor>,const Tensor &, const Tensor &, const Tensor &, const c10::optional<Tensor> &, const c10::optional<Tensor> &>(
-  batch_rule_271_t batch_rule,
+std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_292_t,std::tuple<Tensor,Tensor>,const Tensor &, const Tensor &, const Tensor &, const c10::optional<Tensor> &, const c10::optional<Tensor> &>(
+  batch_rule_292_t batch_rule,
   const Tensor & input_gates, const Tensor & hidden_gates, const Tensor & hx, const c10::optional<Tensor> & input_bias, const c10::optional<Tensor> & hidden_bias
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -5678,10 +6217,10 @@ std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_271_t,std::tuple<Tensor,Te
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_272_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_293_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, bool);
 template <>
-std::tuple<Tensor,Tensor,Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_272_t,std::tuple<Tensor,Tensor,Tensor,Tensor,Tensor>,const Tensor &, const Tensor &, bool>(
-  batch_rule_272_t batch_rule,
+std::tuple<Tensor,Tensor,Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_293_t,std::tuple<Tensor,Tensor,Tensor,Tensor,Tensor>,const Tensor &, const Tensor &, bool>(
+  batch_rule_293_t batch_rule,
   const Tensor & grad_hy, const Tensor & workspace, bool has_bias
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -5698,10 +6237,10 @@ std::tuple<Tensor,Tensor,Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_272_t
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level), makeBatched(std::get<4>(results), std::get<5>(results), cur_level), makeBatched(std::get<6>(results), std::get<7>(results), cur_level), makeBatched(std::get<8>(results), std::get<9>(results), cur_level));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_273_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_294_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>);
 template <>
-std::tuple<Tensor,Tensor,Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_273_t,std::tuple<Tensor,Tensor,Tensor,Tensor,Tensor>,const Tensor &, const Tensor &, const Tensor &, const Tensor &, const c10::optional<Tensor> &, const c10::optional<Tensor> &>(
-  batch_rule_273_t batch_rule,
+std::tuple<Tensor,Tensor,Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_294_t,std::tuple<Tensor,Tensor,Tensor,Tensor,Tensor>,const Tensor &, const Tensor &, const Tensor &, const Tensor &, const c10::optional<Tensor> &, const c10::optional<Tensor> &>(
+  batch_rule_294_t batch_rule,
   const Tensor & grad_hy, const Tensor & input_gates, const Tensor & hidden_gates, const Tensor & hx, const c10::optional<Tensor> & input_bias, const c10::optional<Tensor> & hidden_bias
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -5734,10 +6273,10 @@ std::tuple<Tensor,Tensor,Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_273_t
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level), makeBatched(std::get<4>(results), std::get<5>(results), cur_level), makeBatched(std::get<6>(results), std::get<7>(results), cur_level), makeBatched(std::get<8>(results), std::get<9>(results), cur_level));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_274_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_295_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>);
 template <>
-Tensor lowerToNextLayer<batch_rule_274_t,Tensor,const Tensor &, const Tensor &, const Tensor &, const Tensor &, const c10::optional<Tensor> &, const c10::optional<Tensor> &>(
-  batch_rule_274_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_295_t,Tensor,const Tensor &, const Tensor &, const Tensor &, const Tensor &, const c10::optional<Tensor> &, const c10::optional<Tensor> &>(
+  batch_rule_295_t batch_rule,
   const Tensor & input, const Tensor & hx, const Tensor & w_ih, const Tensor & w_hh, const c10::optional<Tensor> & b_ih, const c10::optional<Tensor> & b_hh
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -5770,10 +6309,10 @@ Tensor lowerToNextLayer<batch_rule_274_t,Tensor,const Tensor &, const Tensor &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_275_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Scalar &, const Scalar &, const Scalar &, const Scalar &);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_296_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Scalar &, const Scalar &, const Scalar &, const Scalar &);
 template <>
-Tensor lowerToNextLayer<batch_rule_275_t,Tensor,const Tensor &, const Tensor &, const Tensor &, const Tensor &, const Tensor &, const Tensor &, const Tensor &, const Tensor &, const Tensor &, const Tensor &, const Scalar &, const Scalar &, const Scalar &, const Scalar &>(
-  batch_rule_275_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_296_t,Tensor,const Tensor &, const Tensor &, const Tensor &, const Tensor &, const Tensor &, const Tensor &, const Tensor &, const Tensor &, const Tensor &, const Tensor &, const Scalar &, const Scalar &, const Scalar &, const Scalar &>(
+  batch_rule_296_t batch_rule,
   const Tensor & input, const Tensor & hx, const Tensor & w_ih, const Tensor & w_hh, const Tensor & b_ih, const Tensor & b_hh, const Tensor & packed_ih, const Tensor & packed_hh, const Tensor & col_offsets_ih, const Tensor & col_offsets_hh, const Scalar & scale_ih, const Scalar & scale_hh, const Scalar & zero_point_ih, const Scalar & zero_point_hh
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -5814,10 +6353,10 @@ Tensor lowerToNextLayer<batch_rule_275_t,Tensor,const Tensor &, const Tensor &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_276_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_297_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, bool);
 template <>
-std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_276_t,std::tuple<Tensor,Tensor>,const Tensor &, const Tensor &, bool>(
-  batch_rule_276_t batch_rule,
+std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_297_t,std::tuple<Tensor,Tensor>,const Tensor &, const Tensor &, bool>(
+  batch_rule_297_t batch_rule,
   const Tensor & input, const Tensor & lengths, bool batch_first
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -5834,10 +6373,10 @@ std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_276_t,std::tuple<Tensor,Te
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_277_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, const Tensor &, c10::optional<int64_t>, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_298_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, const Tensor &, c10::optional<int64_t>, bool);
 template <>
-Tensor lowerToNextLayer<batch_rule_277_t,Tensor,const Tensor &, IntArrayRef, const Tensor &, bool>(
-  batch_rule_277_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_298_t,Tensor,const Tensor &, IntArrayRef, const Tensor &, bool>(
+  batch_rule_298_t batch_rule,
   const Tensor & grad, IntArrayRef input_size, const Tensor & batch_sizes, bool batch_first
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -5854,10 +6393,10 @@ Tensor lowerToNextLayer<batch_rule_277_t,Tensor,const Tensor &, IntArrayRef, con
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_278_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, bool, const Scalar &, int64_t);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_299_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, bool, const Scalar &, int64_t);
 template <>
-std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_278_t,std::tuple<Tensor,Tensor>,const Tensor &, const Tensor &, bool, const Scalar &, int64_t>(
-  batch_rule_278_t batch_rule,
+std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_299_t,std::tuple<Tensor,Tensor>,const Tensor &, const Tensor &, bool, const Scalar &, int64_t>(
+  batch_rule_299_t batch_rule,
   const Tensor & data, const Tensor & batch_sizes, bool batch_first, const Scalar & padding_value, int64_t total_length
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -5874,10 +6413,10 @@ std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_278_t,std::tuple<Tensor,Te
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_279_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_300_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, bool);
 template <>
-Tensor lowerToNextLayer<batch_rule_279_t,Tensor,const Tensor &, const Tensor &, const Tensor &, bool>(
-  batch_rule_279_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_300_t,Tensor,const Tensor &, const Tensor &, const Tensor &, bool>(
+  batch_rule_300_t batch_rule,
   const Tensor & self, const Tensor & index, const Tensor & source, bool accumulate
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -5897,10 +6436,10 @@ Tensor lowerToNextLayer<batch_rule_279_t,Tensor,const Tensor &, const Tensor &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_280_t)(const Tensor &, c10::optional<int64_t>, int64_t, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Scalar &);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_301_t)(const Tensor &, c10::optional<int64_t>, int64_t, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Scalar &);
 template <>
-Tensor lowerToNextLayer<batch_rule_280_t,Tensor,const Tensor &, int64_t, const Tensor &, const Tensor &, const Scalar &>(
-  batch_rule_280_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_301_t,Tensor,const Tensor &, int64_t, const Tensor &, const Tensor &, const Scalar &>(
+  batch_rule_301_t batch_rule,
   const Tensor & self, int64_t dim, const Tensor & index, const Tensor & source, const Scalar & alpha
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -5920,10 +6459,10 @@ Tensor lowerToNextLayer<batch_rule_280_t,Tensor,const Tensor &, int64_t, const T
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_281_t)(const Tensor &, c10::optional<int64_t>, Dimname, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Scalar &);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_302_t)(const Tensor &, c10::optional<int64_t>, Dimname, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Scalar &);
 template <>
-Tensor lowerToNextLayer<batch_rule_281_t,Tensor,const Tensor &, Dimname, const Tensor &, const Tensor &, const Scalar &>(
-  batch_rule_281_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_302_t,Tensor,const Tensor &, Dimname, const Tensor &, const Tensor &, const Scalar &>(
+  batch_rule_302_t batch_rule,
   const Tensor & self, Dimname dim, const Tensor & index, const Tensor & source, const Scalar & alpha
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -5943,10 +6482,10 @@ Tensor lowerToNextLayer<batch_rule_281_t,Tensor,const Tensor &, Dimname, const T
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_282_t)(const Tensor &, c10::optional<int64_t>, int64_t, const Tensor &, c10::optional<int64_t>, const Scalar &);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_303_t)(const Tensor &, c10::optional<int64_t>, int64_t, const Tensor &, c10::optional<int64_t>, const Scalar &);
 template <>
-Tensor lowerToNextLayer<batch_rule_282_t,Tensor,const Tensor &, int64_t, const Tensor &, const Scalar &>(
-  batch_rule_282_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_303_t,Tensor,const Tensor &, int64_t, const Tensor &, const Scalar &>(
+  batch_rule_303_t batch_rule,
   const Tensor & self, int64_t dim, const Tensor & index, const Scalar & value
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -5963,10 +6502,10 @@ Tensor lowerToNextLayer<batch_rule_282_t,Tensor,const Tensor &, int64_t, const T
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_283_t)(const Tensor &, c10::optional<int64_t>, Dimname, const Tensor &, c10::optional<int64_t>, const Scalar &);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_304_t)(const Tensor &, c10::optional<int64_t>, Dimname, const Tensor &, c10::optional<int64_t>, const Scalar &);
 template <>
-Tensor lowerToNextLayer<batch_rule_283_t,Tensor,const Tensor &, Dimname, const Tensor &, const Scalar &>(
-  batch_rule_283_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_304_t,Tensor,const Tensor &, Dimname, const Tensor &, const Scalar &>(
+  batch_rule_304_t batch_rule,
   const Tensor & self, Dimname dim, const Tensor & index, const Scalar & value
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -5983,10 +6522,10 @@ Tensor lowerToNextLayer<batch_rule_283_t,Tensor,const Tensor &, Dimname, const T
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_284_t)(const Tensor &, c10::optional<int64_t>, int64_t, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, c10::string_view);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_305_t)(const Tensor &, c10::optional<int64_t>, int64_t, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, c10::string_view);
 template <>
-Tensor lowerToNextLayer<batch_rule_284_t,Tensor,const Tensor &, int64_t, const Tensor &, const Tensor &, c10::string_view>(
-  batch_rule_284_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_305_t,Tensor,const Tensor &, int64_t, const Tensor &, const Tensor &, c10::string_view>(
+  batch_rule_305_t batch_rule,
   const Tensor & self, int64_t dim, const Tensor & index, const Tensor & src, c10::string_view reduce
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -6006,10 +6545,10 @@ Tensor lowerToNextLayer<batch_rule_284_t,Tensor,const Tensor &, int64_t, const T
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_285_t)(const Tensor &, c10::optional<int64_t>, int64_t, const Tensor &, c10::optional<int64_t>, const Scalar &, c10::string_view);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_306_t)(const Tensor &, c10::optional<int64_t>, int64_t, const Tensor &, c10::optional<int64_t>, const Scalar &, c10::string_view);
 template <>
-Tensor lowerToNextLayer<batch_rule_285_t,Tensor,const Tensor &, int64_t, const Tensor &, const Scalar &, c10::string_view>(
-  batch_rule_285_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_306_t,Tensor,const Tensor &, int64_t, const Tensor &, const Scalar &, c10::string_view>(
+  batch_rule_306_t batch_rule,
   const Tensor & self, int64_t dim, const Tensor & index, const Scalar & value, c10::string_view reduce
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -6026,10 +6565,10 @@ Tensor lowerToNextLayer<batch_rule_285_t,Tensor,const Tensor &, int64_t, const T
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_286_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, int64_t);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_307_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, int64_t);
 template <>
-Tensor lowerToNextLayer<batch_rule_286_t,Tensor,const Tensor &, IntArrayRef, int64_t>(
-  batch_rule_286_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_307_t,Tensor,const Tensor &, IntArrayRef, int64_t>(
+  batch_rule_307_t batch_rule,
   const Tensor & grad, IntArrayRef input_sizes, int64_t diagonal
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -6043,10 +6582,10 @@ Tensor lowerToNextLayer<batch_rule_286_t,Tensor,const Tensor &, IntArrayRef, int
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_287_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, c10::optional<int64_t>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_308_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, c10::optional<int64_t>);
 template <>
-Tensor lowerToNextLayer<batch_rule_287_t,Tensor,const Tensor &, const Tensor &, c10::optional<int64_t>>(
-  batch_rule_287_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_308_t,Tensor,const Tensor &, const Tensor &, c10::optional<int64_t>>(
+  batch_rule_308_t batch_rule,
   const Tensor & self, const Tensor & indices, c10::optional<int64_t> dim
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -6063,10 +6602,10 @@ Tensor lowerToNextLayer<batch_rule_287_t,Tensor,const Tensor &, const Tensor &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_288_t)(const Tensor &, c10::optional<int64_t>, int64_t, const Tensor &, c10::optional<int64_t>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_309_t)(const Tensor &, c10::optional<int64_t>, int64_t, const Tensor &, c10::optional<int64_t>);
 template <>
-Tensor lowerToNextLayer<batch_rule_288_t,Tensor,const Tensor &, int64_t, const Tensor &>(
-  batch_rule_288_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_309_t,Tensor,const Tensor &, int64_t, const Tensor &>(
+  batch_rule_309_t batch_rule,
   const Tensor & self, int64_t dim, const Tensor & index
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -6083,10 +6622,10 @@ Tensor lowerToNextLayer<batch_rule_288_t,Tensor,const Tensor &, int64_t, const T
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_289_t)(const Tensor &, c10::optional<int64_t>, Dimname, const Tensor &, c10::optional<int64_t>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_310_t)(const Tensor &, c10::optional<int64_t>, Dimname, const Tensor &, c10::optional<int64_t>);
 template <>
-Tensor lowerToNextLayer<batch_rule_289_t,Tensor,const Tensor &, Dimname, const Tensor &>(
-  batch_rule_289_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_310_t,Tensor,const Tensor &, Dimname, const Tensor &>(
+  batch_rule_310_t batch_rule,
   const Tensor & self, Dimname dim, const Tensor & index
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -6103,10 +6642,10 @@ Tensor lowerToNextLayer<batch_rule_289_t,Tensor,const Tensor &, Dimname, const T
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_290_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, int64_t, const Tensor &, c10::optional<int64_t>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_311_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, int64_t, const Tensor &, c10::optional<int64_t>);
 template <>
-Tensor lowerToNextLayer<batch_rule_290_t,Tensor,const Tensor &, IntArrayRef, int64_t, const Tensor &>(
-  batch_rule_290_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_311_t,Tensor,const Tensor &, IntArrayRef, int64_t, const Tensor &>(
+  batch_rule_311_t batch_rule,
   const Tensor & grad, IntArrayRef self_sizes, int64_t dim, const Tensor & index
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -6123,10 +6662,10 @@ Tensor lowerToNextLayer<batch_rule_290_t,Tensor,const Tensor &, IntArrayRef, int
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_291_t)(const Tensor &, c10::optional<int64_t>, int64_t, const Tensor &, c10::optional<int64_t>, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_312_t)(const Tensor &, c10::optional<int64_t>, int64_t, const Tensor &, c10::optional<int64_t>, bool);
 template <>
-Tensor lowerToNextLayer<batch_rule_291_t,Tensor,const Tensor &, int64_t, const Tensor &, bool>(
-  batch_rule_291_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_312_t,Tensor,const Tensor &, int64_t, const Tensor &, bool>(
+  batch_rule_312_t batch_rule,
   const Tensor & self, int64_t dim, const Tensor & index, bool sparse_grad
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -6143,10 +6682,10 @@ Tensor lowerToNextLayer<batch_rule_291_t,Tensor,const Tensor &, int64_t, const T
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_292_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, int64_t, const Tensor &, c10::optional<int64_t>, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_313_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, int64_t, const Tensor &, c10::optional<int64_t>, bool);
 template <>
-Tensor lowerToNextLayer<batch_rule_292_t,Tensor,const Tensor &, const Tensor &, int64_t, const Tensor &, bool>(
-  batch_rule_292_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_313_t,Tensor,const Tensor &, const Tensor &, int64_t, const Tensor &, bool>(
+  batch_rule_313_t batch_rule,
   const Tensor & grad, const Tensor & self, int64_t dim, const Tensor & index, bool sparse_grad
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -6166,10 +6705,10 @@ Tensor lowerToNextLayer<batch_rule_292_t,Tensor,const Tensor &, const Tensor &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_293_t)(const Tensor &, c10::optional<int64_t>, Dimname, const Tensor &, c10::optional<int64_t>, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_314_t)(const Tensor &, c10::optional<int64_t>, Dimname, const Tensor &, c10::optional<int64_t>, bool);
 template <>
-Tensor lowerToNextLayer<batch_rule_293_t,Tensor,const Tensor &, Dimname, const Tensor &, bool>(
-  batch_rule_293_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_314_t,Tensor,const Tensor &, Dimname, const Tensor &, bool>(
+  batch_rule_314_t batch_rule,
   const Tensor & self, Dimname dim, const Tensor & index, bool sparse_grad
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -6186,10 +6725,10 @@ Tensor lowerToNextLayer<batch_rule_293_t,Tensor,const Tensor &, Dimname, const T
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_294_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Scalar &);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_315_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Scalar &);
 template <>
-Tensor lowerToNextLayer<batch_rule_294_t,Tensor,const Tensor &, const Tensor &, const Tensor &, const Scalar &>(
-  batch_rule_294_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_315_t,Tensor,const Tensor &, const Tensor &, const Tensor &, const Scalar &>(
+  batch_rule_315_t batch_rule,
   const Tensor & self, const Tensor & tensor1, const Tensor & tensor2, const Scalar & value
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -6209,10 +6748,10 @@ Tensor lowerToNextLayer<batch_rule_294_t,Tensor,const Tensor &, const Tensor &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_295_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, int64_t, int64_t, double);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_316_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, int64_t, int64_t, double);
 template <>
-Tensor lowerToNextLayer<batch_rule_295_t,Tensor,const Tensor &, const Tensor &, const c10::optional<Tensor> &, int64_t, int64_t, double>(
-  batch_rule_295_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_316_t,Tensor,const Tensor &, const Tensor &, const c10::optional<Tensor> &, int64_t, int64_t, double>(
+  batch_rule_316_t batch_rule,
   const Tensor & self, const Tensor & target, const c10::optional<Tensor> & weight, int64_t reduction, int64_t ignore_index, double label_smoothing
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -6234,10 +6773,10 @@ Tensor lowerToNextLayer<batch_rule_295_t,Tensor,const Tensor &, const Tensor &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_296_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_317_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>);
 template <>
-std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_296_t,std::tuple<Tensor,Tensor>,const Tensor &, const Tensor &>(
-  batch_rule_296_t batch_rule,
+std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_317_t,std::tuple<Tensor,Tensor>,const Tensor &, const Tensor &>(
+  batch_rule_317_t batch_rule,
   const Tensor & self, const Tensor & A
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -6254,10 +6793,10 @@ std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_296_t,std::tuple<Tensor,Te
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_297_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, bool, bool, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_318_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, bool, bool, bool);
 template <>
-std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_297_t,std::tuple<Tensor,Tensor>,const Tensor &, const Tensor &, bool, bool, bool>(
-  batch_rule_297_t batch_rule,
+std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_318_t,std::tuple<Tensor,Tensor>,const Tensor &, const Tensor &, bool, bool, bool>(
+  batch_rule_318_t batch_rule,
   const Tensor & self, const Tensor & A, bool upper, bool transpose, bool unitriangular
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -6274,10 +6813,10 @@ std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_297_t,std::tuple<Tensor,Te
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_298_t)(const Tensor &, c10::optional<int64_t>, bool, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_319_t)(const Tensor &, c10::optional<int64_t>, bool, bool);
 template <>
-std::tuple<Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_298_t,std::tuple<Tensor,Tensor,Tensor>,const Tensor &, bool, bool>(
-  batch_rule_298_t batch_rule,
+std::tuple<Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_319_t,std::tuple<Tensor,Tensor,Tensor>,const Tensor &, bool, bool>(
+  batch_rule_319_t batch_rule,
   const Tensor & self, bool pivot, bool check_errors
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -6291,10 +6830,10 @@ std::tuple<Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_298_t,std::tuple<Te
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level), makeBatched(std::get<4>(results), std::get<5>(results), cur_level));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_299_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, bool, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_320_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, bool, bool);
 template <>
-Tensor lowerToNextLayer<batch_rule_299_t,Tensor,const Tensor &, const Tensor &, const Tensor &, bool, bool>(
-  batch_rule_299_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_320_t,Tensor,const Tensor &, const Tensor &, const Tensor &, bool, bool>(
+  batch_rule_320_t batch_rule,
   const Tensor & self, const Tensor & input2, const Tensor & input3, bool left, bool transpose
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -6314,10 +6853,10 @@ Tensor lowerToNextLayer<batch_rule_299_t,Tensor,const Tensor &, const Tensor &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_300_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, bool, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_321_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, bool, bool);
 template <>
-std::tuple<Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_300_t,std::tuple<Tensor,Tensor,Tensor>,const Tensor &, const Tensor &, bool, bool>(
-  batch_rule_300_t batch_rule,
+std::tuple<Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_321_t,std::tuple<Tensor,Tensor,Tensor>,const Tensor &, const Tensor &, bool, bool>(
+  batch_rule_321_t batch_rule,
   const Tensor & LU_data, const Tensor & LU_pivots, bool unpack_data, bool unpack_pivots
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -6334,10 +6873,10 @@ std::tuple<Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_300_t,std::tuple<Te
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level), makeBatched(std::get<4>(results), std::get<5>(results), cur_level));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_301_t)(const Tensor &, c10::optional<int64_t>, int64_t, bool, c10::optional<Generator>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_322_t)(const Tensor &, c10::optional<int64_t>, int64_t, bool, c10::optional<Generator>);
 template <>
-Tensor lowerToNextLayer<batch_rule_301_t,Tensor,const Tensor &, int64_t, bool, c10::optional<Generator>>(
-  batch_rule_301_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_322_t,Tensor,const Tensor &, int64_t, bool, c10::optional<Generator>>(
+  batch_rule_322_t batch_rule,
   const Tensor & self, int64_t num_samples, bool replacement, c10::optional<Generator> generator
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -6351,10 +6890,10 @@ Tensor lowerToNextLayer<batch_rule_301_t,Tensor,const Tensor &, int64_t, bool, c
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_302_t)(int64_t, const Tensor &, c10::optional<int64_t>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_323_t)(int64_t, const Tensor &, c10::optional<int64_t>);
 template <>
-Tensor lowerToNextLayer<batch_rule_302_t,Tensor,int64_t, const Tensor &>(
-  batch_rule_302_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_323_t,Tensor,int64_t, const Tensor &>(
+  batch_rule_323_t batch_rule,
   int64_t n, const Tensor & self
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -6368,10 +6907,10 @@ Tensor lowerToNextLayer<batch_rule_302_t,Tensor,int64_t, const Tensor &>(
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_303_t)(const Tensor &, c10::optional<int64_t>, int64_t, const Scalar &, const Scalar &);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_324_t)(const Tensor &, c10::optional<int64_t>, int64_t, const Scalar &, const Scalar &);
 template <>
-Tensor lowerToNextLayer<batch_rule_303_t,Tensor,const Tensor &, int64_t, const Scalar &, const Scalar &>(
-  batch_rule_303_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_324_t,Tensor,const Tensor &, int64_t, const Scalar &, const Scalar &>(
+  batch_rule_324_t batch_rule,
   const Tensor & self, int64_t bins, const Scalar & min, const Scalar & max
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -6385,10 +6924,10 @@ Tensor lowerToNextLayer<batch_rule_303_t,Tensor,const Tensor &, int64_t, const S
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_304_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_325_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, bool);
 template <>
-std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_304_t,std::tuple<Tensor,Tensor>,const Tensor &, const Tensor &, const c10::optional<Tensor> &, bool>(
-  batch_rule_304_t batch_rule,
+std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_325_t,std::tuple<Tensor,Tensor>,const Tensor &, const Tensor &, const c10::optional<Tensor> &, bool>(
+  batch_rule_325_t batch_rule,
   const Tensor & self, const Tensor & bins, const c10::optional<Tensor> & weight, bool density
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -6410,10 +6949,10 @@ std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_304_t,std::tuple<Tensor,Te
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_305_t)(const Tensor &, c10::optional<int64_t>, int64_t, c10::optional<ArrayRef<double>>, const c10::optional<Tensor> &, c10::optional<int64_t>, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_326_t)(const Tensor &, c10::optional<int64_t>, int64_t, c10::optional<ArrayRef<double>>, const c10::optional<Tensor> &, c10::optional<int64_t>, bool);
 template <>
-std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_305_t,std::tuple<Tensor,Tensor>,const Tensor &, int64_t, c10::optional<ArrayRef<double>>, const c10::optional<Tensor> &, bool>(
-  batch_rule_305_t batch_rule,
+std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_326_t,std::tuple<Tensor,Tensor>,const Tensor &, int64_t, c10::optional<ArrayRef<double>>, const c10::optional<Tensor> &, bool>(
+  batch_rule_326_t batch_rule,
   const Tensor & self, int64_t bins, c10::optional<ArrayRef<double>> range, const c10::optional<Tensor> & weight, bool density
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -6432,10 +6971,54 @@ std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_305_t,std::tuple<Tensor,Te
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_306_t)(const Tensor &, c10::optional<int64_t>, double, c10::optional<int64_t>, bool);
+typedef std::tuple<::std::vector<Tensor>,c10::optional<int64_t>> (*batch_rule_327_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, c10::optional<ArrayRef<double>>, const c10::optional<Tensor> &, c10::optional<int64_t>, bool);
 template <>
-Tensor lowerToNextLayer<batch_rule_306_t,Tensor,const Tensor &, double, c10::optional<int64_t>, bool>(
-  batch_rule_306_t batch_rule,
+::std::vector<Tensor> lowerToNextLayer<batch_rule_327_t,::std::vector<Tensor>,const Tensor &, IntArrayRef, c10::optional<ArrayRef<double>>, const c10::optional<Tensor> &, bool>(
+  batch_rule_327_t batch_rule,
+  const Tensor & self, IntArrayRef bins, c10::optional<ArrayRef<double>> range, const c10::optional<Tensor> & weight, bool density
+) {
+  c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
+  auto maybe_layer = maybeCurrentDynamicLayer();
+  TORCH_INTERNAL_ASSERT(maybe_layer.has_value());
+  int64_t cur_level = maybe_layer->layerId();
+  Tensor self_value;
+  optional<int64_t> self_bdim;
+  std::tie(self_value, self_bdim) = unwrapTensorAtLevel(self, cur_level);
+  optional<Tensor> weight_value;
+  optional<int64_t> weight_bdim;
+  if (weight) {
+      std::tie(weight_value, weight_bdim) = unwrapTensorAtLevel(weight.value(), cur_level);
+  }
+  auto results = batch_rule(self_value, self_bdim, bins, range, weight_value, weight_bdim, density);
+  return makeBatchedVector(std::get<0>(results), std::get<1>(results), cur_level);
+}
+
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_328_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, c10::optional<ArrayRef<double>>, const c10::optional<Tensor> &, c10::optional<int64_t>, bool);
+template <>
+Tensor lowerToNextLayer<batch_rule_328_t,Tensor,const Tensor &, IntArrayRef, c10::optional<ArrayRef<double>>, const c10::optional<Tensor> &, bool>(
+  batch_rule_328_t batch_rule,
+  const Tensor & self, IntArrayRef bins, c10::optional<ArrayRef<double>> range, const c10::optional<Tensor> & weight, bool density
+) {
+  c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
+  auto maybe_layer = maybeCurrentDynamicLayer();
+  TORCH_INTERNAL_ASSERT(maybe_layer.has_value());
+  int64_t cur_level = maybe_layer->layerId();
+  Tensor self_value;
+  optional<int64_t> self_bdim;
+  std::tie(self_value, self_bdim) = unwrapTensorAtLevel(self, cur_level);
+  optional<Tensor> weight_value;
+  optional<int64_t> weight_bdim;
+  if (weight) {
+      std::tie(weight_value, weight_bdim) = unwrapTensorAtLevel(weight.value(), cur_level);
+  }
+  auto results = batch_rule(self_value, self_bdim, bins, range, weight_value, weight_bdim, density);
+  return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
+}
+
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_329_t)(const Tensor &, c10::optional<int64_t>, double, c10::optional<int64_t>, bool);
+template <>
+Tensor lowerToNextLayer<batch_rule_329_t,Tensor,const Tensor &, double, c10::optional<int64_t>, bool>(
+  batch_rule_329_t batch_rule,
   const Tensor & self, double q, c10::optional<int64_t> dim, bool keepdim
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -6449,10 +7032,10 @@ Tensor lowerToNextLayer<batch_rule_306_t,Tensor,const Tensor &, double, c10::opt
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_307_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, c10::optional<int64_t>, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_330_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, c10::optional<int64_t>, bool);
 template <>
-Tensor lowerToNextLayer<batch_rule_307_t,Tensor,const Tensor &, const Tensor &, c10::optional<int64_t>, bool>(
-  batch_rule_307_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_330_t,Tensor,const Tensor &, const Tensor &, c10::optional<int64_t>, bool>(
+  batch_rule_330_t batch_rule,
   const Tensor & self, const Tensor & q, c10::optional<int64_t> dim, bool keepdim
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -6469,10 +7052,10 @@ Tensor lowerToNextLayer<batch_rule_307_t,Tensor,const Tensor &, const Tensor &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_308_t)(const Tensor &, c10::optional<int64_t>, double, c10::optional<int64_t>, bool, c10::string_view);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_331_t)(const Tensor &, c10::optional<int64_t>, double, c10::optional<int64_t>, bool, c10::string_view);
 template <>
-Tensor lowerToNextLayer<batch_rule_308_t,Tensor,const Tensor &, double, c10::optional<int64_t>, bool, c10::string_view>(
-  batch_rule_308_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_331_t,Tensor,const Tensor &, double, c10::optional<int64_t>, bool, c10::string_view>(
+  batch_rule_331_t batch_rule,
   const Tensor & self, double q, c10::optional<int64_t> dim, bool keepdim, c10::string_view interpolation
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -6486,10 +7069,10 @@ Tensor lowerToNextLayer<batch_rule_308_t,Tensor,const Tensor &, double, c10::opt
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_309_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, c10::optional<int64_t>, bool, c10::string_view);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_332_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, c10::optional<int64_t>, bool, c10::string_view);
 template <>
-Tensor lowerToNextLayer<batch_rule_309_t,Tensor,const Tensor &, const Tensor &, c10::optional<int64_t>, bool, c10::string_view>(
-  batch_rule_309_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_332_t,Tensor,const Tensor &, const Tensor &, c10::optional<int64_t>, bool, c10::string_view>(
+  batch_rule_332_t batch_rule,
   const Tensor & self, const Tensor & q, c10::optional<int64_t> dim, bool keepdim, c10::string_view interpolation
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -6506,10 +7089,10 @@ Tensor lowerToNextLayer<batch_rule_309_t,Tensor,const Tensor &, const Tensor &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_310_t)(const Tensor &, c10::optional<int64_t>, c10::optional<bool>, int64_t, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_333_t)(const Tensor &, c10::optional<int64_t>, c10::optional<bool>, int64_t, bool);
 template <>
-std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_310_t,std::tuple<Tensor,Tensor>,const Tensor &, c10::optional<bool>, int64_t, bool>(
-  batch_rule_310_t batch_rule,
+std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_333_t,std::tuple<Tensor,Tensor>,const Tensor &, c10::optional<bool>, int64_t, bool>(
+  batch_rule_333_t batch_rule,
   const Tensor & self, c10::optional<bool> stable, int64_t dim, bool descending
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -6523,10 +7106,10 @@ std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_310_t,std::tuple<Tensor,Te
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_311_t)(const Tensor &, c10::optional<int64_t>, c10::optional<bool>, Dimname, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_334_t)(const Tensor &, c10::optional<int64_t>, c10::optional<bool>, Dimname, bool);
 template <>
-std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_311_t,std::tuple<Tensor,Tensor>,const Tensor &, c10::optional<bool>, Dimname, bool>(
-  batch_rule_311_t batch_rule,
+std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_334_t,std::tuple<Tensor,Tensor>,const Tensor &, c10::optional<bool>, Dimname, bool>(
+  batch_rule_334_t batch_rule,
   const Tensor & self, c10::optional<bool> stable, Dimname dim, bool descending
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -6540,10 +7123,10 @@ std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_311_t,std::tuple<Tensor,Te
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_312_t)(const Tensor &, c10::optional<int64_t>, int64_t, int64_t, bool, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_335_t)(const Tensor &, c10::optional<int64_t>, int64_t, int64_t, bool, bool);
 template <>
-std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_312_t,std::tuple<Tensor,Tensor>,const Tensor &, int64_t, int64_t, bool, bool>(
-  batch_rule_312_t batch_rule,
+std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_335_t,std::tuple<Tensor,Tensor>,const Tensor &, int64_t, int64_t, bool, bool>(
+  batch_rule_335_t batch_rule,
   const Tensor & self, int64_t k, int64_t dim, bool largest, bool sorted
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -6557,10 +7140,10 @@ std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_312_t,std::tuple<Tensor,Te
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_313_t)(const Tensor &, c10::optional<int64_t>, const Scalar &, int64_t, const Scalar &);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_336_t)(const Tensor &, c10::optional<int64_t>, const Scalar &, int64_t, const Scalar &);
 template <>
-Tensor lowerToNextLayer<batch_rule_313_t,Tensor,const Tensor &, const Scalar &, int64_t, const Scalar &>(
-  batch_rule_313_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_336_t,Tensor,const Tensor &, const Scalar &, int64_t, const Scalar &>(
+  batch_rule_336_t batch_rule,
   const Tensor & self, const Scalar & p, int64_t dim, const Scalar & maxnorm
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -6574,27 +7157,74 @@ Tensor lowerToNextLayer<batch_rule_313_t,Tensor,const Tensor &, const Scalar &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_314_t)(double, const Tensor &, c10::optional<int64_t>, c10::optional<Generator>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_337_t)(double, const Tensor &, c10::optional<int64_t>, c10::optional<Generator>);
 template <>
-Tensor lowerToNextLayer<batch_rule_314_t,Tensor,double, const Tensor &, c10::optional<Generator>>(
-  batch_rule_314_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_337_t,Tensor,double, const Tensor &, c10::optional<Generator>>(
+  batch_rule_337_t batch_rule,
   double mean, const Tensor & std, c10::optional<Generator> generator
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
   auto maybe_layer = maybeCurrentDynamicLayer();
   TORCH_INTERNAL_ASSERT(maybe_layer.has_value());
   int64_t cur_level = maybe_layer->layerId();
-  Tensor std_value;
-  optional<int64_t> std_bdim;
-  std::tie(std_value, std_bdim) = unwrapTensorAtLevel(std, cur_level);
-  auto results = batch_rule(mean, std_value, std_bdim, generator);
+  Tensor std_value;
+  optional<int64_t> std_bdim;
+  std::tie(std_value, std_bdim) = unwrapTensorAtLevel(std, cur_level);
+  auto results = batch_rule(mean, std_value, std_bdim, generator);
+  return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
+}
+
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_338_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, bool, bool, c10::optional<c10::string_view>, const c10::optional<Tensor> &, c10::optional<int64_t>);
+template <>
+Tensor lowerToNextLayer<batch_rule_338_t,Tensor,const Tensor &, const Tensor &, bool, bool, c10::optional<c10::string_view>, const c10::optional<Tensor> &>(
+  batch_rule_338_t batch_rule,
+  const Tensor & sorted_sequence, const Tensor & self, bool out_int32, bool right, c10::optional<c10::string_view> side, const c10::optional<Tensor> & sorter
+) {
+  c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
+  auto maybe_layer = maybeCurrentDynamicLayer();
+  TORCH_INTERNAL_ASSERT(maybe_layer.has_value());
+  int64_t cur_level = maybe_layer->layerId();
+  Tensor sorted_sequence_value;
+  optional<int64_t> sorted_sequence_bdim;
+  std::tie(sorted_sequence_value, sorted_sequence_bdim) = unwrapTensorAtLevel(sorted_sequence, cur_level);
+  Tensor self_value;
+  optional<int64_t> self_bdim;
+  std::tie(self_value, self_bdim) = unwrapTensorAtLevel(self, cur_level);
+  optional<Tensor> sorter_value;
+  optional<int64_t> sorter_bdim;
+  if (sorter) {
+      std::tie(sorter_value, sorter_bdim) = unwrapTensorAtLevel(sorter.value(), cur_level);
+  }
+  auto results = batch_rule(sorted_sequence_value, sorted_sequence_bdim, self_value, self_bdim, out_int32, right, side, sorter_value, sorter_bdim);
+  return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
+}
+
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_339_t)(const Tensor &, c10::optional<int64_t>, const Scalar &, bool, bool, c10::optional<c10::string_view>, const c10::optional<Tensor> &, c10::optional<int64_t>);
+template <>
+Tensor lowerToNextLayer<batch_rule_339_t,Tensor,const Tensor &, const Scalar &, bool, bool, c10::optional<c10::string_view>, const c10::optional<Tensor> &>(
+  batch_rule_339_t batch_rule,
+  const Tensor & sorted_sequence, const Scalar & self, bool out_int32, bool right, c10::optional<c10::string_view> side, const c10::optional<Tensor> & sorter
+) {
+  c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
+  auto maybe_layer = maybeCurrentDynamicLayer();
+  TORCH_INTERNAL_ASSERT(maybe_layer.has_value());
+  int64_t cur_level = maybe_layer->layerId();
+  Tensor sorted_sequence_value;
+  optional<int64_t> sorted_sequence_bdim;
+  std::tie(sorted_sequence_value, sorted_sequence_bdim) = unwrapTensorAtLevel(sorted_sequence, cur_level);
+  optional<Tensor> sorter_value;
+  optional<int64_t> sorter_bdim;
+  if (sorter) {
+      std::tie(sorter_value, sorter_bdim) = unwrapTensorAtLevel(sorter.value(), cur_level);
+  }
+  auto results = batch_rule(sorted_sequence_value, sorted_sequence_bdim, self, out_int32, right, side, sorter_value, sorter_bdim);
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_315_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Scalar &, const Scalar &, const c10::optional<Tensor> &, c10::optional<int64_t>, int64_t);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_340_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Scalar &, const Scalar &, const c10::optional<Tensor> &, c10::optional<int64_t>, int64_t);
 template <>
-Tensor lowerToNextLayer<batch_rule_315_t,Tensor,const Tensor &, const Tensor &, const Scalar &, const Scalar &, const c10::optional<Tensor> &, int64_t>(
-  batch_rule_315_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_340_t,Tensor,const Tensor &, const Tensor &, const Scalar &, const Scalar &, const c10::optional<Tensor> &, int64_t>(
+  batch_rule_340_t batch_rule,
   const Tensor & self, const Tensor & target, const Scalar & p, const Scalar & margin, const c10::optional<Tensor> & weight, int64_t reduction
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -6616,10 +7246,10 @@ Tensor lowerToNextLayer<batch_rule_315_t,Tensor,const Tensor &, const Tensor &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_316_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Scalar &, const Scalar &, const c10::optional<Tensor> &, c10::optional<int64_t>, int64_t);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_341_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Scalar &, const Scalar &, const c10::optional<Tensor> &, c10::optional<int64_t>, int64_t);
 template <>
-Tensor lowerToNextLayer<batch_rule_316_t,Tensor,const Tensor &, const Tensor &, const Tensor &, const Scalar &, const Scalar &, const c10::optional<Tensor> &, int64_t>(
-  batch_rule_316_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_341_t,Tensor,const Tensor &, const Tensor &, const Tensor &, const Scalar &, const Scalar &, const c10::optional<Tensor> &, int64_t>(
+  batch_rule_341_t batch_rule,
   const Tensor & grad_output, const Tensor & self, const Tensor & target, const Scalar & p, const Scalar & margin, const c10::optional<Tensor> & weight, int64_t reduction
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -6644,10 +7274,10 @@ Tensor lowerToNextLayer<batch_rule_316_t,Tensor,const Tensor &, const Tensor &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_317_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, int64_t, const Tensor &, c10::optional<int64_t>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_342_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, int64_t, const Tensor &, c10::optional<int64_t>);
 template <>
-Tensor lowerToNextLayer<batch_rule_317_t,Tensor,const Tensor &, const Tensor &, const Tensor &, int64_t, const Tensor &>(
-  batch_rule_317_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_342_t,Tensor,const Tensor &, const Tensor &, const Tensor &, int64_t, const Tensor &>(
+  batch_rule_342_t batch_rule,
   const Tensor & grad_output, const Tensor & self, const Tensor & target, int64_t reduction, const Tensor & is_target
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -6670,10 +7300,10 @@ Tensor lowerToNextLayer<batch_rule_317_t,Tensor,const Tensor &, const Tensor &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_318_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, int64_t, int64_t);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_343_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, int64_t, int64_t);
 template <>
-Tensor lowerToNextLayer<batch_rule_318_t,Tensor,const Tensor &, const Tensor &, const c10::optional<Tensor> &, int64_t, int64_t>(
-  batch_rule_318_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_343_t,Tensor,const Tensor &, const Tensor &, const c10::optional<Tensor> &, int64_t, int64_t>(
+  batch_rule_343_t batch_rule,
   const Tensor & self, const Tensor & target, const c10::optional<Tensor> & weight, int64_t reduction, int64_t ignore_index
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -6695,10 +7325,10 @@ Tensor lowerToNextLayer<batch_rule_318_t,Tensor,const Tensor &, const Tensor &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_319_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, int64_t, int64_t);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_344_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, int64_t, int64_t);
 template <>
-std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_319_t,std::tuple<Tensor,Tensor>,const Tensor &, const Tensor &, const c10::optional<Tensor> &, int64_t, int64_t>(
-  batch_rule_319_t batch_rule,
+std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_344_t,std::tuple<Tensor,Tensor>,const Tensor &, const Tensor &, const c10::optional<Tensor> &, int64_t, int64_t>(
+  batch_rule_344_t batch_rule,
   const Tensor & self, const Tensor & target, const c10::optional<Tensor> & weight, int64_t reduction, int64_t ignore_index
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -6720,10 +7350,10 @@ std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_319_t,std::tuple<Tensor,Te
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_320_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, int64_t, int64_t, const Tensor &, c10::optional<int64_t>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_345_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, int64_t, int64_t, const Tensor &, c10::optional<int64_t>);
 template <>
-Tensor lowerToNextLayer<batch_rule_320_t,Tensor,const Tensor &, const Tensor &, const Tensor &, const c10::optional<Tensor> &, int64_t, int64_t, const Tensor &>(
-  batch_rule_320_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_345_t,Tensor,const Tensor &, const Tensor &, const Tensor &, const c10::optional<Tensor> &, int64_t, int64_t, const Tensor &>(
+  batch_rule_345_t batch_rule,
   const Tensor & grad_output, const Tensor & self, const Tensor & target, const c10::optional<Tensor> & weight, int64_t reduction, int64_t ignore_index, const Tensor & total_weight
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -6751,10 +7381,10 @@ Tensor lowerToNextLayer<batch_rule_320_t,Tensor,const Tensor &, const Tensor &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_321_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, int64_t, double);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_346_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, int64_t, double);
 template <>
-Tensor lowerToNextLayer<batch_rule_321_t,Tensor,const Tensor &, const Tensor &, const Tensor &, int64_t, double>(
-  batch_rule_321_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_346_t,Tensor,const Tensor &, const Tensor &, const Tensor &, int64_t, double>(
+  batch_rule_346_t batch_rule,
   const Tensor & grad_output, const Tensor & self, const Tensor & target, int64_t reduction, double delta
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -6774,10 +7404,10 @@ Tensor lowerToNextLayer<batch_rule_321_t,Tensor,const Tensor &, const Tensor &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_322_t)(const Tensor &, c10::optional<int64_t>, const Scalar &, const Scalar &, const Scalar &);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_347_t)(const Tensor &, c10::optional<int64_t>, const Scalar &, const Scalar &, const Scalar &);
 template <>
-Tensor lowerToNextLayer<batch_rule_322_t,Tensor,const Tensor &, const Scalar &, const Scalar &, const Scalar &>(
-  batch_rule_322_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_347_t,Tensor,const Tensor &, const Scalar &, const Scalar &, const Scalar &>(
+  batch_rule_347_t batch_rule,
   const Tensor & self, const Scalar & alpha, const Scalar & scale, const Scalar & input_scale
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -6791,10 +7421,10 @@ Tensor lowerToNextLayer<batch_rule_322_t,Tensor,const Tensor &, const Scalar &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_323_t)(const Tensor &, c10::optional<int64_t>, const Scalar &, const Scalar &, const Scalar &, bool, const Tensor &, c10::optional<int64_t>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_348_t)(const Tensor &, c10::optional<int64_t>, const Scalar &, const Scalar &, const Scalar &, bool, const Tensor &, c10::optional<int64_t>);
 template <>
-Tensor lowerToNextLayer<batch_rule_323_t,Tensor,const Tensor &, const Scalar &, const Scalar &, const Scalar &, bool, const Tensor &>(
-  batch_rule_323_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_348_t,Tensor,const Tensor &, const Scalar &, const Scalar &, const Scalar &, bool, const Tensor &>(
+  batch_rule_348_t batch_rule,
   const Tensor & grad_output, const Scalar & alpha, const Scalar & scale, const Scalar & input_scale, bool is_result, const Tensor & self_or_result
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -6811,10 +7441,10 @@ Tensor lowerToNextLayer<batch_rule_323_t,Tensor,const Tensor &, const Scalar &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_324_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Scalar &, const Scalar &);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_349_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Scalar &, const Scalar &);
 template <>
-Tensor lowerToNextLayer<batch_rule_324_t,Tensor,const Tensor &, const Tensor &, const Scalar &, const Scalar &>(
-  batch_rule_324_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_349_t,Tensor,const Tensor &, const Tensor &, const Scalar &, const Scalar &>(
+  batch_rule_349_t batch_rule,
   const Tensor & grad_output, const Tensor & self, const Scalar & min_val, const Scalar & max_val
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -6831,10 +7461,10 @@ Tensor lowerToNextLayer<batch_rule_324_t,Tensor,const Tensor &, const Tensor &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_325_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Scalar &, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_350_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Scalar &, bool);
 template <>
-Tensor lowerToNextLayer<batch_rule_325_t,Tensor,const Tensor &, const Tensor &, const Scalar &, bool>(
-  batch_rule_325_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_350_t,Tensor,const Tensor &, const Tensor &, const Scalar &, bool>(
+  batch_rule_350_t batch_rule,
   const Tensor & grad_output, const Tensor & self, const Scalar & negative_slope, bool self_is_result
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -6851,10 +7481,10 @@ Tensor lowerToNextLayer<batch_rule_325_t,Tensor,const Tensor &, const Tensor &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_326_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Scalar &, const Scalar &, bool, c10::optional<Generator>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_351_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Scalar &, const Scalar &, bool, c10::optional<Generator>);
 template <>
-Tensor lowerToNextLayer<batch_rule_326_t,Tensor,const Tensor &, const Tensor &, const Scalar &, const Scalar &, bool, c10::optional<Generator>>(
-  batch_rule_326_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_351_t,Tensor,const Tensor &, const Tensor &, const Scalar &, const Scalar &, bool, c10::optional<Generator>>(
+  batch_rule_351_t batch_rule,
   const Tensor & self, const Tensor & noise, const Scalar & lower, const Scalar & upper, bool training, c10::optional<Generator> generator
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -6871,10 +7501,10 @@ Tensor lowerToNextLayer<batch_rule_326_t,Tensor,const Tensor &, const Tensor &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_327_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Scalar &, const Scalar &, bool, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_352_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Scalar &, const Scalar &, bool, bool);
 template <>
-Tensor lowerToNextLayer<batch_rule_327_t,Tensor,const Tensor &, const Tensor &, const Tensor &, const Scalar &, const Scalar &, bool, bool>(
-  batch_rule_327_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_352_t,Tensor,const Tensor &, const Tensor &, const Tensor &, const Scalar &, const Scalar &, bool, bool>(
+  batch_rule_352_t batch_rule,
   const Tensor & grad_output, const Tensor & self, const Tensor & noise, const Scalar & lower, const Scalar & upper, bool training, bool self_is_result
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -6894,10 +7524,10 @@ Tensor lowerToNextLayer<batch_rule_327_t,Tensor,const Tensor &, const Tensor &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_328_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Scalar &, const Scalar &, const Tensor &, c10::optional<int64_t>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_353_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Scalar &, const Scalar &, const Tensor &, c10::optional<int64_t>);
 template <>
-Tensor lowerToNextLayer<batch_rule_328_t,Tensor,const Tensor &, const Tensor &, const Scalar &, const Scalar &, const Tensor &>(
-  batch_rule_328_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_353_t,Tensor,const Tensor &, const Tensor &, const Scalar &, const Scalar &, const Tensor &>(
+  batch_rule_353_t batch_rule,
   const Tensor & grad_output, const Tensor & self, const Scalar & beta, const Scalar & threshold, const Tensor & output
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -6917,10 +7547,10 @@ Tensor lowerToNextLayer<batch_rule_328_t,Tensor,const Tensor &, const Tensor &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_329_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef, bool, bool, c10::optional<int64_t>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_354_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef, bool, bool, c10::optional<int64_t>);
 template <>
-Tensor lowerToNextLayer<batch_rule_329_t,Tensor,const Tensor &, IntArrayRef, IntArrayRef, IntArrayRef, bool, bool, c10::optional<int64_t>>(
-  batch_rule_329_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_354_t,Tensor,const Tensor &, IntArrayRef, IntArrayRef, IntArrayRef, bool, bool, c10::optional<int64_t>>(
+  batch_rule_354_t batch_rule,
   const Tensor & self, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, bool ceil_mode, bool count_include_pad, c10::optional<int64_t> divisor_override
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -6934,10 +7564,10 @@ Tensor lowerToNextLayer<batch_rule_329_t,Tensor,const Tensor &, IntArrayRef, Int
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_330_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef, bool, bool, c10::optional<int64_t>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_355_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef, bool, bool, c10::optional<int64_t>);
 template <>
-Tensor lowerToNextLayer<batch_rule_330_t,Tensor,const Tensor &, const Tensor &, IntArrayRef, IntArrayRef, IntArrayRef, bool, bool, c10::optional<int64_t>>(
-  batch_rule_330_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_355_t,Tensor,const Tensor &, const Tensor &, IntArrayRef, IntArrayRef, IntArrayRef, bool, bool, c10::optional<int64_t>>(
+  batch_rule_355_t batch_rule,
   const Tensor & grad_output, const Tensor & self, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, bool ceil_mode, bool count_include_pad, c10::optional<int64_t> divisor_override
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -6954,10 +7584,10 @@ Tensor lowerToNextLayer<batch_rule_330_t,Tensor,const Tensor &, const Tensor &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_331_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, const Tensor &, c10::optional<int64_t>);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_356_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, const Tensor &, c10::optional<int64_t>);
 template <>
-std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_331_t,std::tuple<Tensor,Tensor>,const Tensor &, IntArrayRef, IntArrayRef, const Tensor &>(
-  batch_rule_331_t batch_rule,
+std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_356_t,std::tuple<Tensor,Tensor>,const Tensor &, IntArrayRef, IntArrayRef, const Tensor &>(
+  batch_rule_356_t batch_rule,
   const Tensor & self, IntArrayRef kernel_size, IntArrayRef output_size, const Tensor & random_samples
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -6974,10 +7604,10 @@ std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_331_t,std::tuple<Tensor,Te
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_332_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, const Tensor &, c10::optional<int64_t>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_357_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, const Tensor &, c10::optional<int64_t>);
 template <>
-Tensor lowerToNextLayer<batch_rule_332_t,Tensor,const Tensor &, const Tensor &, IntArrayRef, IntArrayRef, const Tensor &>(
-  batch_rule_332_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_357_t,Tensor,const Tensor &, const Tensor &, IntArrayRef, IntArrayRef, const Tensor &>(
+  batch_rule_357_t batch_rule,
   const Tensor & grad_output, const Tensor & self, IntArrayRef kernel_size, IntArrayRef output_size, const Tensor & indices
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -6997,10 +7627,10 @@ Tensor lowerToNextLayer<batch_rule_332_t,Tensor,const Tensor &, const Tensor &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_333_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef, IntArrayRef, bool, const Tensor &, c10::optional<int64_t>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_358_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef, IntArrayRef, bool, const Tensor &, c10::optional<int64_t>);
 template <>
-Tensor lowerToNextLayer<batch_rule_333_t,Tensor,const Tensor &, const Tensor &, IntArrayRef, IntArrayRef, IntArrayRef, IntArrayRef, bool, const Tensor &>(
-  batch_rule_333_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_358_t,Tensor,const Tensor &, const Tensor &, IntArrayRef, IntArrayRef, IntArrayRef, IntArrayRef, bool, const Tensor &>(
+  batch_rule_358_t batch_rule,
   const Tensor & grad_output, const Tensor & self, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, bool ceil_mode, const Tensor & indices
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -7020,10 +7650,10 @@ Tensor lowerToNextLayer<batch_rule_333_t,Tensor,const Tensor &, const Tensor &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_334_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_359_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef);
 template <>
-Tensor lowerToNextLayer<batch_rule_334_t,Tensor,const Tensor &, const Tensor &, IntArrayRef, IntArrayRef, IntArrayRef>(
-  batch_rule_334_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_359_t,Tensor,const Tensor &, const Tensor &, IntArrayRef, IntArrayRef, IntArrayRef>(
+  batch_rule_359_t batch_rule,
   const Tensor & self, const Tensor & indices, IntArrayRef output_size, IntArrayRef stride, IntArrayRef padding
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -7040,10 +7670,10 @@ Tensor lowerToNextLayer<batch_rule_334_t,Tensor,const Tensor &, const Tensor &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_335_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_360_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef);
 template <>
-Tensor lowerToNextLayer<batch_rule_335_t,Tensor,const Tensor &, const Tensor &, const Tensor &, IntArrayRef, IntArrayRef, IntArrayRef>(
-  batch_rule_335_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_360_t,Tensor,const Tensor &, const Tensor &, const Tensor &, IntArrayRef, IntArrayRef, IntArrayRef>(
+  batch_rule_360_t batch_rule,
   const Tensor & grad_output, const Tensor & self, const Tensor & indices, IntArrayRef output_size, IntArrayRef stride, IntArrayRef padding
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -7063,10 +7693,10 @@ Tensor lowerToNextLayer<batch_rule_335_t,Tensor,const Tensor &, const Tensor &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_336_t)(const Tensor &, c10::optional<int64_t>, c10::optional<IntArrayRef>, bool, c10::optional<ArrayRef<double>>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_361_t)(const Tensor &, c10::optional<int64_t>, c10::optional<IntArrayRef>, bool, c10::optional<ArrayRef<double>>);
 template <>
-Tensor lowerToNextLayer<batch_rule_336_t,Tensor,const Tensor &, c10::optional<IntArrayRef>, bool, c10::optional<ArrayRef<double>>>(
-  batch_rule_336_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_361_t,Tensor,const Tensor &, c10::optional<IntArrayRef>, bool, c10::optional<ArrayRef<double>>>(
+  batch_rule_361_t batch_rule,
   const Tensor & input, c10::optional<IntArrayRef> output_size, bool align_corners, c10::optional<ArrayRef<double>> scale_factors
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -7080,10 +7710,10 @@ Tensor lowerToNextLayer<batch_rule_336_t,Tensor,const Tensor &, c10::optional<In
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_337_t)(const Tensor &, c10::optional<int64_t>, c10::optional<IntArrayRef>, IntArrayRef, bool, c10::optional<ArrayRef<double>>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_362_t)(const Tensor &, c10::optional<int64_t>, c10::optional<IntArrayRef>, IntArrayRef, bool, c10::optional<ArrayRef<double>>);
 template <>
-Tensor lowerToNextLayer<batch_rule_337_t,Tensor,const Tensor &, c10::optional<IntArrayRef>, IntArrayRef, bool, c10::optional<ArrayRef<double>>>(
-  batch_rule_337_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_362_t,Tensor,const Tensor &, c10::optional<IntArrayRef>, IntArrayRef, bool, c10::optional<ArrayRef<double>>>(
+  batch_rule_362_t batch_rule,
   const Tensor & grad_output, c10::optional<IntArrayRef> output_size, IntArrayRef input_size, bool align_corners, c10::optional<ArrayRef<double>> scale_factors
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -7097,10 +7727,10 @@ Tensor lowerToNextLayer<batch_rule_337_t,Tensor,const Tensor &, c10::optional<In
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_338_t)(const Tensor &, c10::optional<int64_t>, c10::optional<IntArrayRef>, c10::optional<ArrayRef<double>>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_363_t)(const Tensor &, c10::optional<int64_t>, c10::optional<IntArrayRef>, c10::optional<ArrayRef<double>>);
 template <>
-Tensor lowerToNextLayer<batch_rule_338_t,Tensor,const Tensor &, c10::optional<IntArrayRef>, c10::optional<ArrayRef<double>>>(
-  batch_rule_338_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_363_t,Tensor,const Tensor &, c10::optional<IntArrayRef>, c10::optional<ArrayRef<double>>>(
+  batch_rule_363_t batch_rule,
   const Tensor & input, c10::optional<IntArrayRef> output_size, c10::optional<ArrayRef<double>> scale_factors
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -7114,10 +7744,10 @@ Tensor lowerToNextLayer<batch_rule_338_t,Tensor,const Tensor &, c10::optional<In
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_339_t)(const Tensor &, c10::optional<int64_t>, c10::optional<IntArrayRef>, IntArrayRef, c10::optional<ArrayRef<double>>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_364_t)(const Tensor &, c10::optional<int64_t>, c10::optional<IntArrayRef>, IntArrayRef, c10::optional<ArrayRef<double>>);
 template <>
-Tensor lowerToNextLayer<batch_rule_339_t,Tensor,const Tensor &, c10::optional<IntArrayRef>, IntArrayRef, c10::optional<ArrayRef<double>>>(
-  batch_rule_339_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_364_t,Tensor,const Tensor &, c10::optional<IntArrayRef>, IntArrayRef, c10::optional<ArrayRef<double>>>(
+  batch_rule_364_t batch_rule,
   const Tensor & grad_output, c10::optional<IntArrayRef> output_size, IntArrayRef input_size, c10::optional<ArrayRef<double>> scale_factors
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -7131,10 +7761,10 @@ Tensor lowerToNextLayer<batch_rule_339_t,Tensor,const Tensor &, c10::optional<In
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_340_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, bool, c10::optional<double>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_365_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, bool, c10::optional<double>);
 template <>
-Tensor lowerToNextLayer<batch_rule_340_t,Tensor,const Tensor &, IntArrayRef, bool, c10::optional<double>>(
-  batch_rule_340_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_365_t,Tensor,const Tensor &, IntArrayRef, bool, c10::optional<double>>(
+  batch_rule_365_t batch_rule,
   const Tensor & self, IntArrayRef output_size, bool align_corners, c10::optional<double> scales
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -7148,10 +7778,10 @@ Tensor lowerToNextLayer<batch_rule_340_t,Tensor,const Tensor &, IntArrayRef, boo
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_341_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, bool, c10::optional<double>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_366_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, bool, c10::optional<double>);
 template <>
-Tensor lowerToNextLayer<batch_rule_341_t,Tensor,const Tensor &, IntArrayRef, IntArrayRef, bool, c10::optional<double>>(
-  batch_rule_341_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_366_t,Tensor,const Tensor &, IntArrayRef, IntArrayRef, bool, c10::optional<double>>(
+  batch_rule_366_t batch_rule,
   const Tensor & grad_output, IntArrayRef output_size, IntArrayRef input_size, bool align_corners, c10::optional<double> scales
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -7165,10 +7795,10 @@ Tensor lowerToNextLayer<batch_rule_341_t,Tensor,const Tensor &, IntArrayRef, Int
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_342_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, bool, c10::optional<double>, c10::optional<double>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_367_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, bool, c10::optional<double>, c10::optional<double>);
 template <>
-Tensor lowerToNextLayer<batch_rule_342_t,Tensor,const Tensor &, IntArrayRef, bool, c10::optional<double>, c10::optional<double>>(
-  batch_rule_342_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_367_t,Tensor,const Tensor &, IntArrayRef, bool, c10::optional<double>, c10::optional<double>>(
+  batch_rule_367_t batch_rule,
   const Tensor & self, IntArrayRef output_size, bool align_corners, c10::optional<double> scales_h, c10::optional<double> scales_w
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -7182,10 +7812,10 @@ Tensor lowerToNextLayer<batch_rule_342_t,Tensor,const Tensor &, IntArrayRef, boo
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_343_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, bool, c10::optional<double>, c10::optional<double>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_368_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, bool, c10::optional<double>, c10::optional<double>);
 template <>
-Tensor lowerToNextLayer<batch_rule_343_t,Tensor,const Tensor &, IntArrayRef, IntArrayRef, bool, c10::optional<double>, c10::optional<double>>(
-  batch_rule_343_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_368_t,Tensor,const Tensor &, IntArrayRef, IntArrayRef, bool, c10::optional<double>, c10::optional<double>>(
+  batch_rule_368_t batch_rule,
   const Tensor & grad_output, IntArrayRef output_size, IntArrayRef input_size, bool align_corners, c10::optional<double> scales_h, c10::optional<double> scales_w
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -7199,10 +7829,10 @@ Tensor lowerToNextLayer<batch_rule_343_t,Tensor,const Tensor &, IntArrayRef, Int
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_344_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, bool, c10::optional<double>, c10::optional<double>, c10::optional<double>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_369_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, bool, c10::optional<double>, c10::optional<double>, c10::optional<double>);
 template <>
-Tensor lowerToNextLayer<batch_rule_344_t,Tensor,const Tensor &, IntArrayRef, bool, c10::optional<double>, c10::optional<double>, c10::optional<double>>(
-  batch_rule_344_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_369_t,Tensor,const Tensor &, IntArrayRef, bool, c10::optional<double>, c10::optional<double>, c10::optional<double>>(
+  batch_rule_369_t batch_rule,
   const Tensor & self, IntArrayRef output_size, bool align_corners, c10::optional<double> scales_d, c10::optional<double> scales_h, c10::optional<double> scales_w
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -7216,10 +7846,10 @@ Tensor lowerToNextLayer<batch_rule_344_t,Tensor,const Tensor &, IntArrayRef, boo
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_345_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, bool, c10::optional<double>, c10::optional<double>, c10::optional<double>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_370_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, bool, c10::optional<double>, c10::optional<double>, c10::optional<double>);
 template <>
-Tensor lowerToNextLayer<batch_rule_345_t,Tensor,const Tensor &, IntArrayRef, IntArrayRef, bool, c10::optional<double>, c10::optional<double>, c10::optional<double>>(
-  batch_rule_345_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_370_t,Tensor,const Tensor &, IntArrayRef, IntArrayRef, bool, c10::optional<double>, c10::optional<double>, c10::optional<double>>(
+  batch_rule_370_t batch_rule,
   const Tensor & grad_output, IntArrayRef output_size, IntArrayRef input_size, bool align_corners, c10::optional<double> scales_d, c10::optional<double> scales_h, c10::optional<double> scales_w
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -7233,10 +7863,10 @@ Tensor lowerToNextLayer<batch_rule_345_t,Tensor,const Tensor &, IntArrayRef, Int
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_346_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, c10::optional<double>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_371_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, c10::optional<double>);
 template <>
-Tensor lowerToNextLayer<batch_rule_346_t,Tensor,const Tensor &, IntArrayRef, c10::optional<double>>(
-  batch_rule_346_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_371_t,Tensor,const Tensor &, IntArrayRef, c10::optional<double>>(
+  batch_rule_371_t batch_rule,
   const Tensor & self, IntArrayRef output_size, c10::optional<double> scales
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -7250,10 +7880,10 @@ Tensor lowerToNextLayer<batch_rule_346_t,Tensor,const Tensor &, IntArrayRef, c10
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_347_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, c10::optional<double>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_372_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, c10::optional<double>);
 template <>
-Tensor lowerToNextLayer<batch_rule_347_t,Tensor,const Tensor &, IntArrayRef, IntArrayRef, c10::optional<double>>(
-  batch_rule_347_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_372_t,Tensor,const Tensor &, IntArrayRef, IntArrayRef, c10::optional<double>>(
+  batch_rule_372_t batch_rule,
   const Tensor & grad_output, IntArrayRef output_size, IntArrayRef input_size, c10::optional<double> scales
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -7267,10 +7897,10 @@ Tensor lowerToNextLayer<batch_rule_347_t,Tensor,const Tensor &, IntArrayRef, Int
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_348_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, c10::optional<double>, c10::optional<double>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_373_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, c10::optional<double>, c10::optional<double>);
 template <>
-Tensor lowerToNextLayer<batch_rule_348_t,Tensor,const Tensor &, IntArrayRef, c10::optional<double>, c10::optional<double>>(
-  batch_rule_348_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_373_t,Tensor,const Tensor &, IntArrayRef, c10::optional<double>, c10::optional<double>>(
+  batch_rule_373_t batch_rule,
   const Tensor & self, IntArrayRef output_size, c10::optional<double> scales_h, c10::optional<double> scales_w
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -7284,10 +7914,10 @@ Tensor lowerToNextLayer<batch_rule_348_t,Tensor,const Tensor &, IntArrayRef, c10
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_349_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, c10::optional<double>, c10::optional<double>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_374_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, c10::optional<double>, c10::optional<double>);
 template <>
-Tensor lowerToNextLayer<batch_rule_349_t,Tensor,const Tensor &, IntArrayRef, IntArrayRef, c10::optional<double>, c10::optional<double>>(
-  batch_rule_349_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_374_t,Tensor,const Tensor &, IntArrayRef, IntArrayRef, c10::optional<double>, c10::optional<double>>(
+  batch_rule_374_t batch_rule,
   const Tensor & grad_output, IntArrayRef output_size, IntArrayRef input_size, c10::optional<double> scales_h, c10::optional<double> scales_w
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -7301,10 +7931,10 @@ Tensor lowerToNextLayer<batch_rule_349_t,Tensor,const Tensor &, IntArrayRef, Int
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_350_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, c10::optional<double>, c10::optional<double>, c10::optional<double>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_375_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, c10::optional<double>, c10::optional<double>, c10::optional<double>);
 template <>
-Tensor lowerToNextLayer<batch_rule_350_t,Tensor,const Tensor &, IntArrayRef, c10::optional<double>, c10::optional<double>, c10::optional<double>>(
-  batch_rule_350_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_375_t,Tensor,const Tensor &, IntArrayRef, c10::optional<double>, c10::optional<double>, c10::optional<double>>(
+  batch_rule_375_t batch_rule,
   const Tensor & self, IntArrayRef output_size, c10::optional<double> scales_d, c10::optional<double> scales_h, c10::optional<double> scales_w
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -7318,10 +7948,10 @@ Tensor lowerToNextLayer<batch_rule_350_t,Tensor,const Tensor &, IntArrayRef, c10
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_351_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, c10::optional<double>, c10::optional<double>, c10::optional<double>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_376_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, c10::optional<double>, c10::optional<double>, c10::optional<double>);
 template <>
-Tensor lowerToNextLayer<batch_rule_351_t,Tensor,const Tensor &, IntArrayRef, IntArrayRef, c10::optional<double>, c10::optional<double>, c10::optional<double>>(
-  batch_rule_351_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_376_t,Tensor,const Tensor &, IntArrayRef, IntArrayRef, c10::optional<double>, c10::optional<double>, c10::optional<double>>(
+  batch_rule_376_t batch_rule,
   const Tensor & grad_output, IntArrayRef output_size, IntArrayRef input_size, c10::optional<double> scales_d, c10::optional<double> scales_h, c10::optional<double> scales_w
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -7335,10 +7965,10 @@ Tensor lowerToNextLayer<batch_rule_351_t,Tensor,const Tensor &, IntArrayRef, Int
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_352_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, c10::optional<double>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_377_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, c10::optional<double>);
 template <>
-Tensor lowerToNextLayer<batch_rule_352_t,Tensor,const Tensor &, const Tensor &, c10::optional<double>>(
-  batch_rule_352_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_377_t,Tensor,const Tensor &, const Tensor &, c10::optional<double>>(
+  batch_rule_377_t batch_rule,
   const Tensor & grad_output, const Tensor & self, c10::optional<double> eps
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -7355,10 +7985,10 @@ Tensor lowerToNextLayer<batch_rule_352_t,Tensor,const Tensor &, const Tensor &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_353_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, IntArrayRef, const c10::optional<Tensor> &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef, IntArrayRef);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_378_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, IntArrayRef, const c10::optional<Tensor> &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef, IntArrayRef);
 template <>
-Tensor lowerToNextLayer<batch_rule_353_t,Tensor,const Tensor &, const Tensor &, IntArrayRef, const c10::optional<Tensor> &, IntArrayRef, IntArrayRef, IntArrayRef, IntArrayRef>(
-  batch_rule_353_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_378_t,Tensor,const Tensor &, const Tensor &, IntArrayRef, const c10::optional<Tensor> &, IntArrayRef, IntArrayRef, IntArrayRef, IntArrayRef>(
+  batch_rule_378_t batch_rule,
   const Tensor & self, const Tensor & weight, IntArrayRef kernel_size, const c10::optional<Tensor> & bias, IntArrayRef stride, IntArrayRef padding, IntArrayRef output_padding, IntArrayRef dilation
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -7380,10 +8010,39 @@ Tensor lowerToNextLayer<batch_rule_353_t,Tensor,const Tensor &, const Tensor &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_354_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, IntArrayRef, const c10::optional<Tensor> &, c10::optional<int64_t>, IntArrayRef, IntArrayRef);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_379_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef, IntArrayRef, IntArrayRef, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, ::std::array<bool,3>);
 template <>
-Tensor lowerToNextLayer<batch_rule_354_t,Tensor,const Tensor &, const Tensor &, IntArrayRef, const c10::optional<Tensor> &, IntArrayRef, IntArrayRef>(
-  batch_rule_354_t batch_rule,
+std::tuple<Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_379_t,std::tuple<Tensor,Tensor,Tensor>,const Tensor &, const Tensor &, const Tensor &, IntArrayRef, IntArrayRef, IntArrayRef, IntArrayRef, IntArrayRef, const Tensor &, const Tensor &, ::std::array<bool,3>>(
+  batch_rule_379_t batch_rule,
+  const Tensor & grad_output, const Tensor & self, const Tensor & weight, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, IntArrayRef output_padding, IntArrayRef dilation, const Tensor & finput, const Tensor & fgrad_input, ::std::array<bool,3> output_mask
+) {
+  c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
+  auto maybe_layer = maybeCurrentDynamicLayer();
+  TORCH_INTERNAL_ASSERT(maybe_layer.has_value());
+  int64_t cur_level = maybe_layer->layerId();
+  Tensor grad_output_value;
+  optional<int64_t> grad_output_bdim;
+  std::tie(grad_output_value, grad_output_bdim) = unwrapTensorAtLevel(grad_output, cur_level);
+  Tensor self_value;
+  optional<int64_t> self_bdim;
+  std::tie(self_value, self_bdim) = unwrapTensorAtLevel(self, cur_level);
+  Tensor weight_value;
+  optional<int64_t> weight_bdim;
+  std::tie(weight_value, weight_bdim) = unwrapTensorAtLevel(weight, cur_level);
+  Tensor finput_value;
+  optional<int64_t> finput_bdim;
+  std::tie(finput_value, finput_bdim) = unwrapTensorAtLevel(finput, cur_level);
+  Tensor fgrad_input_value;
+  optional<int64_t> fgrad_input_bdim;
+  std::tie(fgrad_input_value, fgrad_input_bdim) = unwrapTensorAtLevel(fgrad_input, cur_level);
+  auto results = batch_rule(grad_output_value, grad_output_bdim, self_value, self_bdim, weight_value, weight_bdim, kernel_size, stride, padding, output_padding, dilation, finput_value, finput_bdim, fgrad_input_value, fgrad_input_bdim, output_mask);
+  return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level), makeBatched(std::get<4>(results), std::get<5>(results), cur_level));
+}
+
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_380_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, IntArrayRef, const c10::optional<Tensor> &, c10::optional<int64_t>, IntArrayRef, IntArrayRef);
+template <>
+Tensor lowerToNextLayer<batch_rule_380_t,Tensor,const Tensor &, const Tensor &, IntArrayRef, const c10::optional<Tensor> &, IntArrayRef, IntArrayRef>(
+  batch_rule_380_t batch_rule,
   const Tensor & self, const Tensor & weight, IntArrayRef kernel_size, const c10::optional<Tensor> & bias, IntArrayRef stride, IntArrayRef padding
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -7405,10 +8064,10 @@ Tensor lowerToNextLayer<batch_rule_354_t,Tensor,const Tensor &, const Tensor &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_355_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, IntArrayRef, const c10::optional<Tensor> &, c10::optional<int64_t>, IntArrayRef, IntArrayRef);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_381_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, IntArrayRef, const c10::optional<Tensor> &, c10::optional<int64_t>, IntArrayRef, IntArrayRef);
 template <>
-std::tuple<Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_355_t,std::tuple<Tensor,Tensor,Tensor>,const Tensor &, const Tensor &, IntArrayRef, const c10::optional<Tensor> &, IntArrayRef, IntArrayRef>(
-  batch_rule_355_t batch_rule,
+std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_381_t,std::tuple<Tensor,Tensor>,const Tensor &, const Tensor &, IntArrayRef, const c10::optional<Tensor> &, IntArrayRef, IntArrayRef>(
+  batch_rule_381_t batch_rule,
   const Tensor & self, const Tensor & weight, IntArrayRef kernel_size, const c10::optional<Tensor> & bias, IntArrayRef stride, IntArrayRef padding
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -7427,13 +8086,39 @@ std::tuple<Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_355_t,std::tuple<Te
       std::tie(bias_value, bias_bdim) = unwrapTensorAtLevel(bias.value(), cur_level);
   }
   auto results = batch_rule(self_value, self_bdim, weight_value, weight_bdim, kernel_size, bias_value, bias_bdim, stride, padding);
+  return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level));
+}
+
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_382_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef, const Tensor &, c10::optional<int64_t>, ::std::array<bool,3>);
+template <>
+std::tuple<Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_382_t,std::tuple<Tensor,Tensor,Tensor>,const Tensor &, const Tensor &, const Tensor &, IntArrayRef, IntArrayRef, IntArrayRef, const Tensor &, ::std::array<bool,3>>(
+  batch_rule_382_t batch_rule,
+  const Tensor & grad_output, const Tensor & self, const Tensor & weight, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, const Tensor & finput, ::std::array<bool,3> output_mask
+) {
+  c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
+  auto maybe_layer = maybeCurrentDynamicLayer();
+  TORCH_INTERNAL_ASSERT(maybe_layer.has_value());
+  int64_t cur_level = maybe_layer->layerId();
+  Tensor grad_output_value;
+  optional<int64_t> grad_output_bdim;
+  std::tie(grad_output_value, grad_output_bdim) = unwrapTensorAtLevel(grad_output, cur_level);
+  Tensor self_value;
+  optional<int64_t> self_bdim;
+  std::tie(self_value, self_bdim) = unwrapTensorAtLevel(self, cur_level);
+  Tensor weight_value;
+  optional<int64_t> weight_bdim;
+  std::tie(weight_value, weight_bdim) = unwrapTensorAtLevel(weight, cur_level);
+  Tensor finput_value;
+  optional<int64_t> finput_bdim;
+  std::tie(finput_value, finput_bdim) = unwrapTensorAtLevel(finput, cur_level);
+  auto results = batch_rule(grad_output_value, grad_output_bdim, self_value, self_bdim, weight_value, weight_bdim, kernel_size, stride, padding, finput_value, finput_bdim, output_mask);
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level), makeBatched(std::get<4>(results), std::get<5>(results), cur_level));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_356_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, IntArrayRef, const c10::optional<Tensor> &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_383_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, IntArrayRef, const c10::optional<Tensor> &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef);
 template <>
-Tensor lowerToNextLayer<batch_rule_356_t,Tensor,const Tensor &, const Tensor &, IntArrayRef, const c10::optional<Tensor> &, IntArrayRef, IntArrayRef, IntArrayRef>(
-  batch_rule_356_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_383_t,Tensor,const Tensor &, const Tensor &, IntArrayRef, const c10::optional<Tensor> &, IntArrayRef, IntArrayRef, IntArrayRef>(
+  batch_rule_383_t batch_rule,
   const Tensor & self, const Tensor & weight, IntArrayRef kernel_size, const c10::optional<Tensor> & bias, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -7455,10 +8140,110 @@ Tensor lowerToNextLayer<batch_rule_356_t,Tensor,const Tensor &, const Tensor &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_357_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef, IntArrayRef, IntArrayRef);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_384_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef, IntArrayRef, ::std::array<bool,2>);
 template <>
-Tensor lowerToNextLayer<batch_rule_357_t,Tensor,const Tensor &, IntArrayRef, IntArrayRef, IntArrayRef, IntArrayRef, IntArrayRef>(
-  batch_rule_357_t batch_rule,
+std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_384_t,std::tuple<Tensor,Tensor>,const Tensor &, const Tensor &, const Tensor &, IntArrayRef, IntArrayRef, IntArrayRef, IntArrayRef, ::std::array<bool,2>>(
+  batch_rule_384_t batch_rule,
+  const Tensor & grad_output, const Tensor & self, const Tensor & weight, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, ::std::array<bool,2> output_mask
+) {
+  c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
+  auto maybe_layer = maybeCurrentDynamicLayer();
+  TORCH_INTERNAL_ASSERT(maybe_layer.has_value());
+  int64_t cur_level = maybe_layer->layerId();
+  Tensor grad_output_value;
+  optional<int64_t> grad_output_bdim;
+  std::tie(grad_output_value, grad_output_bdim) = unwrapTensorAtLevel(grad_output, cur_level);
+  Tensor self_value;
+  optional<int64_t> self_bdim;
+  std::tie(self_value, self_bdim) = unwrapTensorAtLevel(self, cur_level);
+  Tensor weight_value;
+  optional<int64_t> weight_bdim;
+  std::tie(weight_value, weight_bdim) = unwrapTensorAtLevel(weight, cur_level);
+  auto results = batch_rule(grad_output_value, grad_output_bdim, self_value, self_bdim, weight_value, weight_bdim, kernel_size, stride, padding, dilation, output_mask);
+  return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level));
+}
+
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_385_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef, IntArrayRef, ::std::array<bool,3>);
+template <>
+std::tuple<Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_385_t,std::tuple<Tensor,Tensor,Tensor>,const Tensor &, const Tensor &, const Tensor &, IntArrayRef, IntArrayRef, IntArrayRef, IntArrayRef, ::std::array<bool,3>>(
+  batch_rule_385_t batch_rule,
+  const Tensor & grad_output, const Tensor & self, const Tensor & weight, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, IntArrayRef dilation, ::std::array<bool,3> output_mask
+) {
+  c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
+  auto maybe_layer = maybeCurrentDynamicLayer();
+  TORCH_INTERNAL_ASSERT(maybe_layer.has_value());
+  int64_t cur_level = maybe_layer->layerId();
+  Tensor grad_output_value;
+  optional<int64_t> grad_output_bdim;
+  std::tie(grad_output_value, grad_output_bdim) = unwrapTensorAtLevel(grad_output, cur_level);
+  Tensor self_value;
+  optional<int64_t> self_bdim;
+  std::tie(self_value, self_bdim) = unwrapTensorAtLevel(self, cur_level);
+  Tensor weight_value;
+  optional<int64_t> weight_bdim;
+  std::tie(weight_value, weight_bdim) = unwrapTensorAtLevel(weight, cur_level);
+  auto results = batch_rule(grad_output_value, grad_output_bdim, self_value, self_bdim, weight_value, weight_bdim, kernel_size, stride, padding, dilation, output_mask);
+  return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level), makeBatched(std::get<4>(results), std::get<5>(results), cur_level));
+}
+
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_386_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, IntArrayRef, const c10::optional<Tensor> &, c10::optional<int64_t>, IntArrayRef, IntArrayRef);
+template <>
+std::tuple<Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_386_t,std::tuple<Tensor,Tensor,Tensor>,const Tensor &, const Tensor &, IntArrayRef, const c10::optional<Tensor> &, IntArrayRef, IntArrayRef>(
+  batch_rule_386_t batch_rule,
+  const Tensor & self, const Tensor & weight, IntArrayRef kernel_size, const c10::optional<Tensor> & bias, IntArrayRef stride, IntArrayRef padding
+) {
+  c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
+  auto maybe_layer = maybeCurrentDynamicLayer();
+  TORCH_INTERNAL_ASSERT(maybe_layer.has_value());
+  int64_t cur_level = maybe_layer->layerId();
+  Tensor self_value;
+  optional<int64_t> self_bdim;
+  std::tie(self_value, self_bdim) = unwrapTensorAtLevel(self, cur_level);
+  Tensor weight_value;
+  optional<int64_t> weight_bdim;
+  std::tie(weight_value, weight_bdim) = unwrapTensorAtLevel(weight, cur_level);
+  optional<Tensor> bias_value;
+  optional<int64_t> bias_bdim;
+  if (bias) {
+      std::tie(bias_value, bias_bdim) = unwrapTensorAtLevel(bias.value(), cur_level);
+  }
+  auto results = batch_rule(self_value, self_bdim, weight_value, weight_bdim, kernel_size, bias_value, bias_bdim, stride, padding);
+  return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level), makeBatched(std::get<4>(results), std::get<5>(results), cur_level));
+}
+
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_387_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, ::std::array<bool,3>);
+template <>
+std::tuple<Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_387_t,std::tuple<Tensor,Tensor,Tensor>,const Tensor &, const Tensor &, const Tensor &, IntArrayRef, IntArrayRef, IntArrayRef, const Tensor &, const Tensor &, ::std::array<bool,3>>(
+  batch_rule_387_t batch_rule,
+  const Tensor & grad_output, const Tensor & self, const Tensor & weight, IntArrayRef kernel_size, IntArrayRef stride, IntArrayRef padding, const Tensor & finput, const Tensor & fgrad_input, ::std::array<bool,3> output_mask
+) {
+  c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
+  auto maybe_layer = maybeCurrentDynamicLayer();
+  TORCH_INTERNAL_ASSERT(maybe_layer.has_value());
+  int64_t cur_level = maybe_layer->layerId();
+  Tensor grad_output_value;
+  optional<int64_t> grad_output_bdim;
+  std::tie(grad_output_value, grad_output_bdim) = unwrapTensorAtLevel(grad_output, cur_level);
+  Tensor self_value;
+  optional<int64_t> self_bdim;
+  std::tie(self_value, self_bdim) = unwrapTensorAtLevel(self, cur_level);
+  Tensor weight_value;
+  optional<int64_t> weight_bdim;
+  std::tie(weight_value, weight_bdim) = unwrapTensorAtLevel(weight, cur_level);
+  Tensor finput_value;
+  optional<int64_t> finput_bdim;
+  std::tie(finput_value, finput_bdim) = unwrapTensorAtLevel(finput, cur_level);
+  Tensor fgrad_input_value;
+  optional<int64_t> fgrad_input_bdim;
+  std::tie(fgrad_input_value, fgrad_input_bdim) = unwrapTensorAtLevel(fgrad_input, cur_level);
+  auto results = batch_rule(grad_output_value, grad_output_bdim, self_value, self_bdim, weight_value, weight_bdim, kernel_size, stride, padding, finput_value, finput_bdim, fgrad_input_value, fgrad_input_bdim, output_mask);
+  return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level), makeBatched(std::get<4>(results), std::get<5>(results), cur_level));
+}
+
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_388_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef, IntArrayRef, IntArrayRef);
+template <>
+Tensor lowerToNextLayer<batch_rule_388_t,Tensor,const Tensor &, IntArrayRef, IntArrayRef, IntArrayRef, IntArrayRef, IntArrayRef>(
+  batch_rule_388_t batch_rule,
   const Tensor & grad_output, IntArrayRef input_size, IntArrayRef kernel_size, IntArrayRef dilation, IntArrayRef padding, IntArrayRef stride
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -7472,10 +8257,10 @@ Tensor lowerToNextLayer<batch_rule_357_t,Tensor,const Tensor &, IntArrayRef, Int
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_358_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef, IntArrayRef);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_389_t)(const Tensor &, c10::optional<int64_t>, IntArrayRef, IntArrayRef, IntArrayRef, IntArrayRef);
 template <>
-Tensor lowerToNextLayer<batch_rule_358_t,Tensor,const Tensor &, IntArrayRef, IntArrayRef, IntArrayRef, IntArrayRef>(
-  batch_rule_358_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_389_t,Tensor,const Tensor &, IntArrayRef, IntArrayRef, IntArrayRef, IntArrayRef>(
+  batch_rule_389_t batch_rule,
   const Tensor & self, IntArrayRef kernel_size, IntArrayRef dilation, IntArrayRef padding, IntArrayRef stride
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -7489,10 +8274,10 @@ Tensor lowerToNextLayer<batch_rule_358_t,Tensor,const Tensor &, IntArrayRef, Int
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_359_t)(const Tensor &, c10::optional<int64_t>, c10::optional<int64_t>, int64_t, c10::optional<c10::string_view>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_390_t)(const Tensor &, c10::optional<int64_t>, c10::optional<int64_t>, int64_t, c10::optional<c10::string_view>);
 template <>
-Tensor lowerToNextLayer<batch_rule_359_t,Tensor,const Tensor &, c10::optional<int64_t>, int64_t, c10::optional<c10::string_view>>(
-  batch_rule_359_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_390_t,Tensor,const Tensor &, c10::optional<int64_t>, int64_t, c10::optional<c10::string_view>>(
+  batch_rule_390_t batch_rule,
   const Tensor & self, c10::optional<int64_t> n, int64_t dim, c10::optional<c10::string_view> norm
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -7506,10 +8291,10 @@ Tensor lowerToNextLayer<batch_rule_359_t,Tensor,const Tensor &, c10::optional<in
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_360_t)(const Tensor &, c10::optional<int64_t>, c10::optional<IntArrayRef>, IntArrayRef, c10::optional<c10::string_view>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_391_t)(const Tensor &, c10::optional<int64_t>, c10::optional<IntArrayRef>, IntArrayRef, c10::optional<c10::string_view>);
 template <>
-Tensor lowerToNextLayer<batch_rule_360_t,Tensor,const Tensor &, c10::optional<IntArrayRef>, IntArrayRef, c10::optional<c10::string_view>>(
-  batch_rule_360_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_391_t,Tensor,const Tensor &, c10::optional<IntArrayRef>, IntArrayRef, c10::optional<c10::string_view>>(
+  batch_rule_391_t batch_rule,
   const Tensor & self, c10::optional<IntArrayRef> s, IntArrayRef dim, c10::optional<c10::string_view> norm
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -7523,10 +8308,10 @@ Tensor lowerToNextLayer<batch_rule_360_t,Tensor,const Tensor &, c10::optional<In
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_361_t)(const Tensor &, c10::optional<int64_t>, c10::optional<IntArrayRef>, c10::optional<IntArrayRef>, c10::optional<c10::string_view>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_392_t)(const Tensor &, c10::optional<int64_t>, c10::optional<IntArrayRef>, c10::optional<IntArrayRef>, c10::optional<c10::string_view>);
 template <>
-Tensor lowerToNextLayer<batch_rule_361_t,Tensor,const Tensor &, c10::optional<IntArrayRef>, c10::optional<IntArrayRef>, c10::optional<c10::string_view>>(
-  batch_rule_361_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_392_t,Tensor,const Tensor &, c10::optional<IntArrayRef>, c10::optional<IntArrayRef>, c10::optional<c10::string_view>>(
+  batch_rule_392_t batch_rule,
   const Tensor & self, c10::optional<IntArrayRef> s, c10::optional<IntArrayRef> dim, c10::optional<c10::string_view> norm
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -7540,10 +8325,10 @@ Tensor lowerToNextLayer<batch_rule_361_t,Tensor,const Tensor &, c10::optional<In
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_362_t)(const Tensor &, c10::optional<int64_t>, c10::optional<IntArrayRef>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_393_t)(const Tensor &, c10::optional<int64_t>, c10::optional<IntArrayRef>);
 template <>
-Tensor lowerToNextLayer<batch_rule_362_t,Tensor,const Tensor &, c10::optional<IntArrayRef>>(
-  batch_rule_362_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_393_t,Tensor,const Tensor &, c10::optional<IntArrayRef>>(
+  batch_rule_393_t batch_rule,
   const Tensor & values, c10::optional<IntArrayRef> addends
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -7557,10 +8342,10 @@ Tensor lowerToNextLayer<batch_rule_362_t,Tensor,const Tensor &, c10::optional<In
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_363_t)(const Tensor &, c10::optional<int64_t>);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_394_t)(const Tensor &, c10::optional<int64_t>);
 template <>
-std::tuple<Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_363_t,std::tuple<Tensor,Tensor,Tensor>,const Tensor &>(
-  batch_rule_363_t batch_rule,
+std::tuple<Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_394_t,std::tuple<Tensor,Tensor,Tensor>,const Tensor &>(
+  batch_rule_394_t batch_rule,
   const Tensor & self
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -7574,10 +8359,10 @@ std::tuple<Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_363_t,std::tuple<Te
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level), makeBatched(std::get<4>(results), std::get<5>(results), cur_level));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_364_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_395_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>);
 template <>
-Tensor lowerToNextLayer<batch_rule_364_t,Tensor,const Tensor &, const Tensor &, const Tensor &, const Tensor &, const Tensor &>(
-  batch_rule_364_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_395_t,Tensor,const Tensor &, const Tensor &, const Tensor &, const Tensor &, const Tensor &>(
+  batch_rule_395_t batch_rule,
   const Tensor & det_grad, const Tensor & det, const Tensor & self, const Tensor & lu, const Tensor & pivs
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -7603,10 +8388,10 @@ Tensor lowerToNextLayer<batch_rule_364_t,Tensor,const Tensor &, const Tensor &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_365_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, c10::optional<double>, c10::optional<c10::string_view>);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_396_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, c10::optional<double>, c10::optional<c10::string_view>);
 template <>
-std::tuple<Tensor,Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_365_t,std::tuple<Tensor,Tensor,Tensor,Tensor>,const Tensor &, const Tensor &, c10::optional<double>, c10::optional<c10::string_view>>(
-  batch_rule_365_t batch_rule,
+std::tuple<Tensor,Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_396_t,std::tuple<Tensor,Tensor,Tensor,Tensor>,const Tensor &, const Tensor &, c10::optional<double>, c10::optional<c10::string_view>>(
+  batch_rule_396_t batch_rule,
   const Tensor & self, const Tensor & b, c10::optional<double> rcond, c10::optional<c10::string_view> driver
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -7623,10 +8408,10 @@ std::tuple<Tensor,Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_365_t,std::t
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level), makeBatched(std::get<4>(results), std::get<5>(results), cur_level), makeBatched(std::get<6>(results), std::get<7>(results), cur_level));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_366_t)(const Tensor &, c10::optional<int64_t>, c10::string_view);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_397_t)(const Tensor &, c10::optional<int64_t>, c10::string_view);
 template <>
-std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_366_t,std::tuple<Tensor,Tensor>,const Tensor &, c10::string_view>(
-  batch_rule_366_t batch_rule,
+std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_397_t,std::tuple<Tensor,Tensor>,const Tensor &, c10::string_view>(
+  batch_rule_397_t batch_rule,
   const Tensor & self, c10::string_view mode
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -7640,10 +8425,10 @@ std::tuple<Tensor,Tensor> lowerToNextLayer<batch_rule_366_t,std::tuple<Tensor,Te
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_367_t)(const Tensor &, c10::optional<int64_t>, c10::string_view);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_398_t)(const Tensor &, c10::optional<int64_t>, c10::string_view);
 template <>
-Tensor lowerToNextLayer<batch_rule_367_t,Tensor,const Tensor &, c10::string_view>(
-  batch_rule_367_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_398_t,Tensor,const Tensor &, c10::string_view>(
+  batch_rule_398_t batch_rule,
   const Tensor & self, c10::string_view p
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -7657,10 +8442,10 @@ Tensor lowerToNextLayer<batch_rule_367_t,Tensor,const Tensor &, c10::string_view
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_368_t)(const Tensor &, c10::optional<int64_t>, const c10::optional<Scalar> &, c10::optional<IntArrayRef>, bool, c10::optional<ScalarType>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_399_t)(const Tensor &, c10::optional<int64_t>, const c10::optional<Scalar> &, c10::optional<IntArrayRef>, bool, c10::optional<ScalarType>);
 template <>
-Tensor lowerToNextLayer<batch_rule_368_t,Tensor,const Tensor &, const c10::optional<Scalar> &, c10::optional<IntArrayRef>, bool, c10::optional<ScalarType>>(
-  batch_rule_368_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_399_t,Tensor,const Tensor &, const c10::optional<Scalar> &, c10::optional<IntArrayRef>, bool, c10::optional<ScalarType>>(
+  batch_rule_399_t batch_rule,
   const Tensor & self, const c10::optional<Scalar> & ord, c10::optional<IntArrayRef> dim, bool keepdim, c10::optional<ScalarType> dtype
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -7674,10 +8459,10 @@ Tensor lowerToNextLayer<batch_rule_368_t,Tensor,const Tensor &, const c10::optio
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_369_t)(const Tensor &, c10::optional<int64_t>, c10::string_view, c10::optional<IntArrayRef>, bool, c10::optional<ScalarType>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_400_t)(const Tensor &, c10::optional<int64_t>, c10::string_view, c10::optional<IntArrayRef>, bool, c10::optional<ScalarType>);
 template <>
-Tensor lowerToNextLayer<batch_rule_369_t,Tensor,const Tensor &, c10::string_view, c10::optional<IntArrayRef>, bool, c10::optional<ScalarType>>(
-  batch_rule_369_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_400_t,Tensor,const Tensor &, c10::string_view, c10::optional<IntArrayRef>, bool, c10::optional<ScalarType>>(
+  batch_rule_400_t batch_rule,
   const Tensor & self, c10::string_view ord, c10::optional<IntArrayRef> dim, bool keepdim, c10::optional<ScalarType> dtype
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -7691,10 +8476,10 @@ Tensor lowerToNextLayer<batch_rule_369_t,Tensor,const Tensor &, c10::string_view
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_370_t)(const Tensor &, c10::optional<int64_t>, const Scalar &, c10::optional<IntArrayRef>, bool, c10::optional<ScalarType>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_401_t)(const Tensor &, c10::optional<int64_t>, const Scalar &, c10::optional<IntArrayRef>, bool, c10::optional<ScalarType>);
 template <>
-Tensor lowerToNextLayer<batch_rule_370_t,Tensor,const Tensor &, const Scalar &, c10::optional<IntArrayRef>, bool, c10::optional<ScalarType>>(
-  batch_rule_370_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_401_t,Tensor,const Tensor &, const Scalar &, c10::optional<IntArrayRef>, bool, c10::optional<ScalarType>>(
+  batch_rule_401_t batch_rule,
   const Tensor & self, const Scalar & ord, c10::optional<IntArrayRef> dim, bool keepdim, c10::optional<ScalarType> dtype
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -7708,10 +8493,10 @@ Tensor lowerToNextLayer<batch_rule_370_t,Tensor,const Tensor &, const Scalar &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_371_t)(const Tensor &, c10::optional<int64_t>, const Scalar &, IntArrayRef, bool, c10::optional<ScalarType>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_402_t)(const Tensor &, c10::optional<int64_t>, const Scalar &, IntArrayRef, bool, c10::optional<ScalarType>);
 template <>
-Tensor lowerToNextLayer<batch_rule_371_t,Tensor,const Tensor &, const Scalar &, IntArrayRef, bool, c10::optional<ScalarType>>(
-  batch_rule_371_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_402_t,Tensor,const Tensor &, const Scalar &, IntArrayRef, bool, c10::optional<ScalarType>>(
+  batch_rule_402_t batch_rule,
   const Tensor & self, const Scalar & ord, IntArrayRef dim, bool keepdim, c10::optional<ScalarType> dtype
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -7725,10 +8510,10 @@ Tensor lowerToNextLayer<batch_rule_371_t,Tensor,const Tensor &, const Scalar &,
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_372_t)(const Tensor &, c10::optional<int64_t>, c10::string_view, IntArrayRef, bool, c10::optional<ScalarType>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_403_t)(const Tensor &, c10::optional<int64_t>, c10::string_view, IntArrayRef, bool, c10::optional<ScalarType>);
 template <>
-Tensor lowerToNextLayer<batch_rule_372_t,Tensor,const Tensor &, c10::string_view, IntArrayRef, bool, c10::optional<ScalarType>>(
-  batch_rule_372_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_403_t,Tensor,const Tensor &, c10::string_view, IntArrayRef, bool, c10::optional<ScalarType>>(
+  batch_rule_403_t batch_rule,
   const Tensor & self, c10::string_view ord, IntArrayRef dim, bool keepdim, c10::optional<ScalarType> dtype
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -7742,10 +8527,10 @@ Tensor lowerToNextLayer<batch_rule_372_t,Tensor,const Tensor &, c10::string_view
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_373_t)(const Tensor &, c10::optional<int64_t>, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>,Tensor,c10::optional<int64_t>> (*batch_rule_404_t)(const Tensor &, c10::optional<int64_t>, bool);
 template <>
-std::tuple<Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_373_t,std::tuple<Tensor,Tensor,Tensor>,const Tensor &, bool>(
-  batch_rule_373_t batch_rule,
+std::tuple<Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_404_t,std::tuple<Tensor,Tensor,Tensor>,const Tensor &, bool>(
+  batch_rule_404_t batch_rule,
   const Tensor & self, bool full_matrices
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -7759,10 +8544,10 @@ std::tuple<Tensor,Tensor,Tensor> lowerToNextLayer<batch_rule_373_t,std::tuple<Te
   return std::make_tuple(makeBatched(std::get<0>(results), std::get<1>(results), cur_level), makeBatched(std::get<2>(results), std::get<3>(results), cur_level), makeBatched(std::get<4>(results), std::get<5>(results), cur_level));
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_374_t)(const Tensor &, c10::optional<int64_t>, const c10::optional<Scalar> &);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_405_t)(const Tensor &, c10::optional<int64_t>, const c10::optional<Scalar> &);
 template <>
-Tensor lowerToNextLayer<batch_rule_374_t,Tensor,const Tensor &, const c10::optional<Scalar> &>(
-  batch_rule_374_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_405_t,Tensor,const Tensor &, const c10::optional<Scalar> &>(
+  batch_rule_405_t batch_rule,
   const Tensor & self, const c10::optional<Scalar> & p
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -7776,11 +8561,38 @@ Tensor lowerToNextLayer<batch_rule_374_t,Tensor,const Tensor &, const c10::optio
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_375_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, c10::optional<IntArrayRef>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_406_t)(const Tensor &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, bool);
 template <>
-Tensor lowerToNextLayer<batch_rule_375_t,Tensor,const Tensor &, const Tensor &, c10::optional<IntArrayRef>>(
-  batch_rule_375_t batch_rule,
-  const Tensor & self, const Tensor & other, c10::optional<IntArrayRef> dims
+Tensor lowerToNextLayer<batch_rule_406_t,Tensor,const Tensor &, const c10::optional<Tensor> &, const c10::optional<Tensor> &, bool>(
+  batch_rule_406_t batch_rule,
+  const Tensor & input, const c10::optional<Tensor> & atol, const c10::optional<Tensor> & rtol, bool hermitian
+) {
+  c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
+  auto maybe_layer = maybeCurrentDynamicLayer();
+  TORCH_INTERNAL_ASSERT(maybe_layer.has_value());
+  int64_t cur_level = maybe_layer->layerId();
+  Tensor input_value;
+  optional<int64_t> input_bdim;
+  std::tie(input_value, input_bdim) = unwrapTensorAtLevel(input, cur_level);
+  optional<Tensor> atol_value;
+  optional<int64_t> atol_bdim;
+  if (atol) {
+      std::tie(atol_value, atol_bdim) = unwrapTensorAtLevel(atol.value(), cur_level);
+  }
+  optional<Tensor> rtol_value;
+  optional<int64_t> rtol_bdim;
+  if (rtol) {
+      std::tie(rtol_value, rtol_bdim) = unwrapTensorAtLevel(rtol.value(), cur_level);
+  }
+  auto results = batch_rule(input_value, input_bdim, atol_value, atol_bdim, rtol_value, rtol_bdim, hermitian);
+  return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
+}
+
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_407_t)(const Tensor &, c10::optional<int64_t>, c10::optional<double>, c10::optional<double>, bool);
+template <>
+Tensor lowerToNextLayer<batch_rule_407_t,Tensor,const Tensor &, c10::optional<double>, c10::optional<double>, bool>(
+  batch_rule_407_t batch_rule,
+  const Tensor & self, c10::optional<double> atol, c10::optional<double> rtol, bool hermitian
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
   auto maybe_layer = maybeCurrentDynamicLayer();
@@ -7789,18 +8601,15 @@ Tensor lowerToNextLayer<batch_rule_375_t,Tensor,const Tensor &, const Tensor &,
   Tensor self_value;
   optional<int64_t> self_bdim;
   std::tie(self_value, self_bdim) = unwrapTensorAtLevel(self, cur_level);
-  Tensor other_value;
-  optional<int64_t> other_bdim;
-  std::tie(other_value, other_bdim) = unwrapTensorAtLevel(other, cur_level);
-  auto results = batch_rule(self_value, self_bdim, other_value, other_bdim, dims);
+  auto results = batch_rule(self_value, self_bdim, atol, rtol, hermitian);
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_376_t)(const Tensor &, c10::optional<int64_t>, c10::optional<double>, bool);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_408_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, c10::optional<IntArrayRef>);
 template <>
-Tensor lowerToNextLayer<batch_rule_376_t,Tensor,const Tensor &, c10::optional<double>, bool>(
-  batch_rule_376_t batch_rule,
-  const Tensor & self, c10::optional<double> tol, bool hermitian
+Tensor lowerToNextLayer<batch_rule_408_t,Tensor,const Tensor &, const Tensor &, c10::optional<IntArrayRef>>(
+  batch_rule_408_t batch_rule,
+  const Tensor & self, const Tensor & other, c10::optional<IntArrayRef> dims
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
   auto maybe_layer = maybeCurrentDynamicLayer();
@@ -7809,14 +8618,17 @@ Tensor lowerToNextLayer<batch_rule_376_t,Tensor,const Tensor &, c10::optional<do
   Tensor self_value;
   optional<int64_t> self_bdim;
   std::tie(self_value, self_bdim) = unwrapTensorAtLevel(self, cur_level);
-  auto results = batch_rule(self_value, self_bdim, tol, hermitian);
+  Tensor other_value;
+  optional<int64_t> other_bdim;
+  std::tie(other_value, other_bdim) = unwrapTensorAtLevel(other, cur_level);
+  auto results = batch_rule(self_value, self_bdim, other_value, other_bdim, dims);
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_377_t)(const Tensor &, c10::optional<int64_t>, c10::optional<ArrayRef<double>>);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_409_t)(const Tensor &, c10::optional<int64_t>, c10::optional<ArrayRef<double>>);
 template <>
-Tensor lowerToNextLayer<batch_rule_377_t,Tensor,const Tensor &, c10::optional<ArrayRef<double>>>(
-  batch_rule_377_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_409_t,Tensor,const Tensor &, c10::optional<ArrayRef<double>>>(
+  batch_rule_409_t batch_rule,
   const Tensor & values, c10::optional<ArrayRef<double>> addends
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -7830,10 +8642,10 @@ Tensor lowerToNextLayer<batch_rule_377_t,Tensor,const Tensor &, c10::optional<Ar
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_378_t)(const Tensor &, c10::optional<int64_t>, c10::string_view, c10::string_view);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_410_t)(const Tensor &, c10::optional<int64_t>, c10::string_view, c10::string_view);
 template <>
-Tensor lowerToNextLayer<batch_rule_378_t,Tensor,const Tensor &, c10::string_view, c10::string_view>(
-  batch_rule_378_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_410_t,Tensor,const Tensor &, c10::string_view, c10::string_view>(
+  batch_rule_410_t batch_rule,
   const Tensor & dummy, c10::string_view a, c10::string_view b
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -7847,10 +8659,10 @@ Tensor lowerToNextLayer<batch_rule_378_t,Tensor,const Tensor &, c10::string_view
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_379_t)(const Tensor &, c10::optional<int64_t>, int64_t, c10::string_view);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_411_t)(const Tensor &, c10::optional<int64_t>, int64_t, c10::string_view);
 template <>
-Tensor lowerToNextLayer<batch_rule_379_t,Tensor,const Tensor &, int64_t, c10::string_view>(
-  batch_rule_379_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_411_t,Tensor,const Tensor &, int64_t, c10::string_view>(
+  batch_rule_411_t batch_rule,
   const Tensor & dummy, int64_t a, c10::string_view b
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -7864,10 +8676,10 @@ Tensor lowerToNextLayer<batch_rule_379_t,Tensor,const Tensor &, int64_t, c10::st
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_380_t)(const Tensor &, c10::optional<int64_t>, c10::string_view, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, int64_t, bool, const c10::optional<Scalar> &);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_412_t)(const Tensor &, c10::optional<int64_t>, c10::string_view, const c10::optional<Tensor> &, c10::optional<int64_t>, const c10::optional<Tensor> &, c10::optional<int64_t>, int64_t, bool, const c10::optional<Scalar> &);
 template <>
-Tensor lowerToNextLayer<batch_rule_380_t,Tensor,const Tensor &, c10::string_view, const c10::optional<Tensor> &, const c10::optional<Tensor> &, int64_t, bool, const c10::optional<Scalar> &>(
-  batch_rule_380_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_412_t,Tensor,const Tensor &, c10::string_view, const c10::optional<Tensor> &, const c10::optional<Tensor> &, int64_t, bool, const c10::optional<Scalar> &>(
+  batch_rule_412_t batch_rule,
   const Tensor & data, c10::string_view reduce, const c10::optional<Tensor> & lengths, const c10::optional<Tensor> & indices, int64_t axis, bool unsafe, const c10::optional<Scalar> & initial
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
@@ -7891,10 +8703,10 @@ Tensor lowerToNextLayer<batch_rule_380_t,Tensor,const Tensor &, c10::string_view
   return makeBatched(std::get<0>(results), std::get<1>(results), cur_level);
 }
 
-typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_381_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, c10::string_view, const c10::optional<Tensor> &, c10::optional<int64_t>, int64_t);
+typedef std::tuple<Tensor,c10::optional<int64_t>> (*batch_rule_413_t)(const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, const Tensor &, c10::optional<int64_t>, c10::string_view, const c10::optional<Tensor> &, c10::optional<int64_t>, int64_t);
 template <>
-Tensor lowerToNextLayer<batch_rule_381_t,Tensor,const Tensor &, const Tensor &, const Tensor &, c10::string_view, const c10::optional<Tensor> &, int64_t>(
-  batch_rule_381_t batch_rule,
+Tensor lowerToNextLayer<batch_rule_413_t,Tensor,const Tensor &, const Tensor &, const Tensor &, c10::string_view, const c10::optional<Tensor> &, int64_t>(
+  batch_rule_413_t batch_rule,
   const Tensor & grad, const Tensor & output, const Tensor & data, c10::string_view reduce, const c10::optional<Tensor> & lengths, int64_t axis
 ) {
   c10::impl::ExcludeDispatchKeyGuard guard(kBatchedKey);
diff --git a/test/test_ops.py b/test/test_ops.py
index 9008c031e..78f88be3f 100644
--- a/test/test_ops.py
+++ b/test/test_ops.py
@@ -455,7 +455,6 @@ def test_vmapvjp(self, device, dtype, op):
         xfail('nanquantile'),
         xfail('nn.functional.conv_transpose2d'),
         xfail('nn.functional.gelu'),
-        xfail('nn.functional.grid_sample'),
         xfail('nn.functional.pad', 'circular'),
         xfail('norm', 'fro'),
         xfail('norm', 'inf'),